This commit is contained in:
Bharadwaj Yadavalli 2016-04-19 15:26:59 -04:00
commit d61df5babe
205 changed files with 13672 additions and 10504 deletions

View file

@ -83,6 +83,21 @@ suite = {
"workingSets" : "API,JVMCI",
},
"jdk.vm.ci.code.test" : {
"subDir" : "test/compiler/jvmci",
"sourceDirs" : ["src"],
"dependencies" : [
"mx:JUNIT",
"jdk.vm.ci.amd64",
"jdk.vm.ci.sparc",
"jdk.vm.ci.code",
"jdk.vm.ci.hotspot",
],
"checkstyle" : "jdk.vm.ci.services",
"javaCompliance" : "1.8",
"workingSets" : "API,JVMCI",
},
"jdk.vm.ci.runtime" : {
"subDir" : "src/jdk.vm.ci/share/classes",
"sourceDirs" : ["src"],
@ -164,7 +179,7 @@ suite = {
"subDir" : "test/compiler/jvmci",
"sourceDirs" : ["src"],
"dependencies" : [
"mx:TESTNG",
"TESTNG",
"jdk.vm.ci.hotspot",
],
"checkstyle" : "jdk.vm.ci.services",

View file

@ -3346,6 +3346,10 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
return ret_value; // Per default match rules are supported.
}
const bool Matcher::has_predicated_vectors(void) {
return false;
}
const int Matcher::float_pressure(int default_pressure_threshold) {
return default_pressure_threshold;
}
@ -4190,55 +4194,6 @@ encode %{
}
%}
enc_class aarch64_enc_clear_array_reg_reg(iRegL_R11 cnt, iRegP_R10 base) %{
MacroAssembler _masm(&cbuf);
Register cnt_reg = as_Register($cnt$$reg);
Register base_reg = as_Register($base$$reg);
// base is word aligned
// cnt is count of words
Label loop;
Label entry;
// Algorithm:
//
// scratch1 = cnt & 7;
// cnt -= scratch1;
// p += scratch1;
// switch (scratch1) {
// do {
// cnt -= 8;
// p[-8] = 0;
// case 7:
// p[-7] = 0;
// case 6:
// p[-6] = 0;
// // ...
// case 1:
// p[-1] = 0;
// case 0:
// p += 8;
// } while (cnt);
// }
const int unroll = 8; // Number of str(zr) instructions we'll unroll
__ andr(rscratch1, cnt_reg, unroll - 1); // tmp1 = cnt % unroll
__ sub(cnt_reg, cnt_reg, rscratch1); // cnt -= unroll
// base_reg always points to the end of the region we're about to zero
__ add(base_reg, base_reg, rscratch1, Assembler::LSL, exact_log2(wordSize));
__ adr(rscratch2, entry);
__ sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
__ br(rscratch2);
__ bind(loop);
__ sub(cnt_reg, cnt_reg, unroll);
for (int i = -unroll; i < 0; i++)
__ str(zr, Address(base_reg, i * wordSize));
__ bind(entry);
__ add(base_reg, base_reg, unroll * wordSize);
__ cbnz(cnt_reg, loop);
%}
/// mov envcodings
enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
@ -13363,7 +13318,23 @@ instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlag
ins_cost(4 * INSN_COST);
format %{ "ClearArray $cnt, $base" %}
ins_encode(aarch64_enc_clear_array_reg_reg(cnt, base));
ins_encode %{
__ zero_words($base$$Register, $cnt$$Register);
%}
ins_pipe(pipe_class_memory);
%}
instruct clearArray_imm_reg(immL cnt, iRegP base, Universe dummy, rFlagsReg cr)
%{
match(Set dummy (ClearArray cnt base));
ins_cost(4 * INSN_COST);
format %{ "ClearArray $cnt, $base" %}
ins_encode %{
__ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
%}
ins_pipe(pipe_class_memory);
%}

View file

@ -2942,6 +2942,10 @@ void LIR_Assembler::membar_loadstore() { __ membar(MacroAssembler::LoadStore); }
void LIR_Assembler::membar_storeload() { __ membar(MacroAssembler::StoreLoad); }
void LIR_Assembler::on_spin_wait() {
Unimplemented();
}
void LIR_Assembler::get_thread(LIR_Opr result_reg) {
__ mov(result_reg->as_register(), rthread);
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -1127,7 +1127,7 @@ void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
Values* dims = x->dims();
int i = dims->length();
LIRItemList* items = new LIRItemList(dims->length(), NULL);
LIRItemList* items = new LIRItemList(i, i, NULL);
while (i-- > 0) {
LIRItem* size = new LIRItem(dims->at(i), this);
items->at_put(i, size);

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -32,22 +32,6 @@
#include "runtime/mutexLocker.hpp"
#include "runtime/safepoint.hpp"
// Release the CompiledICHolder* associated with this call site is there is one.
void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) {
// This call site might have become stale so inspect it carefully.
NativeCall* call = nativeCall_at(call_site->addr());
if (is_icholder_entry(call->destination())) {
NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value());
InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data());
}
}
bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) {
// This call site might have become stale so inspect it carefully.
NativeCall* call = nativeCall_at(call_site->addr());
return is_icholder_entry(call->destination());
}
// ----------------------------------------------------------------------------
#define __ _masm.

View file

@ -76,7 +76,8 @@ define_pd_global(bool, CompactStrings, false);
// avoid biased locking while we are bootstrapping the aarch64 build
define_pd_global(bool, UseBiasedLocking, false);
define_pd_global(intx, InitArrayShortSize, 18*BytesPerLong);
// Clear short arrays bigger than one word in an arch-specific way
define_pd_global(intx, InitArrayShortSize, BytesPerLong);
#if defined(COMPILER1) || defined(COMPILER2)
define_pd_global(intx, InlineSmallCode, 1000);

View file

@ -4670,6 +4670,94 @@ void MacroAssembler::arrays_equals(Register a1, Register a2,
BLOCK_COMMENT(is_string ? "} string_equals" : "} array_equals");
}
// base: Address of a buffer to be zeroed, 8 bytes aligned.
// cnt: Count in 8-byte unit.
void MacroAssembler::zero_words(Register base, Register cnt)
{
fill_words(base, cnt, zr);
}
// base: Address of a buffer to be zeroed, 8 bytes aligned.
// cnt: Immediate count in 8-byte unit.
#define ShortArraySize (18 * BytesPerLong)
void MacroAssembler::zero_words(Register base, u_int64_t cnt)
{
int i = cnt & 1; // store any odd word to start
if (i) str(zr, Address(base));
if (cnt <= ShortArraySize / BytesPerLong) {
for (; i < (int)cnt; i += 2)
stp(zr, zr, Address(base, i * wordSize));
} else {
const int unroll = 4; // Number of stp(zr, zr) instructions we'll unroll
int remainder = cnt % (2 * unroll);
for (; i < remainder; i += 2)
stp(zr, zr, Address(base, i * wordSize));
Label loop;
Register cnt_reg = rscratch1;
Register loop_base = rscratch2;
cnt = cnt - remainder;
mov(cnt_reg, cnt);
// adjust base and prebias by -2 * wordSize so we can pre-increment
add(loop_base, base, (remainder - 2) * wordSize);
bind(loop);
sub(cnt_reg, cnt_reg, 2 * unroll);
for (i = 1; i < unroll; i++)
stp(zr, zr, Address(loop_base, 2 * i * wordSize));
stp(zr, zr, Address(pre(loop_base, 2 * unroll * wordSize)));
cbnz(cnt_reg, loop);
}
}
// base: Address of a buffer to be filled, 8 bytes aligned.
// cnt: Count in 8-byte unit.
// value: Value to be filled with.
// base will point to the end of the buffer after filling.
void MacroAssembler::fill_words(Register base, Register cnt, Register value)
{
// Algorithm:
//
// scratch1 = cnt & 7;
// cnt -= scratch1;
// p += scratch1;
// switch (scratch1) {
// do {
// cnt -= 8;
// p[-8] = v;
// case 7:
// p[-7] = v;
// case 6:
// p[-6] = v;
// // ...
// case 1:
// p[-1] = v;
// case 0:
// p += 8;
// } while (cnt);
// }
assert_different_registers(base, cnt, value, rscratch1, rscratch2);
Label entry, loop;
const int unroll = 8; // Number of str instructions we'll unroll
andr(rscratch1, cnt, unroll - 1); // tmp1 = cnt % unroll
cbz(rscratch1, entry);
sub(cnt, cnt, rscratch1); // cnt -= tmp1
// base always points to the end of the region we're about to fill
add(base, base, rscratch1, Assembler::LSL, 3);
adr(rscratch2, entry);
sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
br(rscratch2);
bind(loop);
add(base, base, unroll * 8);
sub(cnt, cnt, unroll);
for (int i = -unroll; i < 0; i++)
str(value, Address(base, i * 8));
bind(entry);
cbnz(cnt, loop);
}
// encode char[] to byte[] in ISO_8859_1
void MacroAssembler::encode_iso_array(Register src, Register dst,

View file

@ -1184,6 +1184,10 @@ public:
Register result, Register cnt1,
int elem_size, bool is_string);
void fill_words(Register base, Register cnt, Register value);
void zero_words(Register base, Register cnt);
void zero_words(Register base, u_int64_t cnt);
void encode_iso_array(Register src, Register dst,
Register len, Register result,
FloatRegister Vtmp1, FloatRegister Vtmp2,

View file

@ -2021,6 +2021,136 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
//
// Generate stub for array fill. If "aligned" is true, the
// "to" address is assumed to be heapword aligned.
//
// Arguments for generated stub:
// to: c_rarg0
// value: c_rarg1
// count: c_rarg2 treated as signed
//
address generate_fill(BasicType t, bool aligned, const char *name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
BLOCK_COMMENT("Entry:");
const Register to = c_rarg0; // source array address
const Register value = c_rarg1; // value
const Register count = c_rarg2; // elements count
const Register cnt_words = c_rarg3; // temp register
__ enter();
Label L_fill_elements, L_exit1;
int shift = -1;
switch (t) {
case T_BYTE:
shift = 0;
__ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
__ bfi(value, value, 8, 8); // 8 bit -> 16 bit
__ bfi(value, value, 16, 16); // 16 bit -> 32 bit
__ br(Assembler::LO, L_fill_elements);
break;
case T_SHORT:
shift = 1;
__ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
__ bfi(value, value, 16, 16); // 16 bit -> 32 bit
__ br(Assembler::LO, L_fill_elements);
break;
case T_INT:
shift = 2;
__ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
__ br(Assembler::LO, L_fill_elements);
break;
default: ShouldNotReachHere();
}
// Align source address at 8 bytes address boundary.
Label L_skip_align1, L_skip_align2, L_skip_align4;
if (!aligned) {
switch (t) {
case T_BYTE:
// One byte misalignment happens only for byte arrays.
__ tbz(to, 0, L_skip_align1);
__ strb(value, Address(__ post(to, 1)));
__ subw(count, count, 1);
__ bind(L_skip_align1);
// Fallthrough
case T_SHORT:
// Two bytes misalignment happens only for byte and short (char) arrays.
__ tbz(to, 1, L_skip_align2);
__ strh(value, Address(__ post(to, 2)));
__ subw(count, count, 2 >> shift);
__ bind(L_skip_align2);
// Fallthrough
case T_INT:
// Align to 8 bytes, we know we are 4 byte aligned to start.
__ tbz(to, 2, L_skip_align4);
__ strw(value, Address(__ post(to, 4)));
__ subw(count, count, 4 >> shift);
__ bind(L_skip_align4);
break;
default: ShouldNotReachHere();
}
}
//
// Fill large chunks
//
__ lsrw(cnt_words, count, 3 - shift); // number of words
__ bfi(value, value, 32, 32); // 32 bit -> 64 bit
__ subw(count, count, cnt_words, Assembler::LSL, 3 - shift);
__ fill_words(to, cnt_words, value);
// Remaining count is less than 8 bytes. Fill it by a single store.
// Note that the total length is no less than 8 bytes.
if (t == T_BYTE || t == T_SHORT) {
Label L_exit1;
__ cbzw(count, L_exit1);
__ add(to, to, count, Assembler::LSL, shift); // points to the end
__ str(value, Address(to, -8)); // overwrite some elements
__ bind(L_exit1);
__ leave();
__ ret(lr);
}
// Handle copies less than 8 bytes.
Label L_fill_2, L_fill_4, L_exit2;
__ bind(L_fill_elements);
switch (t) {
case T_BYTE:
__ tbz(count, 0, L_fill_2);
__ strb(value, Address(__ post(to, 1)));
__ bind(L_fill_2);
__ tbz(count, 1, L_fill_4);
__ strh(value, Address(__ post(to, 2)));
__ bind(L_fill_4);
__ tbz(count, 2, L_exit2);
__ strw(value, Address(to));
break;
case T_SHORT:
__ tbz(count, 0, L_fill_4);
__ strh(value, Address(__ post(to, 2)));
__ bind(L_fill_4);
__ tbz(count, 1, L_exit2);
__ strw(value, Address(to));
break;
case T_INT:
__ cbzw(count, L_exit2);
__ strw(value, Address(to));
break;
default: ShouldNotReachHere();
}
__ bind(L_exit2);
__ leave();
__ ret(lr);
return start;
}
void generate_arraycopy_stubs() {
address entry;
address entry_jbyte_arraycopy;
@ -2124,6 +2254,12 @@ class StubGenerator: public StubCodeGenerator {
entry_jlong_arraycopy,
entry_checkcast_arraycopy);
StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
}
void generate_math_stubs() { Unimplemented(); }

View file

@ -2845,6 +2845,9 @@ void LIR_Assembler::membar_storeload() {
__ membar(Assembler::StoreLoad);
}
void LIR_Assembler::on_spin_wait() {
Unimplemented();
}
void LIR_Assembler::leal(LIR_Opr addr_opr, LIR_Opr dest) {
LIR_Address* addr = addr_opr->as_address_ptr();

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -1055,7 +1055,7 @@ void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
Values* dims = x->dims();
int i = dims->length();
LIRItemList* items = new LIRItemList(dims->length(), NULL);
LIRItemList* items = new LIRItemList(i, i, NULL);
while (i-- > 0) {
LIRItem* size = new LIRItem(dims->at(i), this);
items->at_put(i, size);

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -35,22 +35,6 @@
#include "opto/matcher.hpp"
#endif
// Release the CompiledICHolder* associated with this call site is there is one.
void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) {
// This call site might have become stale so inspect it carefully.
NativeCall* call = nativeCall_at(call_site->addr());
if (is_icholder_entry(call->destination())) {
NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value());
InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data());
}
}
bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) {
// This call site might have become stale so inspect it carefully.
NativeCall* call = nativeCall_at(call_site->addr());
return is_icholder_entry(call->destination());
}
// ----------------------------------------------------------------------------
// A PPC CompiledStaticCall looks like this:

View file

@ -2047,6 +2047,10 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
return ret_value; // Per default match rules are supported.
}
const bool Matcher::has_predicated_vectors(void) {
return false;
}
const int Matcher::float_pressure(int default_pressure_threshold) {
return default_pressure_threshold;
}

View file

@ -3313,6 +3313,9 @@ void LIR_Assembler::membar_storeload() {
__ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
}
void LIR_Assembler::on_spin_wait() {
Unimplemented();
}
// Pack two sequential registers containing 32 bit values
// into a single 64 bit register.

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -1033,7 +1033,7 @@ void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
Values* dims = x->dims();
int i = dims->length();
LIRItemList* items = new LIRItemList(dims->length(), NULL);
LIRItemList* items = new LIRItemList(i, i, NULL);
while (i-- > 0) {
LIRItem* size = new LIRItem(dims->at(i), this);
items->at_put(i, size);

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -34,22 +34,6 @@
#include "opto/matcher.hpp"
#endif
// Release the CompiledICHolder* associated with this call site is there is one.
void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) {
// This call site might have become stale so inspect it carefully.
NativeCall* call = nativeCall_at(call_site->addr());
if (is_icholder_entry(call->destination())) {
NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value());
InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data());
}
}
bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) {
// This call site might have become stale so inspect it carefully.
NativeCall* call = nativeCall_at(call_site->addr());
return is_icholder_entry(call->destination());
}
// ----------------------------------------------------------------------------
#define __ _masm.

View file

@ -3257,12 +3257,12 @@ void MacroAssembler::eden_allocate(
if (var_size_in_bytes->is_valid()) {
// size is unknown at compile time
cmp(free, var_size_in_bytes);
br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
brx(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
delayed()->add(obj, var_size_in_bytes, end);
} else {
// size is known at compile time
cmp(free, con_size_in_bytes);
br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
brx(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
delayed()->add(obj, con_size_in_bytes, end);
}
// Compare obj with the value at top_addr; if still equal, swap the value of

View file

@ -1904,6 +1904,10 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
return ret_value; // Per default match rules are supported.
}
const bool Matcher::has_predicated_vectors(void) {
return false;
}
const int Matcher::float_pressure(int default_pressure_threshold) {
return default_pressure_threshold;
}

View file

@ -1240,6 +1240,7 @@ void Assembler::addr_nop_8() {
void Assembler::addsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x58);
emit_int8((unsigned char)(0xC0 | encode));
@ -1250,6 +1251,7 @@ void Assembler::addsd(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x58);
emit_operand(dst, src);
@ -1599,6 +1601,7 @@ void Assembler::comisd(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);;
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x2F);
emit_operand(dst, src);
@ -1607,6 +1610,7 @@ void Assembler::comisd(XMMRegister dst, Address src) {
void Assembler::comisd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x2F);
emit_int8((unsigned char)(0xC0 | encode));
@ -1733,6 +1737,7 @@ void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x5A);
emit_int8((unsigned char)(0xC0 | encode));
@ -1743,6 +1748,7 @@ void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x5A);
emit_operand(dst, src);
@ -1827,6 +1833,15 @@ void Assembler::cvttss2sil(Register dst, XMMRegister src) {
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::cvttpd2dq(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xE6);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::decl(Address dst) {
// Don't use it directly. Use MacroAssembler::decrement() instead.
InstructionMark im(this);
@ -1840,6 +1855,7 @@ void Assembler::divsd(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x5E);
emit_operand(dst, src);
@ -1848,6 +1864,7 @@ void Assembler::divsd(XMMRegister dst, Address src) {
void Assembler::divsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x5E);
emit_int8((unsigned char)(0xC0 | encode));
@ -2122,6 +2139,7 @@ void Assembler::movapd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x28);
emit_int8((unsigned char)(0xC0 | encode));
@ -2156,6 +2174,7 @@ void Assembler::movddup(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse3(), ""));
int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x12);
emit_int8(0xC0 | encode);
@ -2193,6 +2212,15 @@ void Assembler::kmovwl(Register dst, KRegister src) {
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::kmovwl(KRegister dst, Address src) {
assert(VM_Version::supports_evex(), "");
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x90);
emit_operand((Register)dst, src);
}
void Assembler::kmovdl(KRegister dst, Register src) {
assert(VM_Version::supports_avx512bw(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
@ -2251,6 +2279,14 @@ void Assembler::kmovql(Register dst, KRegister src) {
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::knotwl(KRegister dst, KRegister src) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x44);
emit_int8((unsigned char)(0xC0 | encode));
}
// This instruction produces ZF or CF flags
void Assembler::kortestbl(KRegister src1, KRegister src2) {
assert(VM_Version::supports_avx512dq(), "");
@ -2423,6 +2459,7 @@ void Assembler::vmovdqu(Address dst, XMMRegister src) {
void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
@ -2435,6 +2472,7 @@ void Assembler::evmovdqub(XMMRegister dst, Address src, int vector_len) {
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
emit_operand(dst, src);
@ -2447,6 +2485,7 @@ void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) {
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
emit_int8(0x7F);
emit_operand(src, dst);
@ -2455,6 +2494,7 @@ void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) {
void Assembler::evmovdquw(XMMRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
@ -2466,6 +2506,7 @@ void Assembler::evmovdquw(XMMRegister dst, Address src, int vector_len) {
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
@ -2478,6 +2519,7 @@ void Assembler::evmovdquw(Address dst, XMMRegister src, int vector_len) {
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
emit_int8(0x7F);
@ -2509,8 +2551,8 @@ void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) {
assert(src != xnoreg, "sanity");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x7F);
emit_operand(src, dst);
@ -2529,8 +2571,8 @@ void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
emit_operand(dst, src);
@ -2541,8 +2583,8 @@ void Assembler::evmovdquq(Address dst, XMMRegister src, int vector_len) {
assert(src != xnoreg, "sanity");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x7F);
emit_operand(src, dst);
@ -2592,6 +2634,7 @@ void Assembler::movlpd(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x12);
emit_operand(dst, src);
@ -2622,6 +2665,7 @@ void Assembler::movq(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x7E);
emit_operand(dst, src);
@ -2632,6 +2676,7 @@ void Assembler::movq(Address dst, XMMRegister src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xD6);
emit_operand(src, dst);
@ -2656,6 +2701,7 @@ void Assembler::movsbl(Register dst, Register src) { // movsxb
void Assembler::movsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x10);
emit_int8((unsigned char)(0xC0 | encode));
@ -2666,6 +2712,7 @@ void Assembler::movsd(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x10);
emit_operand(dst, src);
@ -2676,6 +2723,7 @@ void Assembler::movsd(Address dst, XMMRegister src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x11);
emit_operand(src, dst);
@ -2799,6 +2847,7 @@ void Assembler::mulsd(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x59);
emit_operand(dst, src);
@ -2807,6 +2856,7 @@ void Assembler::mulsd(XMMRegister dst, Address src) {
void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x59);
emit_int8((unsigned char)(0xC0 | encode));
@ -3786,6 +3836,7 @@ void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x6C);
emit_int8((unsigned char)(0xC0 | encode));
@ -4081,6 +4132,7 @@ void Assembler::smovl() {
void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x51);
emit_int8((unsigned char)(0xC0 | encode));
@ -4091,6 +4143,7 @@ void Assembler::sqrtsd(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x51);
emit_operand(dst, src);
@ -4166,6 +4219,7 @@ void Assembler::subl(Register dst, Register src) {
void Assembler::subsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x5C);
emit_int8((unsigned char)(0xC0 | encode));
@ -4176,6 +4230,7 @@ void Assembler::subsd(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x5C);
emit_operand(dst, src);
@ -4263,6 +4318,7 @@ void Assembler::ucomisd(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x2E);
emit_operand(dst, src);
@ -4271,6 +4327,7 @@ void Assembler::ucomisd(XMMRegister dst, Address src) {
void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x2E);
emit_int8((unsigned char)(0xC0 | encode));
@ -4382,6 +4439,7 @@ void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x58);
emit_operand(dst, src);
@ -4390,6 +4448,7 @@ void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x58);
emit_int8((unsigned char)(0xC0 | encode));
@ -4418,6 +4477,7 @@ void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x5E);
emit_operand(dst, src);
@ -4426,6 +4486,7 @@ void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x5E);
emit_int8((unsigned char)(0xC0 | encode));
@ -4454,6 +4515,7 @@ void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x59);
emit_operand(dst, src);
@ -4462,6 +4524,7 @@ void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x59);
emit_int8((unsigned char)(0xC0 | encode));
@ -4490,6 +4553,7 @@ void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x5C);
emit_operand(dst, src);
@ -4498,6 +4562,7 @@ void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x5C);
emit_int8((unsigned char)(0xC0 | encode));
@ -4528,6 +4593,7 @@ void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
void Assembler::addpd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x58);
emit_int8((unsigned char)(0xC0 | encode));
@ -4537,6 +4603,7 @@ void Assembler::addpd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x58);
@ -4555,6 +4622,7 @@ void Assembler::addps(XMMRegister dst, XMMRegister src) {
void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x58);
emit_int8((unsigned char)(0xC0 | encode));
@ -4573,6 +4641,7 @@ void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x58);
emit_operand(dst, src);
@ -4591,6 +4660,7 @@ void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector
void Assembler::subpd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x5C);
emit_int8((unsigned char)(0xC0 | encode));
@ -4607,6 +4677,7 @@ void Assembler::subps(XMMRegister dst, XMMRegister src) {
void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x5C);
emit_int8((unsigned char)(0xC0 | encode));
@ -4625,6 +4696,7 @@ void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x5C);
emit_operand(dst, src);
@ -4643,6 +4715,7 @@ void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector
void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x59);
emit_int8((unsigned char)(0xC0 | encode));
@ -4653,6 +4726,7 @@ void Assembler::mulpd(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x59);
emit_operand(dst, src);
@ -4669,6 +4743,7 @@ void Assembler::mulps(XMMRegister dst, XMMRegister src) {
void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x59);
emit_int8((unsigned char)(0xC0 | encode));
@ -4687,6 +4762,7 @@ void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x59);
emit_operand(dst, src);
@ -4705,6 +4781,7 @@ void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector
void Assembler::divpd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x5E);
emit_int8((unsigned char)(0xC0 | encode));
@ -4721,6 +4798,7 @@ void Assembler::divps(XMMRegister dst, XMMRegister src) {
void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x5E);
emit_int8((unsigned char)(0xC0 | encode));
@ -4739,6 +4817,7 @@ void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x5E);
emit_operand(dst, src);
@ -4757,6 +4836,7 @@ void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector
void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x51);
emit_int8((unsigned char)(0xC0 | encode));
@ -4767,6 +4847,7 @@ void Assembler::vsqrtpd(XMMRegister dst, Address src, int vector_len) {
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x51);
emit_operand(dst, src);
@ -4775,6 +4856,7 @@ void Assembler::vsqrtpd(XMMRegister dst, Address src, int vector_len) {
void Assembler::andpd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x54);
emit_int8((unsigned char)(0xC0 | encode));
@ -4803,6 +4885,7 @@ void Assembler::andpd(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x54);
emit_operand(dst, src);
@ -4811,6 +4894,7 @@ void Assembler::andpd(XMMRegister dst, Address src) {
void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x54);
emit_int8((unsigned char)(0xC0 | encode));
@ -4829,6 +4913,7 @@ void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x54);
emit_operand(dst, src);
@ -4847,6 +4932,7 @@ void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector
void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x15);
emit_int8((unsigned char)(0xC0 | encode));
@ -4855,6 +4941,7 @@ void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x14);
emit_int8((unsigned char)(0xC0 | encode));
@ -4863,6 +4950,7 @@ void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x57);
emit_int8((unsigned char)(0xC0 | encode));
@ -4881,6 +4969,7 @@ void Assembler::xorpd(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x57);
emit_operand(dst, src);
@ -4899,6 +4988,7 @@ void Assembler::xorps(XMMRegister dst, Address src) {
void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x57);
emit_int8((unsigned char)(0xC0 | encode));
@ -4917,6 +5007,7 @@ void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x57);
emit_operand(dst, src);
@ -4987,13 +5078,14 @@ void Assembler::paddd(XMMRegister dst, Address src) {
void Assembler::paddq(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xD4);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse3(), ""));
assert(VM_Version::supports_sse3(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x01);
@ -5001,7 +5093,7 @@ void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
}
void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse3(), ""));
assert(VM_Version::supports_sse3(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x02);
@ -5035,6 +5127,7 @@ void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve
void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xD4);
emit_int8((unsigned char)(0xC0 | encode));
@ -5075,6 +5168,7 @@ void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xD4);
emit_operand(dst, src);
@ -5106,6 +5200,7 @@ void Assembler::psubd(XMMRegister dst, XMMRegister src) {
void Assembler::psubq(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xFB);
emit_int8((unsigned char)(0xC0 | encode));
@ -5138,6 +5233,7 @@ void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve
void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xFB);
emit_int8((unsigned char)(0xC0 | encode));
@ -5178,6 +5274,7 @@ void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xFB);
emit_operand(dst, src);
@ -5216,8 +5313,9 @@ void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int v
}
void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 2, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
assert(UseAVX > 2, "requires some form of EVEX");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x40);
emit_int8((unsigned char)(0xC0 | encode));
@ -5244,10 +5342,11 @@ void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vecto
}
void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
assert(UseAVX > 2, "requires some form of EVEX");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
attributes.set_is_evex_instruction();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x40);
emit_operand(dst, src);
@ -5303,6 +5402,7 @@ void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xF3);
emit_int8((unsigned char)(0xC0 | encode));
@ -5332,6 +5432,7 @@ void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_l
void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
// XMM6 is for /6 encoding: 66 0F 73 /6 ib
int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x73);
@ -5358,6 +5459,7 @@ void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int
void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xF3);
emit_int8((unsigned char)(0xC0 | encode));
@ -5389,6 +5491,7 @@ void Assembler::psrlq(XMMRegister dst, int shift) {
// shifts 128 bit value in xmm register by number of bytes.
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
// XMM2 is for /2 encoding: 66 0F 73 /2 ib
int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x73);
@ -5415,6 +5518,7 @@ void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xD3);
emit_int8((unsigned char)(0xC0 | encode));
@ -5443,6 +5547,7 @@ void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_l
void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
// XMM2 is for /2 encoding: 66 0F 73 /2 ib
int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x73);
@ -5469,6 +5574,7 @@ void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int
void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xD3);
emit_int8((unsigned char)(0xC0 | encode));
@ -5578,6 +5684,7 @@ void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_
void Assembler::pandn(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xDF);
emit_int8((unsigned char)(0xC0 | encode));
@ -5867,9 +5974,9 @@ void Assembler::vextracti32x4(Address dst, XMMRegister src, uint8_t imm8) {
}
void Assembler::vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_evex(), "");
assert(VM_Version::supports_avx512dq(), "");
assert(imm8 <= 0x03, "imm8: %u", imm8);
InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x39);
emit_int8((unsigned char)(0xC0 | encode));
@ -5957,9 +6064,9 @@ void Assembler::vextractf32x4(Address dst, XMMRegister src, uint8_t imm8) {
}
void Assembler::vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_evex(), "");
assert(VM_Version::supports_avx512dq(), "");
assert(imm8 <= 0x03, "imm8: %u", imm8);
InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x19);
emit_int8((unsigned char)(0xC0 | encode));
@ -6084,7 +6191,8 @@ void Assembler::evpbroadcastd(XMMRegister dst, Address src, int vector_len) {
// duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
void Assembler::evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x59);
emit_int8((unsigned char)(0xC0 | encode));
@ -6094,7 +6202,8 @@ void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(dst != xnoreg, "sanity");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
// swap src<->dst for encoding
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
@ -6129,7 +6238,8 @@ void Assembler::evpbroadcastss(XMMRegister dst, Address src, int vector_len) {
// duplicate double precision data from src into programmed locations in dest : requires AVX512VL
void Assembler::evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x19);
emit_int8((unsigned char)(0xC0 | encode));
@ -6139,8 +6249,9 @@ void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(dst != xnoreg, "sanity");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
// swap src<->dst for encoding
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x19);
@ -6154,12 +6265,9 @@ void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
if (attributes.is_evex_instruction()) {
emit_int8(0x7A);
} else {
emit_int8(0x78);
}
emit_int8(0x7A);
emit_int8((unsigned char)(0xC0 | encode));
}
@ -6167,12 +6275,9 @@ void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
if (attributes.is_evex_instruction()) {
emit_int8(0x7B);
} else {
emit_int8(0x79);
}
emit_int8(0x7B);
emit_int8((unsigned char)(0xC0 | encode));
}
@ -6180,12 +6285,9 @@ void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
if (attributes.is_evex_instruction()) {
emit_int8(0x7C);
} else {
emit_int8(0x58);
}
emit_int8(0x7C);
emit_int8((unsigned char)(0xC0 | encode));
}
@ -6193,12 +6295,9 @@ void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) {
void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
if (attributes.is_evex_instruction()) {
emit_int8(0x7C);
} else {
emit_int8(0x59);
}
emit_int8(0x7C);
emit_int8((unsigned char)(0xC0 | encode));
}
@ -6853,6 +6952,9 @@ void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix
attributes->set_is_evex_instruction();
evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
} else {
if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
attributes->set_rex_vex_w(false);
}
vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
}
}
@ -6912,6 +7014,9 @@ int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexS
attributes->set_is_evex_instruction();
evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
} else {
if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
attributes->set_rex_vex_w(false);
}
vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
}
@ -6966,6 +7071,21 @@ void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMM
emit_int8((unsigned char)(0xF0 & src2_enc<<4));
}
void Assembler::shlxl(Register dst, Register src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0xF7);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::shlxq(Register dst, Register src1, Register src2) {
assert(VM_Version::supports_bmi2(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0xF7);
emit_int8((unsigned char)(0xC0 | encode));
}
#ifndef _LP64

View file

@ -1048,6 +1048,8 @@ private:
void cvttss2sil(Register dst, XMMRegister src);
void cvttss2siq(Register dst, XMMRegister src);
void cvttpd2dq(XMMRegister dst, XMMRegister src);
// Divide Scalar Double-Precision Floating-Point Values
void divsd(XMMRegister dst, Address src);
void divsd(XMMRegister dst, XMMRegister src);
@ -1335,6 +1337,7 @@ private:
void kmovbl(KRegister dst, Register src);
void kmovbl(Register dst, KRegister src);
void kmovwl(KRegister dst, Register src);
void kmovwl(KRegister dst, Address src);
void kmovwl(Register dst, KRegister src);
void kmovdl(KRegister dst, Register src);
void kmovdl(Register dst, KRegister src);
@ -1344,6 +1347,8 @@ private:
void kmovql(KRegister dst, Register src);
void kmovql(Register dst, KRegister src);
void knotwl(KRegister dst, KRegister src);
void kortestbl(KRegister dst, KRegister src);
void kortestwl(KRegister dst, KRegister src);
void kortestdl(KRegister dst, KRegister src);
@ -2050,6 +2055,8 @@ private:
void cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
void shlxl(Register dst, Register src1, Register src2);
void shlxq(Register dst, Register src1, Register src2);
protected:
// Next instructions require address alignment 16 bytes SSE mode.
@ -2075,6 +2082,7 @@ public:
:
_avx_vector_len(vector_len),
_rex_vex_w(rex_vex_w),
_rex_vex_w_reverted(false),
_legacy_mode(legacy_mode),
_no_reg_mask(no_reg_mask),
_uses_vl(uses_vl),
@ -2098,6 +2106,7 @@ public:
private:
int _avx_vector_len;
bool _rex_vex_w;
bool _rex_vex_w_reverted;
bool _legacy_mode;
bool _no_reg_mask;
bool _uses_vl;
@ -2114,6 +2123,7 @@ public:
// query functions for field accessors
int get_vector_len(void) const { return _avx_vector_len; }
bool is_rex_vex_w(void) const { return _rex_vex_w; }
bool is_rex_vex_w_reverted(void) { return _rex_vex_w_reverted; }
bool is_legacy_mode(void) const { return _legacy_mode; }
bool is_no_reg_mask(void) const { return _no_reg_mask; }
bool uses_vl(void) const { return _uses_vl; }
@ -2127,6 +2137,12 @@ public:
// Set the vector len manually
void set_vector_len(int vector_len) { _avx_vector_len = vector_len; }
// Set revert rex_vex_w for avx encoding
void set_rex_vex_w_reverted(void) { _rex_vex_w_reverted = true; }
// Set rex_vex_w based on state
void set_rex_vex_w(bool state) { _rex_vex_w = state; }
// Set the instruction to be encoded in AVX mode
void set_is_legacy_mode(void) { _legacy_mode = true; }

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -169,18 +169,18 @@ void FpuStackSim::clear() {
intArray* FpuStackSim::write_state() {
intArray* res = new intArray(1 + FrameMap::nof_fpu_regs);
(*res)[0] = stack_size();
res->append(stack_size());
for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
(*res)[1 + i] = regs_at(i);
res->append(regs_at(i));
}
return res;
}
void FpuStackSim::read_state(intArray* fpu_stack_state) {
_stack_size = (*fpu_stack_state)[0];
_stack_size = fpu_stack_state->at(0);
for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
set_regs_at(i, (*fpu_stack_state)[1 + i]);
set_regs_at(i, fpu_stack_state->at(1 + i));
}
}

View file

@ -2365,13 +2365,8 @@ void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, L
} else if (value->is_double_fpu()) {
assert(value->fpu_regnrLo() == 0 && dest->fpu_regnrLo() == 0, "both must be on TOS");
switch(code) {
case lir_log10 : __ flog10() ; break;
case lir_abs : __ fabs() ; break;
case lir_sqrt : __ fsqrt(); break;
case lir_tan :
// Should consider not saving rbx, if not necessary
__ trigfunc('t', op->as_Op2()->fpu_stack_size());
break;
default : ShouldNotReachHere();
}
} else {
@ -3886,6 +3881,10 @@ void LIR_Assembler::membar_storeload() {
__ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
}
void LIR_Assembler::on_spin_wait() {
__ pause ();
}
void LIR_Assembler::get_thread(LIR_Opr result_reg) {
assert(result_reg->is_register(), "check");
#ifdef _LP64

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -812,7 +812,8 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog ||
x->id() == vmIntrinsics::_dpow || x->id() == vmIntrinsics::_dcos ||
x->id() == vmIntrinsics::_dsin) {
x->id() == vmIntrinsics::_dsin || x->id() == vmIntrinsics::_dtan ||
x->id() == vmIntrinsics::_dlog10) {
do_LibmIntrinsic(x);
return;
}
@ -820,58 +821,17 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
LIRItem value(x->argument_at(0), this);
bool use_fpu = false;
if (UseSSE >= 2) {
switch(x->id()) {
case vmIntrinsics::_dtan:
case vmIntrinsics::_dlog10:
use_fpu = true;
break;
}
} else {
if (UseSSE < 2) {
value.set_destroys_register();
}
value.load_item();
LIR_Opr calc_input = value.result();
LIR_Opr calc_input2 = NULL;
if (x->id() == vmIntrinsics::_dpow) {
LIRItem extra_arg(x->argument_at(1), this);
if (UseSSE < 2) {
extra_arg.set_destroys_register();
}
extra_arg.load_item();
calc_input2 = extra_arg.result();
}
LIR_Opr calc_result = rlock_result(x);
// sin, cos, pow and exp need two free fpu stack slots, so register
// two temporary operands
LIR_Opr tmp1 = FrameMap::caller_save_fpu_reg_at(0);
LIR_Opr tmp2 = FrameMap::caller_save_fpu_reg_at(1);
if (use_fpu) {
LIR_Opr tmp = FrameMap::fpu0_double_opr;
int tmp_start = 1;
if (calc_input2 != NULL) {
__ move(calc_input2, tmp);
tmp_start = 2;
calc_input2 = tmp;
}
__ move(calc_input, tmp);
calc_input = tmp;
calc_result = tmp;
tmp1 = FrameMap::caller_save_fpu_reg_at(tmp_start);
tmp2 = FrameMap::caller_save_fpu_reg_at(tmp_start + 1);
}
switch(x->id()) {
case vmIntrinsics::_dabs: __ abs (calc_input, calc_result, LIR_OprFact::illegalOpr); break;
case vmIntrinsics::_dsqrt: __ sqrt (calc_input, calc_result, LIR_OprFact::illegalOpr); break;
case vmIntrinsics::_dtan: __ tan (calc_input, calc_result, tmp1, tmp2); break;
case vmIntrinsics::_dlog10: __ log10(calc_input, calc_result, tmp1); break;
default: ShouldNotReachHere();
}
@ -912,21 +872,28 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
result_reg = tmp;
switch(x->id()) {
case vmIntrinsics::_dexp:
if (VM_Version::supports_sse2()) {
if (StubRoutines::dexp() != NULL) {
__ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dlog:
if (VM_Version::supports_sse2()) {
if (StubRoutines::dlog() != NULL) {
__ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dlog10:
if (StubRoutines::dlog10() != NULL) {
__ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dpow:
if (VM_Version::supports_sse2()) {
if (StubRoutines::dpow() != NULL) {
__ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args());
@ -946,18 +913,44 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dtan:
if (StubRoutines::dtan() != NULL) {
__ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args());
}
break;
default: ShouldNotReachHere();
}
#else
switch (x->id()) {
case vmIntrinsics::_dexp:
__ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
if (StubRoutines::dexp() != NULL) {
__ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dlog:
if (StubRoutines::dlog() != NULL) {
__ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dlog10:
if (StubRoutines::dlog10() != NULL) {
__ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dpow:
if (StubRoutines::dpow() != NULL) {
__ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dsin:
if (StubRoutines::dsin() != NULL) {
@ -973,6 +966,13 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dtan:
if (StubRoutines::dtan() != NULL) {
__ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args());
}
break;
default: ShouldNotReachHere();
}
#endif // _LP64
@ -1260,7 +1260,7 @@ void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
Values* dims = x->dims();
int i = dims->length();
LIRItemList* items = new LIRItemList(dims->length(), NULL);
LIRItemList* items = new LIRItemList(i, i, NULL);
while (i-- > 0) {
LIRItem* size = new LIRItem(dims->at(i), this);
items->at_put(i, size);

View file

@ -786,58 +786,6 @@ void FpuStackAllocator::handle_op2(LIR_Op2* op2) {
break;
}
case lir_log10: {
// log and log10 need one temporary fpu stack slot, so
// there is one temporary registers stored in temp of the
// operation. the stack allocator must guarantee that the stack
// slots are really free, otherwise there might be a stack
// overflow.
assert(right->is_illegal(), "must be");
assert(left->is_fpu_register(), "must be");
assert(res->is_fpu_register(), "must be");
assert(op2->tmp1_opr()->is_fpu_register(), "must be");
insert_free_if_dead(op2->tmp1_opr());
insert_free_if_dead(res, left);
insert_exchange(left);
do_rename(left, res);
new_left = to_fpu_stack_top(res);
new_res = new_left;
op2->set_fpu_stack_size(sim()->stack_size());
assert(sim()->stack_size() <= 7, "at least one stack slot must be free");
break;
}
case lir_tan: {
// sin, cos and exp need two temporary fpu stack slots, so there are two temporary
// registers (stored in right and temp of the operation).
// the stack allocator must guarantee that the stack slots are really free,
// otherwise there might be a stack overflow.
assert(left->is_fpu_register(), "must be");
assert(res->is_fpu_register(), "must be");
// assert(left->is_last_use(), "old value gets destroyed");
assert(right->is_fpu_register(), "right is used as the first temporary register");
assert(op2->tmp1_opr()->is_fpu_register(), "temp is used as the second temporary register");
assert(fpu_num(left) != fpu_num(right) && fpu_num(right) != fpu_num(op2->tmp1_opr()) && fpu_num(op2->tmp1_opr()) != fpu_num(res), "need distinct temp registers");
insert_free_if_dead(right);
insert_free_if_dead(op2->tmp1_opr());
insert_free_if_dead(res, left);
insert_exchange(left);
do_rename(left, res);
new_left = to_fpu_stack_top(res);
new_res = new_left;
op2->set_fpu_stack_size(sim()->stack_size());
assert(sim()->stack_size() <= 6, "at least two stack slots must be free");
break;
}
default: {
assert(false, "missed a fpu-operation");
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -31,22 +31,6 @@
#include "runtime/mutexLocker.hpp"
#include "runtime/safepoint.hpp"
// Release the CompiledICHolder* associated with this call site is there is one.
void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) {
// This call site might have become stale so inspect it carefully.
NativeCall* call = nativeCall_at(call_site->addr());
if (is_icholder_entry(call->destination())) {
NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value());
InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data());
}
}
bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) {
// This call site might have become stale so inspect it carefully.
NativeCall* call = nativeCall_at(call_site->addr());
return is_icholder_entry(call->destination());
}
// ----------------------------------------------------------------------------
#define __ _masm.

View file

@ -194,9 +194,6 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
product(bool, UseBMI2Instructions, false, \
"Use BMI2 instructions") \
\
diagnostic(bool, UseLibmSinIntrinsic, true, \
"Use Libm Sin Intrinsic") \
\
diagnostic(bool, UseLibmCosIntrinsic, true, \
"Use Libm Cos Intrinsic")
diagnostic(bool, UseLibmIntrinsic, true, \
"Use Libm Intrinsics")
#endif // CPU_X86_VM_GLOBALS_X86_HPP

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -3399,6 +3399,18 @@ void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) {
}
}
void MacroAssembler::setvectmask(Register dst, Register src) {
Assembler::movl(dst, 1);
Assembler::shlxl(dst, dst, src);
Assembler::decl(dst);
Assembler::kmovdl(k1, dst);
Assembler::movl(dst, src);
}
void MacroAssembler::restorevectmask() {
Assembler::knotwl(k1, k0);
}
void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
if (reachable(src)) {
if (UseXmmLoadAndClearUpper) {

View file

@ -156,6 +156,10 @@ class MacroAssembler: public Assembler {
void incrementq(Register reg, int value = 1);
void incrementq(Address dst, int value = 1);
// special instructions for EVEX
void setvectmask(Register dst, Register src);
void restorevectmask();
// Support optimal SSE move instructions.
void movflt(XMMRegister dst, XMMRegister src) {
if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; }
@ -928,6 +932,10 @@ class MacroAssembler: public Assembler {
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp1, Register tmp2);
void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register r11);
void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
Register rdx, Register tmp1, Register tmp2, Register tmp3, Register tmp4);
@ -941,11 +949,19 @@ class MacroAssembler: public Assembler {
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp1,
Register tmp2, Register tmp3, Register tmp4);
void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp1,
Register tmp2, Register tmp3, Register tmp4);
#else
void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp1);
void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp);
void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
Register rdx, Register tmp);
@ -964,6 +980,14 @@ class MacroAssembler: public Assembler {
void libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx,
Register esi, Register edi, Register ebp, Register esp);
void libm_tancot_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx,
Register edx, Register ebx, Register esi, Register edi,
Register ebp, Register esp);
void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp);
#endif
void increase_precision();

View file

@ -0,0 +1,889 @@
/*
* Copyright (c) 2016, Intel Corporation.
* Intel Math Library (LIBM) Source Code
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "asm/assembler.inline.hpp"
#include "runtime/stubRoutines.hpp"
#include "macroAssembler_x86.hpp"
#ifdef _MSC_VER
#define ALIGNED_(x) __declspec(align(x))
#else
#define ALIGNED_(x) __attribute__ ((aligned(x)))
#endif
/******************************************************************************/
// ALGORITHM DESCRIPTION - COS()
// ---------------------
//
// 1. RANGE REDUCTION
//
// We perform an initial range reduction from X to r with
//
// X =~= N * pi/32 + r
//
// so that |r| <= pi/64 + epsilon. We restrict inputs to those
// where |N| <= 932560. Beyond this, the range reduction is
// insufficiently accurate. For extremely small inputs,
// denormalization can occur internally, impacting performance.
// This means that the main path is actually only taken for
// 2^-252 <= |X| < 90112.
//
// To avoid branches, we perform the range reduction to full
// accuracy each time.
//
// X - N * (P_1 + P_2 + P_3)
//
// where P_1 and P_2 are 32-bit numbers (so multiplication by N
// is exact) and P_3 is a 53-bit number. Together, these
// approximate pi well enough for all cases in the restricted
// range.
//
// The main reduction sequence is:
//
// y = 32/pi * x
// N = integer(y)
// (computed by adding and subtracting off SHIFTER)
//
// m_1 = N * P_1
// m_2 = N * P_2
// r_1 = x - m_1
// r = r_1 - m_2
// (this r can be used for most of the calculation)
//
// c_1 = r_1 - r
// m_3 = N * P_3
// c_2 = c_1 - m_2
// c = c_2 - m_3
//
// 2. MAIN ALGORITHM
//
// The algorithm uses a table lookup based on B = M * pi / 32
// where M = N mod 64. The stored values are:
// sigma closest power of 2 to cos(B)
// C_hl 53-bit cos(B) - sigma
// S_hi + S_lo 2 * 53-bit sin(B)
//
// The computation is organized as follows:
//
// sin(B + r + c) = [sin(B) + sigma * r] +
// r * (cos(B) - sigma) +
// sin(B) * [cos(r + c) - 1] +
// cos(B) * [sin(r + c) - r]
//
// which is approximately:
//
// [S_hi + sigma * r] +
// C_hl * r +
// S_lo + S_hi * [(cos(r) - 1) - r * c] +
// (C_hl + sigma) * [(sin(r) - r) + c]
//
// and this is what is actually computed. We separate this sum
// into four parts:
//
// hi + med + pols + corr
//
// where
//
// hi = S_hi + sigma r
// med = C_hl * r
// pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r)
// corr = S_lo + c * ((C_hl + sigma) - S_hi * r)
//
// 3. POLYNOMIAL
//
// The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) *
// (sin(r) - r) can be rearranged freely, since it is quite
// small, so we exploit parallelism to the fullest.
//
// psc4 = SC_4 * r_1
// msc4 = psc4 * r
// r2 = r * r
// msc2 = SC_2 * r2
// r4 = r2 * r2
// psc3 = SC_3 + msc4
// psc1 = SC_1 + msc2
// msc3 = r4 * psc3
// sincospols = psc1 + msc3
// pols = sincospols *
// <S_hi * r^2 | (C_hl + sigma) * r^3>
//
// 4. CORRECTION TERM
//
// This is where the "c" component of the range reduction is
// taken into account; recall that just "r" is used for most of
// the calculation.
//
// -c = m_3 - c_2
// -d = S_hi * r - (C_hl + sigma)
// corr = -c * -d + S_lo
//
// 5. COMPENSATED SUMMATIONS
//
// The two successive compensated summations add up the high
// and medium parts, leaving just the low parts to add up at
// the end.
//
// rs = sigma * r
// res_int = S_hi + rs
// k_0 = S_hi - res_int
// k_2 = k_0 + rs
// med = C_hl * r
// res_hi = res_int + med
// k_1 = res_int - res_hi
// k_3 = k_1 + med
//
// 6. FINAL SUMMATION
//
// We now add up all the small parts:
//
// res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3
//
// Now the overall result is just:
//
// res_hi + res_lo
//
// 7. SMALL ARGUMENTS
//
// Inputs with |X| < 2^-252 are treated specially as
// 1 - |x|.
//
// Special cases:
// cos(NaN) = quiet NaN, and raise invalid exception
// cos(INF) = NaN and raise invalid exception
// cos(0) = 1
//
/******************************************************************************/
#ifdef _LP64
// The 64 bit code is at most SSE2 compliant
ALIGNED_(8) juint _ONE[] =
{
0x00000000UL, 0x3ff00000UL
};
void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register r8, Register r9, Register r10, Register r11) {
Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1;
Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1;
Label L_2TAG_PACKET_8_0_1, L_2TAG_PACKET_9_0_1, L_2TAG_PACKET_10_0_1, L_2TAG_PACKET_11_0_1;
Label L_2TAG_PACKET_12_0_1, L_2TAG_PACKET_13_0_1, B1_2, B1_3, B1_4, B1_5, start;
assert_different_registers(r8, r9, r10, r11, eax, ecx, edx);
address ONEHALF = StubRoutines::x86::_ONEHALF_addr();
address P_2 = StubRoutines::x86::_P_2_addr();
address SC_4 = StubRoutines::x86::_SC_4_addr();
address Ctable = StubRoutines::x86::_Ctable_addr();
address SC_2 = StubRoutines::x86::_SC_2_addr();
address SC_3 = StubRoutines::x86::_SC_3_addr();
address SC_1 = StubRoutines::x86::_SC_1_addr();
address PI_INV_TABLE = StubRoutines::x86::_PI_INV_TABLE_addr();
address PI_4 = (address)StubRoutines::x86::_PI_4_addr();
address PI32INV = (address)StubRoutines::x86::_PI32INV_addr();
address SIGN_MASK = (address)StubRoutines::x86::_SIGN_MASK_addr();
address P_1 = (address)StubRoutines::x86::_P_1_addr();
address P_3 = (address)StubRoutines::x86::_P_3_addr();
address ONE = (address)_ONE;
address NEG_ZERO = (address)StubRoutines::x86::_NEG_ZERO_addr();
bind(start);
push(rbx);
subq(rsp, 16);
movsd(Address(rsp, 8), xmm0);
bind(B1_2);
movl(eax, Address(rsp, 12));
movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL
andl(eax, 2147418112);
subl(eax, 808452096);
cmpl(eax, 281346048);
jcc(Assembler::above, L_2TAG_PACKET_0_0_1);
mulsd(xmm1, xmm0);
movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL
pand(xmm4, xmm0);
por(xmm5, xmm4);
addpd(xmm1, xmm5);
cvttsd2sil(edx, xmm1);
cvtsi2sdl(xmm1, edx);
movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL
mulsd(xmm3, xmm1);
unpcklpd(xmm1, xmm1);
addq(rdx, 1865232);
movdqu(xmm4, xmm0);
andq(rdx, 63);
movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL
lea(rax, ExternalAddress(Ctable));
shlq(rdx, 5);
addq(rax, rdx);
mulpd(xmm2, xmm1);
subsd(xmm0, xmm3);
mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL
subsd(xmm4, xmm3);
movq(xmm7, Address(rax, 8));
unpcklpd(xmm0, xmm0);
movdqu(xmm3, xmm4);
subsd(xmm4, xmm2);
mulpd(xmm5, xmm0);
subpd(xmm0, xmm2);
movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
mulsd(xmm7, xmm4);
subsd(xmm3, xmm4);
mulpd(xmm5, xmm0);
mulpd(xmm0, xmm0);
subsd(xmm3, xmm2);
movdqu(xmm2, Address(rax, 0));
subsd(xmm1, xmm3);
movq(xmm3, Address(rax, 24));
addsd(xmm2, xmm3);
subsd(xmm7, xmm2);
mulsd(xmm2, xmm4);
mulpd(xmm6, xmm0);
mulsd(xmm3, xmm4);
mulpd(xmm2, xmm0);
mulpd(xmm0, xmm0);
addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
mulsd(xmm4, Address(rax, 0));
addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
mulpd(xmm5, xmm0);
movdqu(xmm0, xmm3);
addsd(xmm3, Address(rax, 8));
mulpd(xmm1, xmm7);
movdqu(xmm7, xmm4);
addsd(xmm4, xmm3);
addpd(xmm6, xmm5);
movq(xmm5, Address(rax, 8));
subsd(xmm5, xmm3);
subsd(xmm3, xmm4);
addsd(xmm1, Address(rax, 16));
mulpd(xmm6, xmm2);
addsd(xmm0, xmm5);
addsd(xmm3, xmm7);
addsd(xmm0, xmm1);
addsd(xmm0, xmm3);
addsd(xmm0, xmm6);
unpckhpd(xmm6, xmm6);
addsd(xmm0, xmm6);
addsd(xmm0, xmm4);
jmp(B1_4);
bind(L_2TAG_PACKET_0_0_1);
jcc(Assembler::greater, L_2TAG_PACKET_1_0_1);
pextrw(eax, xmm0, 3);
andl(eax, 32767);
pinsrw(xmm0, eax, 3);
movq(xmm1, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL
subsd(xmm1, xmm0);
movdqu(xmm0, xmm1);
jmp(B1_4);
bind(L_2TAG_PACKET_1_0_1);
pextrw(eax, xmm0, 3);
andl(eax, 32752);
cmpl(eax, 32752);
jcc(Assembler::equal, L_2TAG_PACKET_2_0_1);
pextrw(ecx, xmm0, 3);
andl(ecx, 32752);
subl(ecx, 16224);
shrl(ecx, 7);
andl(ecx, 65532);
lea(r11, ExternalAddress(PI_INV_TABLE));
addq(rcx, r11);
movdq(rax, xmm0);
movl(r10, Address(rcx, 20));
movl(r8, Address(rcx, 24));
movl(edx, eax);
shrq(rax, 21);
orl(eax, INT_MIN);
shrl(eax, 11);
movl(r9, r10);
imulq(r10, rdx);
imulq(r9, rax);
imulq(r8, rax);
movl(rsi, Address(rcx, 16));
movl(rdi, Address(rcx, 12));
movl(r11, r10);
shrq(r10, 32);
addq(r9, r10);
addq(r11, r8);
movl(r8, r11);
shrq(r11, 32);
addq(r9, r11);
movl(r10, rsi);
imulq(rsi, rdx);
imulq(r10, rax);
movl(r11, rdi);
imulq(rdi, rdx);
movl(rbx, rsi);
shrq(rsi, 32);
addq(r9, rbx);
movl(rbx, r9);
shrq(r9, 32);
addq(r10, rsi);
addq(r10, r9);
shlq(rbx, 32);
orq(r8, rbx);
imulq(r11, rax);
movl(r9, Address(rcx, 8));
movl(rsi, Address(rcx, 4));
movl(rbx, rdi);
shrq(rdi, 32);
addq(r10, rbx);
movl(rbx, r10);
shrq(r10, 32);
addq(r11, rdi);
addq(r11, r10);
movq(rdi, r9);
imulq(r9, rdx);
imulq(rdi, rax);
movl(r10, r9);
shrq(r9, 32);
addq(r11, r10);
movl(r10, r11);
shrq(r11, 32);
addq(rdi, r9);
addq(rdi, r11);
movq(r9, rsi);
imulq(rsi, rdx);
imulq(r9, rax);
shlq(r10, 32);
orq(r10, rbx);
movl(eax, Address(rcx, 0));
movl(r11, rsi);
shrq(rsi, 32);
addq(rdi, r11);
movl(r11, rdi);
shrq(rdi, 32);
addq(r9, rsi);
addq(r9, rdi);
imulq(rdx, rax);
pextrw(rbx, xmm0, 3);
lea(rdi, ExternalAddress(PI_INV_TABLE));
subq(rcx, rdi);
addl(ecx, ecx);
addl(ecx, ecx);
addl(ecx, ecx);
addl(ecx, 19);
movl(rsi, 32768);
andl(rsi, rbx);
shrl(rbx, 4);
andl(rbx, 2047);
subl(rbx, 1023);
subl(ecx, rbx);
addq(r9, rdx);
movl(edx, ecx);
addl(edx, 32);
cmpl(ecx, 1);
jcc(Assembler::less, L_2TAG_PACKET_3_0_1);
negl(ecx);
addl(ecx, 29);
shll(r9);
movl(rdi, r9);
andl(r9, 536870911);
testl(r9, 268435456);
jcc(Assembler::notEqual, L_2TAG_PACKET_4_0_1);
shrl(r9);
movl(rbx, 0);
shlq(r9, 32);
orq(r9, r11);
bind(L_2TAG_PACKET_5_0_1);
bind(L_2TAG_PACKET_6_0_1);
cmpq(r9, 0);
jcc(Assembler::equal, L_2TAG_PACKET_7_0_1);
bind(L_2TAG_PACKET_8_0_1);
bsrq(r11, r9);
movl(ecx, 29);
subl(ecx, r11);
jcc(Assembler::lessEqual, L_2TAG_PACKET_9_0_1);
shlq(r9);
movq(rax, r10);
shlq(r10);
addl(edx, ecx);
negl(ecx);
addl(ecx, 64);
shrq(rax);
shrq(r8);
orq(r9, rax);
orq(r10, r8);
bind(L_2TAG_PACKET_10_0_1);
cvtsi2sdq(xmm0, r9);
shrq(r10, 1);
cvtsi2sdq(xmm3, r10);
xorpd(xmm4, xmm4);
shll(edx, 4);
negl(edx);
addl(edx, 16368);
orl(edx, rsi);
xorl(edx, rbx);
pinsrw(xmm4, edx, 3);
movq(xmm2, ExternalAddress(PI_4)); //0x40000000UL, 0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
movq(xmm6, ExternalAddress(8 + PI_4)); //0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
xorpd(xmm5, xmm5);
subl(edx, 1008);
pinsrw(xmm5, edx, 3);
mulsd(xmm0, xmm4);
shll(rsi, 16);
sarl(rsi, 31);
mulsd(xmm3, xmm5);
movdqu(xmm1, xmm0);
mulsd(xmm0, xmm2);
shrl(rdi, 29);
addsd(xmm1, xmm3);
mulsd(xmm3, xmm2);
addl(rdi, rsi);
xorl(rdi, rsi);
mulsd(xmm6, xmm1);
movl(eax, rdi);
addsd(xmm6, xmm3);
movdqu(xmm2, xmm0);
addsd(xmm0, xmm6);
subsd(xmm2, xmm0);
addsd(xmm6, xmm2);
bind(L_2TAG_PACKET_11_0_1);
movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL
mulsd(xmm1, xmm0);
movq(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL
pand(xmm4, xmm0);
por(xmm5, xmm4);
addpd(xmm1, xmm5);
cvttsd2siq(rdx, xmm1);
cvtsi2sdq(xmm1, rdx);
movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL
movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
mulsd(xmm3, xmm1);
unpcklpd(xmm1, xmm1);
shll(eax, 3);
addl(edx, 1865232);
movdqu(xmm4, xmm0);
addl(edx, eax);
andl(edx, 63);
movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL
lea(rax, ExternalAddress(Ctable));
shll(edx, 5);
addq(rax, rdx);
mulpd(xmm2, xmm1);
subsd(xmm0, xmm3);
mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL
subsd(xmm4, xmm3);
movq(xmm7, Address(rax, 8));
unpcklpd(xmm0, xmm0);
movdqu(xmm3, xmm4);
subsd(xmm4, xmm2);
mulpd(xmm5, xmm0);
subpd(xmm0, xmm2);
mulsd(xmm7, xmm4);
subsd(xmm3, xmm4);
mulpd(xmm5, xmm0);
mulpd(xmm0, xmm0);
subsd(xmm3, xmm2);
movdqu(xmm2, Address(rax, 0));
subsd(xmm1, xmm3);
movq(xmm3, Address(rax, 24));
addsd(xmm2, xmm3);
subsd(xmm7, xmm2);
subsd(xmm1, xmm6);
movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
mulsd(xmm2, xmm4);
mulpd(xmm6, xmm0);
mulsd(xmm3, xmm4);
mulpd(xmm2, xmm0);
mulpd(xmm0, xmm0);
addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
mulsd(xmm4, Address(rax, 0));
addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
mulpd(xmm5, xmm0);
movdqu(xmm0, xmm3);
addsd(xmm3, Address(rax, 8));
mulpd(xmm1, xmm7);
movdqu(xmm7, xmm4);
addsd(xmm4, xmm3);
addpd(xmm6, xmm5);
movq(xmm5, Address(rax, 8));
subsd(xmm5, xmm3);
subsd(xmm3, xmm4);
addsd(xmm1, Address(rax, 16));
mulpd(xmm6, xmm2);
addsd(xmm5, xmm0);
addsd(xmm3, xmm7);
addsd(xmm1, xmm5);
addsd(xmm1, xmm3);
addsd(xmm1, xmm6);
unpckhpd(xmm6, xmm6);
movdqu(xmm0, xmm4);
addsd(xmm1, xmm6);
addsd(xmm0, xmm1);
jmp(B1_4);
bind(L_2TAG_PACKET_7_0_1);
addl(edx, 64);
movq(r9, r10);
movq(r10, r8);
movl(r8, 0);
cmpq(r9, 0);
jcc(Assembler::notEqual, L_2TAG_PACKET_8_0_1);
addl(edx, 64);
movq(r9, r10);
movq(r10, r8);
cmpq(r9, 0);
jcc(Assembler::notEqual, L_2TAG_PACKET_8_0_1);
xorpd(xmm0, xmm0);
xorpd(xmm6, xmm6);
jmp(L_2TAG_PACKET_11_0_1);
bind(L_2TAG_PACKET_9_0_1);
jcc(Assembler::equal, L_2TAG_PACKET_10_0_1);
negl(ecx);
shrq(r10);
movq(rax, r9);
shrq(r9);
subl(edx, ecx);
negl(ecx);
addl(ecx, 64);
shlq(rax);
orq(r10, rax);
jmp(L_2TAG_PACKET_10_0_1);
bind(L_2TAG_PACKET_3_0_1);
negl(ecx);
shlq(r9, 32);
orq(r9, r11);
shlq(r9);
movq(rdi, r9);
testl(r9, INT_MIN);
jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_1);
shrl(r9);
movl(rbx, 0);
shrq(rdi, 3);
jmp(L_2TAG_PACKET_6_0_1);
bind(L_2TAG_PACKET_4_0_1);
shrl(r9);
movl(rbx, 536870912);
shrl(rbx);
shlq(r9, 32);
orq(r9, r11);
shlq(rbx, 32);
addl(rdi, 536870912);
movl(rcx, 0);
movl(r11, 0);
subq(rcx, r8);
sbbq(r11, r10);
sbbq(rbx, r9);
movq(r8, rcx);
movq(r10, r11);
movq(r9, rbx);
movl(rbx, 32768);
jmp(L_2TAG_PACKET_5_0_1);
bind(L_2TAG_PACKET_12_0_1);
shrl(r9);
mov64(rbx, 0x100000000);
shrq(rbx);
movl(rcx, 0);
movl(r11, 0);
subq(rcx, r8);
sbbq(r11, r10);
sbbq(rbx, r9);
movq(r8, rcx);
movq(r10, r11);
movq(r9, rbx);
movl(rbx, 32768);
shrq(rdi, 3);
addl(rdi, 536870912);
jmp(L_2TAG_PACKET_6_0_1);
bind(L_2TAG_PACKET_2_0_1);
movsd(xmm0, Address(rsp, 8));
mulsd(xmm0, ExternalAddress(NEG_ZERO)); //0x00000000UL, 0x80000000UL
movq(Address(rsp, 0), xmm0);
bind(L_2TAG_PACKET_13_0_1);
bind(B1_4);
addq(rsp, 16);
pop(rbx);
}
#else
// The 32 bit code is at most SSE2 compliant
ALIGNED_(16) juint _static_const_table_cos[] =
{
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, 0xbf73b92eUL,
0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL,
0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL,
0xc0000000UL, 0xbc626d19UL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL,
0xbfa60beaUL, 0x2ed59f06UL, 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL,
0x00000000UL, 0x3ff00000UL, 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL,
0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, 0x00000000UL, 0x3ff00000UL,
0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, 0x20000000UL,
0x3c5e0d89UL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, 0xbfc59267UL,
0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL,
0x3ff00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL,
0x20000000UL, 0x3c68076aUL, 0x00000000UL, 0x3ff00000UL, 0x99fcef32UL,
0x3fca8279UL, 0x667f3bcdUL, 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL,
0x00000000UL, 0x3fe00000UL, 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL,
0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, 0x00000000UL, 0x3fe00000UL,
0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, 0xe0000000UL,
0x3c39f630UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, 0xbf9d4a2cUL,
0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL,
0x3fe00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0x3fed906bUL,
0x20000000UL, 0x3c7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x76acf82dUL,
0x3fa4a031UL, 0x56c62ddaUL, 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL,
0x00000000UL, 0x3fd00000UL, 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL,
0x3fef6297UL, 0x20000000UL, 0x3c756217UL, 0x00000000UL, 0x3fd00000UL,
0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, 0x40000000UL,
0xbc887df6UL, 0x00000000UL, 0x3fc00000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0x3fefd88dUL,
0x40000000UL, 0xbc887df6UL, 0x00000000UL, 0xbfc00000UL, 0x0e5967d5UL,
0x3fac1d1fUL, 0xcff75cb0UL, 0x3fef6297UL, 0x20000000UL, 0x3c756217UL,
0x00000000UL, 0xbfd00000UL, 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL,
0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, 0x00000000UL, 0xbfd00000UL,
0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, 0x3fed906bUL, 0x20000000UL,
0x3c7457e6UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, 0x3f9d4a2cUL,
0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL,
0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL,
0xe0000000UL, 0x3c39f630UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL,
0xbfc133ccUL, 0x6b151741UL, 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL,
0x00000000UL, 0xbfe00000UL, 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL,
0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, 0x00000000UL, 0xbfe00000UL,
0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, 0x20000000UL,
0x3c68076aUL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, 0x3fc59267UL,
0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL,
0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL,
0x20000000UL, 0x3c5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL,
0x3fb37ca1UL, 0xa6aea963UL, 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL,
0x00000000UL, 0xbff00000UL, 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL,
0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, 0x00000000UL, 0xbff00000UL,
0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, 0xc0000000UL,
0xbc626d19UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, 0x3f73b92eUL,
0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL,
0xbff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL,
0x3f73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL,
0x00000000UL, 0xbff00000UL, 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL,
0xbfc8f8b8UL, 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0xbff00000UL,
0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL,
0x3c75d28dUL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, 0x3fb37ca1UL,
0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, 0x3c672cedUL, 0x00000000UL,
0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0xbfde2b5dUL,
0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL,
0x3fc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL,
0x00000000UL, 0xbff00000UL, 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL,
0xbfe44cf3UL, 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0xbff00000UL,
0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL,
0x3c8bdd34UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, 0xbfc133ccUL,
0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, 0x3c82c5e1UL, 0x00000000UL,
0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0xbfea9b66UL,
0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL,
0x3f9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL,
0x00000000UL, 0xbfe00000UL, 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL,
0xbfed906bUL, 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0xbfe00000UL,
0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL,
0xbc8760b1UL, 0x00000000UL, 0xbfd00000UL, 0x0e5967d5UL, 0x3fac1d1fUL,
0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, 0xbc756217UL, 0x00000000UL,
0xbfd00000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0xbfefd88dUL,
0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0xbfc00000UL, 0x00000000UL,
0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x00000000UL, 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL,
0xbfefd88dUL, 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0x3fc00000UL,
0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL,
0xbc756217UL, 0x00000000UL, 0x3fd00000UL, 0x76acf82dUL, 0x3fa4a031UL,
0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, 0xbc8760b1UL, 0x00000000UL,
0x3fd00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0xbfed906bUL,
0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL,
0xbf9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL,
0x00000000UL, 0x3fe00000UL, 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL,
0xbfea9b66UL, 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0x3fe00000UL,
0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL,
0x3c82c5e1UL, 0x00000000UL, 0x3fe00000UL, 0x99fcef32UL, 0x3fca8279UL,
0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, 0x3c8bdd34UL, 0x00000000UL,
0x3fe00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0xbfe44cf3UL,
0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL,
0xbfc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL,
0x00000000UL, 0x3ff00000UL, 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL,
0xbfde2b5dUL, 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0x3ff00000UL,
0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL,
0x3c672cedUL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, 0xbfa60beaUL,
0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, 0x3c75d28dUL, 0x00000000UL,
0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0xbfc8f8b8UL,
0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL,
0xbf73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL,
0x00000000UL, 0x3ff00000UL, 0x55555555UL, 0xbfc55555UL, 0x00000000UL,
0xbfe00000UL, 0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL,
0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL, 0xa556c734UL,
0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL, 0x1a600000UL, 0x3d90b461UL,
0x1a600000UL, 0x3d90b461UL, 0x54400000UL, 0x3fb921fbUL, 0x00000000UL,
0x00000000UL, 0x2e037073UL, 0x3b63198aUL, 0x00000000UL, 0x00000000UL,
0x6dc9c883UL, 0x40245f30UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x43380000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3ff00000UL,
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL,
0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
};
//registers,
// input: (rbp + 8)
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, rbx (tmp)
// Code generated by Intel C compiler for LIBM library
void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
assert_different_registers(tmp, eax, ecx, edx);
address static_const_table_cos = (address)_static_const_table_cos;
bind(start);
subl(rsp, 120);
movl(Address(rsp, 56), tmp);
lea(tmp, ExternalAddress(static_const_table_cos));
movsd(xmm0, Address(rsp, 128));
pextrw(eax, xmm0, 3);
andl(eax, 32767);
subl(eax, 12336);
cmpl(eax, 4293);
jcc(Assembler::above, L_2TAG_PACKET_0_0_2);
movsd(xmm1, Address(tmp, 2160));
mulsd(xmm1, xmm0);
movdqu(xmm5, Address(tmp, 2240));
movsd(xmm4, Address(tmp, 2224));
pand(xmm4, xmm0);
por(xmm5, xmm4);
movsd(xmm3, Address(tmp, 2128));
movdqu(xmm2, Address(tmp, 2112));
addpd(xmm1, xmm5);
cvttsd2sil(edx, xmm1);
cvtsi2sdl(xmm1, edx);
mulsd(xmm3, xmm1);
unpcklpd(xmm1, xmm1);
addl(edx, 1865232);
movdqu(xmm4, xmm0);
andl(edx, 63);
movdqu(xmm5, Address(tmp, 2096));
lea(eax, Address(tmp, 0));
shll(edx, 5);
addl(eax, edx);
mulpd(xmm2, xmm1);
subsd(xmm0, xmm3);
mulsd(xmm1, Address(tmp, 2144));
subsd(xmm4, xmm3);
movsd(xmm7, Address(eax, 8));
unpcklpd(xmm0, xmm0);
movapd(xmm3, xmm4);
subsd(xmm4, xmm2);
mulpd(xmm5, xmm0);
subpd(xmm0, xmm2);
movdqu(xmm6, Address(tmp, 2064));
mulsd(xmm7, xmm4);
subsd(xmm3, xmm4);
mulpd(xmm5, xmm0);
mulpd(xmm0, xmm0);
subsd(xmm3, xmm2);
movdqu(xmm2, Address(eax, 0));
subsd(xmm1, xmm3);
movsd(xmm3, Address(eax, 24));
addsd(xmm2, xmm3);
subsd(xmm7, xmm2);
mulsd(xmm2, xmm4);
mulpd(xmm6, xmm0);
mulsd(xmm3, xmm4);
mulpd(xmm2, xmm0);
mulpd(xmm0, xmm0);
addpd(xmm5, Address(tmp, 2080));
mulsd(xmm4, Address(eax, 0));
addpd(xmm6, Address(tmp, 2048));
mulpd(xmm5, xmm0);
movapd(xmm0, xmm3);
addsd(xmm3, Address(eax, 8));
mulpd(xmm1, xmm7);
movapd(xmm7, xmm4);
addsd(xmm4, xmm3);
addpd(xmm6, xmm5);
movsd(xmm5, Address(eax, 8));
subsd(xmm5, xmm3);
subsd(xmm3, xmm4);
addsd(xmm1, Address(eax, 16));
mulpd(xmm6, xmm2);
addsd(xmm5, xmm0);
addsd(xmm3, xmm7);
addsd(xmm1, xmm5);
addsd(xmm1, xmm3);
addsd(xmm1, xmm6);
unpckhpd(xmm6, xmm6);
addsd(xmm1, xmm6);
addsd(xmm4, xmm1);
movsd(Address(rsp, 0), xmm4);
fld_d(Address(rsp, 0));
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_0_0_2);
jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
pextrw(eax, xmm0, 3);
andl(eax, 32767);
pinsrw(xmm0, eax, 3);
movsd(xmm1, Address(tmp, 2192));
subsd(xmm1, xmm0);
movsd(Address(rsp, 0), xmm1);
fld_d(Address(rsp, 0));
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_2_0_2);
movl(eax, Address(rsp, 132));
andl(eax, 2146435072);
cmpl(eax, 2146435072);
jcc(Assembler::equal, L_2TAG_PACKET_3_0_2);
subl(rsp, 32);
movsd(Address(rsp, 0), xmm0);
lea(eax, Address(rsp, 40));
movl(Address(rsp, 8), eax);
movl(eax, 1);
movl(Address(rsp, 12), eax);
call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_sin_cos_huge())));
addl(rsp, 32);
fld_d(Address(rsp, 8));
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_3_0_2);
fld_d(Address(rsp, 128));
fmul_d(Address(tmp, 2208));
bind(L_2TAG_PACKET_1_0_2);
movl(tmp, Address(rsp, 56));
}
#endif

View file

@ -0,0 +1,674 @@
/*
* Copyright (c) 2016, Intel Corporation.
* Intel Math Library (LIBM) Source Code
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "asm/assembler.inline.hpp"
#include "runtime/stubRoutines.hpp"
#include "macroAssembler_x86.hpp"
#ifdef _MSC_VER
#define ALIGNED_(x) __declspec(align(x))
#else
#define ALIGNED_(x) __attribute__ ((aligned(x)))
#endif
/******************************************************************************/
// ALGORITHM DESCRIPTION - EXP()
// ---------------------
//
// Description:
// Let K = 64 (table size).
// x x/log(2) n
// e = 2 = 2 * T[j] * (1 + P(y))
// where
// x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K]
// m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2]
// j/K
// values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
//
// P(y) is a minimax polynomial approximation of exp(x)-1
// on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
//
// To avoid problems with arithmetic overflow and underflow,
// n n1 n2
// value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
// where BIAS is a value of exponent bias.
//
// Special cases:
// exp(NaN) = NaN
// exp(+INF) = +INF
// exp(-INF) = 0
// exp(x) = 1 for subnormals
// for finite argument, only exp(0)=1 is exact
// For IEEE double
// if x > 709.782712893383973096 then exp(x) overflow
// if x < -745.133219101941108420 then exp(x) underflow
//
/******************************************************************************/
#ifdef _LP64
// The 64 bit code is at most SSE2 compliant
ALIGNED_(16) juint _cv[] =
{
0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL, 0xfefa0000UL,
0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL, 0x3d1cf79aUL,
0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL,
0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL,
0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
};
ALIGNED_(16) juint _shifter[] =
{
0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
};
ALIGNED_(16) juint _mmask[] =
{
0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
};
ALIGNED_(16) juint _bias[] =
{
0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
};
ALIGNED_(16) juint _Tbl_addr[] =
{
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
0x000fa7c1UL
};
ALIGNED_(16) juint _ALLONES[] =
{
0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
};
ALIGNED_(16) juint _ebias[] =
{
0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
};
ALIGNED_(4) juint _XMAX[] =
{
0xffffffffUL, 0x7fefffffUL
};
ALIGNED_(4) juint _XMIN[] =
{
0x00000000UL, 0x00100000UL
};
ALIGNED_(4) juint _INF[] =
{
0x00000000UL, 0x7ff00000UL
};
ALIGNED_(4) juint _ZERO[] =
{
0x00000000UL, 0x00000000UL
};
ALIGNED_(4) juint _ONE_val[] =
{
0x00000000UL, 0x3ff00000UL
};
// Registers:
// input: xmm0
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, tmp - r11
// Code generated by Intel C compiler for LIBM library
void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
Label L_2TAG_PACKET_12_0_2, B1_3, B1_5, start;
assert_different_registers(tmp, eax, ecx, edx);
jmp(start);
address cv = (address)_cv;
address Shifter = (address)_shifter;
address mmask = (address)_mmask;
address bias = (address)_bias;
address Tbl_addr = (address)_Tbl_addr;
address ALLONES = (address)_ALLONES;
address ebias = (address)_ebias;
address XMAX = (address)_XMAX;
address XMIN = (address)_XMIN;
address INF = (address)_INF;
address ZERO = (address)_ZERO;
address ONE_val = (address)_ONE_val;
bind(start);
subq(rsp, 24);
movsd(Address(rsp, 8), xmm0);
unpcklpd(xmm0, xmm0);
movdqu(xmm1, ExternalAddress(cv)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
movdqu(xmm6, ExternalAddress(Shifter)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
movdqu(xmm2, ExternalAddress(16 + cv)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
movdqu(xmm3, ExternalAddress(32 + cv)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
pextrw(eax, xmm0, 3);
andl(eax, 32767);
movl(edx, 16527);
subl(edx, eax);
subl(eax, 15504);
orl(edx, eax);
cmpl(edx, INT_MIN);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
mulpd(xmm1, xmm0);
addpd(xmm1, xmm6);
movapd(xmm7, xmm1);
subpd(xmm1, xmm6);
mulpd(xmm2, xmm1);
movdqu(xmm4, ExternalAddress(64 + cv)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
mulpd(xmm3, xmm1);
movdqu(xmm5, ExternalAddress(80 + cv)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
subpd(xmm0, xmm2);
movdl(eax, xmm7);
movl(ecx, eax);
andl(ecx, 63);
shll(ecx, 4);
sarl(eax, 6);
movl(edx, eax);
movdqu(xmm6, ExternalAddress(mmask)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
pand(xmm7, xmm6);
movdqu(xmm6, ExternalAddress(bias)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
paddq(xmm7, xmm6);
psllq(xmm7, 46);
subpd(xmm0, xmm3);
lea(tmp, ExternalAddress(Tbl_addr));
movdqu(xmm2, Address(ecx, tmp));
mulpd(xmm4, xmm0);
movapd(xmm6, xmm0);
movapd(xmm1, xmm0);
mulpd(xmm6, xmm6);
mulpd(xmm0, xmm6);
addpd(xmm5, xmm4);
mulsd(xmm0, xmm6);
mulpd(xmm6, ExternalAddress(48 + cv)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
addsd(xmm1, xmm2);
unpckhpd(xmm2, xmm2);
mulpd(xmm0, xmm5);
addsd(xmm1, xmm0);
por(xmm2, xmm7);
unpckhpd(xmm0, xmm0);
addsd(xmm0, xmm1);
addsd(xmm0, xmm6);
addl(edx, 894);
cmpl(edx, 1916);
jcc(Assembler::above, L_2TAG_PACKET_1_0_2);
mulsd(xmm0, xmm2);
addsd(xmm0, xmm2);
jmp(B1_5);
bind(L_2TAG_PACKET_1_0_2);
xorpd(xmm3, xmm3);
movdqu(xmm4, ExternalAddress(ALLONES)); // 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
movl(edx, -1022);
subl(edx, eax);
movdl(xmm5, edx);
psllq(xmm4, xmm5);
movl(ecx, eax);
sarl(eax, 1);
pinsrw(xmm3, eax, 3);
movdqu(xmm6, ExternalAddress(ebias)); // 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
psllq(xmm3, 4);
psubd(xmm2, xmm3);
mulsd(xmm0, xmm2);
cmpl(edx, 52);
jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
pand(xmm4, xmm2);
paddd(xmm3, xmm6);
subsd(xmm2, xmm4);
addsd(xmm0, xmm2);
cmpl(ecx, 1023);
jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
pextrw(ecx, xmm0, 3);
andl(ecx, 32768);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
movapd(xmm6, xmm0);
addsd(xmm0, xmm4);
mulsd(xmm0, xmm3);
pextrw(ecx, xmm0, 3);
andl(ecx, 32752);
cmpl(ecx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_5_0_2);
jmp(B1_5);
bind(L_2TAG_PACKET_5_0_2);
mulsd(xmm6, xmm3);
mulsd(xmm4, xmm3);
movdqu(xmm0, xmm6);
pxor(xmm6, xmm4);
psrad(xmm6, 31);
pshufd(xmm6, xmm6, 85);
psllq(xmm0, 1);
psrlq(xmm0, 1);
pxor(xmm0, xmm6);
psrlq(xmm6, 63);
paddq(xmm0, xmm6);
paddq(xmm0, xmm4);
movl(Address(rsp, 0), 15);
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_4_0_2);
addsd(xmm0, xmm4);
mulsd(xmm0, xmm3);
jmp(B1_5);
bind(L_2TAG_PACKET_3_0_2);
addsd(xmm0, xmm4);
mulsd(xmm0, xmm3);
pextrw(ecx, xmm0, 3);
andl(ecx, 32752);
cmpl(ecx, 32752);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
jmp(B1_5);
bind(L_2TAG_PACKET_2_0_2);
paddd(xmm3, xmm6);
addpd(xmm0, xmm2);
mulsd(xmm0, xmm3);
movl(Address(rsp, 0), 15);
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_8_0_2);
cmpl(eax, 2146435072);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2);
movl(eax, Address(rsp, 12));
cmpl(eax, INT_MIN);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_10_0_2);
movsd(xmm0, ExternalAddress(XMAX)); // 0xffffffffUL, 0x7fefffffUL
mulsd(xmm0, xmm0);
bind(L_2TAG_PACKET_7_0_2);
movl(Address(rsp, 0), 14);
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_10_0_2);
movsd(xmm0, ExternalAddress(XMIN)); // 0x00000000UL, 0x00100000UL
mulsd(xmm0, xmm0);
movl(Address(rsp, 0), 15);
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_9_0_2);
movl(edx, Address(rsp, 8));
cmpl(eax, 2146435072);
jcc(Assembler::above, L_2TAG_PACKET_11_0_2);
cmpl(edx, 0);
jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
movl(eax, Address(rsp, 12));
cmpl(eax, 2146435072);
jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_2);
movsd(xmm0, ExternalAddress(INF)); // 0x00000000UL, 0x7ff00000UL
jmp(B1_5);
bind(L_2TAG_PACKET_12_0_2);
movsd(xmm0, ExternalAddress(ZERO)); // 0x00000000UL, 0x00000000UL
jmp(B1_5);
bind(L_2TAG_PACKET_11_0_2);
movsd(xmm0, Address(rsp, 8));
addsd(xmm0, xmm0);
jmp(B1_5);
bind(L_2TAG_PACKET_0_0_2);
movl(eax, Address(rsp, 12));
andl(eax, 2147483647);
cmpl(eax, 1083179008);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2);
movsd(Address(rsp, 8), xmm0);
addsd(xmm0, ExternalAddress(ONE_val)); // 0x00000000UL, 0x3ff00000UL
jmp(B1_5);
bind(L_2TAG_PACKET_6_0_2);
movq(Address(rsp, 16), xmm0);
bind(B1_3);
movq(xmm0, Address(rsp, 16));
bind(B1_5);
addq(rsp, 24);
}
#else
// The 32 bit code is at most SSE2 compliant
ALIGNED_(16) juint _static_const_table[] =
{
0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL, 0xffffffc0UL,
0x00000000UL, 0xffffffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL,
0x0000ffc0UL, 0x00000000UL, 0x00000000UL, 0x43380000UL, 0x00000000UL,
0x43380000UL, 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL,
0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL,
0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL,
0xfffffffeUL, 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL,
0x3fa55555UL, 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL,
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
0x000fa7c1UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x7ff00000UL,
0x00000000UL, 0x00000000UL, 0xffffffffUL, 0x7fefffffUL, 0x00000000UL,
0x00100000UL
};
//registers,
// input: (rbp + 8)
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, rbx (tmp)
// Code generated by Intel C compiler for LIBM library
void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
assert_different_registers(tmp, eax, ecx, edx);
jmp(start);
address static_const_table = (address)_static_const_table;
bind(start);
subl(rsp, 120);
movl(Address(rsp, 64), tmp);
lea(tmp, ExternalAddress(static_const_table));
movdqu(xmm0, Address(rsp, 128));
unpcklpd(xmm0, xmm0);
movdqu(xmm1, Address(tmp, 64)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
movdqu(xmm6, Address(tmp, 48)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
movdqu(xmm2, Address(tmp, 80)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
movdqu(xmm3, Address(tmp, 96)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
pextrw(eax, xmm0, 3);
andl(eax, 32767);
movl(edx, 16527);
subl(edx, eax);
subl(eax, 15504);
orl(edx, eax);
cmpl(edx, INT_MIN);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
mulpd(xmm1, xmm0);
addpd(xmm1, xmm6);
movapd(xmm7, xmm1);
subpd(xmm1, xmm6);
mulpd(xmm2, xmm1);
movdqu(xmm4, Address(tmp, 128)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
mulpd(xmm3, xmm1);
movdqu(xmm5, Address(tmp, 144)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
subpd(xmm0, xmm2);
movdl(eax, xmm7);
movl(ecx, eax);
andl(ecx, 63);
shll(ecx, 4);
sarl(eax, 6);
movl(edx, eax);
movdqu(xmm6, Address(tmp, 16)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
pand(xmm7, xmm6);
movdqu(xmm6, Address(tmp, 32)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
paddq(xmm7, xmm6);
psllq(xmm7, 46);
subpd(xmm0, xmm3);
movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160));
mulpd(xmm4, xmm0);
movapd(xmm6, xmm0);
movapd(xmm1, xmm0);
mulpd(xmm6, xmm6);
mulpd(xmm0, xmm6);
addpd(xmm5, xmm4);
mulsd(xmm0, xmm6);
mulpd(xmm6, Address(tmp, 112)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
addsd(xmm1, xmm2);
unpckhpd(xmm2, xmm2);
mulpd(xmm0, xmm5);
addsd(xmm1, xmm0);
por(xmm2, xmm7);
unpckhpd(xmm0, xmm0);
addsd(xmm0, xmm1);
addsd(xmm0, xmm6);
addl(edx, 894);
cmpl(edx, 1916);
jcc(Assembler::above, L_2TAG_PACKET_1_0_2);
mulsd(xmm0, xmm2);
addsd(xmm0, xmm2);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_1_0_2);
fnstcw(Address(rsp, 24));
movzwl(edx, Address(rsp, 24));
orl(edx, 768);
movw(Address(rsp, 28), edx);
fldcw(Address(rsp, 28));
movl(edx, eax);
sarl(eax, 1);
subl(edx, eax);
movdqu(xmm6, Address(tmp, 0)); // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL
pandn(xmm6, xmm2);
addl(eax, 1023);
movdl(xmm3, eax);
psllq(xmm3, 52);
por(xmm6, xmm3);
addl(edx, 1023);
movdl(xmm4, edx);
psllq(xmm4, 52);
movsd(Address(rsp, 8), xmm0);
fld_d(Address(rsp, 8));
movsd(Address(rsp, 16), xmm6);
fld_d(Address(rsp, 16));
fmula(1);
faddp(1);
movsd(Address(rsp, 8), xmm4);
fld_d(Address(rsp, 8));
fmulp(1);
fstp_d(Address(rsp, 8));
movsd(xmm0, Address(rsp, 8));
fldcw(Address(rsp, 24));
pextrw(ecx, xmm0, 3);
andl(ecx, 32752);
cmpl(ecx, 32752);
jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
cmpl(ecx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
jmp(L_2TAG_PACKET_2_0_2);
cmpl(ecx, INT_MIN);
jcc(Assembler::less, L_2TAG_PACKET_3_0_2);
cmpl(ecx, -1064950997);
jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
jcc(Assembler::greater, L_2TAG_PACKET_4_0_2);
movl(edx, Address(rsp, 128));
cmpl(edx, -17155601);
jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
jmp(L_2TAG_PACKET_4_0_2);
bind(L_2TAG_PACKET_3_0_2);
movl(edx, 14);
jmp(L_2TAG_PACKET_5_0_2);
bind(L_2TAG_PACKET_4_0_2);
movl(edx, 15);
bind(L_2TAG_PACKET_5_0_2);
movsd(Address(rsp, 0), xmm0);
movsd(xmm0, Address(rsp, 128));
fld_d(Address(rsp, 0));
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_7_0_2);
cmpl(eax, 2146435072);
jcc(Assembler::greaterEqual, L_2TAG_PACKET_8_0_2);
movl(eax, Address(rsp, 132));
cmpl(eax, INT_MIN);
jcc(Assembler::greaterEqual, L_2TAG_PACKET_9_0_2);
movsd(xmm0, Address(tmp, 1208)); // 0xffffffffUL, 0x7fefffffUL
mulsd(xmm0, xmm0);
movl(edx, 14);
jmp(L_2TAG_PACKET_5_0_2);
bind(L_2TAG_PACKET_9_0_2);
movsd(xmm0, Address(tmp, 1216));
mulsd(xmm0, xmm0);
movl(edx, 15);
jmp(L_2TAG_PACKET_5_0_2);
bind(L_2TAG_PACKET_8_0_2);
movl(edx, Address(rsp, 128));
cmpl(eax, 2146435072);
jcc(Assembler::above, L_2TAG_PACKET_10_0_2);
cmpl(edx, 0);
jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2);
movl(eax, Address(rsp, 132));
cmpl(eax, 2146435072);
jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
movsd(xmm0, Address(tmp, 1192)); // 0x00000000UL, 0x7ff00000UL
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_11_0_2);
movsd(xmm0, Address(tmp, 1200)); // 0x00000000UL, 0x00000000UL
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_10_0_2);
movsd(xmm0, Address(rsp, 128));
addsd(xmm0, xmm0);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_0_0_2);
movl(eax, Address(rsp, 132));
andl(eax, 2147483647);
cmpl(eax, 1083179008);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
movsd(xmm0, Address(rsp, 128));
addsd(xmm0, Address(tmp, 1184)); // 0x00000000UL, 0x3ff00000UL
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_2_0_2);
movsd(Address(rsp, 48), xmm0);
fld_d(Address(rsp, 48));
bind(L_2TAG_PACKET_6_0_2);
movl(tmp, Address(rsp, 64));
}
#endif

View file

@ -0,0 +1,655 @@
/*
* Copyright (c) 2016, Intel Corporation.
* Intel Math Library (LIBM) Source Code
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "asm/assembler.inline.hpp"
#include "macroAssembler_x86.hpp"
#ifdef _MSC_VER
#define ALIGNED_(x) __declspec(align(x))
#else
#define ALIGNED_(x) __attribute__ ((aligned(x)))
#endif
/******************************************************************************/
// ALGORITHM DESCRIPTION - LOG()
// ---------------------
//
// x=2^k * mx, mx in [1,2)
//
// Get B~1/mx based on the output of rcpss instruction (B0)
// B = int((B0*2^7+0.5))/2^7
//
// Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts)
//
// Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and
// p(r) is a degree 7 polynomial
// -log(B) read from data table (high, low parts)
// Result is formed from high and low parts
//
// Special cases:
// log(NaN) = quiet NaN, and raise invalid exception
// log(+INF) = that INF
// log(0) = -INF with divide-by-zero exception raised
// log(1) = +0
// log(x) = NaN with invalid exception raised if x < -0, including -INF
//
/******************************************************************************/
#ifdef _LP64
// The 64 bit code is at most SSE2 compliant
ALIGNED_(16) juint _L_tbl[] =
{
0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL,
0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL,
0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL,
0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL,
0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL,
0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL,
0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL,
0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL,
0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL,
0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL,
0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL,
0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL,
0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL,
0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL,
0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL,
0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL,
0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL,
0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL,
0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL,
0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL,
0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL,
0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL,
0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL,
0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL,
0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL,
0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL,
0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL,
0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL,
0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL,
0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL,
0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL,
0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL,
0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL,
0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL,
0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL,
0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL,
0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL,
0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL,
0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL,
0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL,
0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL,
0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL,
0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL,
0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL,
0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL,
0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL,
0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL,
0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL,
0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL,
0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL,
0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL,
0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL,
0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL,
0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL,
0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL,
0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL,
0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL,
0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL,
0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL,
0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL,
0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL,
0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL,
0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL,
0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL,
0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL,
0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL,
0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL,
0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL,
0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL,
0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL,
0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL,
0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL,
0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL,
0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL,
0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL,
0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL,
0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL,
0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL,
0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL,
0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL,
0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL,
0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL,
0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL,
0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL,
0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL,
0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL,
0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL,
0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL,
0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL,
0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL,
0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL,
0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL,
0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL,
0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL,
0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL,
0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL,
0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL,
0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL,
0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL,
0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL,
0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL,
0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL,
0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x80000000UL
};
ALIGNED_(16) juint _log2[] =
{
0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL
};
ALIGNED_(16) juint _coeff[] =
{
0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL,
0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL,
0x00000000UL, 0xbfe00000UL
};
//registers,
// input: xmm0
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, r8, r11
void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp1, Register tmp2) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2;
Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
assert_different_registers(tmp1, tmp2, eax, ecx, edx);
jmp(start);
address L_tbl = (address)_L_tbl;
address log2 = (address)_log2;
address coeff = (address)_coeff;
bind(start);
subq(rsp, 24);
movsd(Address(rsp, 0), xmm0);
mov64(rax, 0x3ff0000000000000);
movdq(xmm2, rax);
mov64(rdx, 0x77f0000000000000);
movdq(xmm3, rdx);
movl(ecx, 32768);
movdl(xmm4, rcx);
mov64(tmp1, 0xffffe00000000000);
movdq(xmm5, tmp1);
movdqu(xmm1, xmm0);
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
movl(ecx, 16352);
psrlq(xmm0, 27);
lea(tmp2, ExternalAddress(L_tbl));
psrld(xmm0, 2);
rcpps(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 228);
psrlq(xmm1, 12);
subl(eax, 16);
cmpl(eax, 32736);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
bind(L_2TAG_PACKET_1_0_2);
paddd(xmm0, xmm4);
por(xmm1, xmm3);
movdl(edx, xmm0);
psllq(xmm0, 29);
pand(xmm5, xmm1);
pand(xmm0, xmm6);
subsd(xmm1, xmm5);
mulpd(xmm5, xmm0);
andl(eax, 32752);
subl(eax, ecx);
cvtsi2sdl(xmm7, eax);
mulsd(xmm1, xmm0);
movq(xmm6, ExternalAddress(log2)); // 0xfefa3800UL, 0x3fa62e42UL
movdqu(xmm3, ExternalAddress(coeff)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL
subsd(xmm5, xmm2);
andl(edx, 16711680);
shrl(edx, 12);
movdqu(xmm0, Address(tmp2, edx));
movdqu(xmm4, ExternalAddress(16 + coeff)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL
addsd(xmm1, xmm5);
movdqu(xmm2, ExternalAddress(32 + coeff)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL
mulsd(xmm6, xmm7);
if (VM_Version::supports_sse3()) {
movddup(xmm5, xmm1);
}
else {
movdqu(xmm5, xmm1);
movlhps(xmm5, xmm5);
}
mulsd(xmm7, ExternalAddress(8 + log2)); // 0x93c76730UL, 0x3ceef357UL
mulsd(xmm3, xmm1);
addsd(xmm0, xmm6);
mulpd(xmm4, xmm5);
mulpd(xmm5, xmm5);
if (VM_Version::supports_sse3()) {
movddup(xmm6, xmm0);
}
else {
movdqu(xmm6, xmm0);
movlhps(xmm6, xmm6);
}
addsd(xmm0, xmm1);
addpd(xmm4, xmm2);
mulpd(xmm3, xmm5);
subsd(xmm6, xmm0);
mulsd(xmm4, xmm1);
pshufd(xmm2, xmm0, 238);
addsd(xmm1, xmm6);
mulsd(xmm5, xmm5);
addsd(xmm7, xmm2);
addpd(xmm4, xmm3);
addsd(xmm1, xmm7);
mulpd(xmm4, xmm5);
addsd(xmm1, xmm4);
pshufd(xmm5, xmm4, 238);
addsd(xmm1, xmm5);
addsd(xmm0, xmm1);
jmp(B1_5);
bind(L_2TAG_PACKET_0_0_2);
movq(xmm0, Address(rsp, 0));
movq(xmm1, Address(rsp, 0));
addl(eax, 16);
cmpl(eax, 32768);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2);
cmpl(eax, 16);
jcc(Assembler::below, L_2TAG_PACKET_3_0_2);
bind(L_2TAG_PACKET_4_0_2);
addsd(xmm0, xmm0);
jmp(B1_5);
bind(L_2TAG_PACKET_5_0_2);
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
cmpl(edx, 0);
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_3_0_2);
xorpd(xmm1, xmm1);
addsd(xmm1, xmm0);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
xorpd(xmm1, xmm1);
movl(eax, 18416);
pinsrw(xmm1, eax, 3);
mulsd(xmm0, xmm1);
movdqu(xmm1, xmm0);
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
psrlq(xmm0, 27);
movl(ecx, 18416);
psrld(xmm0, 2);
rcpps(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 228);
psrlq(xmm1, 12);
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_2_0_2);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
addl(ecx, ecx);
cmpl(ecx, -2097152);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
bind(L_2TAG_PACKET_6_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 32752);
pinsrw(xmm1, eax, 3);
mulsd(xmm0, xmm1);
movl(Address(rsp, 16), 3);
jmp(L_2TAG_PACKET_8_0_2);
bind(L_2TAG_PACKET_7_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 49136);
pinsrw(xmm0, eax, 3);
divsd(xmm0, xmm1);
movl(Address(rsp, 16), 2);
bind(L_2TAG_PACKET_8_0_2);
movq(Address(rsp, 8), xmm0);
bind(B1_3);
movq(xmm0, Address(rsp, 8));
bind(B1_5);
addq(rsp, 24);
}
#else
// The 32 bit code is at most SSE2 compliant
ALIGNED_(16) juint _static_const_table_log[] =
{
0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL,
0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL,
0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL,
0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL,
0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL,
0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL,
0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL,
0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL,
0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL,
0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL,
0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL,
0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL,
0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL,
0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL,
0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL,
0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL,
0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL,
0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL,
0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL,
0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL,
0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL,
0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL,
0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL,
0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL,
0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL,
0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL,
0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL,
0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL,
0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL,
0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL,
0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL,
0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL,
0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL,
0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL,
0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL,
0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL,
0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL,
0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL,
0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL,
0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL,
0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL,
0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL,
0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL,
0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL,
0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL,
0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL,
0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL,
0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL,
0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL,
0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL,
0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL,
0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL,
0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL,
0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL,
0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL,
0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL,
0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL,
0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL,
0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL,
0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL,
0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL,
0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL,
0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL,
0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL,
0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL,
0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL,
0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL,
0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL,
0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL,
0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL,
0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL,
0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL,
0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL,
0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL,
0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL,
0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL,
0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL,
0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL,
0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL,
0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL,
0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL,
0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL,
0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL,
0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL,
0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL,
0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL,
0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL,
0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL,
0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL,
0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL,
0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL,
0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL,
0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL,
0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL,
0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL,
0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL,
0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL,
0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL,
0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL,
0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL,
0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL,
0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL,
0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x80000000UL, 0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL,
0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL,
0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL,
0x00000000UL, 0xbfe00000UL, 0x00000000UL, 0xffffe000UL, 0x00000000UL,
0xffffe000UL
};
//registers,
// input: xmm0
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, rbx (tmp)
void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2;
Label L_2TAG_PACKET_10_0_2, start;
assert_different_registers(tmp, eax, ecx, edx);
jmp(start);
address static_const_table = (address)_static_const_table_log;
bind(start);
subl(rsp, 104);
movl(Address(rsp, 40), tmp);
lea(tmp, ExternalAddress(static_const_table));
xorpd(xmm2, xmm2);
movl(eax, 16368);
pinsrw(xmm2, eax, 3);
xorpd(xmm3, xmm3);
movl(edx, 30704);
pinsrw(xmm3, edx, 3);
movsd(xmm0, Address(rsp, 112));
movapd(xmm1, xmm0);
movl(ecx, 32768);
movdl(xmm4, ecx);
movsd(xmm5, Address(tmp, 2128)); // 0x00000000UL, 0xffffe000UL
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
psllq(xmm0, 5);
movl(ecx, 16352);
psrlq(xmm0, 34);
rcpss(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 228);
psrlq(xmm1, 12);
subl(eax, 16);
cmpl(eax, 32736);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
bind(L_2TAG_PACKET_1_0_2);
paddd(xmm0, xmm4);
por(xmm1, xmm3);
movdl(edx, xmm0);
psllq(xmm0, 29);
pand(xmm5, xmm1);
pand(xmm0, xmm6);
subsd(xmm1, xmm5);
mulpd(xmm5, xmm0);
andl(eax, 32752);
subl(eax, ecx);
cvtsi2sdl(xmm7, eax);
mulsd(xmm1, xmm0);
movsd(xmm6, Address(tmp, 2064)); // 0xfefa3800UL, 0x3fa62e42UL
movdqu(xmm3, Address(tmp, 2080)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL
subsd(xmm5, xmm2);
andl(edx, 16711680);
shrl(edx, 12);
movdqu(xmm0, Address(tmp, edx));
movdqu(xmm4, Address(tmp, 2096)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL
addsd(xmm1, xmm5);
movdqu(xmm2, Address(tmp, 2112)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL
mulsd(xmm6, xmm7);
pshufd(xmm5, xmm1, 68);
mulsd(xmm7, Address(tmp, 2072)); // 0x93c76730UL, 0x3ceef357UL, 0x92492492UL, 0x3fc24924UL
mulsd(xmm3, xmm1);
addsd(xmm0, xmm6);
mulpd(xmm4, xmm5);
mulpd(xmm5, xmm5);
pshufd(xmm6, xmm0, 228);
addsd(xmm0, xmm1);
addpd(xmm4, xmm2);
mulpd(xmm3, xmm5);
subsd(xmm6, xmm0);
mulsd(xmm4, xmm1);
pshufd(xmm2, xmm0, 238);
addsd(xmm1, xmm6);
mulsd(xmm5, xmm5);
addsd(xmm7, xmm2);
addpd(xmm4, xmm3);
addsd(xmm1, xmm7);
mulpd(xmm4, xmm5);
addsd(xmm1, xmm4);
pshufd(xmm5, xmm4, 238);
addsd(xmm1, xmm5);
addsd(xmm0, xmm1);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_0_0_2);
movsd(xmm0, Address(rsp, 112));
movdqu(xmm1, xmm0);
addl(eax, 16);
cmpl(eax, 32768);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2);
cmpl(eax, 16);
jcc(Assembler::below, L_2TAG_PACKET_4_0_2);
bind(L_2TAG_PACKET_5_0_2);
addsd(xmm0, xmm0);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_6_0_2);
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
cmpl(edx, 0);
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
jmp(L_2TAG_PACKET_7_0_2);
bind(L_2TAG_PACKET_3_0_2);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
addl(ecx, ecx);
cmpl(ecx, -2097152);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
bind(L_2TAG_PACKET_7_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 32752);
pinsrw(xmm1, eax, 3);
movl(edx, 3);
mulsd(xmm0, xmm1);
bind(L_2TAG_PACKET_9_0_2);
movsd(Address(rsp, 0), xmm0);
movsd(xmm0, Address(rsp, 112));
fld_d(Address(rsp, 0));
jmp(L_2TAG_PACKET_10_0_2);
bind(L_2TAG_PACKET_8_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 49136);
pinsrw(xmm0, eax, 3);
divsd(xmm0, xmm1);
movl(edx, 2);
jmp(L_2TAG_PACKET_9_0_2);
bind(L_2TAG_PACKET_4_0_2);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
xorpd(xmm1, xmm1);
movl(eax, 18416);
pinsrw(xmm1, eax, 3);
mulsd(xmm0, xmm1);
movapd(xmm1, xmm0);
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
psllq(xmm0, 5);
movl(ecx, 18416);
psrlq(xmm0, 34);
rcpss(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 228);
psrlq(xmm1, 12);
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_2_0_2);
movsd(Address(rsp, 24), xmm0);
fld_d(Address(rsp, 24));
bind(L_2TAG_PACKET_10_0_2);
movl(tmp, Address(rsp, 40));
}
#endif

View file

@ -0,0 +1,687 @@
/*
* Copyright (c) 2016, Intel Corporation.
* Intel Math Library (LIBM) Source Code
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "asm/assembler.inline.hpp"
#include "runtime/stubRoutines.hpp"
#include "macroAssembler_x86.hpp"
#ifdef _MSC_VER
#define ALIGNED_(x) __declspec(align(x))
#else
#define ALIGNED_(x) __attribute__ ((aligned(x)))
#endif
/******************************************************************************/
// ALGORITHM DESCRIPTION - LOG10()
// ---------------------
//
// Let x=2^k * mx, mx in [1,2)
//
// Get B~1/mx based on the output of rcpss instruction (B0)
// B = int((B0*LH*2^7+0.5))/2^7
// LH is a short approximation for log10(e)
//
// Reduced argument: r=B*mx-LH (computed accurately in high and low parts)
//
// Result: k*log10(2) - log(B) + p(r)
// p(r) is a degree 7 polynomial
// -log(B) read from data table (high, low parts)
// Result is formed from high and low parts
//
// Special cases:
// log10(0) = -INF with divide-by-zero exception raised
// log10(1) = +0
// log10(x) = NaN with invalid exception raised if x < -0, including -INF
// log10(+INF) = +INF
//
/******************************************************************************/
#ifdef _LP64
// The 64 bit code is at most SSE2 compliant
ALIGNED_(16) juint _HIGHSIGMASK_log10[] =
{
0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xffffe000UL
};
ALIGNED_(16) juint _LOG10_E[] =
{
0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
};
ALIGNED_(16) juint _L_tbl_log10[] =
{
0x509f7800UL, 0x3fd34413UL, 0x1f12b358UL, 0x3d1fef31UL, 0x80333400UL,
0x3fd32418UL, 0xc671d9d0UL, 0xbcf542bfUL, 0x51195000UL, 0x3fd30442UL,
0x78a4b0c3UL, 0x3d18216aUL, 0x6fc79400UL, 0x3fd2e490UL, 0x80fa389dUL,
0xbc902869UL, 0x89d04000UL, 0x3fd2c502UL, 0x75c2f564UL, 0x3d040754UL,
0x4ddd1c00UL, 0x3fd2a598UL, 0xd219b2c3UL, 0xbcfa1d84UL, 0x6baa7c00UL,
0x3fd28651UL, 0xfd9abec1UL, 0x3d1be6d3UL, 0x94028800UL, 0x3fd2672dUL,
0xe289a455UL, 0xbd1ede5eUL, 0x78b86400UL, 0x3fd2482cUL, 0x6734d179UL,
0x3d1fe79bUL, 0xcca3c800UL, 0x3fd2294dUL, 0x981a40b8UL, 0xbced34eaUL,
0x439c5000UL, 0x3fd20a91UL, 0xcc392737UL, 0xbd1a9cc3UL, 0x92752c00UL,
0x3fd1ebf6UL, 0x03c9afe7UL, 0x3d1e98f8UL, 0x6ef8dc00UL, 0x3fd1cd7dUL,
0x71dae7f4UL, 0x3d08a86cUL, 0x8fe4dc00UL, 0x3fd1af25UL, 0xee9185a1UL,
0xbcff3412UL, 0xace59400UL, 0x3fd190eeUL, 0xc2cab353UL, 0x3cf17ed9UL,
0x7e925000UL, 0x3fd172d8UL, 0x6952c1b2UL, 0x3cf1521cUL, 0xbe694400UL,
0x3fd154e2UL, 0xcacb79caUL, 0xbd0bdc78UL, 0x26cbac00UL, 0x3fd1370dUL,
0xf71f4de1UL, 0xbd01f8beUL, 0x72fa0800UL, 0x3fd11957UL, 0x55bf910bUL,
0x3c946e2bUL, 0x5f106000UL, 0x3fd0fbc1UL, 0x39e639c1UL, 0x3d14a84bUL,
0xa802a800UL, 0x3fd0de4aUL, 0xd3f31d5dUL, 0xbd178385UL, 0x0b992000UL,
0x3fd0c0f3UL, 0x3843106fUL, 0xbd1f602fUL, 0x486ce800UL, 0x3fd0a3baUL,
0x8819497cUL, 0x3cef987aUL, 0x1de49400UL, 0x3fd086a0UL, 0x1caa0467UL,
0x3d0faec7UL, 0x4c30cc00UL, 0x3fd069a4UL, 0xa4424372UL, 0xbd1618fcUL,
0x94490000UL, 0x3fd04cc6UL, 0x946517d2UL, 0xbd18384bUL, 0xb7e84000UL,
0x3fd03006UL, 0xe0109c37UL, 0xbd19a6acUL, 0x798a0c00UL, 0x3fd01364UL,
0x5121e864UL, 0xbd164cf7UL, 0x38ce8000UL, 0x3fcfedbfUL, 0x46214d1aUL,
0xbcbbc402UL, 0xc8e62000UL, 0x3fcfb4efUL, 0xdab93203UL, 0x3d1e0176UL,
0x2cb02800UL, 0x3fcf7c5aUL, 0x2a2ea8e4UL, 0xbcfec86aUL, 0xeeeaa000UL,
0x3fcf43fdUL, 0xc18e49a4UL, 0x3cf110a8UL, 0x9bb6e800UL, 0x3fcf0bdaUL,
0x923cc9c0UL, 0xbd15ce99UL, 0xc093f000UL, 0x3fced3efUL, 0x4d4b51e9UL,
0x3d1a04c7UL, 0xec58f800UL, 0x3fce9c3cUL, 0x163cad59UL, 0x3cac8260UL,
0x9a907000UL, 0x3fce2d7dUL, 0x3fa93646UL, 0x3ce4a1c0UL, 0x37311000UL,
0x3fcdbf99UL, 0x32abd1fdUL, 0x3d07ea9dUL, 0x6744b800UL, 0x3fcd528cUL,
0x4dcbdfd4UL, 0xbd1b08e2UL, 0xe36de800UL, 0x3fcce653UL, 0x0b7b7f7fUL,
0xbd1b8f03UL, 0x77506800UL, 0x3fcc7aecUL, 0xa821c9fbUL, 0x3d13c163UL,
0x00ff8800UL, 0x3fcc1053UL, 0x536bca76UL, 0xbd074ee5UL, 0x70719800UL,
0x3fcba684UL, 0xd7da9b6bUL, 0xbd1fbf16UL, 0xc6f8d800UL, 0x3fcb3d7dUL,
0xe2220bb3UL, 0x3d1a295dUL, 0x16c15800UL, 0x3fcad53cUL, 0xe724911eUL,
0xbcf55822UL, 0x82533800UL, 0x3fca6dbcUL, 0x6d982371UL, 0x3cac567cUL,
0x3c19e800UL, 0x3fca06fcUL, 0x84d17d80UL, 0x3d1da204UL, 0x85ef8000UL,
0x3fc9a0f8UL, 0x54466a6aUL, 0xbd002204UL, 0xb0ac2000UL, 0x3fc93baeUL,
0xd601fd65UL, 0x3d18840cUL, 0x1bb9b000UL, 0x3fc8d71cUL, 0x7bf58766UL,
0xbd14f897UL, 0x34aae800UL, 0x3fc8733eUL, 0x3af6ac24UL, 0xbd0f5c45UL,
0x76d68000UL, 0x3fc81012UL, 0x4303e1a1UL, 0xbd1f9a80UL, 0x6af57800UL,
0x3fc7ad96UL, 0x43fbcb46UL, 0x3cf4c33eUL, 0xa6c51000UL, 0x3fc74bc7UL,
0x70f0eac5UL, 0xbd192e3bUL, 0xccab9800UL, 0x3fc6eaa3UL, 0xc0093dfeUL,
0xbd0faf15UL, 0x8b60b800UL, 0x3fc68a28UL, 0xde78d5fdUL, 0xbc9ea4eeUL,
0x9d987000UL, 0x3fc62a53UL, 0x962bea6eUL, 0xbd194084UL, 0xc9b0e800UL,
0x3fc5cb22UL, 0x888dd999UL, 0x3d1fe201UL, 0xe1634800UL, 0x3fc56c93UL,
0x16ada7adUL, 0x3d1b1188UL, 0xc176c000UL, 0x3fc50ea4UL, 0x4159b5b5UL,
0xbcf09c08UL, 0x51766000UL, 0x3fc4b153UL, 0x84393d23UL, 0xbcf6a89cUL,
0x83695000UL, 0x3fc4549dUL, 0x9f0b8bbbUL, 0x3d1c4b8cUL, 0x538d5800UL,
0x3fc3f881UL, 0xf49df747UL, 0x3cf89b99UL, 0xc8138000UL, 0x3fc39cfcUL,
0xd503b834UL, 0xbd13b99fUL, 0xf0df0800UL, 0x3fc3420dUL, 0xf011b386UL,
0xbd05d8beUL, 0xe7466800UL, 0x3fc2e7b2UL, 0xf39c7bc2UL, 0xbd1bb94eUL,
0xcdd62800UL, 0x3fc28de9UL, 0x05e6d69bUL, 0xbd10ed05UL, 0xd015d800UL,
0x3fc234b0UL, 0xe29b6c9dUL, 0xbd1ff967UL, 0x224ea800UL, 0x3fc1dc06UL,
0x727711fcUL, 0xbcffb30dUL, 0x01540000UL, 0x3fc183e8UL, 0x39786c5aUL,
0x3cc23f57UL, 0xb24d9800UL, 0x3fc12c54UL, 0xc905a342UL, 0x3d003a1dUL,
0x82835800UL, 0x3fc0d54aUL, 0x9b9920c0UL, 0x3d03b25aUL, 0xc72ac000UL,
0x3fc07ec7UL, 0x46f26a24UL, 0x3cf0fa41UL, 0xdd35d800UL, 0x3fc028caUL,
0x41d9d6dcUL, 0x3d034a65UL, 0x52474000UL, 0x3fbfa6a4UL, 0x44f66449UL,
0x3d19cad3UL, 0x2da3d000UL, 0x3fbefcb8UL, 0x67832999UL, 0x3d18400fUL,
0x32a10000UL, 0x3fbe53ceUL, 0x9c0e3b1aUL, 0xbcff62fdUL, 0x556b7000UL,
0x3fbdabe3UL, 0x02976913UL, 0xbcf8243bUL, 0x97e88000UL, 0x3fbd04f4UL,
0xec793797UL, 0x3d1c0578UL, 0x09647000UL, 0x3fbc5effUL, 0x05fc0565UL,
0xbd1d799eUL, 0xc6426000UL, 0x3fbbb9ffUL, 0x4625f5edUL, 0x3d1f5723UL,
0xf7afd000UL, 0x3fbb15f3UL, 0xdd5aae61UL, 0xbd1a7e1eUL, 0xd358b000UL,
0x3fba72d8UL, 0x3314e4d3UL, 0x3d17bc91UL, 0x9b1f5000UL, 0x3fb9d0abUL,
0x9a4d514bUL, 0x3cf18c9bUL, 0x9cd4e000UL, 0x3fb92f69UL, 0x7e4496abUL,
0x3cf1f96dUL, 0x31f4f000UL, 0x3fb88f10UL, 0xf56479e7UL, 0x3d165818UL,
0xbf628000UL, 0x3fb7ef9cUL, 0x26bf486dUL, 0xbd1113a6UL, 0xb526b000UL,
0x3fb7510cUL, 0x1a1c3384UL, 0x3ca9898dUL, 0x8e31e000UL, 0x3fb6b35dUL,
0xb3875361UL, 0xbd0661acUL, 0xd01de000UL, 0x3fb6168cUL, 0x2a7cacfaUL,
0xbd1bdf10UL, 0x0af23000UL, 0x3fb57a98UL, 0xff868816UL, 0x3cf046d0UL,
0xd8ea0000UL, 0x3fb4df7cUL, 0x1515fbe7UL, 0xbd1fd529UL, 0xde3b2000UL,
0x3fb44538UL, 0x6e59a132UL, 0x3d1faeeeUL, 0xc8df9000UL, 0x3fb3abc9UL,
0xf1322361UL, 0xbd198807UL, 0x505f1000UL, 0x3fb3132dUL, 0x0888e6abUL,
0x3d1e5380UL, 0x359bd000UL, 0x3fb27b61UL, 0xdfbcbb22UL, 0xbcfe2724UL,
0x429ee000UL, 0x3fb1e463UL, 0x6eb4c58cUL, 0xbcfe4dd6UL, 0x4a673000UL,
0x3fb14e31UL, 0x4ce1ac9bUL, 0x3d1ba691UL, 0x28b96000UL, 0x3fb0b8c9UL,
0x8c7813b8UL, 0xbd0b3872UL, 0xc1f08000UL, 0x3fb02428UL, 0xc2bc8c2cUL,
0x3cb5ea6bUL, 0x05a1a000UL, 0x3faf209cUL, 0x72e8f18eUL, 0xbce8df84UL,
0xc0b5e000UL, 0x3fadfa6dUL, 0x9fdef436UL, 0x3d087364UL, 0xaf416000UL,
0x3facd5c2UL, 0x1068c3a9UL, 0x3d0827e7UL, 0xdb356000UL, 0x3fabb296UL,
0x120a34d3UL, 0x3d101a9fUL, 0x5dfea000UL, 0x3faa90e6UL, 0xdaded264UL,
0xbd14c392UL, 0x6034c000UL, 0x3fa970adUL, 0x1c9d06a9UL, 0xbd1b705eUL,
0x194c6000UL, 0x3fa851e8UL, 0x83996ad9UL, 0xbd0117bcUL, 0xcf4ac000UL,
0x3fa73492UL, 0xb1a94a62UL, 0xbca5ea42UL, 0xd67b4000UL, 0x3fa618a9UL,
0x75aed8caUL, 0xbd07119bUL, 0x9126c000UL, 0x3fa4fe29UL, 0x5291d533UL,
0x3d12658fUL, 0x6f4d4000UL, 0x3fa3e50eUL, 0xcd2c5cd9UL, 0x3d1d5c70UL,
0xee608000UL, 0x3fa2cd54UL, 0xd1008489UL, 0x3d1a4802UL, 0x9900e000UL,
0x3fa1b6f9UL, 0x54fb5598UL, 0xbd16593fUL, 0x06bb6000UL, 0x3fa0a1f9UL,
0x64ef57b4UL, 0xbd17636bUL, 0xb7940000UL, 0x3f9f1c9fUL, 0xee6a4737UL,
0x3cb5d479UL, 0x91aa0000UL, 0x3f9cf7f5UL, 0x3a16373cUL, 0x3d087114UL,
0x156b8000UL, 0x3f9ad5edUL, 0x836c554aUL, 0x3c6900b0UL, 0xd4764000UL,
0x3f98b67fUL, 0xed12f17bUL, 0xbcffc974UL, 0x77dec000UL, 0x3f9699a7UL,
0x232ce7eaUL, 0x3d1e35bbUL, 0xbfbf4000UL, 0x3f947f5dUL, 0xd84ffa6eUL,
0x3d0e0a49UL, 0x82c7c000UL, 0x3f92679cUL, 0x8d170e90UL, 0xbd14d9f2UL,
0xadd20000UL, 0x3f90525dUL, 0x86d9f88eUL, 0x3cdeb986UL, 0x86f10000UL,
0x3f8c7f36UL, 0xb9e0a517UL, 0x3ce29faaUL, 0xb75c8000UL, 0x3f885e9eUL,
0x542568cbUL, 0xbd1f7bdbUL, 0x46b30000UL, 0x3f8442e8UL, 0xb954e7d9UL,
0x3d1e5287UL, 0xb7e60000UL, 0x3f802c07UL, 0x22da0b17UL, 0xbd19fb27UL,
0x6c8b0000UL, 0x3f7833e3UL, 0x821271efUL, 0xbd190f96UL, 0x29910000UL,
0x3f701936UL, 0xbc3491a5UL, 0xbd1bcf45UL, 0x354a0000UL, 0x3f600fe3UL,
0xc0ff520aUL, 0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL
};
ALIGNED_(16) juint _log2_log10[] =
{
0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL
};
ALIGNED_(16) juint _coeff_log10[] =
{
0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL, 0x385593b1UL,
0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL,
0xdc77b115UL, 0xbff27af2UL
};
// Registers:
// input: xmm0
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, tmp - r11
// Code generated by Intel C compiler for LIBM library
void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register r11) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, B1_2, B1_3, B1_4, B1_5, start;
assert_different_registers(r11, eax, ecx, edx);
address HIGHSIGMASK = (address)_HIGHSIGMASK_log10;
address LOG10_E = (address)_LOG10_E;
address L_tbl = (address)_L_tbl_log10;
address log2 = (address)_log2_log10;
address coeff = (address)_coeff_log10;
bind(start);
subq(rsp, 24);
movsd(Address(rsp, 0), xmm0);
bind(B1_2);
xorpd(xmm2, xmm2);
movl(eax, 16368);
pinsrw(xmm2, eax, 3);
movl(ecx, 1054736384);
movdl(xmm7, ecx);
xorpd(xmm3, xmm3);
movl(edx, 30704);
pinsrw(xmm3, edx, 3);
movdqu(xmm1, xmm0);
movl(edx, 32768);
movdl(xmm4, edx);
movdqu(xmm5, ExternalAddress(HIGHSIGMASK)); //0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xffffe000UL
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
movl(ecx, 16352);
psrlq(xmm0, 27);
movdqu(xmm2, ExternalAddress(LOG10_E)); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
psrld(xmm0, 2);
rcpps(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 78);
psrlq(xmm1, 12);
subl(eax, 16);
cmpl(eax, 32736);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
bind(L_2TAG_PACKET_1_0_2);
mulss(xmm0, xmm7);
por(xmm1, xmm3);
lea(r11, ExternalAddress(L_tbl));
andpd(xmm5, xmm1);
paddd(xmm0, xmm4);
subsd(xmm1, xmm5);
movdl(edx, xmm0);
psllq(xmm0, 29);
andpd(xmm0, xmm6);
andl(eax, 32752);
subl(eax, ecx);
cvtsi2sdl(xmm7, eax);
mulpd(xmm5, xmm0);
mulsd(xmm1, xmm0);
movq(xmm6, ExternalAddress(log2)); //0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL
movdqu(xmm3, ExternalAddress(coeff)); //0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL
subsd(xmm5, xmm2);
andl(edx, 16711680);
shrl(edx, 12);
movdqu(xmm0, Address(r11, rdx, Address::times_1, -1504));
movdqu(xmm4, ExternalAddress(16 + coeff)); //0x385593b1UL, 0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL
addsd(xmm1, xmm5);
movdqu(xmm2, ExternalAddress(32 + coeff)); //0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL, 0xbff27af2UL
mulsd(xmm6, xmm7);
pshufd(xmm5, xmm1, 68);
mulsd(xmm7, ExternalAddress(8 + log2)); //0x1f12b358UL, 0x3cdfef31UL
mulsd(xmm3, xmm1);
addsd(xmm0, xmm6);
mulpd(xmm4, xmm5);
movq(xmm6, ExternalAddress(8 + LOG10_E)); //0xbf2e4108UL, 0x3f5a7a6cUL
mulpd(xmm5, xmm5);
addpd(xmm4, xmm2);
mulpd(xmm3, xmm5);
pshufd(xmm2, xmm0, 228);
addsd(xmm0, xmm1);
mulsd(xmm4, xmm1);
subsd(xmm2, xmm0);
mulsd(xmm6, xmm1);
addsd(xmm1, xmm2);
pshufd(xmm2, xmm0, 238);
mulsd(xmm5, xmm5);
addsd(xmm7, xmm2);
addsd(xmm1, xmm6);
addpd(xmm4, xmm3);
addsd(xmm1, xmm7);
mulpd(xmm4, xmm5);
addsd(xmm1, xmm4);
pshufd(xmm5, xmm4, 238);
addsd(xmm1, xmm5);
addsd(xmm0, xmm1);
jmp(B1_5);
bind(L_2TAG_PACKET_0_0_2);
movq(xmm0, Address(rsp, 0));
movq(xmm1, Address(rsp, 0));
addl(eax, 16);
cmpl(eax, 32768);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2);
cmpl(eax, 16);
jcc(Assembler::below, L_2TAG_PACKET_3_0_2);
bind(L_2TAG_PACKET_4_0_2);
addsd(xmm0, xmm0);
jmp(B1_5);
bind(L_2TAG_PACKET_5_0_2);
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
cmpl(edx, 0);
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_3_0_2);
xorpd(xmm1, xmm1);
addsd(xmm1, xmm0);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
xorpd(xmm1, xmm1);
movl(eax, 18416);
pinsrw(xmm1, eax, 3);
mulsd(xmm0, xmm1);
xorpd(xmm2, xmm2);
movl(eax, 16368);
pinsrw(xmm2, eax, 3);
movdqu(xmm1, xmm0);
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
movl(ecx, 18416);
psrlq(xmm0, 27);
movdqu(xmm2, ExternalAddress(LOG10_E)); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
psrld(xmm0, 2);
rcpps(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 78);
psrlq(xmm1, 12);
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_2_0_2);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
addl(ecx, ecx);
cmpl(ecx, -2097152);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
bind(L_2TAG_PACKET_6_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 32752);
pinsrw(xmm1, eax, 3);
mulsd(xmm0, xmm1);
movl(Address(rsp, 16), 9);
jmp(L_2TAG_PACKET_8_0_2);
bind(L_2TAG_PACKET_7_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 49136);
pinsrw(xmm0, eax, 3);
divsd(xmm0, xmm1);
movl(Address(rsp, 16), 8);
bind(L_2TAG_PACKET_8_0_2);
movq(Address(rsp, 8), xmm0);
bind(B1_3);
movq(xmm0, Address(rsp, 8));
bind(L_2TAG_PACKET_9_0_2);
bind(B1_5);
addq(rsp, 24);
}
#else
// The 32 bit code is at most SSE2 compliant
ALIGNED_(16) juint _static_const_table_log10[] =
{
0x509f7800UL, 0x3fd34413UL, 0x1f12b358UL, 0x3d1fef31UL, 0x80333400UL,
0x3fd32418UL, 0xc671d9d0UL, 0xbcf542bfUL, 0x51195000UL, 0x3fd30442UL,
0x78a4b0c3UL, 0x3d18216aUL, 0x6fc79400UL, 0x3fd2e490UL, 0x80fa389dUL,
0xbc902869UL, 0x89d04000UL, 0x3fd2c502UL, 0x75c2f564UL, 0x3d040754UL,
0x4ddd1c00UL, 0x3fd2a598UL, 0xd219b2c3UL, 0xbcfa1d84UL, 0x6baa7c00UL,
0x3fd28651UL, 0xfd9abec1UL, 0x3d1be6d3UL, 0x94028800UL, 0x3fd2672dUL,
0xe289a455UL, 0xbd1ede5eUL, 0x78b86400UL, 0x3fd2482cUL, 0x6734d179UL,
0x3d1fe79bUL, 0xcca3c800UL, 0x3fd2294dUL, 0x981a40b8UL, 0xbced34eaUL,
0x439c5000UL, 0x3fd20a91UL, 0xcc392737UL, 0xbd1a9cc3UL, 0x92752c00UL,
0x3fd1ebf6UL, 0x03c9afe7UL, 0x3d1e98f8UL, 0x6ef8dc00UL, 0x3fd1cd7dUL,
0x71dae7f4UL, 0x3d08a86cUL, 0x8fe4dc00UL, 0x3fd1af25UL, 0xee9185a1UL,
0xbcff3412UL, 0xace59400UL, 0x3fd190eeUL, 0xc2cab353UL, 0x3cf17ed9UL,
0x7e925000UL, 0x3fd172d8UL, 0x6952c1b2UL, 0x3cf1521cUL, 0xbe694400UL,
0x3fd154e2UL, 0xcacb79caUL, 0xbd0bdc78UL, 0x26cbac00UL, 0x3fd1370dUL,
0xf71f4de1UL, 0xbd01f8beUL, 0x72fa0800UL, 0x3fd11957UL, 0x55bf910bUL,
0x3c946e2bUL, 0x5f106000UL, 0x3fd0fbc1UL, 0x39e639c1UL, 0x3d14a84bUL,
0xa802a800UL, 0x3fd0de4aUL, 0xd3f31d5dUL, 0xbd178385UL, 0x0b992000UL,
0x3fd0c0f3UL, 0x3843106fUL, 0xbd1f602fUL, 0x486ce800UL, 0x3fd0a3baUL,
0x8819497cUL, 0x3cef987aUL, 0x1de49400UL, 0x3fd086a0UL, 0x1caa0467UL,
0x3d0faec7UL, 0x4c30cc00UL, 0x3fd069a4UL, 0xa4424372UL, 0xbd1618fcUL,
0x94490000UL, 0x3fd04cc6UL, 0x946517d2UL, 0xbd18384bUL, 0xb7e84000UL,
0x3fd03006UL, 0xe0109c37UL, 0xbd19a6acUL, 0x798a0c00UL, 0x3fd01364UL,
0x5121e864UL, 0xbd164cf7UL, 0x38ce8000UL, 0x3fcfedbfUL, 0x46214d1aUL,
0xbcbbc402UL, 0xc8e62000UL, 0x3fcfb4efUL, 0xdab93203UL, 0x3d1e0176UL,
0x2cb02800UL, 0x3fcf7c5aUL, 0x2a2ea8e4UL, 0xbcfec86aUL, 0xeeeaa000UL,
0x3fcf43fdUL, 0xc18e49a4UL, 0x3cf110a8UL, 0x9bb6e800UL, 0x3fcf0bdaUL,
0x923cc9c0UL, 0xbd15ce99UL, 0xc093f000UL, 0x3fced3efUL, 0x4d4b51e9UL,
0x3d1a04c7UL, 0xec58f800UL, 0x3fce9c3cUL, 0x163cad59UL, 0x3cac8260UL,
0x9a907000UL, 0x3fce2d7dUL, 0x3fa93646UL, 0x3ce4a1c0UL, 0x37311000UL,
0x3fcdbf99UL, 0x32abd1fdUL, 0x3d07ea9dUL, 0x6744b800UL, 0x3fcd528cUL,
0x4dcbdfd4UL, 0xbd1b08e2UL, 0xe36de800UL, 0x3fcce653UL, 0x0b7b7f7fUL,
0xbd1b8f03UL, 0x77506800UL, 0x3fcc7aecUL, 0xa821c9fbUL, 0x3d13c163UL,
0x00ff8800UL, 0x3fcc1053UL, 0x536bca76UL, 0xbd074ee5UL, 0x70719800UL,
0x3fcba684UL, 0xd7da9b6bUL, 0xbd1fbf16UL, 0xc6f8d800UL, 0x3fcb3d7dUL,
0xe2220bb3UL, 0x3d1a295dUL, 0x16c15800UL, 0x3fcad53cUL, 0xe724911eUL,
0xbcf55822UL, 0x82533800UL, 0x3fca6dbcUL, 0x6d982371UL, 0x3cac567cUL,
0x3c19e800UL, 0x3fca06fcUL, 0x84d17d80UL, 0x3d1da204UL, 0x85ef8000UL,
0x3fc9a0f8UL, 0x54466a6aUL, 0xbd002204UL, 0xb0ac2000UL, 0x3fc93baeUL,
0xd601fd65UL, 0x3d18840cUL, 0x1bb9b000UL, 0x3fc8d71cUL, 0x7bf58766UL,
0xbd14f897UL, 0x34aae800UL, 0x3fc8733eUL, 0x3af6ac24UL, 0xbd0f5c45UL,
0x76d68000UL, 0x3fc81012UL, 0x4303e1a1UL, 0xbd1f9a80UL, 0x6af57800UL,
0x3fc7ad96UL, 0x43fbcb46UL, 0x3cf4c33eUL, 0xa6c51000UL, 0x3fc74bc7UL,
0x70f0eac5UL, 0xbd192e3bUL, 0xccab9800UL, 0x3fc6eaa3UL, 0xc0093dfeUL,
0xbd0faf15UL, 0x8b60b800UL, 0x3fc68a28UL, 0xde78d5fdUL, 0xbc9ea4eeUL,
0x9d987000UL, 0x3fc62a53UL, 0x962bea6eUL, 0xbd194084UL, 0xc9b0e800UL,
0x3fc5cb22UL, 0x888dd999UL, 0x3d1fe201UL, 0xe1634800UL, 0x3fc56c93UL,
0x16ada7adUL, 0x3d1b1188UL, 0xc176c000UL, 0x3fc50ea4UL, 0x4159b5b5UL,
0xbcf09c08UL, 0x51766000UL, 0x3fc4b153UL, 0x84393d23UL, 0xbcf6a89cUL,
0x83695000UL, 0x3fc4549dUL, 0x9f0b8bbbUL, 0x3d1c4b8cUL, 0x538d5800UL,
0x3fc3f881UL, 0xf49df747UL, 0x3cf89b99UL, 0xc8138000UL, 0x3fc39cfcUL,
0xd503b834UL, 0xbd13b99fUL, 0xf0df0800UL, 0x3fc3420dUL, 0xf011b386UL,
0xbd05d8beUL, 0xe7466800UL, 0x3fc2e7b2UL, 0xf39c7bc2UL, 0xbd1bb94eUL,
0xcdd62800UL, 0x3fc28de9UL, 0x05e6d69bUL, 0xbd10ed05UL, 0xd015d800UL,
0x3fc234b0UL, 0xe29b6c9dUL, 0xbd1ff967UL, 0x224ea800UL, 0x3fc1dc06UL,
0x727711fcUL, 0xbcffb30dUL, 0x01540000UL, 0x3fc183e8UL, 0x39786c5aUL,
0x3cc23f57UL, 0xb24d9800UL, 0x3fc12c54UL, 0xc905a342UL, 0x3d003a1dUL,
0x82835800UL, 0x3fc0d54aUL, 0x9b9920c0UL, 0x3d03b25aUL, 0xc72ac000UL,
0x3fc07ec7UL, 0x46f26a24UL, 0x3cf0fa41UL, 0xdd35d800UL, 0x3fc028caUL,
0x41d9d6dcUL, 0x3d034a65UL, 0x52474000UL, 0x3fbfa6a4UL, 0x44f66449UL,
0x3d19cad3UL, 0x2da3d000UL, 0x3fbefcb8UL, 0x67832999UL, 0x3d18400fUL,
0x32a10000UL, 0x3fbe53ceUL, 0x9c0e3b1aUL, 0xbcff62fdUL, 0x556b7000UL,
0x3fbdabe3UL, 0x02976913UL, 0xbcf8243bUL, 0x97e88000UL, 0x3fbd04f4UL,
0xec793797UL, 0x3d1c0578UL, 0x09647000UL, 0x3fbc5effUL, 0x05fc0565UL,
0xbd1d799eUL, 0xc6426000UL, 0x3fbbb9ffUL, 0x4625f5edUL, 0x3d1f5723UL,
0xf7afd000UL, 0x3fbb15f3UL, 0xdd5aae61UL, 0xbd1a7e1eUL, 0xd358b000UL,
0x3fba72d8UL, 0x3314e4d3UL, 0x3d17bc91UL, 0x9b1f5000UL, 0x3fb9d0abUL,
0x9a4d514bUL, 0x3cf18c9bUL, 0x9cd4e000UL, 0x3fb92f69UL, 0x7e4496abUL,
0x3cf1f96dUL, 0x31f4f000UL, 0x3fb88f10UL, 0xf56479e7UL, 0x3d165818UL,
0xbf628000UL, 0x3fb7ef9cUL, 0x26bf486dUL, 0xbd1113a6UL, 0xb526b000UL,
0x3fb7510cUL, 0x1a1c3384UL, 0x3ca9898dUL, 0x8e31e000UL, 0x3fb6b35dUL,
0xb3875361UL, 0xbd0661acUL, 0xd01de000UL, 0x3fb6168cUL, 0x2a7cacfaUL,
0xbd1bdf10UL, 0x0af23000UL, 0x3fb57a98UL, 0xff868816UL, 0x3cf046d0UL,
0xd8ea0000UL, 0x3fb4df7cUL, 0x1515fbe7UL, 0xbd1fd529UL, 0xde3b2000UL,
0x3fb44538UL, 0x6e59a132UL, 0x3d1faeeeUL, 0xc8df9000UL, 0x3fb3abc9UL,
0xf1322361UL, 0xbd198807UL, 0x505f1000UL, 0x3fb3132dUL, 0x0888e6abUL,
0x3d1e5380UL, 0x359bd000UL, 0x3fb27b61UL, 0xdfbcbb22UL, 0xbcfe2724UL,
0x429ee000UL, 0x3fb1e463UL, 0x6eb4c58cUL, 0xbcfe4dd6UL, 0x4a673000UL,
0x3fb14e31UL, 0x4ce1ac9bUL, 0x3d1ba691UL, 0x28b96000UL, 0x3fb0b8c9UL,
0x8c7813b8UL, 0xbd0b3872UL, 0xc1f08000UL, 0x3fb02428UL, 0xc2bc8c2cUL,
0x3cb5ea6bUL, 0x05a1a000UL, 0x3faf209cUL, 0x72e8f18eUL, 0xbce8df84UL,
0xc0b5e000UL, 0x3fadfa6dUL, 0x9fdef436UL, 0x3d087364UL, 0xaf416000UL,
0x3facd5c2UL, 0x1068c3a9UL, 0x3d0827e7UL, 0xdb356000UL, 0x3fabb296UL,
0x120a34d3UL, 0x3d101a9fUL, 0x5dfea000UL, 0x3faa90e6UL, 0xdaded264UL,
0xbd14c392UL, 0x6034c000UL, 0x3fa970adUL, 0x1c9d06a9UL, 0xbd1b705eUL,
0x194c6000UL, 0x3fa851e8UL, 0x83996ad9UL, 0xbd0117bcUL, 0xcf4ac000UL,
0x3fa73492UL, 0xb1a94a62UL, 0xbca5ea42UL, 0xd67b4000UL, 0x3fa618a9UL,
0x75aed8caUL, 0xbd07119bUL, 0x9126c000UL, 0x3fa4fe29UL, 0x5291d533UL,
0x3d12658fUL, 0x6f4d4000UL, 0x3fa3e50eUL, 0xcd2c5cd9UL, 0x3d1d5c70UL,
0xee608000UL, 0x3fa2cd54UL, 0xd1008489UL, 0x3d1a4802UL, 0x9900e000UL,
0x3fa1b6f9UL, 0x54fb5598UL, 0xbd16593fUL, 0x06bb6000UL, 0x3fa0a1f9UL,
0x64ef57b4UL, 0xbd17636bUL, 0xb7940000UL, 0x3f9f1c9fUL, 0xee6a4737UL,
0x3cb5d479UL, 0x91aa0000UL, 0x3f9cf7f5UL, 0x3a16373cUL, 0x3d087114UL,
0x156b8000UL, 0x3f9ad5edUL, 0x836c554aUL, 0x3c6900b0UL, 0xd4764000UL,
0x3f98b67fUL, 0xed12f17bUL, 0xbcffc974UL, 0x77dec000UL, 0x3f9699a7UL,
0x232ce7eaUL, 0x3d1e35bbUL, 0xbfbf4000UL, 0x3f947f5dUL, 0xd84ffa6eUL,
0x3d0e0a49UL, 0x82c7c000UL, 0x3f92679cUL, 0x8d170e90UL, 0xbd14d9f2UL,
0xadd20000UL, 0x3f90525dUL, 0x86d9f88eUL, 0x3cdeb986UL, 0x86f10000UL,
0x3f8c7f36UL, 0xb9e0a517UL, 0x3ce29faaUL, 0xb75c8000UL, 0x3f885e9eUL,
0x542568cbUL, 0xbd1f7bdbUL, 0x46b30000UL, 0x3f8442e8UL, 0xb954e7d9UL,
0x3d1e5287UL, 0xb7e60000UL, 0x3f802c07UL, 0x22da0b17UL, 0xbd19fb27UL,
0x6c8b0000UL, 0x3f7833e3UL, 0x821271efUL, 0xbd190f96UL, 0x29910000UL,
0x3f701936UL, 0xbc3491a5UL, 0xbd1bcf45UL, 0x354a0000UL, 0x3f600fe3UL,
0xc0ff520aUL, 0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL,
0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL, 0x385593b1UL,
0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL,
0xdc77b115UL, 0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL,
0xffffe000UL, 0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
};
//registers,
// input: xmm0
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, rbx (tmp)
void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, start;
assert_different_registers(tmp, eax, ecx, edx);
address static_const_table_log10 = (address)_static_const_table_log10;
bind(start);
subl(rsp, 104);
movl(Address(rsp, 40), tmp);
lea(tmp, ExternalAddress(static_const_table_log10));
xorpd(xmm2, xmm2);
movl(eax, 16368);
pinsrw(xmm2, eax, 3);
movl(ecx, 1054736384);
movdl(xmm7, ecx);
xorpd(xmm3, xmm3);
movl(edx, 30704);
pinsrw(xmm3, edx, 3);
movsd(xmm0, Address(rsp, 112));
movdqu(xmm1, xmm0);
movl(edx, 32768);
movdl(xmm4, edx);
movdqu(xmm5, Address(tmp, 2128)); //0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
movl(ecx, 16352);
psllq(xmm0, 5);
movsd(xmm2, Address(tmp, 2144)); //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL
psrlq(xmm0, 34);
rcpss(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 78);
psrlq(xmm1, 12);
subl(eax, 16);
cmpl(eax, 32736);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
bind(L_2TAG_PACKET_1_0_2);
mulss(xmm0, xmm7);
por(xmm1, xmm3);
andpd(xmm5, xmm1);
paddd(xmm0, xmm4);
subsd(xmm1, xmm5);
movdl(edx, xmm0);
psllq(xmm0, 29);
andpd(xmm0, xmm6);
andl(eax, 32752);
subl(eax, ecx);
cvtsi2sdl(xmm7, eax);
mulpd(xmm5, xmm0);
mulsd(xmm1, xmm0);
movsd(xmm6, Address(tmp, 2064)); //0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL
movdqu(xmm3, Address(tmp, 2080)); //0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL
subsd(xmm5, xmm2);
andl(edx, 16711680);
shrl(edx, 12);
movdqu(xmm0, Address(tmp, edx, Address::times_1, -1504));
movdqu(xmm4, Address(tmp, 2096)); //0x3cdfef31UL, 0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL
addsd(xmm1, xmm5);
movdqu(xmm2, Address(tmp, 2112)); //0xc0089309UL, 0x385593b1UL, 0xc025c917UL, 0xdc963467UL
mulsd(xmm6, xmm7);
pshufd(xmm5, xmm1, 68);
mulsd(xmm7, Address(tmp, 2072)); //0x00000000UL, 0x00000000UL, 0x00000000UL, 0x509f7800UL
mulsd(xmm3, xmm1);
addsd(xmm0, xmm6);
mulpd(xmm4, xmm5);
movsd(xmm6, Address(tmp, 2152)); //0xffffffffUL, 0x00000000UL, 0xffffe000UL, 0x00000000UL
mulpd(xmm5, xmm5);
addpd(xmm4, xmm2);
mulpd(xmm3, xmm5);
pshufd(xmm2, xmm0, 228);
addsd(xmm0, xmm1);
mulsd(xmm4, xmm1);
subsd(xmm2, xmm0);
mulsd(xmm6, xmm1);
addsd(xmm1, xmm2);
pshufd(xmm2, xmm0, 238);
mulsd(xmm5, xmm5);
addsd(xmm7, xmm2);
addsd(xmm1, xmm6);
addpd(xmm4, xmm3);
addsd(xmm1, xmm7);
mulpd(xmm4, xmm5);
addsd(xmm1, xmm4);
pshufd(xmm5, xmm4, 238);
addsd(xmm1, xmm5);
addsd(xmm0, xmm1);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_0_0_2);
movsd(xmm0, Address(rsp, 112)); //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL
movdqu(xmm1, xmm0);
addl(eax, 16);
cmpl(eax, 32768);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2);
cmpl(eax, 16);
jcc(Assembler::below, L_2TAG_PACKET_4_0_2);
bind(L_2TAG_PACKET_5_0_2);
addsd(xmm0, xmm0);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_6_0_2);
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
cmpl(edx, 0);
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
jmp(L_2TAG_PACKET_7_0_2);
bind(L_2TAG_PACKET_3_0_2);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
addl(ecx, ecx);
cmpl(ecx, -2097152);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
bind(L_2TAG_PACKET_7_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 32752);
pinsrw(xmm1, eax, 3);
movl(edx, 9);
mulsd(xmm0, xmm1);
bind(L_2TAG_PACKET_9_0_2);
movsd(Address(rsp, 0), xmm0);
movsd(xmm0, Address(rsp, 112)); //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL
fld_d(Address(rsp, 0));
jmp(L_2TAG_PACKET_10_0_2);
bind(L_2TAG_PACKET_8_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 49136);
pinsrw(xmm0, eax, 3);
divsd(xmm0, xmm1);
movl(edx, 8);
jmp(L_2TAG_PACKET_9_0_2);
bind(L_2TAG_PACKET_4_0_2);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
xorpd(xmm1, xmm1);
movl(eax, 18416);
pinsrw(xmm1, eax, 3);
mulsd(xmm0, xmm1);
xorpd(xmm2, xmm2);
movl(eax, 16368);
pinsrw(xmm2, eax, 3);
movdqu(xmm1, xmm0);
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
movl(ecx, 18416);
psllq(xmm0, 5);
movsd(xmm2, Address(tmp, 2144)); //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL
psrlq(xmm0, 34);
rcpss(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 78);
psrlq(xmm1, 12);
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_2_0_2);
movsd(Address(rsp, 24), xmm0);
fld_d(Address(rsp, 24));
bind(L_2TAG_PACKET_10_0_2);
movl(tmp, Address(rsp, 40));
}
#endif

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -2092,25 +2092,6 @@ class StubGenerator: public StubCodeGenerator {
entry_checkcast_arraycopy);
}
void generate_math_stubs() {
{
StubCodeMark mark(this, "StubRoutines", "log10");
StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc();
__ fld_d(Address(rsp, 4));
__ flog10();
__ ret(0);
}
{
StubCodeMark mark(this, "StubRoutines", "tan");
StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc();
__ fld_d(Address(rsp, 4));
__ trigfunc('t');
__ ret(0);
}
}
// AES intrinsic stubs
enum {AESBlockSize = 16};
@ -3533,6 +3514,31 @@ class StubGenerator: public StubCodeGenerator {
}
address generate_libmLog10() {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = rbx;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_log10(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
address generate_libmPow() {
address start = __ pc();
@ -3627,6 +3633,44 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
address generate_libm_tan_cot_huge() {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
BLOCK_COMMENT("Entry:");
__ libm_tancot_huge(x0, x1, rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp);
return start;
}
address generate_libmTan() {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = rbx;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_tan(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
// Safefetch stubs.
@ -3852,24 +3896,25 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
}
if (VM_Version::supports_sse2()) {
if (VM_Version::supports_sse2() && UseLibmIntrinsic) {
StubRoutines::x86::_L_2il0floatpacket_0_adr = (address)StubRoutines::x86::_L_2il0floatpacket_0;
StubRoutines::x86::_Pi4Inv_adr = (address)StubRoutines::x86::_Pi4Inv;
StubRoutines::x86::_Pi4x3_adr = (address)StubRoutines::x86::_Pi4x3;
StubRoutines::x86::_Pi4x4_adr = (address)StubRoutines::x86::_Pi4x4;
StubRoutines::x86::_ones_adr = (address)StubRoutines::x86::_ones;
StubRoutines::_dexp = generate_libmExp();
StubRoutines::_dlog = generate_libmLog();
StubRoutines::_dlog10 = generate_libmLog10();
StubRoutines::_dpow = generate_libmPow();
if (UseLibmSinIntrinsic || UseLibmCosIntrinsic) {
StubRoutines::_dlibm_reduce_pi04l = generate_libm_reduce_pi04l();
StubRoutines::_dlibm_sin_cos_huge = generate_libm_sin_cos_huge();
}
if (UseLibmSinIntrinsic) {
StubRoutines::_dsin = generate_libmSin();
}
if (UseLibmCosIntrinsic) {
StubRoutines::_dcos = generate_libmCos();
}
StubRoutines::_dlibm_reduce_pi04l = generate_libm_reduce_pi04l();
StubRoutines::_dlibm_sin_cos_huge = generate_libm_sin_cos_huge();
StubRoutines::_dsin = generate_libmSin();
StubRoutines::_dcos = generate_libmCos();
StubRoutines::_dlibm_tan_cot_huge = generate_libm_tan_cot_huge();
StubRoutines::_dtan = generate_libmTan();
}
}
void generate_all() {
// Generates all stubs and initializes the entry points
@ -3888,8 +3933,6 @@ class StubGenerator: public StubCodeGenerator {
// arraycopy stubs used by compilers
generate_arraycopy_stubs();
generate_math_stubs();
// don't bother generating these AES intrinsic stubs unless global flag is set
if (UseAESIntrinsics) {
StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others

View file

@ -2971,35 +2971,6 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit;
}
void generate_math_stubs() {
{
StubCodeMark mark(this, "StubRoutines", "log10");
StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc();
__ subq(rsp, 8);
__ movdbl(Address(rsp, 0), xmm0);
__ fld_d(Address(rsp, 0));
__ flog10();
__ fstp_d(Address(rsp, 0));
__ movdbl(xmm0, Address(rsp, 0));
__ addq(rsp, 8);
__ ret(0);
}
{
StubCodeMark mark(this, "StubRoutines", "tan");
StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc();
__ subq(rsp, 8);
__ movdbl(Address(rsp, 0), xmm0);
__ fld_d(Address(rsp, 0));
__ trigfunc('t');
__ fstp_d(Address(rsp, 0));
__ movdbl(xmm0, Address(rsp, 0));
__ addq(rsp, 8);
__ ret(0);
}
}
// AES intrinsic stubs
enum {AESBlockSize = 16};
@ -4730,6 +4701,46 @@ class StubGenerator: public StubCodeGenerator {
#endif
__ fast_log(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2);
#ifdef _WIN64
// restore xmm regs belonging to calling function
__ movdqu(xmm6, Address(rsp, 0));
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
__ addptr(rsp, 4 * wordSize);
#endif
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
address generate_libmLog10() {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = r11;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
#ifdef _WIN64
// save the xmm registers which must be preserved 6-7
__ subptr(rsp, 4 * wordSize);
__ movdqu(Address(rsp, 0), xmm6);
__ movdqu(Address(rsp, 2 * wordSize), xmm7);
#endif
__ fast_log10(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
#ifdef _WIN64
// restore xmm regs belonging to calling function
__ movdqu(xmm6, Address(rsp, 0));
@ -4809,6 +4820,8 @@ class StubGenerator: public StubCodeGenerator {
__ enter(); // required for proper stackwalking of RuntimeStub frame
#ifdef _WIN64
__ push(rsi);
__ push(rdi);
// save the xmm registers which must be preserved 6-7
__ subptr(rsp, 4 * wordSize);
__ movdqu(Address(rsp, 0), xmm6);
@ -4821,6 +4834,8 @@ class StubGenerator: public StubCodeGenerator {
__ movdqu(xmm6, Address(rsp, 0));
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
__ addptr(rsp, 4 * wordSize);
__ pop(rdi);
__ pop(rsi);
#endif
__ leave(); // required for proper stackwalking of RuntimeStub frame
@ -4852,6 +4867,8 @@ class StubGenerator: public StubCodeGenerator {
__ enter(); // required for proper stackwalking of RuntimeStub frame
#ifdef _WIN64
__ push(rsi);
__ push(rdi);
// save the xmm registers which must be preserved 6-7
__ subptr(rsp, 4 * wordSize);
__ movdqu(Address(rsp, 0), xmm6);
@ -4864,6 +4881,55 @@ class StubGenerator: public StubCodeGenerator {
__ movdqu(xmm6, Address(rsp, 0));
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
__ addptr(rsp, 4 * wordSize);
__ pop(rdi);
__ pop(rsi);
#endif
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
address generate_libmTan() {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp1 = r8;
const Register tmp2 = r9;
const Register tmp3 = r10;
const Register tmp4 = r11;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
#ifdef _WIN64
__ push(rsi);
__ push(rdi);
// save the xmm registers which must be preserved 6-7
__ subptr(rsp, 4 * wordSize);
__ movdqu(Address(rsp, 0), xmm6);
__ movdqu(Address(rsp, 2 * wordSize), xmm7);
#endif
__ fast_tan(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
#ifdef _WIN64
// restore xmm regs belonging to calling function
__ movdqu(xmm6, Address(rsp, 0));
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
__ addptr(rsp, 4 * wordSize);
__ pop(rdi);
__ pop(rsi);
#endif
__ leave(); // required for proper stackwalking of RuntimeStub frame
@ -5064,16 +5130,28 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
}
if (VM_Version::supports_sse2()) {
if (VM_Version::supports_sse2() && UseLibmIntrinsic) {
StubRoutines::x86::_ONEHALF_adr = (address)StubRoutines::x86::_ONEHALF;
StubRoutines::x86::_P_2_adr = (address)StubRoutines::x86::_P_2;
StubRoutines::x86::_SC_4_adr = (address)StubRoutines::x86::_SC_4;
StubRoutines::x86::_Ctable_adr = (address)StubRoutines::x86::_Ctable;
StubRoutines::x86::_SC_2_adr = (address)StubRoutines::x86::_SC_2;
StubRoutines::x86::_SC_3_adr = (address)StubRoutines::x86::_SC_3;
StubRoutines::x86::_SC_1_adr = (address)StubRoutines::x86::_SC_1;
StubRoutines::x86::_PI_INV_TABLE_adr = (address)StubRoutines::x86::_PI_INV_TABLE;
StubRoutines::x86::_PI_4_adr = (address)StubRoutines::x86::_PI_4;
StubRoutines::x86::_PI32INV_adr = (address)StubRoutines::x86::_PI32INV;
StubRoutines::x86::_SIGN_MASK_adr = (address)StubRoutines::x86::_SIGN_MASK;
StubRoutines::x86::_P_1_adr = (address)StubRoutines::x86::_P_1;
StubRoutines::x86::_P_3_adr = (address)StubRoutines::x86::_P_3;
StubRoutines::x86::_NEG_ZERO_adr = (address)StubRoutines::x86::_NEG_ZERO;
StubRoutines::_dexp = generate_libmExp();
StubRoutines::_dlog = generate_libmLog();
StubRoutines::_dlog10 = generate_libmLog10();
StubRoutines::_dpow = generate_libmPow();
if (UseLibmSinIntrinsic) {
StubRoutines::_dsin = generate_libmSin();
}
if (UseLibmCosIntrinsic) {
StubRoutines::_dcos = generate_libmCos();
}
StubRoutines::_dtan = generate_libmTan();
StubRoutines::_dsin = generate_libmSin();
StubRoutines::_dcos = generate_libmCos();
}
}
@ -5118,8 +5196,6 @@ class StubGenerator: public StubCodeGenerator {
// arraycopy stubs used by compilers
generate_arraycopy_stubs();
generate_math_stubs();
// don't bother generating these AES intrinsic stubs unless global flag is set
if (UseAESIntrinsics) {
StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // needed by the others

View file

@ -48,6 +48,29 @@ address StubRoutines::x86::_shuffle_byte_flip_mask_addr = NULL;
address StubRoutines::x86::_k256_adr = NULL;
address StubRoutines::x86::_pshuffle_byte_flip_mask_addr = NULL;
//tables common for sin and cos
address StubRoutines::x86::_ONEHALF_adr = NULL;
address StubRoutines::x86::_P_2_adr = NULL;
address StubRoutines::x86::_SC_4_adr = NULL;
address StubRoutines::x86::_Ctable_adr = NULL;
address StubRoutines::x86::_SC_2_adr = NULL;
address StubRoutines::x86::_SC_3_adr = NULL;
address StubRoutines::x86::_SC_1_adr = NULL;
address StubRoutines::x86::_PI_INV_TABLE_adr = NULL;
address StubRoutines::x86::_PI_4_adr = NULL;
address StubRoutines::x86::_PI32INV_adr = NULL;
address StubRoutines::x86::_SIGN_MASK_adr = NULL;
address StubRoutines::x86::_P_1_adr = NULL;
address StubRoutines::x86::_P_3_adr = NULL;
address StubRoutines::x86::_NEG_ZERO_adr = NULL;
//tables common for sincos and tancot
address StubRoutines::x86::_L_2il0floatpacket_0_adr = NULL;
address StubRoutines::x86::_Pi4Inv_adr = NULL;
address StubRoutines::x86::_Pi4x3_adr = NULL;
address StubRoutines::x86::_Pi4x4_adr = NULL;
address StubRoutines::x86::_ones_adr = NULL;
uint64_t StubRoutines::x86::_crc_by128_masks[] =
{
/* The fields in this structure are arranged so that they can be

View file

@ -57,6 +57,48 @@
// byte flip mask for sha256
static address _pshuffle_byte_flip_mask_addr;
//tables common for LIBM sin and cos
static juint _ONEHALF[];
static address _ONEHALF_adr;
static juint _P_2[];
static address _P_2_adr;
static juint _SC_4[];
static address _SC_4_adr;
static juint _Ctable[];
static address _Ctable_adr;
static juint _SC_2[];
static address _SC_2_adr;
static juint _SC_3[];
static address _SC_3_adr;
static juint _SC_1[];
static address _SC_1_adr;
static juint _PI_INV_TABLE[];
static address _PI_INV_TABLE_adr;
static juint _PI_4[];
static address _PI_4_adr;
static juint _PI32INV[];
static address _PI32INV_adr;
static juint _SIGN_MASK[];
static address _SIGN_MASK_adr;
static juint _P_1[];
static address _P_1_adr;
static juint _P_3[];
static address _P_3_adr;
static juint _NEG_ZERO[];
static address _NEG_ZERO_adr;
//tables common for LIBM sincos and tancot
static juint _L_2il0floatpacket_0[];
static address _L_2il0floatpacket_0_adr;
static juint _Pi4Inv[];
static address _Pi4Inv_adr;
static juint _Pi4x3[];
static address _Pi4x3_adr;
static juint _Pi4x4[];
static address _Pi4x4_adr;
static juint _ones[];
static address _ones_adr;
public:
static address verify_mxcsr_entry() { return _verify_mxcsr_entry; }
static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
@ -69,4 +111,24 @@
static address k256_addr() { return _k256_adr; }
static address pshuffle_byte_flip_mask_addr() { return _pshuffle_byte_flip_mask_addr; }
static void generate_CRC32C_table(bool is_pclmulqdq_supported);
static address _ONEHALF_addr() { return _ONEHALF_adr; }
static address _P_2_addr() { return _P_2_adr; }
static address _SC_4_addr() { return _SC_4_adr; }
static address _Ctable_addr() { return _Ctable_adr; }
static address _SC_2_addr() { return _SC_2_adr; }
static address _SC_3_addr() { return _SC_3_adr; }
static address _SC_1_addr() { return _SC_1_adr; }
static address _PI_INV_TABLE_addr() { return _PI_INV_TABLE_adr; }
static address _PI_4_addr() { return _PI_4_adr; }
static address _PI32INV_addr() { return _PI32INV_adr; }
static address _SIGN_MASK_addr() { return _SIGN_MASK_adr; }
static address _P_1_addr() { return _P_1_adr; }
static address _P_3_addr() { return _P_3_adr; }
static address _NEG_ZERO_addr() { return _NEG_ZERO_adr; }
static address _L_2il0floatpacket_0_addr() { return _L_2il0floatpacket_0_adr; }
static address _Pi4Inv_addr() { return _Pi4Inv_adr; }
static address _Pi4x3_addr() { return _Pi4x3_adr; }
static address _Pi4x4_addr() { return _Pi4x4_adr; }
static address _ones_addr() { return _ones_adr; }
#endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP

View file

@ -345,13 +345,34 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
__ fld_d(Address(rsp, 1*wordSize));
switch (kind) {
case Interpreter::java_lang_math_sin :
__ trigfunc('s');
__ subptr(rsp, 2 * wordSize);
__ fstp_d(Address(rsp, 0));
if (VM_Version::supports_sse2() && StubRoutines::dsin() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dsin)));
}
__ addptr(rsp, 2 * wordSize);
break;
case Interpreter::java_lang_math_cos :
__ trigfunc('c');
__ subptr(rsp, 2 * wordSize);
__ fstp_d(Address(rsp, 0));
if (VM_Version::supports_sse2() && StubRoutines::dcos() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dcos)));
}
__ addptr(rsp, 2 * wordSize);
break;
case Interpreter::java_lang_math_tan :
__ trigfunc('t');
__ subptr(rsp, 2 * wordSize);
__ fstp_d(Address(rsp, 0));
if (StubRoutines::dtan() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtan)));
}
__ addptr(rsp, 2 * wordSize);
break;
case Interpreter::java_lang_math_sqrt:
__ fsqrt();
@ -362,26 +383,29 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
case Interpreter::java_lang_math_log:
__ subptr(rsp, 2 * wordSize);
__ fstp_d(Address(rsp, 0));
if (VM_Version::supports_sse2()) {
if (StubRoutines::dlog() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
}
else {
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog)));
}
__ addptr(rsp, 2 * wordSize);
break;
case Interpreter::java_lang_math_log10:
__ flog10();
// Store to stack to convert 80bit precision back to 64bits
__ push_fTOS();
__ pop_fTOS();
__ subptr(rsp, 2 * wordSize);
__ fstp_d(Address(rsp, 0));
if (StubRoutines::dlog10() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10)));
}
__ addptr(rsp, 2 * wordSize);
break;
case Interpreter::java_lang_math_pow:
__ fld_d(Address(rsp, 3*wordSize)); // second argument
__ subptr(rsp, 4 * wordSize);
__ fstp_d(Address(rsp, 0));
__ fstp_d(Address(rsp, 2 * wordSize));
if (VM_Version::supports_sse2()) {
if (StubRoutines::dpow() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dpow)));
@ -391,7 +415,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
case Interpreter::java_lang_math_exp:
__ subptr(rsp, 2*wordSize);
__ fstp_d(Address(rsp, 0));
if (VM_Version::supports_sse2()) {
if (StubRoutines::dexp() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)));

View file

@ -29,6 +29,7 @@
#include "interpreter/interpreterRuntime.hpp"
#include "interpreter/templateInterpreterGenerator.hpp"
#include "runtime/arguments.hpp"
#include "runtime/sharedRuntime.hpp"
#define __ _masm->
@ -373,32 +374,60 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
__ sqrtsd(xmm0, Address(rsp, wordSize));
} else if (kind == Interpreter::java_lang_math_exp) {
__ movdbl(xmm0, Address(rsp, wordSize));
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
if (StubRoutines::dexp() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)));
}
} else if (kind == Interpreter::java_lang_math_log) {
__ movdbl(xmm0, Address(rsp, wordSize));
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
if (StubRoutines::dlog() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog)));
}
} else if (kind == Interpreter::java_lang_math_log10) {
__ movdbl(xmm0, Address(rsp, wordSize));
if (StubRoutines::dlog10() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10)));
}
} else if (kind == Interpreter::java_lang_math_sin) {
__ movdbl(xmm0, Address(rsp, wordSize));
if (StubRoutines::dsin() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dsin)));
}
} else if (kind == Interpreter::java_lang_math_cos) {
__ movdbl(xmm0, Address(rsp, wordSize));
if (StubRoutines::dcos() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dcos)));
}
} else if (kind == Interpreter::java_lang_math_pow) {
__ movdbl(xmm1, Address(rsp, wordSize));
__ movdbl(xmm0, Address(rsp, 3 * wordSize));
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
if (StubRoutines::dpow() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dpow)));
}
} else if (kind == Interpreter::java_lang_math_tan) {
__ movdbl(xmm0, Address(rsp, wordSize));
if (StubRoutines::dtan() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtan)));
}
} else {
__ fld_d(Address(rsp, wordSize));
switch (kind) {
case Interpreter::java_lang_math_sin :
__ trigfunc('s');
break;
case Interpreter::java_lang_math_cos :
__ trigfunc('c');
break;
case Interpreter::java_lang_math_tan :
__ trigfunc('t');
break;
case Interpreter::java_lang_math_abs:
__ fabs();
break;
case Interpreter::java_lang_math_log10:
__ flog10();
break;
default :
ShouldNotReachHere();
}

View file

@ -844,6 +844,11 @@ public:
static uint32_t get_xsave_header_upper_segment() {
return _cpuid_info.xem_xcr0_edx;
}
// SSE2 and later processors implement a 'pause' instruction
// that can be used for efficient implementation of
// the intrinsic for java.lang.Thread.onSpinWait()
static bool supports_on_spin_wait() { return supports_sse2(); }
};
#endif // CPU_X86_VM_VM_VERSION_X86_HPP

View file

@ -1719,6 +1719,10 @@ const bool Matcher::match_rule_supported(int opcode) {
if (!(UseSSE > 4))
ret_value = false;
break;
case Op_OnSpinWait:
if (VM_Version::supports_on_spin_wait() == false)
ret_value = false;
break;
}
return ret_value; // Per default match rules are supported.
@ -1754,6 +1758,15 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
return ret_value; // Per default match rules are supported.
}
const bool Matcher::has_predicated_vectors(void) {
bool ret_value = false;
if (UseAVX > 2) {
ret_value = VM_Version::supports_avx512vl();
}
return ret_value;
}
const int Matcher::float_pressure(int default_pressure_threshold) {
int float_pressure_threshold = default_pressure_threshold;
#ifdef _LP64
@ -1871,7 +1884,7 @@ static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo
__ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
break;
case Op_VecZ:
__ evmovdqul(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
__ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
break;
default:
ShouldNotReachHere();
@ -1926,7 +1939,7 @@ static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
__ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
break;
case Op_VecZ:
__ evmovdqul(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
__ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
break;
default:
ShouldNotReachHere();
@ -1946,7 +1959,7 @@ static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
__ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
break;
case Op_VecZ:
__ evmovdqul(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
__ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
break;
default:
ShouldNotReachHere();
@ -2172,6 +2185,19 @@ instruct ShouldNotReachHere() %{
ins_pipe(pipe_slow);
%}
// =================================EVEX special===============================
instruct setMask(rRegI dst, rRegI src) %{
predicate(Matcher::has_predicated_vectors());
match(Set dst (SetVectMaskI src));
effect(TEMP dst);
format %{ "setvectmask $dst, $src" %}
ins_encode %{
__ setvectmask($dst$$Register, $src$$Register);
%}
ins_pipe(pipe_slow);
%}
// ============================================================================
instruct addF_reg(regF dst, regF src) %{
@ -2996,6 +3022,24 @@ instruct sqrtD_imm(regD dst, immD con) %{
ins_pipe(pipe_slow);
%}
instruct onspinwait() %{
match(OnSpinWait);
ins_cost(200);
format %{
$$template
if (os::is_MP()) {
$$emit$$"pause\t! membar_onspinwait"
} else {
$$emit$$"MEMBAR-onspinwait ! (empty encoding)"
}
%}
ins_encode %{
__ pause();
%}
ins_pipe(pipe_slow);
%}
// ====================VECTOR INSTRUCTIONS=====================================
// Load vectors (4 bytes long)
@ -3047,11 +3091,11 @@ instruct loadV32(vecY dst, memory mem) %{
%}
// Load vectors (64 bytes long)
instruct loadV64(vecZ dst, memory mem) %{
predicate(n->as_LoadVector()->memory_size() == 64);
instruct loadV64_dword(vecZ dst, memory mem) %{
predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4);
match(Set dst (LoadVector mem));
ins_cost(125);
format %{ "vmovdqu $dst k0,$mem\t! load vector (64 bytes)" %}
format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %}
ins_encode %{
int vector_len = 2;
__ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len);
@ -3059,6 +3103,19 @@ instruct loadV64(vecZ dst, memory mem) %{
ins_pipe( pipe_slow );
%}
// Load vectors (64 bytes long)
instruct loadV64_qword(vecZ dst, memory mem) %{
predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4);
match(Set dst (LoadVector mem));
ins_cost(125);
format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %}
ins_encode %{
int vector_len = 2;
__ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
// Store vectors
instruct storeV4(memory mem, vecS src) %{
predicate(n->as_StoreVector()->memory_size() == 4);
@ -3104,11 +3161,11 @@ instruct storeV32(memory mem, vecY src) %{
ins_pipe( pipe_slow );
%}
instruct storeV64(memory mem, vecZ src) %{
predicate(n->as_StoreVector()->memory_size() == 64);
instruct storeV64_dword(memory mem, vecZ src) %{
predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4);
match(Set mem (StoreVector mem src));
ins_cost(145);
format %{ "vmovdqu $mem k0,$src\t! store vector (64 bytes)" %}
format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %}
ins_encode %{
int vector_len = 2;
__ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len);
@ -3116,6 +3173,18 @@ instruct storeV64(memory mem, vecZ src) %{
ins_pipe( pipe_slow );
%}
instruct storeV64_qword(memory mem, vecZ src) %{
predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4);
match(Set mem (StoreVector mem src));
ins_cost(145);
format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %}
ins_encode %{
int vector_len = 2;
__ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
// ====================LEGACY REPLICATE=======================================
instruct Repl4B_mem(vecS dst, memory mem) %{

View file

@ -1021,10 +1021,10 @@ static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_off
__ vmovdqu(xmm0, Address(rsp, -32));
break;
case Op_VecZ:
__ evmovdqul(Address(rsp, -64), xmm0, 2);
__ evmovdqul(xmm0, Address(rsp, src_offset), 2);
__ evmovdqul(Address(rsp, dst_offset), xmm0, 2);
__ evmovdqul(xmm0, Address(rsp, -64), 2);
__ evmovdquq(Address(rsp, -64), xmm0, 2);
__ evmovdquq(xmm0, Address(rsp, src_offset), 2);
__ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
__ evmovdquq(xmm0, Address(rsp, -64), 2);
break;
default:
ShouldNotReachHere();
@ -9828,27 +9828,6 @@ instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
ins_pipe( pipe_slow );
%}
instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
predicate (UseSSE<=1);
match(Set dst(TanD src));
format %{ "DTAN $dst" %}
ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan
Opcode(0xDD), Opcode(0xD8)); // fstp st
ins_pipe( pipe_slow );
%}
instruct tanD_reg(regD dst, eFlagsReg cr) %{
predicate (UseSSE>=2);
match(Set dst(TanD dst));
effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
format %{ "DTAN $dst" %}
ins_encode( Push_SrcD(dst),
Opcode(0xD9), Opcode(0xF2), // fptan
Opcode(0xDD), Opcode(0xD8), // fstp st
Push_ResultD(dst) );
ins_pipe( pipe_slow );
%}
instruct atanDPR_reg(regDPR dst, regDPR src) %{
predicate (UseSSE<=1);
match(Set dst(AtanD dst src));
@ -9880,41 +9859,6 @@ instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
ins_pipe( pipe_slow );
%}
instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
predicate (UseSSE<=1);
// The source Double operand on FPU stack
match(Set dst (Log10D src));
// fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
// fxch ; swap ST(0) with ST(1)
// fyl2x ; compute log_10(2) * log_2(x)
format %{ "FLDLG2 \t\t\t#Log10\n\t"
"FXCH \n\t"
"FYL2X \t\t\t# Q=Log10*Log_2(x)"
%}
ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
Opcode(0xD9), Opcode(0xC9), // fxch
Opcode(0xD9), Opcode(0xF1)); // fyl2x
ins_pipe( pipe_slow );
%}
instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
predicate (UseSSE>=2);
effect(KILL cr);
match(Set dst (Log10D src));
// fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
// fyl2x ; compute log_10(2) * log_2(x)
format %{ "FLDLG2 \t\t\t#Log10\n\t"
"FYL2X \t\t\t# Q=Log10*Log_2(x)"
%}
ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
Push_SrcD(src),
Opcode(0xD9), Opcode(0xF1), // fyl2x
Push_ResultD(dst));
ins_pipe( pipe_slow );
%}
//-------------Float Instructions-------------------------------
// Float Math
@ -12103,6 +12047,7 @@ instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
// Jump Direct Conditional - Label defines a relative address from Jcc+1
instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
predicate(!n->has_vector_mask_set());
match(CountedLoopEnd cop cr);
effect(USE labl);
@ -12118,6 +12063,7 @@ instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
// Jump Direct Conditional - Label defines a relative address from Jcc+1
instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
predicate(!n->has_vector_mask_set());
match(CountedLoopEnd cop cmp);
effect(USE labl);
@ -12132,6 +12078,7 @@ instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
%}
instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
predicate(!n->has_vector_mask_set());
match(CountedLoopEnd cop cmp);
effect(USE labl);
@ -12145,6 +12092,60 @@ instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
ins_pipe( pipe_jcc );
%}
// mask version
// Jump Direct Conditional - Label defines a relative address from Jcc+1
instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{
predicate(n->has_vector_mask_set());
match(CountedLoopEnd cop cr);
effect(USE labl);
ins_cost(400);
format %{ "J$cop $labl\t# Loop end\n\t"
"restorevectmask \t# vector mask restore for loops" %}
size(10);
ins_encode %{
Label* L = $labl$$label;
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
__ restorevectmask();
%}
ins_pipe( pipe_jcc );
%}
// Jump Direct Conditional - Label defines a relative address from Jcc+1
instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{
predicate(n->has_vector_mask_set());
match(CountedLoopEnd cop cmp);
effect(USE labl);
ins_cost(400);
format %{ "J$cop,u $labl\t# Loop end\n\t"
"restorevectmask \t# vector mask restore for loops" %}
size(10);
ins_encode %{
Label* L = $labl$$label;
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
__ restorevectmask();
%}
ins_pipe( pipe_jcc );
%}
instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
predicate(n->has_vector_mask_set());
match(CountedLoopEnd cop cmp);
effect(USE labl);
ins_cost(300);
format %{ "J$cop,u $labl\t# Loop end\n\t"
"restorevectmask \t# vector mask restore for loops" %}
size(10);
ins_encode %{
Label* L = $labl$$label;
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
__ restorevectmask();
%}
ins_pipe( pipe_jcc );
%}
// Jump Direct Conditional - using unsigned comparison
instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
match(If cop cmp);

View file

@ -1081,10 +1081,10 @@ static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
__ vmovdqu(xmm0, Address(rsp, -32));
break;
case Op_VecZ:
__ evmovdqul(Address(rsp, -64), xmm0, 2);
__ evmovdqul(xmm0, Address(rsp, src_offset), 2);
__ evmovdqul(Address(rsp, dst_offset), xmm0, 2);
__ evmovdqul(xmm0, Address(rsp, -64), 2);
__ evmovdquq(Address(rsp, -64), xmm0, 2);
__ evmovdquq(xmm0, Address(rsp, src_offset), 2);
__ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
__ evmovdquq(xmm0, Address(rsp, -64), 2);
break;
default:
ShouldNotReachHere();
@ -9897,34 +9897,6 @@ instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
ins_pipe(pipe_slow);
%}
// -----------Trig and Trancendental Instructions------------------------------
instruct tanD_reg(regD dst) %{
match(Set dst (TanD dst));
format %{ "dtan $dst\n\t" %}
ins_encode( Push_SrcXD(dst),
Opcode(0xD9), Opcode(0xF2), //fptan
Opcode(0xDD), Opcode(0xD8), //fstp st
Push_ResultXD(dst) );
ins_pipe( pipe_slow );
%}
instruct log10D_reg(regD dst) %{
// The source and result Double operands in XMM registers
match(Set dst (Log10D dst));
// fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
// fyl2x ; compute log_10(2) * log_2(x)
format %{ "fldlg2\t\t\t#Log10\n\t"
"fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
%}
ins_encode(Opcode(0xD9), Opcode(0xEC), // fldlg2
Push_SrcXD(dst),
Opcode(0xD9), Opcode(0xF1), // fyl2x
Push_ResultXD(dst));
ins_pipe( pipe_slow );
%}
//----------Arithmetic Conversion Instructions---------------------------------
instruct roundFloat_nop(regF dst)
@ -11471,6 +11443,7 @@ instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
// Jump Direct Conditional - Label defines a relative address from Jcc+1
instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
%{
predicate(!n->has_vector_mask_set());
match(CountedLoopEnd cop cr);
effect(USE labl);
@ -11486,6 +11459,7 @@ instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
// Jump Direct Conditional - Label defines a relative address from Jcc+1
instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
predicate(!n->has_vector_mask_set());
match(CountedLoopEnd cop cmp);
effect(USE labl);
@ -11500,6 +11474,7 @@ instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
%}
instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
predicate(!n->has_vector_mask_set());
match(CountedLoopEnd cop cmp);
effect(USE labl);
@ -11513,6 +11488,61 @@ instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
ins_pipe(pipe_jcc);
%}
// mask version
// Jump Direct Conditional - Label defines a relative address from Jcc+1
instruct jmpLoopEnd_and_restoreMask(cmpOp cop, rFlagsReg cr, label labl)
%{
predicate(n->has_vector_mask_set());
match(CountedLoopEnd cop cr);
effect(USE labl);
ins_cost(400);
format %{ "j$cop $labl\t# loop end\n\t"
"restorevectmask \t# vector mask restore for loops" %}
size(10);
ins_encode %{
Label* L = $labl$$label;
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
__ restorevectmask();
%}
ins_pipe(pipe_jcc);
%}
// Jump Direct Conditional - Label defines a relative address from Jcc+1
instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, rFlagsRegU cmp, label labl) %{
predicate(n->has_vector_mask_set());
match(CountedLoopEnd cop cmp);
effect(USE labl);
ins_cost(400);
format %{ "j$cop,u $labl\t# loop end\n\t"
"restorevectmask \t# vector mask restore for loops" %}
size(10);
ins_encode %{
Label* L = $labl$$label;
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
__ restorevectmask();
%}
ins_pipe(pipe_jcc);
%}
instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
predicate(n->has_vector_mask_set());
match(CountedLoopEnd cop cmp);
effect(USE labl);
ins_cost(300);
format %{ "j$cop,u $labl\t# loop end\n\t"
"restorevectmask \t# vector mask restore for loops" %}
size(10);
ins_encode %{
Label* L = $labl$$label;
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
__ restorevectmask();
%}
ins_pipe(pipe_jcc);
%}
// Jump Direct Conditional - using unsigned comparison
instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
match(If cop cmp);

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -42,22 +42,6 @@
#include "utilities/events.hpp"
// Release the CompiledICHolder* associated with this call site is there is one.
void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) {
// This call site might have become stale so inspect it carefully.
NativeCall* call = nativeCall_at(call_site->addr());
if (is_icholder_entry(call->destination())) {
NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value());
InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data());
}
}
bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) {
// This call site might have become stale so inspect it carefully.
NativeCall* call = nativeCall_at(call_site->addr());
return is_icholder_entry(call->destination());
}
// ----------------------------------------------------------------------------
address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {

View file

@ -22,7 +22,7 @@
*/
package jdk.vm.ci.common;
import sun.misc.Unsafe;
import jdk.internal.misc.Unsafe;
/**
* Utilities for operating on raw memory with {@link Unsafe}.

View file

@ -39,7 +39,7 @@ import jdk.vm.ci.inittimer.InitTimer;
import jdk.vm.ci.meta.JavaType;
import jdk.vm.ci.meta.ResolvedJavaMethod;
import jdk.vm.ci.meta.ResolvedJavaType;
import sun.misc.Unsafe;
import jdk.internal.misc.Unsafe;
/**
* Calls from Java into HotSpot. The behavior of all the methods in this class that take a native

View file

@ -120,7 +120,9 @@ public class HotSpotCodeCacheProvider implements CodeCacheProvider {
resultInstalledCode = installedCode;
}
int result = runtime.getCompilerToVM().installCode(target, (HotSpotCompiledCode) compiledCode, resultInstalledCode, (HotSpotSpeculationLog) log);
HotSpotSpeculationLog speculationLog = (log != null && log.hasSpeculations()) ? (HotSpotSpeculationLog) log : null;
int result = runtime.getCompilerToVM().installCode(target, (HotSpotCompiledCode) compiledCode, resultInstalledCode, speculationLog);
if (result != config.codeInstallResultOk) {
String resultDesc = config.getCodeInstallResultDescription(result);
if (compiledCode instanceof HotSpotCompiledNmethod) {

View file

@ -25,7 +25,7 @@ package jdk.vm.ci.hotspot;
import static jdk.vm.ci.hotspot.UnsafeAccess.UNSAFE;
import jdk.vm.ci.code.InstalledCode;
import jdk.vm.ci.inittimer.SuppressFBWarnings;
import sun.misc.Unsafe;
import jdk.internal.misc.Unsafe;
/**
* Implementation of {@link InstalledCode} for HotSpot.

View file

@ -30,7 +30,7 @@ import jdk.vm.ci.meta.JavaKind;
import jdk.vm.ci.meta.JavaType;
import jdk.vm.ci.meta.ResolvedJavaType;
import jdk.vm.ci.runtime.JVMCIRuntime;
import sun.misc.Unsafe;
import jdk.internal.misc.Unsafe;
//JaCoCo Exclude

View file

@ -39,7 +39,7 @@ import jdk.vm.ci.meta.JavaTypeProfile.ProfiledType;
import jdk.vm.ci.meta.ResolvedJavaMethod;
import jdk.vm.ci.meta.ResolvedJavaType;
import jdk.vm.ci.meta.TriState;
import sun.misc.Unsafe;
import jdk.internal.misc.Unsafe;
/**
* Access to a HotSpot MethodData structure (defined in methodData.hpp).

View file

@ -38,7 +38,7 @@ public class HotSpotSpeculationLog implements SpeculationLog {
/** All speculations that have been a deoptimization reason. */
private Set<SpeculationReason> failedSpeculations;
/** Strong references to all reasons embededded in the current nmethod. */
/** Strong references to all reasons embedded in the current nmethod. */
private volatile Collection<SpeculationReason> speculations;
@Override
@ -81,4 +81,9 @@ public class HotSpotSpeculationLog implements SpeculationLog {
return HotSpotObjectConstantImpl.forObject(reason);
}
@Override
public synchronized boolean hasSpeculations() {
return speculations != null && !speculations.isEmpty();
}
}

View file

@ -38,7 +38,7 @@ import jdk.vm.ci.hotspotvmconfig.HotSpotVMData;
import jdk.vm.ci.hotspotvmconfig.HotSpotVMField;
import jdk.vm.ci.hotspotvmconfig.HotSpotVMFlag;
import jdk.vm.ci.hotspotvmconfig.HotSpotVMType;
import sun.misc.Unsafe;
import jdk.internal.misc.Unsafe;
//JaCoCo Exclude

View file

@ -40,7 +40,7 @@ import jdk.internal.org.objectweb.asm.Label;
import jdk.internal.org.objectweb.asm.MethodVisitor;
import jdk.internal.org.objectweb.asm.Opcodes;
import jdk.internal.org.objectweb.asm.Type;
import sun.misc.Unsafe;
import jdk.internal.misc.Unsafe;
/**
* A {@link ClassVisitor} that verifies {@link HotSpotVMConfig} does not access {@link Unsafe} from

View file

@ -24,7 +24,7 @@ package jdk.vm.ci.hotspot;
import static jdk.vm.ci.hotspot.HotSpotJVMCIRuntime.runtime;
import static jdk.vm.ci.hotspot.UnsafeAccess.UNSAFE;
import sun.misc.Unsafe;
import jdk.internal.misc.Unsafe;
/**
* Class to access the C++ {@code vmSymbols} table.

View file

@ -24,7 +24,7 @@ package jdk.vm.ci.hotspot;
import java.lang.reflect.Field;
import sun.misc.Unsafe;
import jdk.internal.misc.Unsafe;
/**
* Package private access to the {@link Unsafe} capability.

View file

@ -56,4 +56,11 @@ public interface SpeculationLog {
* argument to the deoptimization function.
*/
JavaConstant speculate(SpeculationReason reason);
/**
* Returns if this log has speculations.
*
* @return true if there are speculations, false otherwise
*/
boolean hasSpeculations();
}

View file

@ -24,9 +24,6 @@
*/
module jdk.vm.ci {
// 8153756
requires jdk.unsupported;
uses jdk.vm.ci.hotspot.HotSpotVMEventListener;
uses jdk.vm.ci.hotspot.HotSpotJVMCIBackendFactory;
uses jdk.vm.ci.runtime.JVMCICompilerFactory;

View file

@ -247,7 +247,7 @@ void Canonicalizer::do_ArrayLength (ArrayLength* x) {
} else if ((lf = x->array()->as_LoadField()) != NULL) {
ciField* field = lf->field();
if (field->is_constant() && field->is_static()) {
if (field->is_static_constant()) {
assert(PatchALot || ScavengeRootsInCode < 2, "Constant field loads are folded during parsing");
ciObject* c = field->constant_value().as_object();
if (!c->is_null_object()) {

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -77,17 +77,13 @@ class CodeStub: public CompilationResourceObj {
}
};
define_array(CodeStubArray, CodeStub*)
define_stack(_CodeStubList, CodeStubArray)
class CodeStubList: public _CodeStubList {
class CodeStubList: public GrowableArray<CodeStub*> {
public:
CodeStubList(): _CodeStubList() {}
CodeStubList(): GrowableArray<CodeStub*>() {}
void append(CodeStub* stub) {
if (!contains(stub)) {
_CodeStubList::append(stub);
GrowableArray<CodeStub*>::append(stub);
}
}
};

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -53,12 +53,9 @@ class C1_MacroAssembler;
class CFGPrinter;
typedef LIR_OprDesc* LIR_Opr;
define_array(BasicTypeArray, BasicType)
define_stack(BasicTypeList, BasicTypeArray)
define_array(ExceptionInfoArray, ExceptionInfo*)
define_stack(ExceptionInfoList, ExceptionInfoArray)
typedef GrowableArray<BasicType> BasicTypeArray;
typedef GrowableArray<BasicType> BasicTypeList;
typedef GrowableArray<ExceptionInfo*> ExceptionInfoList;
class Compilation: public StackObj {
friend class CompilationResourceObj;

View file

@ -131,6 +131,9 @@ bool Compiler::is_intrinsic_supported(const methodHandle& method) {
if (!VM_Version::supports_atomic_getset4()) return false;
#endif
break;
case vmIntrinsics::_onSpinWait:
if (!VM_Version::supports_on_spin_wait()) return false;
break;
case vmIntrinsics::_arraycopy:
case vmIntrinsics::_currentTimeMillis:
case vmIntrinsics::_nanoTime:

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -164,7 +164,7 @@ FrameMap::FrameMap(ciMethod* method, int monitors, int reserved_argument_area_si
_reserved_argument_area_size = MAX2(4, reserved_argument_area_size) * BytesPerWord;
_argcount = method->arg_size();
_argument_locations = new intArray(_argcount, -1);
_argument_locations = new intArray(_argcount, _argcount, -1);
_incoming_arguments = java_calling_convention(signature_type_array_for(method), false);
_oop_map_arg_count = _incoming_arguments->reserved_stack_slots();

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -36,8 +36,6 @@
class ciMethod;
class CallingConvention;
class BasicTypeArray;
class BasicTypeList;
//--------------------------------------------------------
// FrameMap

View file

@ -357,7 +357,7 @@ void BlockListBuilder::mark_loops() {
_active = BitMap(BlockBegin::number_of_blocks()); _active.clear();
_visited = BitMap(BlockBegin::number_of_blocks()); _visited.clear();
_loop_map = intArray(BlockBegin::number_of_blocks(), 0);
_loop_map = intArray(BlockBegin::number_of_blocks(), BlockBegin::number_of_blocks(), 0);
_next_loop_index = 0;
_next_block_number = _blocks.length();
@ -1354,7 +1354,7 @@ void GraphBuilder::lookup_switch() {
} else {
// collect successors & keys
BlockList* sux = new BlockList(l + 1, NULL);
intArray* keys = new intArray(l, 0);
intArray* keys = new intArray(l, l, 0);
int i;
bool has_bb = false;
for (i = 0; i < l; i++) {
@ -1521,6 +1521,8 @@ void GraphBuilder::method_return(Value x) {
}
Value GraphBuilder::make_constant(ciConstant field_value, ciField* field) {
if (!field_value.is_valid()) return NULL;
BasicType field_type = field_value.basic_type();
ValueType* value = as_ValueType(field_value);
@ -1588,9 +1590,8 @@ void GraphBuilder::access_field(Bytecodes::Code code) {
case Bytecodes::_getstatic: {
// check for compile-time constants, i.e., initialized static final fields
Value constant = NULL;
if (field->is_constant() && !PatchALot) {
if (field->is_static_constant() && !PatchALot) {
ciConstant field_value = field->constant_value();
// Stable static fields are checked for non-default values in ciField::initialize_from().
assert(!field->is_stable() || !field_value.is_null_or_zero(),
"stable static w/ default value shouldn't be a constant");
constant = make_constant(field_value, field);
@ -1619,31 +1620,18 @@ void GraphBuilder::access_field(Bytecodes::Code code) {
Value constant = NULL;
obj = apop();
ObjectType* obj_type = obj->type()->as_ObjectType();
if (obj_type->is_constant() && !PatchALot) {
if (field->is_constant() && obj_type->is_constant() && !PatchALot) {
ciObject* const_oop = obj_type->constant_value();
if (!const_oop->is_null_object() && const_oop->is_loaded()) {
if (field->is_constant()) {
ciConstant field_value = field->constant_value_of(const_oop);
if (FoldStableValues && field->is_stable() && field_value.is_null_or_zero()) {
// Stable field with default value can't be constant.
constant = NULL;
} else {
constant = make_constant(field_value, field);
}
} else {
// For CallSite objects treat the target field as a compile time constant.
if (const_oop->is_call_site()) {
ciConstant field_value = field->constant_value_of(const_oop);
if (field_value.is_valid()) {
constant = make_constant(field_value, field);
// For CallSite objects add a dependency for invalidation of the optimization.
if (field->is_call_site_target()) {
ciCallSite* call_site = const_oop->as_call_site();
if (field->is_call_site_target()) {
ciMethodHandle* target = call_site->get_target();
if (target != NULL) { // just in case
ciConstant field_val(T_OBJECT, target);
constant = new Constant(as_ValueType(field_val));
// Add a dependence for invalidation of the optimization.
if (!call_site->is_constant_call_site()) {
dependency_recorder()->assert_call_site_target_value(call_site, target);
}
}
if (!call_site->is_constant_call_site()) {
ciMethodHandle* target = field_value.as_object()->as_method_handle();
dependency_recorder()->assert_call_site_target_value(call_site, target);
}
}
}
@ -1722,7 +1710,7 @@ void GraphBuilder::check_args_for_profiling(Values* obj_args, int expected) {
bool ignored_will_link;
ciSignature* declared_signature = NULL;
ciMethod* real_target = method()->get_method_at_bci(bci(), ignored_will_link, &declared_signature);
assert(expected == obj_args->length() || real_target->is_method_handle_intrinsic(), "missed on arg?");
assert(expected == obj_args->max_length() || real_target->is_method_handle_intrinsic(), "missed on arg?");
#endif
}
@ -1733,7 +1721,7 @@ Values* GraphBuilder::collect_args_for_profiling(Values* args, ciMethod* target,
if (obj_args == NULL) {
return NULL;
}
int s = obj_args->size();
int s = obj_args->max_length();
// if called through method handle invoke, some arguments may have been popped
for (int i = start, j = 0; j < s && i < args->length(); i++) {
if (args->at(i)->type()->is_object_kind()) {
@ -2170,7 +2158,7 @@ void GraphBuilder::new_multi_array(int dimensions) {
ciKlass* klass = stream()->get_klass(will_link);
ValueStack* state_before = !klass->is_loaded() || PatchALot ? copy_state_before() : copy_state_exhandling();
Values* dims = new Values(dimensions, NULL);
Values* dims = new Values(dimensions, dimensions, NULL);
// fill in all dimensions
int i = dimensions;
while (i-- > 0) dims->at_put(i, ipop());
@ -3773,9 +3761,9 @@ bool GraphBuilder::try_inline_full(ciMethod* callee, bool holder_known, Bytecode
int start = 0;
Values* obj_args = args_list_for_profiling(callee, start, has_receiver);
if (obj_args != NULL) {
int s = obj_args->size();
int s = obj_args->max_length();
// if called through method handle invoke, some arguments may have been popped
for (int i = args_base+start, j = 0; j < obj_args->size() && i < state()->stack_size(); ) {
for (int i = args_base+start, j = 0; j < obj_args->max_length() && i < state()->stack_size(); ) {
Value v = state()->stack_at_inc(i);
if (v->type()->is_object_kind()) {
obj_args->push(v);
@ -4092,7 +4080,7 @@ void GraphBuilder::push_scope_for_jsr(BlockBegin* jsr_continuation, int jsr_dest
// properly clone all blocks in jsr region as well as exception
// handlers containing rets
BlockList* new_bci2block = new BlockList(bci2block()->length());
new_bci2block->push_all(bci2block());
new_bci2block->appendAll(bci2block());
data->set_bci2block(new_bci2block);
data->set_scope(scope());
data->setup_jsr_xhandlers();

View file

@ -531,7 +531,7 @@ ComputeLinearScanOrder::ComputeLinearScanOrder(Compilation* c, BlockBegin* start
_visited_blocks(_max_block_id),
_active_blocks(_max_block_id),
_dominator_blocks(_max_block_id),
_forward_branches(_max_block_id, 0),
_forward_branches(_max_block_id, _max_block_id, 0),
_loop_end_blocks(8),
_work_list(8),
_linear_scan_order(NULL), // initialized later with correct size
@ -849,13 +849,13 @@ bool ComputeLinearScanOrder::ready_for_processing(BlockBegin* cur) {
return false;
}
assert(_linear_scan_order->index_of(cur) == -1, "block already processed (block can be ready only once)");
assert(_work_list.index_of(cur) == -1, "block already in work-list (block can be ready only once)");
assert(_linear_scan_order->find(cur) == -1, "block already processed (block can be ready only once)");
assert(_work_list.find(cur) == -1, "block already in work-list (block can be ready only once)");
return true;
}
void ComputeLinearScanOrder::sort_into_work_list(BlockBegin* cur) {
assert(_work_list.index_of(cur) == -1, "block already in work list");
assert(_work_list.find(cur) == -1, "block already in work list");
int cur_weight = compute_weight(cur);
@ -891,7 +891,7 @@ void ComputeLinearScanOrder::sort_into_work_list(BlockBegin* cur) {
void ComputeLinearScanOrder::append_block(BlockBegin* cur) {
TRACE_LINEAR_SCAN(3, tty->print_cr("appending block B%d (weight 0x%6x) to linear-scan order", cur->block_id(), cur->linear_scan_number()));
assert(_linear_scan_order->index_of(cur) == -1, "cannot add the same block twice");
assert(_linear_scan_order->find(cur) == -1, "cannot add the same block twice");
// currently, the linear scan order and code emit order are equal.
// therefore the linear_scan_number and the weight of a block must also
@ -1116,13 +1116,13 @@ void ComputeLinearScanOrder::verify() {
BlockBegin* cur = _linear_scan_order->at(i);
assert(cur->linear_scan_number() == i, "incorrect linear_scan_number");
assert(cur->linear_scan_number() >= 0 && cur->linear_scan_number() == _linear_scan_order->index_of(cur), "incorrect linear_scan_number");
assert(cur->linear_scan_number() >= 0 && cur->linear_scan_number() == _linear_scan_order->find(cur), "incorrect linear_scan_number");
int j;
for (j = cur->number_of_sux() - 1; j >= 0; j--) {
BlockBegin* sux = cur->sux_at(j);
assert(sux->linear_scan_number() >= 0 && sux->linear_scan_number() == _linear_scan_order->index_of(sux), "incorrect linear_scan_number");
assert(sux->linear_scan_number() >= 0 && sux->linear_scan_number() == _linear_scan_order->find(sux), "incorrect linear_scan_number");
if (!sux->is_set(BlockBegin::backward_branch_target_flag)) {
assert(cur->linear_scan_number() < sux->linear_scan_number(), "invalid order");
}
@ -1134,7 +1134,7 @@ void ComputeLinearScanOrder::verify() {
for (j = cur->number_of_preds() - 1; j >= 0; j--) {
BlockBegin* pred = cur->pred_at(j);
assert(pred->linear_scan_number() >= 0 && pred->linear_scan_number() == _linear_scan_order->index_of(pred), "incorrect linear_scan_number");
assert(pred->linear_scan_number() >= 0 && pred->linear_scan_number() == _linear_scan_order->find(pred), "incorrect linear_scan_number");
if (!cur->is_set(BlockBegin::backward_branch_target_flag)) {
assert(cur->linear_scan_number() > pred->linear_scan_number(), "invalid order");
}
@ -1256,8 +1256,7 @@ void IR::print(bool cfg_only, bool live_only) {
}
define_array(BlockListArray, BlockList*)
define_stack(BlockListList, BlockListArray)
typedef GrowableArray<BlockList*> BlockListList;
class PredecessorValidator : public BlockClosure {
private:
@ -1271,7 +1270,7 @@ class PredecessorValidator : public BlockClosure {
public:
PredecessorValidator(IR* hir) {
ResourceMark rm;
_predecessors = new BlockListList(BlockBegin::number_of_blocks(), NULL);
_predecessors = new BlockListList(BlockBegin::number_of_blocks(), BlockBegin::number_of_blocks(), NULL);
_blocks = new BlockList();
int i;

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -104,9 +104,7 @@ class XHandler: public CompilationResourceObj {
bool equals(XHandler* other) const;
};
define_array(_XHandlerArray, XHandler*)
define_stack(_XHandlerList, _XHandlerArray)
typedef GrowableArray<XHandler*> _XHandlerList;
// XHandlers is the C1 internal list of exception handlers for a method
class XHandlers: public CompilationResourceObj {
@ -132,8 +130,7 @@ class XHandlers: public CompilationResourceObj {
class IRScope;
define_array(IRScopeArray, IRScope*)
define_stack(IRScopeList, IRScopeArray)
typedef GrowableArray<IRScope*> IRScopeList;
class Compilation;
class IRScope: public CompilationResourceObj {

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -564,7 +564,7 @@ void BlockBegin::disconnect_edge(BlockBegin* from, BlockBegin* to) {
for (int s = 0; s < from->number_of_sux();) {
BlockBegin* sux = from->sux_at(s);
if (sux == to) {
int index = sux->_predecessors.index_of(from);
int index = sux->_predecessors.find(from);
if (index >= 0) {
sux->_predecessors.remove_at(index);
}
@ -664,7 +664,7 @@ BlockBegin* BlockBegin::insert_block_between(BlockBegin* sux) {
void BlockBegin::remove_successor(BlockBegin* pred) {
int idx;
while ((idx = _successors.index_of(pred)) >= 0) {
while ((idx = _successors.find(pred)) >= 0) {
_successors.remove_at(idx);
}
}
@ -677,7 +677,7 @@ void BlockBegin::add_predecessor(BlockBegin* pred) {
void BlockBegin::remove_predecessor(BlockBegin* pred) {
int idx;
while ((idx = _predecessors.index_of(pred)) >= 0) {
while ((idx = _predecessors.find(pred)) >= 0) {
_predecessors.remove_at(idx);
}
}
@ -722,13 +722,15 @@ void BlockBegin::iterate_postorder(boolArray& mark, BlockClosure* closure) {
void BlockBegin::iterate_preorder(BlockClosure* closure) {
boolArray mark(number_of_blocks(), false);
int mark_len = number_of_blocks();
boolArray mark(mark_len, mark_len, false);
iterate_preorder(mark, closure);
}
void BlockBegin::iterate_postorder(BlockClosure* closure) {
boolArray mark(number_of_blocks(), false);
int mark_len = number_of_blocks();
boolArray mark(mark_len, mark_len, false);
iterate_postorder(mark, closure);
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -115,11 +115,8 @@ class Assert;
// A Value is a reference to the instruction creating the value
typedef Instruction* Value;
define_array(ValueArray, Value)
define_stack(Values, ValueArray)
define_array(ValueStackArray, ValueStack*)
define_stack(ValueStackStack, ValueStackArray)
typedef GrowableArray<Value> Values;
typedef GrowableArray<ValueStack*> ValueStackStack;
// BlockClosure is the base class for block traversal/iteration.
@ -137,14 +134,13 @@ class ValueVisitor: public StackObj {
// Some array and list classes
define_array(BlockBeginArray, BlockBegin*)
define_stack(_BlockList, BlockBeginArray)
typedef GrowableArray<BlockBegin*> BlockBeginArray;
class BlockList: public _BlockList {
class BlockList: public GrowableArray<BlockBegin*> {
public:
BlockList(): _BlockList() {}
BlockList(const int size): _BlockList(size) {}
BlockList(const int size, BlockBegin* init): _BlockList(size, init) {}
BlockList(): GrowableArray<BlockBegin*>() {}
BlockList(const int size): GrowableArray<BlockBegin*>(size) {}
BlockList(const int size, BlockBegin* init): GrowableArray<BlockBegin*>(size, size, init) {}
void iterate_forward(BlockClosure* closure);
void iterate_backward(BlockClosure* closure);
@ -1744,7 +1740,7 @@ LEAF(BlockBegin, StateSplit)
void remove_predecessor(BlockBegin* pred);
bool is_predecessor(BlockBegin* pred) const { return _predecessors.contains(pred); }
int number_of_preds() const { return _predecessors.length(); }
BlockBegin* pred_at(int i) const { return _predecessors[i]; }
BlockBegin* pred_at(int i) const { return _predecessors.at(i); }
// exception handlers potentially invoked by this block
void add_exception_handler(BlockBegin* b);
@ -2609,10 +2605,7 @@ class BlockPair: public CompilationResourceObj {
void set_from(BlockBegin* b) { _from = b; }
};
define_array(BlockPairArray, BlockPair*)
define_stack(BlockPairList, BlockPairArray)
typedef GrowableArray<BlockPair*> BlockPairList;
inline int BlockBegin::number_of_sux() const { assert(_end == NULL || _end->number_of_sux() == _successors.length(), "mismatch"); return _successors.length(); }
inline BlockBegin* BlockBegin::sux_at(int i) const { assert(_end == NULL || _end->sux_at(i) == _successors.at(i), "mismatch"); return _successors.at(i); }

View file

@ -483,6 +483,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
case lir_membar_storestore: // result and info always invalid
case lir_membar_loadstore: // result and info always invalid
case lir_membar_storeload: // result and info always invalid
case lir_on_spin_wait:
{
assert(op->as_Op0() != NULL, "must be");
assert(op->_info == NULL, "info not used by this instruction");
@ -727,31 +728,6 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
break;
}
case lir_tan:
case lir_log10: {
assert(op->as_Op2() != NULL, "must be");
LIR_Op2* op2 = (LIR_Op2*)op;
// On x86 tan/sin/cos need two temporary fpu stack slots and
// log/log10 need one so handle opr2 and tmp as temp inputs.
// Register input operand as temp to guarantee that it doesn't
// overlap with the input.
assert(op2->_info == NULL, "not used");
assert(op2->_tmp5->is_illegal(), "not used");
assert(op2->_opr1->is_valid(), "used");
do_input(op2->_opr1); do_temp(op2->_opr1);
if (op2->_opr2->is_valid()) do_temp(op2->_opr2);
if (op2->_tmp1->is_valid()) do_temp(op2->_tmp1);
if (op2->_tmp2->is_valid()) do_temp(op2->_tmp2);
if (op2->_tmp3->is_valid()) do_temp(op2->_tmp3);
if (op2->_tmp4->is_valid()) do_temp(op2->_tmp4);
if (op2->_result->is_valid()) do_output(op2->_result);
break;
}
// LIR_Op3
case lir_idiv:
case lir_irem: {
@ -1691,6 +1667,7 @@ const char * LIR_Op::name() const {
case lir_word_align: s = "word_align"; break;
case lir_label: s = "label"; break;
case lir_nop: s = "nop"; break;
case lir_on_spin_wait: s = "on_spin_wait"; break;
case lir_backwardbranch_target: s = "backbranch"; break;
case lir_std_entry: s = "std_entry"; break;
case lir_osr_entry: s = "osr_entry"; break;
@ -1738,8 +1715,6 @@ const char * LIR_Op::name() const {
case lir_rem: s = "rem"; break;
case lir_abs: s = "abs"; break;
case lir_sqrt: s = "sqrt"; break;
case lir_tan: s = "tan"; break;
case lir_log10: s = "log10"; break;
case lir_logic_and: s = "logic_and"; break;
case lir_logic_or: s = "logic_or"; break;
case lir_logic_xor: s = "logic_xor"; break;

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -59,17 +59,9 @@ class LIR_OprVisitor;
typedef LIR_OprDesc* LIR_Opr;
typedef int RegNr;
define_array(LIR_OprArray, LIR_Opr)
define_stack(LIR_OprList, LIR_OprArray)
define_array(LIR_OprRefArray, LIR_Opr*)
define_stack(LIR_OprRefList, LIR_OprRefArray)
define_array(CodeEmitInfoArray, CodeEmitInfo*)
define_stack(CodeEmitInfoList, CodeEmitInfoArray)
define_array(LIR_OpArray, LIR_Op*)
define_stack(LIR_OpList, LIR_OpArray)
typedef GrowableArray<LIR_Opr> LIR_OprList;
typedef GrowableArray<LIR_Op*> LIR_OpArray;
typedef GrowableArray<LIR_Op*> LIR_OpList;
// define LIR_OprPtr early so LIR_OprDesc can refer to it
class LIR_OprPtr: public CompilationResourceObj {
@ -920,6 +912,7 @@ enum LIR_Code {
, lir_membar_loadstore
, lir_membar_storeload
, lir_get_thread
, lir_on_spin_wait
, end_op0
, begin_op1
, lir_fxch
@ -2101,6 +2094,8 @@ class LIR_List: public CompilationResourceObj {
void std_entry(LIR_Opr receiver) { append(new LIR_Op0(lir_std_entry, receiver)); }
void osr_entry(LIR_Opr osrPointer) { append(new LIR_Op0(lir_osr_entry, osrPointer)); }
void on_spin_wait() { append(new LIR_Op0(lir_on_spin_wait)); }
void branch_destination(Label* lbl) { append(new LIR_OpLabel(lbl)); }
void negate(LIR_Opr from, LIR_Opr to) { append(new LIR_Op1(lir_neg, from, to)); }

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -127,7 +127,7 @@ void LIR_Assembler::append_code_stub(CodeStub* stub) {
void LIR_Assembler::emit_stubs(CodeStubList* stub_list) {
for (int m = 0; m < stub_list->length(); m++) {
CodeStub* s = (*stub_list)[m];
CodeStub* s = stub_list->at(m);
check_codespace();
CHECK_BAILOUT();
@ -678,6 +678,10 @@ void LIR_Assembler::emit_op0(LIR_Op0* op) {
get_thread(op->result_opr());
break;
case lir_on_spin_wait:
on_spin_wait();
break;
default:
ShouldNotReachHere();
break;

View file

@ -250,6 +250,7 @@ class LIR_Assembler: public CompilationResourceObj {
void membar_storestore();
void membar_loadstore();
void membar_storeload();
void on_spin_wait();
void get_thread(LIR_Opr result);
void verify_oop_map(CodeEmitInfo* info);

View file

@ -150,7 +150,7 @@ PhiResolver::~PhiResolver() {
int i;
// resolve any cycles in moves from and to virtual registers
for (i = virtual_operands().length() - 1; i >= 0; i --) {
ResolveNode* node = virtual_operands()[i];
ResolveNode* node = virtual_operands().at(i);
if (!node->visited()) {
_loop = NULL;
move(NULL, node);
@ -161,7 +161,7 @@ PhiResolver::~PhiResolver() {
// generate move for move from non virtual register to abitrary destination
for (i = other_operands().length() - 1; i >= 0; i --) {
ResolveNode* node = other_operands()[i];
ResolveNode* node = other_operands().at(i);
for (int j = node->no_of_destinations() - 1; j >= 0; j --) {
emit_move(node->operand(), node->destination_at(j)->operand());
}
@ -177,7 +177,7 @@ ResolveNode* PhiResolver::create_node(LIR_Opr opr, bool source) {
assert(node == NULL || node->operand() == opr, "");
if (node == NULL) {
node = new ResolveNode(opr);
vreg_table()[vreg_num] = node;
vreg_table().at_put(vreg_num, node);
}
// Make sure that all virtual operands show up in the list when
// they are used as the source of a move.
@ -3161,7 +3161,9 @@ void LIRGenerator::do_Intrinsic(Intrinsic* x) {
case vmIntrinsics::_fullFence :
if (os::is_MP()) __ membar();
break;
case vmIntrinsics::_onSpinWait:
__ on_spin_wait();
break;
case vmIntrinsics::_Reference_get:
do_Reference_get(x);
break;

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -39,8 +39,7 @@ class Invoke;
class SwitchRange;
class LIRItem;
define_array(LIRItemArray, LIRItem*)
define_stack(LIRItemList, LIRItemArray)
typedef GrowableArray<LIRItem*> LIRItemList;
class SwitchRange: public CompilationResourceObj {
private:
@ -56,15 +55,12 @@ class SwitchRange: public CompilationResourceObj {
BlockBegin* sux() const { return _sux; }
};
define_array(SwitchRangeArray, SwitchRange*)
define_stack(SwitchRangeList, SwitchRangeArray)
typedef GrowableArray<SwitchRange*> SwitchRangeArray;
typedef GrowableArray<SwitchRange*> SwitchRangeList;
class ResolveNode;
define_array(NodeArray, ResolveNode*);
define_stack(NodeList, NodeArray);
typedef GrowableArray<ResolveNode*> NodeList;
// Node objects form a directed graph of LIR_Opr
// Edges between Nodes represent moves from one Node to its destinations
@ -86,7 +82,7 @@ class ResolveNode: public CompilationResourceObj {
// accessors
LIR_Opr operand() const { return _operand; }
int no_of_destinations() const { return _destinations.length(); }
ResolveNode* destination_at(int i) { return _destinations[i]; }
ResolveNode* destination_at(int i) { return _destinations.at(i); }
bool assigned() const { return _assigned; }
bool visited() const { return _visited; }
bool start_node() const { return _start_node; }

View file

@ -496,8 +496,8 @@ void LinearScan::number_instructions() {
}
// initialize with correct length
_lir_ops = LIR_OpArray(num_instructions);
_block_of_op = BlockBeginArray(num_instructions);
_lir_ops = LIR_OpArray(num_instructions, num_instructions, NULL);
_block_of_op = BlockBeginArray(num_instructions, num_instructions, NULL);
int op_id = 0;
int idx = 0;
@ -2507,7 +2507,8 @@ LocationValue* _illegal_value = new (ResourceObj::C_HEAP, mtCompiler) Lo
void LinearScan::init_compute_debug_info() {
// cache for frequently used scope values
// (cpu registers and stack slots)
_scope_value_cache = ScopeValueArray((LinearScan::nof_cpu_regs + frame_map()->argcount() + max_spills()) * 2, NULL);
int cache_size = (LinearScan::nof_cpu_regs + frame_map()->argcount() + max_spills()) * 2;
_scope_value_cache = ScopeValueArray(cache_size, cache_size, NULL);
}
MonitorValue* LinearScan::location_for_monitor_index(int monitor_index) {
@ -3042,7 +3043,7 @@ void LinearScan::assign_reg_num(LIR_OpList* instructions, IntervalWalker* iw) {
insert_point++;
}
}
instructions->truncate(insert_point);
instructions->trunc_to(insert_point);
}
}
@ -3446,7 +3447,7 @@ class RegisterVerifier: public StackObj {
RegisterVerifier(LinearScan* allocator)
: _allocator(allocator)
, _work_list(16)
, _saved_states(BlockBegin::number_of_blocks(), NULL)
, _saved_states(BlockBegin::number_of_blocks(), BlockBegin::number_of_blocks(), NULL)
{ }
void verify(BlockBegin* start);
@ -4452,7 +4453,7 @@ Interval* Interval::split(int split_pos) {
new_use_pos_and_kinds.append(_use_pos_and_kinds.at(i));
}
_use_pos_and_kinds.truncate(start_idx + 2);
_use_pos_and_kinds.trunc_to(start_idx + 2);
result->_use_pos_and_kinds = _use_pos_and_kinds;
_use_pos_and_kinds = new_use_pos_and_kinds;
@ -5540,7 +5541,7 @@ void LinearScanWalker::split_and_spill_intersecting_intervals(int reg, int regHi
IntervalList* processed = _spill_intervals[reg];
for (int i = 0; i < _spill_intervals[regHi]->length(); i++) {
Interval* it = _spill_intervals[regHi]->at(i);
if (processed->find_from_end(it) == -1) {
if (processed->find(it) == -1) {
remove_from_list(it);
split_and_spill_interval(it);
}
@ -6211,7 +6212,7 @@ void ControlFlowOptimizer::delete_empty_blocks(BlockList* code) {
_original_preds.clear();
for (j = block->number_of_preds() - 1; j >= 0; j--) {
BlockBegin* pred = block->pred_at(j);
if (_original_preds.index_of(pred) == -1) {
if (_original_preds.find(pred) == -1) {
_original_preds.append(pred);
}
}
@ -6231,7 +6232,7 @@ void ControlFlowOptimizer::delete_empty_blocks(BlockList* code) {
}
old_pos++;
}
code->truncate(new_pos);
code->trunc_to(new_pos);
DEBUG_ONLY(verify(code));
}
@ -6256,7 +6257,7 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) {
TRACE_LINEAR_SCAN(3, tty->print_cr("Deleting unconditional branch at end of block B%d", block->block_id()));
// delete last branch instruction
instructions->truncate(instructions->length() - 1);
instructions->trunc_to(instructions->length() - 1);
} else {
LIR_Op* prev_op = instructions->at(instructions->length() - 2);
@ -6295,7 +6296,7 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) {
prev_branch->change_block(last_branch->block());
prev_branch->negate_cond();
prev_cmp->set_condition(prev_branch->cond());
instructions->truncate(instructions->length() - 1);
instructions->trunc_to(instructions->length() - 1);
// if we do change the condition, we have to change the cmove as well
if (prev_cmove != NULL) {
prev_cmove->set_condition(prev_branch->cond());
@ -6378,19 +6379,19 @@ void ControlFlowOptimizer::verify(BlockList* code) {
LIR_OpBranch* op_branch = instructions->at(j)->as_OpBranch();
if (op_branch != NULL) {
assert(op_branch->block() == NULL || code->index_of(op_branch->block()) != -1, "branch target not valid");
assert(op_branch->ublock() == NULL || code->index_of(op_branch->ublock()) != -1, "branch target not valid");
assert(op_branch->block() == NULL || code->find(op_branch->block()) != -1, "branch target not valid");
assert(op_branch->ublock() == NULL || code->find(op_branch->ublock()) != -1, "branch target not valid");
}
}
for (j = 0; j < block->number_of_sux() - 1; j++) {
BlockBegin* sux = block->sux_at(j);
assert(code->index_of(sux) != -1, "successor not valid");
assert(code->find(sux) != -1, "successor not valid");
}
for (j = 0; j < block->number_of_preds() - 1; j++) {
BlockBegin* pred = block->pred_at(j);
assert(code->index_of(pred) != -1, "successor not valid");
assert(code->find(pred) != -1, "successor not valid");
}
}
}

View file

@ -44,18 +44,9 @@ class Range;
typedef GrowableArray<Interval*> IntervalArray;
typedef GrowableArray<Interval*> IntervalList;
define_array(IntervalsArray, IntervalList*)
define_stack(IntervalsList, IntervalsArray)
define_array(OopMapArray, OopMap*)
define_stack(OopMapList, OopMapArray)
define_array(ScopeValueArray, ScopeValue*)
define_array(LIR_OpListArray, LIR_OpList*);
define_stack(LIR_OpListStack, LIR_OpListArray);
typedef GrowableArray<IntervalList*> IntervalsList;
typedef GrowableArray<ScopeValue*> ScopeValueArray;
typedef GrowableArray<LIR_OpList*> LIR_OpListStack;
enum IntervalUseKind {
// priority of use kinds must be ascending
@ -67,9 +58,6 @@ enum IntervalUseKind {
firstValidKind = 1,
lastValidKind = 3
};
define_array(UseKindArray, IntervalUseKind)
define_stack(UseKindStack, UseKindArray)
enum IntervalKind {
fixedKind = 0, // interval pre-colored by LIR_Generator
@ -619,7 +607,7 @@ class Interval : public CompilationResourceObj {
void add_range(int from, int to);
Interval* split(int split_pos);
Interval* split_from_start(int split_pos);
void remove_first_use_pos() { _use_pos_and_kinds.truncate(_use_pos_and_kinds.length() - 2); }
void remove_first_use_pos() { _use_pos_and_kinds.trunc_to(_use_pos_and_kinds.length() - 2); }
// test intersection
bool covers(int op_id, LIR_OpVisitState::OprMode mode) const;

View file

@ -32,9 +32,7 @@
#include "utilities/bitMap.inline.hpp"
#include "compiler/compileLog.hpp"
define_array(ValueSetArray, ValueSet*);
define_stack(ValueSetList, ValueSetArray);
typedef GrowableArray<ValueSet*> ValueSetList;
Optimizer::Optimizer(IR* ir) {
assert(ir->is_valid(), "IR must be valid");
@ -584,8 +582,8 @@ class NullCheckEliminator: public ValueVisitor {
ValueSet* state() { return _set; }
void set_state_from (ValueSet* state) { _set->set_from(state); }
ValueSet* state_for (BlockBegin* block) { return _block_states[block->block_id()]; }
void set_state_for (BlockBegin* block, ValueSet* stack) { _block_states[block->block_id()] = stack; }
ValueSet* state_for (BlockBegin* block) { return _block_states.at(block->block_id()); }
void set_state_for (BlockBegin* block, ValueSet* stack) { _block_states.at_put(block->block_id(), stack); }
// Returns true if caused a change in the block's state.
bool merge_state_for(BlockBegin* block,
ValueSet* incoming_state);
@ -596,7 +594,7 @@ class NullCheckEliminator: public ValueVisitor {
: _opt(opt)
, _set(new ValueSet())
, _last_explicit_null_check(NULL)
, _block_states(BlockBegin::number_of_blocks(), NULL)
, _block_states(BlockBegin::number_of_blocks(), BlockBegin::number_of_blocks(), NULL)
, _work_list(new BlockList()) {
_visitable_instructions = new ValueSet();
_visitor.set_eliminator(this);
@ -1165,19 +1163,19 @@ void Optimizer::eliminate_null_checks() {
// handlers and iterate over them as well
int nblocks = BlockBegin::number_of_blocks();
BlockList blocks(nblocks);
boolArray visited_block(nblocks, false);
boolArray visited_block(nblocks, nblocks, false);
blocks.push(ir()->start());
visited_block[ir()->start()->block_id()] = true;
visited_block.at_put(ir()->start()->block_id(), true);
for (int i = 0; i < blocks.length(); i++) {
BlockBegin* b = blocks[i];
BlockBegin* b = blocks.at(i);
// exception handlers need to be treated as additional roots
for (int e = b->number_of_exception_handlers(); e-- > 0; ) {
BlockBegin* excp = b->exception_handler_at(e);
int id = excp->block_id();
if (!visited_block[id]) {
if (!visited_block.at(id)) {
blocks.push(excp);
visited_block[id] = true;
visited_block.at_put(id, true);
nce.iterate(excp);
}
}
@ -1186,9 +1184,9 @@ void Optimizer::eliminate_null_checks() {
for (int s = end->number_of_sux(); s-- > 0; ) {
BlockBegin* next = end->sux_at(s);
int id = next->block_id();
if (!visited_block[id]) {
if (!visited_block.at(id)) {
blocks.push(next);
visited_block[id] = true;
visited_block.at_put(id, true);
}
}
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -53,8 +53,8 @@ void RangeCheckElimination::eliminate(IR *ir) {
// Constructor
RangeCheckEliminator::RangeCheckEliminator(IR *ir) :
_bounds(Instruction::number_of_instructions(), NULL),
_access_indexed_info(Instruction::number_of_instructions(), NULL)
_bounds(Instruction::number_of_instructions(), Instruction::number_of_instructions(), NULL),
_access_indexed_info(Instruction::number_of_instructions(), Instruction::number_of_instructions(), NULL)
{
_visitor.set_range_check_eliminator(this);
_ir = ir;
@ -303,28 +303,28 @@ RangeCheckEliminator::Bound *RangeCheckEliminator::get_bound(Value v) {
// Wrong type or NULL -> No bound
if (!v || (!v->type()->as_IntType() && !v->type()->as_ObjectType())) return NULL;
if (!_bounds[v->id()]) {
if (!_bounds.at(v->id())) {
// First (default) bound is calculated
// Create BoundStack
_bounds[v->id()] = new BoundStack();
_bounds.at_put(v->id(), new BoundStack());
_visitor.clear_bound();
Value visit_value = v;
visit_value->visit(&_visitor);
Bound *bound = _visitor.bound();
if (bound) {
_bounds[v->id()]->push(bound);
_bounds.at(v->id())->push(bound);
}
if (_bounds[v->id()]->length() == 0) {
if (_bounds.at(v->id())->length() == 0) {
assert(!(v->as_Constant() && v->type()->as_IntConstant()), "constants not handled here");
_bounds[v->id()]->push(new Bound());
_bounds.at(v->id())->push(new Bound());
}
} else if (_bounds[v->id()]->length() == 0) {
} else if (_bounds.at(v->id())->length() == 0) {
// To avoid endless loops, bound is currently in calculation -> nothing known about it
return new Bound();
}
// Return bound
return _bounds[v->id()]->top();
return _bounds.at(v->id())->top();
}
// Update bound
@ -353,28 +353,28 @@ void RangeCheckEliminator::update_bound(IntegerStack &pushed, Value v, Bound *bo
// No bound update for constants
return;
}
if (!_bounds[v->id()]) {
if (!_bounds.at(v->id())) {
get_bound(v);
assert(_bounds[v->id()], "Now Stack must exist");
assert(_bounds.at(v->id()), "Now Stack must exist");
}
Bound *top = NULL;
if (_bounds[v->id()]->length() > 0) {
top = _bounds[v->id()]->top();
if (_bounds.at(v->id())->length() > 0) {
top = _bounds.at(v->id())->top();
}
if (top) {
bound->and_op(top);
}
_bounds[v->id()]->push(bound);
_bounds.at(v->id())->push(bound);
pushed.append(v->id());
}
// Add instruction + idx for in block motion
void RangeCheckEliminator::add_access_indexed_info(InstructionList &indices, int idx, Value instruction, AccessIndexed *ai) {
int id = instruction->id();
AccessIndexedInfo *aii = _access_indexed_info[id];
AccessIndexedInfo *aii = _access_indexed_info.at(id);
if (aii == NULL) {
aii = new AccessIndexedInfo();
_access_indexed_info[id] = aii;
_access_indexed_info.at_put(id, aii);
indices.append(instruction);
aii->_min = idx;
aii->_max = idx;
@ -461,7 +461,7 @@ void RangeCheckEliminator::in_block_motion(BlockBegin *block, AccessIndexedList
if (_optimistic) {
for (int i = 0; i < indices.length(); i++) {
Instruction *index_instruction = indices.at(i);
AccessIndexedInfo *info = _access_indexed_info[index_instruction->id()];
AccessIndexedInfo *info = _access_indexed_info.at(index_instruction->id());
assert(info != NULL, "Info must not be null");
// if idx < 0, max > 0, max + idx may fall between 0 and
@ -562,7 +562,7 @@ void RangeCheckEliminator::in_block_motion(BlockBegin *block, AccessIndexedList
// Clear data structures for next array
for (int i = 0; i < indices.length(); i++) {
Instruction *index_instruction = indices.at(i);
_access_indexed_info[index_instruction->id()] = NULL;
_access_indexed_info.at_put(index_instruction->id(), NULL);
}
indices.clear();
}
@ -1005,7 +1005,7 @@ void RangeCheckEliminator::calc_bounds(BlockBegin *block, BlockBegin *loop_heade
// Reset stack
for (int i=0; i<pushed.length(); i++) {
_bounds[pushed[i]]->pop();
_bounds.at(pushed.at(i))->pop();
}
}
@ -1051,7 +1051,7 @@ void RangeCheckEliminator::dump_condition_stack(BlockBegin *block) {
#endif
// Verification or the IR
RangeCheckEliminator::Verification::Verification(IR *ir) : _used(BlockBegin::number_of_blocks(), false) {
RangeCheckEliminator::Verification::Verification(IR *ir) : _used(BlockBegin::number_of_blocks(), BlockBegin::number_of_blocks(), false) {
this->_ir = ir;
ir->iterate_linear_scan_order(this);
}
@ -1146,14 +1146,14 @@ bool RangeCheckEliminator::Verification::can_reach(BlockBegin *start, BlockBegin
if (start == end) return start != dont_use;
// Simple BSF from start to end
// BlockBeginList _current;
for (int i=0; i<_used.length(); i++) {
_used[i] = false;
for (int i=0; i < _used.length(); i++) {
_used.at_put(i, false);
}
_current.truncate(0);
_successors.truncate(0);
_current.trunc_to(0);
_successors.trunc_to(0);
if (start != dont_use) {
_current.push(start);
_used[start->block_id()] = true;
_used.at_put(start->block_id(), true);
}
// BlockBeginList _successors;
@ -1180,17 +1180,17 @@ bool RangeCheckEliminator::Verification::can_reach(BlockBegin *start, BlockBegin
}
}
for (int i=0; i<_successors.length(); i++) {
BlockBegin *sux = _successors[i];
BlockBegin *sux = _successors.at(i);
assert(sux != NULL, "Successor must not be NULL!");
if (sux == end) {
return true;
}
if (sux != dont_use && !_used[sux->block_id()]) {
_used[sux->block_id()] = true;
if (sux != dont_use && !_used.at(sux->block_id())) {
_used.at_put(sux->block_id(), true);
_current.push(sux);
}
}
_successors.truncate(0);
_successors.trunc_to(0);
}
return false;

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -40,10 +40,8 @@ private:
bool _optimistic; // Insert predicates and deoptimize when they fail
IR *_ir;
define_array(BlockBeginArray, BlockBegin*)
define_stack(BlockBeginList, BlockBeginArray)
define_stack(IntegerStack, intArray)
define_array(IntegerMap, IntegerStack*)
typedef GrowableArray<BlockBegin*> BlockBeginList;
typedef GrowableArray<int> IntegerStack;
class Verification : public BlockClosure {
// RangeCheckEliminator::Verification should never get instatiated on the heap.
@ -180,13 +178,10 @@ public:
void add_assertions(Bound *bound, Instruction *instruction, Instruction *position);
#endif
define_array(BoundArray, Bound *)
define_stack(BoundStack, BoundArray)
define_array(BoundMap, BoundStack *)
define_array(AccessIndexedArray, AccessIndexed *)
define_stack(AccessIndexedList, AccessIndexedArray)
define_array(InstructionArray, Instruction *)
define_stack(InstructionList, InstructionArray)
typedef GrowableArray<Bound*> BoundStack;
typedef GrowableArray<BoundStack*> BoundMap;
typedef GrowableArray<AccessIndexed*> AccessIndexedList;
typedef GrowableArray<Instruction*> InstructionList;
class AccessIndexedInfo : public CompilationResourceObj {
public:
@ -195,7 +190,7 @@ public:
int _max;
};
define_array(AccessIndexedInfoArray, AccessIndexedInfo *)
typedef GrowableArray<AccessIndexedInfo*> AccessIndexedInfoArray;
BoundMap _bounds; // Mapping from Instruction's id to current bound
AccessIndexedInfoArray _access_indexed_info; // Mapping from Instruction's id to AccessIndexedInfo for in block motion
Visitor _visitor;

View file

@ -320,9 +320,11 @@ const char* Runtime1::name_for_address(address entry) {
FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32());
FUNCTION_CASE(entry, StubRoutines::dexp());
FUNCTION_CASE(entry, StubRoutines::dlog());
FUNCTION_CASE(entry, StubRoutines::dlog10());
FUNCTION_CASE(entry, StubRoutines::dpow());
FUNCTION_CASE(entry, StubRoutines::dsin());
FUNCTION_CASE(entry, StubRoutines::dcos());
FUNCTION_CASE(entry, StubRoutines::dtan());
#undef FUNCTION_CASE

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -46,7 +46,7 @@
ValueMap::ValueMap()
: _nesting(0)
, _entries(ValueMapInitialSize, NULL)
, _entries(ValueMapInitialSize, ValueMapInitialSize, NULL)
, _killed_values()
, _entry_count(0)
{
@ -56,7 +56,7 @@ ValueMap::ValueMap()
ValueMap::ValueMap(ValueMap* old)
: _nesting(old->_nesting + 1)
, _entries(old->_entries.length())
, _entries(old->_entries.length(), old->_entries.length(), NULL)
, _killed_values()
, _entry_count(old->_entry_count)
{
@ -72,7 +72,7 @@ void ValueMap::increase_table_size() {
int new_size = old_size * 2 + 1;
ValueMapEntryList worklist(8);
ValueMapEntryArray new_entries(new_size, NULL);
ValueMapEntryArray new_entries(new_size, new_size, NULL);
int new_entry_count = 0;
TRACE_VALUE_NUMBERING(tty->print_cr("increasing table size from %d to %d", old_size, new_size));
@ -486,7 +486,7 @@ bool ShortLoopOptimizer::process(BlockBegin* loop_header) {
GlobalValueNumbering::GlobalValueNumbering(IR* ir)
: _current_map(NULL)
, _value_maps(ir->linear_scan_order()->length(), NULL)
, _value_maps(ir->linear_scan_order()->length(), ir->linear_scan_order()->length(), NULL)
, _compilation(ir->compilation())
{
TRACE_VALUE_NUMBERING(tty->print_cr("****** start of global value numbering"));

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -53,8 +53,8 @@ class ValueMapEntry: public CompilationResourceObj {
void set_next(ValueMapEntry* next) { _next = next; }
};
define_array(ValueMapEntryArray, ValueMapEntry*)
define_stack(ValueMapEntryList, ValueMapEntryArray)
typedef GrowableArray<ValueMapEntry*> ValueMapEntryArray;
typedef GrowableArray<ValueMapEntry*> ValueMapEntryList;
// ValueMap implements nested hash tables for value numbering. It
// maintains a set _killed_values which represents the instructions
@ -129,8 +129,7 @@ class ValueMap: public CompilationResourceObj {
#endif
};
define_array(ValueMapArray, ValueMap*)
typedef GrowableArray<ValueMap*> ValueMapArray;
class ValueNumberingVisitor: public InstructionVisitor {
protected:

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -35,7 +35,7 @@ ValueStack::ValueStack(IRScope* scope, ValueStack* caller_state)
, _caller_state(caller_state)
, _bci(-99)
, _kind(Parsing)
, _locals(scope->method()->max_locals(), NULL)
, _locals(scope->method()->max_locals(), scope->method()->max_locals(), NULL)
, _stack(scope->method()->max_stack())
, _locks()
{
@ -178,7 +178,7 @@ void ValueStack::setup_phi_for_stack(BlockBegin* b, int index) {
ValueType* t = stack_at(index)->type();
Value phi = new Phi(t, b, -index - 1);
_stack[index] = phi;
_stack.at_put(index, phi);
assert(!t->is_double_word() || _stack.at(index + 1) == NULL, "hi-word of doubleword value must be NULL");
}
@ -225,7 +225,7 @@ void ValueStack::print() {
if (locals_size() > 0) {
InstructionPrinter ip;
for (int i = 0; i < locals_size();) {
Value l = _locals[i];
Value l = _locals.at(i);
tty->print("local %d ", i);
if (l == NULL) {
tty->print("null");

View file

@ -124,6 +124,9 @@ public:
}
}
bool is_valid() const {
return basic_type() != T_ILLEGAL;
}
// Debugging output
void print();
};

View file

@ -235,96 +235,77 @@ void ciField::initialize_from(fieldDescriptor* fd) {
_holder = CURRENT_ENV->get_instance_klass(fd->field_holder());
// Check to see if the field is constant.
bool is_final = this->is_final();
bool is_stable = FoldStableValues && this->is_stable();
if (_holder->is_initialized() && (is_final || is_stable)) {
if (!this->is_static()) {
// A field can be constant if it's a final static field or if
// it's a final non-static field of a trusted class (classes in
// java.lang.invoke and sun.invoke packages and subpackages).
if (is_stable || trust_final_non_static_fields(_holder)) {
_is_constant = true;
return;
}
_is_constant = false;
return;
}
// This field just may be constant. The only case where it will
// not be constant is when the field is a *special* static&final field
// whose value may change. The three examples are java.lang.System.in,
// java.lang.System.out, and java.lang.System.err.
KlassHandle k = _holder->get_Klass();
assert( SystemDictionary::System_klass() != NULL, "Check once per vm");
if( k() == SystemDictionary::System_klass() ) {
// Check offsets for case 2: System.in, System.out, or System.err
if( _offset == java_lang_System::in_offset_in_bytes() ||
_offset == java_lang_System::out_offset_in_bytes() ||
_offset == java_lang_System::err_offset_in_bytes() ) {
_is_constant = false;
return;
}
}
Handle mirror = k->java_mirror();
switch(type()->basic_type()) {
case T_BYTE:
_constant_value = ciConstant(type()->basic_type(), mirror->byte_field(_offset));
break;
case T_CHAR:
_constant_value = ciConstant(type()->basic_type(), mirror->char_field(_offset));
break;
case T_SHORT:
_constant_value = ciConstant(type()->basic_type(), mirror->short_field(_offset));
break;
case T_BOOLEAN:
_constant_value = ciConstant(type()->basic_type(), mirror->bool_field(_offset));
break;
case T_INT:
_constant_value = ciConstant(type()->basic_type(), mirror->int_field(_offset));
break;
case T_FLOAT:
_constant_value = ciConstant(mirror->float_field(_offset));
break;
case T_DOUBLE:
_constant_value = ciConstant(mirror->double_field(_offset));
break;
case T_LONG:
_constant_value = ciConstant(mirror->long_field(_offset));
break;
case T_OBJECT:
case T_ARRAY:
{
oop o = mirror->obj_field(_offset);
// A field will be "constant" if it is known always to be
// a non-null reference to an instance of a particular class,
// or to a particular array. This can happen even if the instance
// or array is not perm. In such a case, an "unloaded" ciArray
// or ciInstance is created. The compiler may be able to use
// information about the object's class (which is exact) or length.
if (o == NULL) {
_constant_value = ciConstant(type()->basic_type(), ciNullObject::make());
} else {
_constant_value = ciConstant(type()->basic_type(), CURRENT_ENV->get_object(o));
assert(_constant_value.as_object() == CURRENT_ENV->get_object(o), "check interning");
Klass* k = _holder->get_Klass();
bool is_stable_field = FoldStableValues && is_stable();
if (is_final() || is_stable_field) {
if (is_static()) {
// This field just may be constant. The only case where it will
// not be constant is when the field is a *special* static & final field
// whose value may change. The three examples are java.lang.System.in,
// java.lang.System.out, and java.lang.System.err.
assert(SystemDictionary::System_klass() != NULL, "Check once per vm");
if (k == SystemDictionary::System_klass()) {
// Check offsets for case 2: System.in, System.out, or System.err
if( _offset == java_lang_System::in_offset_in_bytes() ||
_offset == java_lang_System::out_offset_in_bytes() ||
_offset == java_lang_System::err_offset_in_bytes() ) {
_is_constant = false;
return;
}
}
}
if (is_stable && _constant_value.is_null_or_zero()) {
// It is not a constant after all; treat it as uninitialized.
_is_constant = false;
} else {
_is_constant = true;
} else {
// An instance field can be constant if it's a final static field or if
// it's a final non-static field of a trusted class (classes in
// java.lang.invoke and sun.invoke packages and subpackages).
_is_constant = is_stable_field || trust_final_non_static_fields(_holder);
}
} else {
_is_constant = false;
// For CallSite objects treat the target field as a compile time constant.
assert(SystemDictionary::CallSite_klass() != NULL, "should be already initialized");
if (k == SystemDictionary::CallSite_klass() &&
_offset == java_lang_invoke_CallSite::target_offset_in_bytes()) {
_is_constant = true;
} else {
// Non-final & non-stable fields are not constants.
_is_constant = false;
}
}
}
// ------------------------------------------------------------------
// ciField::constant_value
// Get the constant value of a this static field.
ciConstant ciField::constant_value() {
assert(is_static() && is_constant(), "illegal call to constant_value()");
if (!_holder->is_initialized()) {
return ciConstant(); // Not initialized yet
}
if (_constant_value.basic_type() == T_ILLEGAL) {
// Static fields are placed in mirror objects.
VM_ENTRY_MARK;
ciInstance* mirror = CURRENT_ENV->get_instance(_holder->get_Klass()->java_mirror());
_constant_value = mirror->field_value_impl(type()->basic_type(), offset());
}
if (FoldStableValues && is_stable() && _constant_value.is_null_or_zero()) {
return ciConstant();
}
return _constant_value;
}
// ------------------------------------------------------------------
// ciField::constant_value_of
// Get the constant value of non-static final field in the given object.
ciConstant ciField::constant_value_of(ciObject* object) {
assert(!is_static() && is_constant(), "only if field is non-static constant");
assert(object->is_instance(), "must be instance");
ciConstant field_value = object->as_instance()->field_value(this);
if (FoldStableValues && is_stable() && field_value.is_null_or_zero()) {
return ciConstant();
}
return field_value;
}
// ------------------------------------------------------------------
// ciField::compute_type
//

View file

@ -62,7 +62,7 @@ private:
void initialize_from(fieldDescriptor* fd);
public:
ciFlags flags() { return _flags; }
ciFlags flags() const { return _flags; }
// Of which klass is this field a member?
//
@ -89,13 +89,13 @@ public:
//
// In that case the declared holder of f would be B and
// the canonical holder of f would be A.
ciInstanceKlass* holder() { return _holder; }
ciInstanceKlass* holder() const { return _holder; }
// Name of this field?
ciSymbol* name() { return _name; }
ciSymbol* name() const { return _name; }
// Signature of this field?
ciSymbol* signature() { return _signature; }
ciSymbol* signature() const { return _signature; }
// Of what type is this field?
ciType* type() { return (_type == NULL) ? compute_type() : _type; }
@ -107,13 +107,13 @@ public:
int size_in_bytes() { return type2aelembytes(layout_type()); }
// What is the offset of this field?
int offset() {
int offset() const {
assert(_offset >= 1, "illegal call to offset()");
return _offset;
}
// Same question, explicit units. (Fields are aligned to the byte level.)
int offset_in_bytes() {
int offset_in_bytes() const {
return offset();
}
@ -127,31 +127,27 @@ public:
//
// Clarification: A field is considered constant if:
// 1. The field is both static and final
// 2. The canonical holder of the field has undergone
// static initialization.
// 3. The field is not one of the special static/final
// 2. The field is not one of the special static/final
// non-constant fields. These are java.lang.System.in
// and java.lang.System.out. Abomination.
//
// A field is also considered constant if it is marked @Stable
// and is non-null (or non-zero, if a primitive).
// For non-static fields, the null/zero check must be
// arranged by the user, as constant_value().is_null_or_zero().
bool is_constant() { return _is_constant; }
//
// A user should also check the field value (constant_value().is_valid()), since
// constant fields of non-initialized classes don't have values yet.
bool is_constant() const { return _is_constant; }
// Get the constant value of this field.
ciConstant constant_value() {
assert(is_static() && is_constant(), "illegal call to constant_value()");
return _constant_value;
// Get the constant value of the static field.
ciConstant constant_value();
bool is_static_constant() {
return is_static() && is_constant() && constant_value().is_valid();
}
// Get the constant value of non-static final field in the given
// object.
ciConstant constant_value_of(ciObject* object) {
assert(!is_static() && is_constant(), "only if field is non-static constant");
assert(object->is_instance(), "must be instance");
return object->as_instance()->field_value(this);
}
ciConstant constant_value_of(ciObject* object);
// Check for link time errors. Accessing a field from a
// certain class via a certain bytecode may or may not be legal.
@ -165,14 +161,14 @@ public:
Bytecodes::Code bc);
// Java access flags
bool is_public () { return flags().is_public(); }
bool is_private () { return flags().is_private(); }
bool is_protected () { return flags().is_protected(); }
bool is_static () { return flags().is_static(); }
bool is_final () { return flags().is_final(); }
bool is_stable () { return flags().is_stable(); }
bool is_volatile () { return flags().is_volatile(); }
bool is_transient () { return flags().is_transient(); }
bool is_public () const { return flags().is_public(); }
bool is_private () const { return flags().is_private(); }
bool is_protected () const { return flags().is_protected(); }
bool is_static () const { return flags().is_static(); }
bool is_final () const { return flags().is_final(); }
bool is_stable () const { return flags().is_stable(); }
bool is_volatile () const { return flags().is_volatile(); }
bool is_transient () const { return flags().is_transient(); }
bool is_call_site_target() {
ciInstanceKlass* callsite_klass = CURRENT_ENV->CallSite_klass();

View file

@ -56,49 +56,21 @@ ciType* ciInstance::java_mirror_type() {
}
// ------------------------------------------------------------------
// ciInstance::field_value
//
// Constant value of a field.
ciConstant ciInstance::field_value(ciField* field) {
assert(is_loaded(), "invalid access - must be loaded");
assert(field->holder()->is_loaded(), "invalid access - holder must be loaded");
assert(klass()->is_subclass_of(field->holder()), "invalid access - must be subclass");
VM_ENTRY_MARK;
ciConstant result;
// ciInstance::field_value_impl
ciConstant ciInstance::field_value_impl(BasicType field_btype, int offset) {
Handle obj = get_oop();
assert(!obj.is_null(), "bad oop");
BasicType field_btype = field->type()->basic_type();
int offset = field->offset();
switch(field_btype) {
case T_BYTE:
return ciConstant(field_btype, obj->byte_field(offset));
break;
case T_CHAR:
return ciConstant(field_btype, obj->char_field(offset));
break;
case T_SHORT:
return ciConstant(field_btype, obj->short_field(offset));
break;
case T_BOOLEAN:
return ciConstant(field_btype, obj->bool_field(offset));
break;
case T_INT:
return ciConstant(field_btype, obj->int_field(offset));
break;
case T_FLOAT:
return ciConstant(obj->float_field(offset));
break;
case T_DOUBLE:
return ciConstant(obj->double_field(offset));
break;
case T_LONG:
return ciConstant(obj->long_field(offset));
break;
case T_OBJECT:
case T_ARRAY:
{
case T_BYTE: return ciConstant(field_btype, obj->byte_field(offset));
case T_CHAR: return ciConstant(field_btype, obj->char_field(offset));
case T_SHORT: return ciConstant(field_btype, obj->short_field(offset));
case T_BOOLEAN: return ciConstant(field_btype, obj->bool_field(offset));
case T_INT: return ciConstant(field_btype, obj->int_field(offset));
case T_FLOAT: return ciConstant(obj->float_field(offset));
case T_DOUBLE: return ciConstant(obj->double_field(offset));
case T_LONG: return ciConstant(obj->long_field(offset));
case T_OBJECT: // fall through
case T_ARRAY: {
oop o = obj->obj_field(offset);
// A field will be "constant" if it is known always to be
@ -115,11 +87,22 @@ ciConstant ciInstance::field_value(ciField* field) {
}
}
}
ShouldNotReachHere();
// to shut up the compiler
fatal("no field value: %s", type2name(field_btype));
return ciConstant();
}
// ------------------------------------------------------------------
// ciInstance::field_value
//
// Constant value of a field.
ciConstant ciInstance::field_value(ciField* field) {
assert(is_loaded(), "invalid access - must be loaded");
assert(field->holder()->is_loaded(), "invalid access - holder must be loaded");
assert(field->is_static() || klass()->is_subclass_of(field->holder()), "invalid access - must be subclass");
GUARDED_VM_ENTRY(return field_value_impl(field->type()->basic_type(), field->offset());)
}
// ------------------------------------------------------------------
// ciInstance::field_value_by_offset
//

View file

@ -36,6 +36,7 @@
// instance of java.lang.Object.
class ciInstance : public ciObject {
CI_PACKAGE_ACCESS
friend class ciField;
protected:
ciInstance(instanceHandle h_i) : ciObject(h_i) {
@ -50,6 +51,8 @@ protected:
void print_impl(outputStream* st);
ciConstant field_value_impl(BasicType field_btype, int offset);
public:
// If this object is a java mirror, return the corresponding type.
// Otherwise, return NULL.

View file

@ -88,12 +88,7 @@ bool ciKlass::is_subclass_of(ciKlass* that) {
assert(this->is_loaded(), "must be loaded: %s", this->name()->as_quoted_ascii());
assert(that->is_loaded(), "must be loaded: %s", that->name()->as_quoted_ascii());
VM_ENTRY_MARK;
Klass* this_klass = get_Klass();
Klass* that_klass = that->get_Klass();
bool result = this_klass->is_subclass_of(that_klass);
return result;
GUARDED_VM_ENTRY(return get_Klass()->is_subclass_of(that->get_Klass());)
}
// ------------------------------------------------------------------

View file

@ -58,9 +58,7 @@ ciSymbol::ciSymbol(Symbol* s)
//
// The text of the symbol as a null-terminated C string.
const char* ciSymbol::as_utf8() {
VM_QUICK_ENTRY_MARK;
Symbol* s = get_symbol();
return s->as_utf8();
GUARDED_VM_QUICK_ENTRY(return get_symbol()->as_utf8();)
}
// The text of the symbol as a null-terminated C string.

View file

@ -2927,7 +2927,7 @@ static const intArray* sort_methods(Array<Method*>* methods) {
// If JVMTI original method ordering or sharing is enabled construct int
// array remembering the original ordering
if (JvmtiExport::can_maintain_original_method_order() || DumpSharedSpaces) {
method_ordering = new intArray(length);
method_ordering = new intArray(length, length, -1);
for (int index = 0; index < length; index++) {
Method* const m = methods->at(index);
const int old_index = m->vtable_index();

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -42,7 +42,6 @@ class FieldInfo;
template <typename T>
class GrowableArray;
class InstanceKlass;
class intArray;
class Symbol;
class TempNewSymbol;

View file

@ -882,6 +882,10 @@
do_name( newArray_name, "newArray") \
do_signature(newArray_signature, "(Ljava/lang/Class;I)Ljava/lang/Object;") \
\
do_intrinsic(_onSpinWait, java_lang_Thread, onSpinWait_name, onSpinWait_signature, F_S) \
do_name( onSpinWait_name, "onSpinWait") \
do_alias( onSpinWait_signature, void_method_signature) \
\
do_intrinsic(_copyOf, java_util_Arrays, copyOf_name, copyOf_signature, F_S) \
do_name( copyOf_name, "copyOf") \
do_signature(copyOf_signature, "([Ljava/lang/Object;ILjava/lang/Class;)[Ljava/lang/Object;") \

View file

@ -533,6 +533,22 @@ bool CompiledIC::is_icholder_entry(address entry) {
return (cb != NULL && cb->is_adapter_blob());
}
// Release the CompiledICHolder* associated with this call site is there is one.
void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) {
// This call site might have become stale so inspect it carefully.
NativeCall* call = nativeCall_at(call_site->addr());
if (is_icholder_entry(call->destination())) {
NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value());
InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data());
}
}
bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) {
// This call site might have become stale so inspect it carefully.
NativeCall* call = nativeCall_at(call_site->addr());
return is_icholder_entry(call->destination());
}
// ----------------------------------------------------------------------------
void CompiledStaticCall::set_to_clean() {

View file

@ -313,7 +313,8 @@ bool ExceptionCache::match_exception_with_space(Handle exception) {
address ExceptionCache::test_address(address addr) {
for (int i=0; i<count(); i++) {
int limit = count();
for (int i = 0; i < limit; i++) {
if (pc_at(i) == addr) {
return handler_at(i);
}
@ -329,7 +330,6 @@ bool ExceptionCache::add_address_and_handler(address addr, address handler) {
if (index < cache_size) {
set_pc_at(index, addr);
set_handler_at(index, handler);
OrderAccess::storestore();
increment_count();
return true;
}
@ -442,10 +442,11 @@ void nmethod::add_exception_cache_entry(ExceptionCache* new_entry) {
assert(new_entry != NULL,"Must be non null");
assert(new_entry->next() == NULL, "Must be null");
if (exception_cache() != NULL) {
new_entry->set_next(exception_cache());
ExceptionCache *ec = exception_cache();
if (ec != NULL) {
new_entry->set_next(ec);
}
set_exception_cache(new_entry);
release_set_exception_cache(new_entry);
}
void nmethod::clean_exception_cache(BoolObjectClosure* is_alive) {

View file

@ -41,15 +41,16 @@ class ExceptionCache : public CHeapObj<mtCode> {
Klass* _exception_type;
address _pc[cache_size];
address _handler[cache_size];
int _count;
volatile int _count;
ExceptionCache* _next;
address pc_at(int index) { assert(index >= 0 && index < count(),""); return _pc[index]; }
void set_pc_at(int index, address a) { assert(index >= 0 && index < cache_size,""); _pc[index] = a; }
address handler_at(int index) { assert(index >= 0 && index < count(),""); return _handler[index]; }
void set_handler_at(int index, address a) { assert(index >= 0 && index < cache_size,""); _handler[index] = a; }
int count() { return _count; }
void increment_count() { _count++; }
int count() { return OrderAccess::load_acquire(&_count); }
// increment_count is only called under lock, but there may be concurrent readers.
void increment_count() { OrderAccess::release_store(&_count, _count + 1); }
public:
@ -241,7 +242,7 @@ class nmethod : public CodeBlob {
// counter is decreased (by 1) while sweeping.
int _hotness_counter;
ExceptionCache *_exception_cache;
ExceptionCache * volatile _exception_cache;
PcDescCache _pc_desc_cache;
// These are used for compiled synchronized native methods to
@ -433,7 +434,7 @@ class nmethod : public CodeBlob {
// flag accessing and manipulation
bool is_in_use() const { return _state == in_use; }
bool is_alive() const { return _state == in_use || _state == not_entrant; }
bool is_alive() const { unsigned char s = _state; return s == in_use || s == not_entrant; }
bool is_not_entrant() const { return _state == not_entrant; }
bool is_zombie() const { return _state == zombie; }
bool is_unloaded() const { return _state == unloaded; }
@ -576,8 +577,10 @@ public:
void set_stack_traversal_mark(long l) { _stack_traversal_mark = l; }
// Exception cache support
// Note: _exception_cache may be read concurrently. We rely on memory_order_consume here.
ExceptionCache* exception_cache() const { return _exception_cache; }
void set_exception_cache(ExceptionCache *ec) { _exception_cache = ec; }
void release_set_exception_cache(ExceptionCache *ec) { OrderAccess::release_store_ptr(&_exception_cache, ec); }
address handler_for_exception_and_pc(Handle exception, address pc);
void add_handler_for_exception_and_pc(Handle exception, address pc, address handler);
void clean_exception_cache(BoolObjectClosure* is_alive);

Some files were not shown because too many files have changed in this diff Show more