mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-26 14:24:46 +02:00
7181494: cleanup avx and vectors code
Renamed mach nodes which use scalar AVX instructions, added integer vectors shuffling instructions Reviewed-by: twisti
This commit is contained in:
parent
19ea8f720f
commit
15f4203b0f
5 changed files with 386 additions and 313 deletions
|
@ -2573,6 +2573,13 @@ void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
|
||||||
emit_byte(0xC0 | encode);
|
emit_byte(0xC0 | encode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
|
||||||
|
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||||
|
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
|
||||||
|
emit_byte(0x6C);
|
||||||
|
emit_byte(0xC0 | encode);
|
||||||
|
}
|
||||||
|
|
||||||
void Assembler::push(int32_t imm32) {
|
void Assembler::push(int32_t imm32) {
|
||||||
// in 64bits we push 64bits onto the stack but only
|
// in 64bits we push 64bits onto the stack but only
|
||||||
// take a 32bit immediate
|
// take a 32bit immediate
|
||||||
|
@ -3178,6 +3185,13 @@ void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool v
|
||||||
emit_byte(0xC0 | encode);
|
emit_byte(0xC0 | encode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
|
||||||
|
assert(VM_Version::supports_avx2() || (!vector256) && VM_Version::supports_avx(), "");
|
||||||
|
int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256);
|
||||||
|
emit_byte(0xEF);
|
||||||
|
emit_byte(0xC0 | encode);
|
||||||
|
}
|
||||||
|
|
||||||
void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
|
void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
|
||||||
assert(VM_Version::supports_avx(), "");
|
assert(VM_Version::supports_avx(), "");
|
||||||
bool vector256 = true;
|
bool vector256 = true;
|
||||||
|
@ -3189,6 +3203,17 @@ void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src)
|
||||||
emit_byte(0x01);
|
emit_byte(0x01);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
|
||||||
|
assert(VM_Version::supports_avx2(), "");
|
||||||
|
bool vector256 = true;
|
||||||
|
int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
|
||||||
|
emit_byte(0x38);
|
||||||
|
emit_byte(0xC0 | encode);
|
||||||
|
// 0x00 - insert into lower 128 bits
|
||||||
|
// 0x01 - insert into upper 128 bits
|
||||||
|
emit_byte(0x01);
|
||||||
|
}
|
||||||
|
|
||||||
void Assembler::vzeroupper() {
|
void Assembler::vzeroupper() {
|
||||||
assert(VM_Version::supports_avx(), "");
|
assert(VM_Version::supports_avx(), "");
|
||||||
(void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
|
(void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
|
||||||
|
@ -7480,6 +7505,24 @@ void MacroAssembler::movbyte(ArrayAddress dst, int src) {
|
||||||
movb(as_Address(dst), src);
|
movb(as_Address(dst), src);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src) {
|
||||||
|
if (reachable(src)) {
|
||||||
|
movdl(dst, as_Address(src));
|
||||||
|
} else {
|
||||||
|
lea(rscratch1, src);
|
||||||
|
movdl(dst, Address(rscratch1, 0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) {
|
||||||
|
if (reachable(src)) {
|
||||||
|
movq(dst, as_Address(src));
|
||||||
|
} else {
|
||||||
|
lea(rscratch1, src);
|
||||||
|
movq(dst, Address(rscratch1, 0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
|
void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
|
||||||
if (reachable(src)) {
|
if (reachable(src)) {
|
||||||
if (UseXmmLoadAndClearUpper) {
|
if (UseXmmLoadAndClearUpper) {
|
||||||
|
|
|
@ -1466,6 +1466,9 @@ private:
|
||||||
void punpckldq(XMMRegister dst, XMMRegister src);
|
void punpckldq(XMMRegister dst, XMMRegister src);
|
||||||
void punpckldq(XMMRegister dst, Address src);
|
void punpckldq(XMMRegister dst, Address src);
|
||||||
|
|
||||||
|
// Interleave Low Quadwords
|
||||||
|
void punpcklqdq(XMMRegister dst, XMMRegister src);
|
||||||
|
|
||||||
#ifndef _LP64 // no 32bit push/pop on amd64
|
#ifndef _LP64 // no 32bit push/pop on amd64
|
||||||
void pushl(Address src);
|
void pushl(Address src);
|
||||||
#endif
|
#endif
|
||||||
|
@ -1606,13 +1609,11 @@ private:
|
||||||
|
|
||||||
void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
|
void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
|
||||||
|
|
||||||
// AVX 3-operands instructions (encoded with VEX prefix)
|
// AVX 3-operands scalar instructions (encoded with VEX prefix)
|
||||||
void vaddsd(XMMRegister dst, XMMRegister nds, Address src);
|
void vaddsd(XMMRegister dst, XMMRegister nds, Address src);
|
||||||
void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
||||||
void vaddss(XMMRegister dst, XMMRegister nds, Address src);
|
void vaddss(XMMRegister dst, XMMRegister nds, Address src);
|
||||||
void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
||||||
void vandpd(XMMRegister dst, XMMRegister nds, Address src);
|
|
||||||
void vandps(XMMRegister dst, XMMRegister nds, Address src);
|
|
||||||
void vdivsd(XMMRegister dst, XMMRegister nds, Address src);
|
void vdivsd(XMMRegister dst, XMMRegister nds, Address src);
|
||||||
void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
||||||
void vdivss(XMMRegister dst, XMMRegister nds, Address src);
|
void vdivss(XMMRegister dst, XMMRegister nds, Address src);
|
||||||
|
@ -1625,13 +1626,17 @@ private:
|
||||||
void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
||||||
void vsubss(XMMRegister dst, XMMRegister nds, Address src);
|
void vsubss(XMMRegister dst, XMMRegister nds, Address src);
|
||||||
void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
||||||
void vxorpd(XMMRegister dst, XMMRegister nds, Address src);
|
|
||||||
void vxorps(XMMRegister dst, XMMRegister nds, Address src);
|
|
||||||
|
|
||||||
// AVX Vector instrucitons.
|
// AVX Vector instrucitons.
|
||||||
|
void vandpd(XMMRegister dst, XMMRegister nds, Address src);
|
||||||
|
void vandps(XMMRegister dst, XMMRegister nds, Address src);
|
||||||
|
void vxorpd(XMMRegister dst, XMMRegister nds, Address src);
|
||||||
|
void vxorps(XMMRegister dst, XMMRegister nds, Address src);
|
||||||
void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
|
void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
|
||||||
void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
|
void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
|
||||||
|
void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
|
||||||
void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
||||||
|
void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
||||||
|
|
||||||
// AVX instruction which is used to clear upper 128 bits of YMM registers and
|
// AVX instruction which is used to clear upper 128 bits of YMM registers and
|
||||||
// to avoid transaction penalty between AVX and SSE states. There is no
|
// to avoid transaction penalty between AVX and SSE states. There is no
|
||||||
|
@ -2563,6 +2568,20 @@ public:
|
||||||
void vxorps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorps(dst, nds, src); }
|
void vxorps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorps(dst, nds, src); }
|
||||||
void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src);
|
void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src);
|
||||||
|
|
||||||
|
void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
|
||||||
|
if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2
|
||||||
|
Assembler::vpxor(dst, nds, src, vector256);
|
||||||
|
else
|
||||||
|
Assembler::vxorpd(dst, nds, src, vector256);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Move packed integer values from low 128 bit to hign 128 bit in 256 bit vector.
|
||||||
|
void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
|
||||||
|
if (UseAVX > 1) // vinserti128h is available only in AVX2
|
||||||
|
Assembler::vinserti128h(dst, nds, src);
|
||||||
|
else
|
||||||
|
Assembler::vinsertf128h(dst, nds, src);
|
||||||
|
}
|
||||||
|
|
||||||
// Data
|
// Data
|
||||||
|
|
||||||
|
@ -2615,6 +2634,13 @@ public:
|
||||||
// to avoid hiding movb
|
// to avoid hiding movb
|
||||||
void movbyte(ArrayAddress dst, int src);
|
void movbyte(ArrayAddress dst, int src);
|
||||||
|
|
||||||
|
// Import other mov() methods from the parent class or else
|
||||||
|
// they will be hidden by the following overriding declaration.
|
||||||
|
using Assembler::movdl;
|
||||||
|
using Assembler::movq;
|
||||||
|
void movdl(XMMRegister dst, AddressLiteral src);
|
||||||
|
void movq(XMMRegister dst, AddressLiteral src);
|
||||||
|
|
||||||
// Can push value or effective address
|
// Can push value or effective address
|
||||||
void pushptr(AddressLiteral src);
|
void pushptr(AddressLiteral src);
|
||||||
|
|
||||||
|
|
|
@ -562,7 +562,7 @@ void VM_Version::get_processor_features() {
|
||||||
AllocatePrefetchInstr = 3;
|
AllocatePrefetchInstr = 3;
|
||||||
}
|
}
|
||||||
// On family 15h processors use XMM and UnalignedLoadStores for Array Copy
|
// On family 15h processors use XMM and UnalignedLoadStores for Array Copy
|
||||||
if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) {
|
if( supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) {
|
||||||
UseXMMForArrayCopy = true;
|
UseXMMForArrayCopy = true;
|
||||||
}
|
}
|
||||||
if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) {
|
if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) {
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -131,6 +131,10 @@ public:
|
||||||
assert((is_reg() && value() < stack0->value() - 1) || is_stack(), "must be");
|
assert((is_reg() && value() < stack0->value() - 1) || is_stack(), "must be");
|
||||||
return (VMReg)(intptr_t)(value() + 1);
|
return (VMReg)(intptr_t)(value() + 1);
|
||||||
}
|
}
|
||||||
|
VMReg next(int i) {
|
||||||
|
assert((is_reg() && value() < stack0->value() - i) || is_stack(), "must be");
|
||||||
|
return (VMReg)(intptr_t)(value() + i);
|
||||||
|
}
|
||||||
VMReg prev() {
|
VMReg prev() {
|
||||||
assert((is_stack() && value() > stack0->value()) || (is_reg() && value() != 0), "must be");
|
assert((is_stack() && value() > stack0->value()) || (is_reg() && value() != 0), "must be");
|
||||||
return (VMReg)(intptr_t)(value() - 1);
|
return (VMReg)(intptr_t)(value() - 1);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue