8149421: Vectorized Post Loops

Add vectorised post loop for counted loops with vectors.

Reviewed-by: kvn
This commit is contained in:
Michael Berg 2016-02-12 16:12:15 -08:00
parent 9d5f03332e
commit 3ade110066
10 changed files with 213 additions and 50 deletions

View file

@ -54,6 +54,7 @@ define_pd_global(intx, INTPRESSURE, 25);
define_pd_global(intx, InteriorEntryAlignment, 16); define_pd_global(intx, InteriorEntryAlignment, 16);
define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
define_pd_global(intx, LoopUnrollLimit, 60); define_pd_global(intx, LoopUnrollLimit, 60);
define_pd_global(intx, LoopPercentProfileLimit, 10);
// InitialCodeCacheSize derived from specjbb2000 run. // InitialCodeCacheSize derived from specjbb2000 run.
define_pd_global(intx, InitialCodeCacheSize, 2496*K); // Integral multiple of CodeCacheExpansionSize define_pd_global(intx, InitialCodeCacheSize, 2496*K); // Integral multiple of CodeCacheExpansionSize
define_pd_global(intx, CodeCacheExpansionSize, 64*K); define_pd_global(intx, CodeCacheExpansionSize, 64*K);

View file

@ -54,6 +54,7 @@ define_pd_global(intx, RegisterCostAreaRatio, 16000);
define_pd_global(bool, UseTLAB, true); define_pd_global(bool, UseTLAB, true);
define_pd_global(bool, ResizeTLAB, true); define_pd_global(bool, ResizeTLAB, true);
define_pd_global(intx, LoopUnrollLimit, 60); define_pd_global(intx, LoopUnrollLimit, 60);
define_pd_global(intx, LoopPercentProfileLimit, 10);
// Peephole and CISC spilling both break the graph, and so make the // Peephole and CISC spilling both break the graph, and so make the
// scheduler sick. // scheduler sick.

View file

@ -52,6 +52,7 @@ define_pd_global(intx, RegisterCostAreaRatio, 12000);
define_pd_global(bool, UseTLAB, true); define_pd_global(bool, UseTLAB, true);
define_pd_global(bool, ResizeTLAB, true); define_pd_global(bool, ResizeTLAB, true);
define_pd_global(intx, LoopUnrollLimit, 60); // Design center runs on 1.3.1 define_pd_global(intx, LoopUnrollLimit, 60); // Design center runs on 1.3.1
define_pd_global(intx, LoopPercentProfileLimit, 10);
define_pd_global(intx, MinJumpTableSize, 5); define_pd_global(intx, MinJumpTableSize, 5);
// Peephole and CISC spilling both break the graph, and so makes the // Peephole and CISC spilling both break the graph, and so makes the

View file

@ -2361,7 +2361,7 @@ void Assembler::movdqa(XMMRegister dst, Address src) {
void Assembler::movdqu(XMMRegister dst, Address src) { void Assembler::movdqu(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F); emit_int8(0x6F);
@ -2398,7 +2398,7 @@ void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
void Assembler::vmovdqu(XMMRegister dst, Address src) { void Assembler::vmovdqu(XMMRegister dst, Address src) {
assert(UseAVX > 0, ""); assert(UseAVX > 0, "");
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F); emit_int8(0x6F);
@ -2486,7 +2486,7 @@ void Assembler::evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
void Assembler::evmovdqul(XMMRegister dst, Address src, int vector_len) { void Assembler::evmovdqul(XMMRegister dst, Address src, int vector_len) {
assert(VM_Version::supports_evex(), ""); assert(VM_Version::supports_evex(), "");
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false , /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F); emit_int8(0x6F);
@ -2515,7 +2515,7 @@ void Assembler::evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) { void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) {
assert(VM_Version::supports_evex(), ""); assert(VM_Version::supports_evex(), "");
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F); emit_int8(0x6F);
@ -2640,7 +2640,7 @@ void Assembler::movsbl(Register dst, Register src) { // movsxb
void Assembler::movsd(XMMRegister dst, XMMRegister src) { void Assembler::movsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x10); emit_int8(0x10);
emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)(0xC0 | encode));
@ -2649,7 +2649,7 @@ void Assembler::movsd(XMMRegister dst, XMMRegister src) {
void Assembler::movsd(XMMRegister dst, Address src) { void Assembler::movsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x10); emit_int8(0x10);
@ -2668,7 +2668,7 @@ void Assembler::movsd(Address dst, XMMRegister src) {
void Assembler::movss(XMMRegister dst, XMMRegister src) { void Assembler::movss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x10); emit_int8(0x10);
emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)(0xC0 | encode));
@ -2677,7 +2677,7 @@ void Assembler::movss(XMMRegister dst, XMMRegister src) {
void Assembler::movss(XMMRegister dst, Address src) { void Assembler::movss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x10); emit_int8(0x10);
@ -2782,7 +2782,7 @@ void Assembler::mull(Register src) {
void Assembler::mulsd(XMMRegister dst, Address src) { void Assembler::mulsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x59); emit_int8(0x59);
@ -2791,7 +2791,7 @@ void Assembler::mulsd(XMMRegister dst, Address src) {
void Assembler::mulsd(XMMRegister dst, XMMRegister src) { void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x59); emit_int8(0x59);
emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)(0xC0 | encode));
@ -2800,7 +2800,7 @@ void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
void Assembler::mulss(XMMRegister dst, Address src) { void Assembler::mulss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x59); emit_int8(0x59);
@ -2809,7 +2809,7 @@ void Assembler::mulss(XMMRegister dst, Address src) {
void Assembler::mulss(XMMRegister dst, XMMRegister src) { void Assembler::mulss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x59); emit_int8(0x59);
emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)(0xC0 | encode));
@ -3993,7 +3993,7 @@ void Assembler::smovl() {
void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x51); emit_int8(0x51);
emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)(0xC0 | encode));
@ -4002,7 +4002,7 @@ void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
void Assembler::sqrtsd(XMMRegister dst, Address src) { void Assembler::sqrtsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x51); emit_int8(0x51);
@ -4011,7 +4011,7 @@ void Assembler::sqrtsd(XMMRegister dst, Address src) {
void Assembler::sqrtss(XMMRegister dst, XMMRegister src) { void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x51); emit_int8(0x51);
emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)(0xC0 | encode));
@ -4024,7 +4024,7 @@ void Assembler::std() {
void Assembler::sqrtss(XMMRegister dst, Address src) { void Assembler::sqrtss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x51); emit_int8(0x51);
@ -4078,7 +4078,7 @@ void Assembler::subl(Register dst, Register src) {
void Assembler::subsd(XMMRegister dst, XMMRegister src) { void Assembler::subsd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x5C); emit_int8(0x5C);
emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)(0xC0 | encode));
@ -4087,7 +4087,7 @@ void Assembler::subsd(XMMRegister dst, XMMRegister src) {
void Assembler::subsd(XMMRegister dst, Address src) { void Assembler::subsd(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x5C); emit_int8(0x5C);
@ -4096,7 +4096,7 @@ void Assembler::subsd(XMMRegister dst, Address src) {
void Assembler::subss(XMMRegister dst, XMMRegister src) { void Assembler::subss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false , /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x5C); emit_int8(0x5C);
emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)(0xC0 | encode));
@ -4105,7 +4105,7 @@ void Assembler::subss(XMMRegister dst, XMMRegister src) {
void Assembler::subss(XMMRegister dst, Address src) { void Assembler::subss(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x5C); emit_int8(0x5C);
@ -4293,7 +4293,7 @@ void Assembler::xorb(Register dst, Address src) {
void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) { void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
assert(VM_Version::supports_avx(), ""); assert(VM_Version::supports_avx(), "");
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
int nds_enc = nds->is_valid() ? nds->encoding() : 0; int nds_enc = nds->is_valid() ? nds->encoding() : 0;
vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
@ -4303,7 +4303,7 @@ void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), ""); assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int nds_enc = nds->is_valid() ? nds->encoding() : 0; int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x58); emit_int8(0x58);
@ -4313,7 +4313,7 @@ void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) { void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
assert(VM_Version::supports_avx(), ""); assert(VM_Version::supports_avx(), "");
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
int nds_enc = nds->is_valid() ? nds->encoding() : 0; int nds_enc = nds->is_valid() ? nds->encoding() : 0;
vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
@ -4323,7 +4323,7 @@ void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), ""); assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int nds_enc = nds->is_valid() ? nds->encoding() : 0; int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x58); emit_int8(0x58);
@ -4333,7 +4333,7 @@ void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) { void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
assert(VM_Version::supports_avx(), ""); assert(VM_Version::supports_avx(), "");
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
int nds_enc = nds->is_valid() ? nds->encoding() : 0; int nds_enc = nds->is_valid() ? nds->encoding() : 0;
vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
@ -4343,7 +4343,7 @@ void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), ""); assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int nds_enc = nds->is_valid() ? nds->encoding() : 0; int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x5E); emit_int8(0x5E);
@ -4353,7 +4353,7 @@ void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) { void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
assert(VM_Version::supports_avx(), ""); assert(VM_Version::supports_avx(), "");
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
int nds_enc = nds->is_valid() ? nds->encoding() : 0; int nds_enc = nds->is_valid() ? nds->encoding() : 0;
vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
@ -4363,7 +4363,7 @@ void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), ""); assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int nds_enc = nds->is_valid() ? nds->encoding() : 0; int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x5E); emit_int8(0x5E);
@ -4373,7 +4373,7 @@ void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) { void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
assert(VM_Version::supports_avx(), ""); assert(VM_Version::supports_avx(), "");
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
int nds_enc = nds->is_valid() ? nds->encoding() : 0; int nds_enc = nds->is_valid() ? nds->encoding() : 0;
vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
@ -4383,7 +4383,7 @@ void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), ""); assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int nds_enc = nds->is_valid() ? nds->encoding() : 0; int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x59); emit_int8(0x59);
@ -4393,7 +4393,7 @@ void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) { void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
assert(VM_Version::supports_avx(), ""); assert(VM_Version::supports_avx(), "");
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
int nds_enc = nds->is_valid() ? nds->encoding() : 0; int nds_enc = nds->is_valid() ? nds->encoding() : 0;
vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
@ -4403,7 +4403,7 @@ void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), ""); assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int nds_enc = nds->is_valid() ? nds->encoding() : 0; int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x59); emit_int8(0x59);
@ -4413,7 +4413,7 @@ void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) { void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
assert(VM_Version::supports_avx(), ""); assert(VM_Version::supports_avx(), "");
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
int nds_enc = nds->is_valid() ? nds->encoding() : 0; int nds_enc = nds->is_valid() ? nds->encoding() : 0;
vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
@ -4423,7 +4423,7 @@ void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), ""); assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int nds_enc = nds->is_valid() ? nds->encoding() : 0; int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x5C); emit_int8(0x5C);
@ -4433,7 +4433,7 @@ void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) { void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
assert(VM_Version::supports_avx(), ""); assert(VM_Version::supports_avx(), "");
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
int nds_enc = nds->is_valid() ? nds->encoding() : 0; int nds_enc = nds->is_valid() ? nds->encoding() : 0;
vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
@ -4443,7 +4443,7 @@ void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), ""); assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int nds_enc = nds->is_valid() ? nds->encoding() : 0; int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x5C); emit_int8(0x5C);
@ -5901,7 +5901,7 @@ void Assembler::vpbroadcastw(XMMRegister dst, XMMRegister src) {
// duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL // duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL
void Assembler::evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) { void Assembler::evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), ""); assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x78); emit_int8(0x78);
emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)(0xC0 | encode));
@ -5911,7 +5911,7 @@ void Assembler::evpbroadcastb(XMMRegister dst, Address src, int vector_len) {
assert(VM_Version::supports_evex(), ""); assert(VM_Version::supports_evex(), "");
assert(dst != xnoreg, "sanity"); assert(dst != xnoreg, "sanity");
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
// swap src<->dst for encoding // swap src<->dst for encoding
vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
@ -5922,7 +5922,7 @@ void Assembler::evpbroadcastb(XMMRegister dst, Address src, int vector_len) {
// duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL // duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL
void Assembler::evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) { void Assembler::evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), ""); assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x79); emit_int8(0x79);
emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)(0xC0 | encode));
@ -5932,7 +5932,7 @@ void Assembler::evpbroadcastw(XMMRegister dst, Address src, int vector_len) {
assert(VM_Version::supports_evex(), ""); assert(VM_Version::supports_evex(), "");
assert(dst != xnoreg, "sanity"); assert(dst != xnoreg, "sanity");
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
// swap src<->dst for encoding // swap src<->dst for encoding
vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
@ -6027,7 +6027,7 @@ void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
// duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL // duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL
void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) { void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
assert(VM_Version::supports_evex(), ""); assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x7A); emit_int8(0x7A);
emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)(0xC0 | encode));
@ -6036,7 +6036,7 @@ void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
// duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL // duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL
void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) { void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
assert(VM_Version::supports_evex(), ""); assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x7B); emit_int8(0x7B);
emit_int8((unsigned char)(0xC0 | encode)); emit_int8((unsigned char)(0xC0 | encode));

View file

@ -2044,11 +2044,11 @@ private:
class InstructionAttr { class InstructionAttr {
public: public:
InstructionAttr( InstructionAttr(
int vector_len, int vector_len, // The length of vector to be applied in encoding - for both AVX and EVEX
bool rex_vex_w, bool rex_vex_w, // Width of data: if 32-bits or less, false, else if 64-bit or specially defined, true
bool legacy_mode, bool legacy_mode, // Details if either this instruction is conditionally encoded to AVX or earlier if true else possibly EVEX
bool no_reg_mask, bool no_reg_mask, // when true, k0 is used when EVEX encoding is chosen, else k1 is used under the same condition
bool uses_vl) bool uses_vl) // This instruction may have legacy constraints based on vector length for EVEX
: :
_avx_vector_len(vector_len), _avx_vector_len(vector_len),
_rex_vex_w(rex_vex_w), _rex_vex_w(rex_vex_w),

View file

@ -46,6 +46,7 @@ define_pd_global(intx, OnStackReplacePercentage, 140);
define_pd_global(intx, ConditionalMoveLimit, 3); define_pd_global(intx, ConditionalMoveLimit, 3);
define_pd_global(intx, FreqInlineSize, 325); define_pd_global(intx, FreqInlineSize, 325);
define_pd_global(intx, MinJumpTableSize, 10); define_pd_global(intx, MinJumpTableSize, 10);
define_pd_global(intx, LoopPercentProfileLimit, 30);
#ifdef AMD64 #ifdef AMD64
define_pd_global(intx, INTPRESSURE, 13); define_pd_global(intx, INTPRESSURE, 13);
define_pd_global(intx, FLOATPRESSURE, 14); define_pd_global(intx, FLOATPRESSURE, 14);

View file

@ -182,6 +182,10 @@
"Unroll loop bodies with node count less than this") \ "Unroll loop bodies with node count less than this") \
range(0, max_jint / 4) \ range(0, max_jint / 4) \
\ \
product_pd(intx, LoopPercentProfileLimit, \
"Unroll loop bodies with % node count of profile limit") \
range(10, 100) \
\
product(intx, LoopMaxUnroll, 16, \ product(intx, LoopMaxUnroll, 16, \
"Maximum number of unrolls for main loop") \ "Maximum number of unrolls for main loop") \
range(0, max_jint) \ range(0, max_jint) \

View file

@ -666,7 +666,8 @@ bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
if (future_unroll_ct > LoopMaxUnroll) return false; if (future_unroll_ct > LoopMaxUnroll) return false;
} else { } else {
// obey user constraints on vector mapped loops with additional unrolling applied // obey user constraints on vector mapped loops with additional unrolling applied
if ((future_unroll_ct / cl->slp_max_unroll()) > LoopMaxUnroll) return false; int unroll_constraint = (cl->slp_max_unroll()) ? cl->slp_max_unroll() : 1;
if ((future_unroll_ct / unroll_constraint) > LoopMaxUnroll) return false;
} }
// Check for initial stride being a small enough constant // Check for initial stride being a small enough constant
@ -689,7 +690,7 @@ bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
// Progress defined as current size less than 20% larger than previous size. // Progress defined as current size less than 20% larger than previous size.
if (UseSuperWord && cl->node_count_before_unroll() > 0 && if (UseSuperWord && cl->node_count_before_unroll() > 0 &&
future_unroll_ct > LoopUnrollMin && future_unroll_ct > LoopUnrollMin &&
(future_unroll_ct - 1) * 10.0 > cl->profile_trip_cnt() && (future_unroll_ct - 1) * (100 / LoopPercentProfileLimit) > cl->profile_trip_cnt() &&
1.2 * cl->node_count_before_unroll() < (double)_body.size()) { 1.2 * cl->node_count_before_unroll() < (double)_body.size()) {
return false; return false;
} }
@ -1260,6 +1261,146 @@ void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_
loop->record_for_igvn(); loop->record_for_igvn();
} }
//------------------------------insert_vector_post_loop------------------------
// Insert a copy of the atomic unrolled vectorized main loop as a post loop,
// unroll_policy has already informed us that more unrolling is about to happen to
// the main loop. The resultant post loop will serve as a vectorized drain loop.
void PhaseIdealLoop::insert_vector_post_loop(IdealLoopTree *loop, Node_List &old_new) {
if (!loop->_head->is_CountedLoop()) return;
CountedLoopNode *cl = loop->_head->as_CountedLoop();
// only process vectorized main loops
if (!cl->is_vectorized_loop() || !cl->is_main_loop()) return;
int slp_max_unroll_factor = cl->slp_max_unroll();
int cur_unroll = cl->unrolled_count();
if (slp_max_unroll_factor == 0) return;
// only process atomic unroll vector loops (not super unrolled after vectorization)
if (cur_unroll != slp_max_unroll_factor) return;
// we only ever process this one time
if (cl->has_atomic_post_loop()) return;
#ifndef PRODUCT
if (TraceLoopOpts) {
tty->print("PostVector ");
loop->dump_head();
}
#endif
C->set_major_progress();
// Find common pieces of the loop being guarded with pre & post loops
CountedLoopNode *main_head = loop->_head->as_CountedLoop();
CountedLoopEndNode *main_end = main_head->loopexit();
guarantee(main_end != NULL, "no loop exit node");
// diagnostic to show loop end is not properly formed
assert(main_end->outcnt() == 2, "1 true, 1 false path only");
uint dd_main_head = dom_depth(main_head);
uint max = main_head->outcnt();
// mark this loop as processed
main_head->mark_has_atomic_post_loop();
Node *pre_header = main_head->in(LoopNode::EntryControl);
Node *init = main_head->init_trip();
Node *incr = main_end->incr();
Node *limit = main_end->limit();
Node *stride = main_end->stride();
Node *cmp = main_end->cmp_node();
BoolTest::mask b_test = main_end->test_trip();
//------------------------------
// Step A: Create a new post-Loop.
Node* main_exit = main_end->proj_out(false);
assert(main_exit->Opcode() == Op_IfFalse, "");
int dd_main_exit = dom_depth(main_exit);
// Step A1: Clone the loop body of main. The clone becomes the vector post-loop.
// The main loop pre-header illegally has 2 control users (old & new loops).
clone_loop(loop, old_new, dd_main_exit);
assert(old_new[main_end->_idx]->Opcode() == Op_CountedLoopEnd, "");
CountedLoopNode *post_head = old_new[main_head->_idx]->as_CountedLoop();
post_head->set_normal_loop();
post_head->set_post_loop(main_head);
// Reduce the post-loop trip count.
CountedLoopEndNode* post_end = old_new[main_end->_idx]->as_CountedLoopEnd();
post_end->_prob = PROB_FAIR;
// Build the main-loop normal exit.
IfFalseNode *new_main_exit = new IfFalseNode(main_end);
_igvn.register_new_node_with_optimizer(new_main_exit);
set_idom(new_main_exit, main_end, dd_main_exit);
set_loop(new_main_exit, loop->_parent);
// Step A2: Build a zero-trip guard for the vector post-loop. After leaving the
// main-loop, the vector post-loop may not execute at all. We 'opaque' the incr
// (the vectorized main-loop trip-counter exit value) because we will be changing
// the exit value (via additional unrolling) so we cannot constant-fold away the zero
// trip guard until all unrolling is done.
Node *zer_opaq = new Opaque1Node(C, incr);
Node *zer_cmp = new CmpINode(zer_opaq, limit);
Node *zer_bol = new BoolNode(zer_cmp, b_test);
register_new_node(zer_opaq, new_main_exit);
register_new_node(zer_cmp, new_main_exit);
register_new_node(zer_bol, new_main_exit);
// Build the IfNode
IfNode *zer_iff = new IfNode(new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN);
_igvn.register_new_node_with_optimizer(zer_iff);
set_idom(zer_iff, new_main_exit, dd_main_exit);
set_loop(zer_iff, loop->_parent);
// Plug in the false-path, taken if we need to skip vector post-loop
_igvn.replace_input_of(main_exit, 0, zer_iff);
set_idom(main_exit, zer_iff, dd_main_exit);
set_idom(main_exit->unique_out(), zer_iff, dd_main_exit);
// Make the true-path, must enter the vector post loop
Node *zer_taken = new IfTrueNode(zer_iff);
_igvn.register_new_node_with_optimizer(zer_taken);
set_idom(zer_taken, zer_iff, dd_main_exit);
set_loop(zer_taken, loop->_parent);
// Plug in the true path
_igvn.hash_delete(post_head);
post_head->set_req(LoopNode::EntryControl, zer_taken);
set_idom(post_head, zer_taken, dd_main_exit);
Arena *a = Thread::current()->resource_area();
VectorSet visited(a);
Node_Stack clones(a, main_head->back_control()->outcnt());
// Step A3: Make the fall-in values to the vector post-loop come from the
// fall-out values of the main-loop.
for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) {
Node* main_phi = main_head->fast_out(i);
if (main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0) {
Node *cur_phi = old_new[main_phi->_idx];
Node *fallnew = clone_up_backedge_goo(main_head->back_control(),
post_head->init_control(),
main_phi->in(LoopNode::LoopBackControl),
visited, clones);
_igvn.hash_delete(cur_phi);
cur_phi->set_req(LoopNode::EntryControl, fallnew);
}
}
// CastII for the new post loop:
bool inserted = cast_incr_before_loop(zer_opaq->in(1), zer_taken, post_head);
assert(inserted, "no castII inserted");
// It's difficult to be precise about the trip-counts
// for post loops. They are usually very short,
// so guess that unit vector trips is a reasonable value.
post_head->set_profile_trip_cnt((float)slp_max_unroll_factor);
// Now force out all loop-invariant dominating tests. The optimizer
// finds some, but we _know_ they are all useless.
peeled_dom_test_elim(loop, old_new);
loop->record_for_igvn();
}
//------------------------------is_invariant----------------------------- //------------------------------is_invariant-----------------------------
// Return true if n is invariant // Return true if n is invariant
bool IdealLoopTree::is_invariant(Node* n) const { bool IdealLoopTree::is_invariant(Node* n) const {
@ -2608,6 +2749,9 @@ bool IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_
// and we'd rather unroll the post-RCE'd loop SO... do not unroll if // and we'd rather unroll the post-RCE'd loop SO... do not unroll if
// peeling. // peeling.
if (should_unroll && !should_peel) { if (should_unroll && !should_peel) {
if (SuperWordLoopUnrollAnalysis) {
phase->insert_vector_post_loop(this, old_new);
}
phase->do_unroll(this, old_new, true); phase->do_unroll(this, old_new, true);
} }

View file

@ -67,7 +67,9 @@ protected:
HasReductions=128, HasReductions=128,
WasSlpAnalyzed=256, WasSlpAnalyzed=256,
PassedSlpAnalysis=512, PassedSlpAnalysis=512,
DoUnrollOnly=1024 }; DoUnrollOnly=1024,
VectorizedLoop=2048,
HasAtomicPostLoop=4096 };
char _unswitch_count; char _unswitch_count;
enum { _unswitch_max=3 }; enum { _unswitch_max=3 };
@ -86,6 +88,8 @@ public:
void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; } void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; }
void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; } void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; }
void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; } void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; }
void mark_loop_vectorized() { _loop_flags |= VectorizedLoop; }
void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }
int unswitch_max() { return _unswitch_max; } int unswitch_max() { return _unswitch_max; }
int unswitch_count() { return _unswitch_count; } int unswitch_count() { return _unswitch_count; }
@ -221,6 +225,8 @@ public:
int has_passed_slp () const { return (_loop_flags&PassedSlpAnalysis) == PassedSlpAnalysis; } int has_passed_slp () const { return (_loop_flags&PassedSlpAnalysis) == PassedSlpAnalysis; }
int do_unroll_only () const { return (_loop_flags&DoUnrollOnly) == DoUnrollOnly; } int do_unroll_only () const { return (_loop_flags&DoUnrollOnly) == DoUnrollOnly; }
int is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; } int is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; }
int is_vectorized_loop () const { return (_loop_flags & VectorizedLoop) == VectorizedLoop; }
int has_atomic_post_loop () const { return (_loop_flags & HasAtomicPostLoop) == HasAtomicPostLoop; }
void set_main_no_pre_loop() { _loop_flags |= MainHasNoPreLoop; } void set_main_no_pre_loop() { _loop_flags |= MainHasNoPreLoop; }
int main_idx() const { return _main_idx; } int main_idx() const { return _main_idx; }
@ -893,6 +899,8 @@ public:
// Add pre and post loops around the given loop. These loops are used // Add pre and post loops around the given loop. These loops are used
// during RCE, unrolling and aligning loops. // during RCE, unrolling and aligning loops.
void insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only ); void insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only );
// Add a vector post loop between a vector main loop and the current post loop
void insert_vector_post_loop(IdealLoopTree *loop, Node_List &old_new);
// If Node n lives in the back_ctrl block, we clone a private version of n // If Node n lives in the back_ctrl block, we clone a private version of n
// in preheader_ctrl block and return that, otherwise return n. // in preheader_ctrl block and return that, otherwise return n.
Node *clone_up_backedge_goo( Node *back_ctrl, Node *preheader_ctrl, Node *n, VectorSet &visited, Node_Stack &clones ); Node *clone_up_backedge_goo( Node *back_ctrl, Node *preheader_ctrl, Node *n, VectorSet &visited, Node_Stack &clones );

View file

@ -2253,6 +2253,9 @@ void SuperWord::output() {
C->set_major_progress(); C->set_major_progress();
} }
cl->mark_do_unroll_only(); cl->mark_do_unroll_only();
if (do_reserve_copy()) {
cl->mark_loop_vectorized();
}
} }
} }
} }