8239008: C2: Simplify Replicate support for sub-word types on x86

Reviewed-by: kvn
This commit is contained in:
Vladimir Ivanov 2020-03-10 20:51:08 +03:00
parent 072cfd2e48
commit 1dcd3d2c50
2 changed files with 49 additions and 158 deletions

View file

@ -7160,7 +7160,7 @@ void Assembler::evbroadcasti64x2(XMMRegister dst, Address src, int vector_len) {
// duplicate single precision data from src into programmed locations in dest : requires AVX512VL // duplicate single precision data from src into programmed locations in dest : requires AVX512VL
void Assembler::vbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) { void Assembler::vbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), ""); assert(VM_Version::supports_avx2(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x18); emit_int8(0x18);
@ -7181,7 +7181,8 @@ void Assembler::vbroadcastss(XMMRegister dst, Address src, int vector_len) {
// duplicate double precision data from src into programmed locations in dest : requires AVX512VL // duplicate double precision data from src into programmed locations in dest : requires AVX512VL
void Assembler::vbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) { void Assembler::vbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), ""); assert(VM_Version::supports_avx2(), "");
assert(vector_len == AVX_256bit || vector_len == AVX_512bit, "");
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted(); attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
@ -7191,6 +7192,7 @@ void Assembler::vbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
void Assembler::vbroadcastsd(XMMRegister dst, Address src, int vector_len) { void Assembler::vbroadcastsd(XMMRegister dst, Address src, int vector_len) {
assert(VM_Version::supports_avx(), ""); assert(VM_Version::supports_avx(), "");
assert(vector_len == AVX_256bit || vector_len == AVX_512bit, "");
assert(dst != xnoreg, "sanity"); assert(dst != xnoreg, "sanity");
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);

View file

@ -3124,14 +3124,12 @@ instruct storeV(memory mem, vec src) %{
// Replicate byte scalar to be vector // Replicate byte scalar to be vector
instruct ReplB_reg(vec dst, rRegI src) %{ instruct ReplB_reg(vec dst, rRegI src) %{
predicate((n->as_Vector()->length() <= 32) ||
(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw())); // AVX512BW for 512bit byte instructions
match(Set dst (ReplicateB src)); match(Set dst (ReplicateB src));
format %{ "replicateB $dst,$src" %} format %{ "replicateB $dst,$src" %}
ins_encode %{ ins_encode %{
uint vlen = vector_length(this); uint vlen = vector_length(this);
if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
assert(VM_Version::supports_avx512bw(), "required"); assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
int vlen_enc = vector_length_encoding(this); int vlen_enc = vector_length_encoding(this);
__ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
} else { } else {
@ -3141,7 +3139,7 @@ instruct ReplB_reg(vec dst, rRegI src) %{
if (vlen >= 16) { if (vlen >= 16) {
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
if (vlen >= 32) { if (vlen >= 32) {
assert(vlen == 32, "sanity"); // vlen == 64 && !AVX512BW is covered by ReplB_reg_leg assert(vlen == 32, "sanity");
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
} }
} }
@ -3150,29 +3148,11 @@ instruct ReplB_reg(vec dst, rRegI src) %{
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
instruct ReplB_reg_leg(legVec dst, rRegI src) %{
predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512bw()); // AVX512BW for 512bit byte instructions
match(Set dst (ReplicateB src));
format %{ "replicateB $dst,$src" %}
ins_encode %{
assert(UseAVX > 2, "required");
__ movdl($dst$$XMMRegister, $src$$Register);
__ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
%}
ins_pipe( pipe_slow );
%}
instruct ReplB_mem(vec dst, memory mem) %{ instruct ReplB_mem(vec dst, memory mem) %{
predicate((n->as_Vector()->length() <= 32 && VM_Version::supports_avx512vlbw()) || // AVX512VL for <512bit operands predicate(VM_Version::supports_avx2());
(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw())); // AVX512BW for 512bit byte instructions
match(Set dst (ReplicateB (LoadB mem))); match(Set dst (ReplicateB (LoadB mem)));
format %{ "replicateB $dst,$mem" %} format %{ "replicateB $dst,$mem" %}
ins_encode %{ ins_encode %{
assert(UseAVX > 2, "required");
int vector_len = vector_length_encoding(this); int vector_len = vector_length_encoding(this);
__ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
%} %}
@ -3180,8 +3160,6 @@ instruct ReplB_mem(vec dst, memory mem) %{
%} %}
instruct ReplB_imm(vec dst, immI con) %{ instruct ReplB_imm(vec dst, immI con) %{
predicate((n->as_Vector()->length() <= 32) ||
(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw())); // AVX512BW for 512bit byte instructions
match(Set dst (ReplicateB con)); match(Set dst (ReplicateB con));
format %{ "replicateB $dst,$con" %} format %{ "replicateB $dst,$con" %}
ins_encode %{ ins_encode %{
@ -3192,15 +3170,12 @@ instruct ReplB_imm(vec dst, immI con) %{
} else { } else {
__ movq($dst$$XMMRegister, const_addr); __ movq($dst$$XMMRegister, const_addr);
if (vlen >= 16) { if (vlen >= 16) {
if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands if (VM_Version::supports_avx2()) {
int vlen_enc = vector_length_encoding(this); int vlen_enc = vector_length_encoding(this);
__ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
} else { } else {
assert(vlen == 16, "sanity");
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
if (vlen >= 32) {
assert(vlen == 32, "sanity");// vlen == 64 && !AVX512BW is covered by ReplB_imm_leg
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
}
} }
} }
} }
@ -3208,19 +3183,6 @@ instruct ReplB_imm(vec dst, immI con) %{
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
instruct ReplB_imm_leg(legVec dst, immI con) %{
predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512bw());
match(Set dst (ReplicateB con));
format %{ "replicateB $dst,$con" %}
ins_encode %{
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
%}
ins_pipe( pipe_slow );
%}
// Replicate byte scalar zero to be vector // Replicate byte scalar zero to be vector
instruct ReplB_zero(vec dst, immI0 zero) %{ instruct ReplB_zero(vec dst, immI0 zero) %{
match(Set dst (ReplicateB zero)); match(Set dst (ReplicateB zero));
@ -3241,14 +3203,12 @@ instruct ReplB_zero(vec dst, immI0 zero) %{
// ====================ReplicateS======================================= // ====================ReplicateS=======================================
instruct ReplS_reg(vec dst, rRegI src) %{ instruct ReplS_reg(vec dst, rRegI src) %{
predicate((n->as_Vector()->length() <= 16) ||
(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw())); // AVX512BW for 512bit instructions on shorts
match(Set dst (ReplicateS src)); match(Set dst (ReplicateS src));
format %{ "replicateS $dst,$src" %} format %{ "replicateS $dst,$src" %}
ins_encode %{ ins_encode %{
uint vlen = vector_length(this); uint vlen = vector_length(this);
if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
assert(VM_Version::supports_avx512bw(), "required"); assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
int vlen_enc = vector_length_encoding(this); int vlen_enc = vector_length_encoding(this);
__ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
} else { } else {
@ -3257,7 +3217,7 @@ instruct ReplS_reg(vec dst, rRegI src) %{
if (vlen >= 8) { if (vlen >= 8) {
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
if (vlen >= 16) { if (vlen >= 16) {
assert(vlen == 16, "sanity"); // vlen == 32 && !AVX512BW is covered by ReplS_reg_leg assert(vlen == 16, "sanity");
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
} }
} }
@ -3266,81 +3226,43 @@ instruct ReplS_reg(vec dst, rRegI src) %{
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
instruct ReplS_reg_leg(legVec dst, rRegI src) %{
predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512bw());
match(Set dst (ReplicateS src));
format %{ "replicateS $dst,$src" %}
ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register);
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
%}
ins_pipe( pipe_slow );
%}
instruct ReplS_mem(vec dst, memory mem) %{ instruct ReplS_mem(vec dst, memory mem) %{
predicate((n->as_Vector()->length() >= 4 && predicate(VM_Version::supports_avx()); // use VEX-encoded pshuflw to relax 16-byte alignment restriction on the source
n->as_Vector()->length() <= 16 && VM_Version::supports_avx()) ||
(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw())); // AVX512BW for 512bit instructions on shorts
match(Set dst (ReplicateS (LoadS mem))); match(Set dst (ReplicateS (LoadS mem)));
format %{ "replicateS $dst,$mem" %} format %{ "replicateS $dst,$mem" %}
ins_encode %{ ins_encode %{
uint vlen = vector_length(this); uint vlen = vector_length(this);
if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands if (VM_Version::supports_avx2()) {
assert(VM_Version::supports_avx512bw(), "required");
int vlen_enc = vector_length_encoding(this); int vlen_enc = vector_length_encoding(this);
__ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
} else { } else {
__ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00);
if (vlen >= 8) { if (vlen >= 8) {
assert(vlen == 8, "sanity");
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
if (vlen >= 16) {
assert(vlen == 16, "sanity"); // vlen == 32 && !AVX512BW is covered by ReplS_mem_leg
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
}
} }
} }
%} %}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
instruct ReplS_mem_leg(legVec dst, memory mem) %{
predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512bw());
match(Set dst (ReplicateS (LoadS mem)));
format %{ "replicateS $dst,$mem" %}
ins_encode %{
__ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
%}
ins_pipe( pipe_slow );
%}
instruct ReplS_imm(vec dst, immI con) %{ instruct ReplS_imm(vec dst, immI con) %{
predicate((n->as_Vector()->length() <= 16) ||
(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw())); // AVX512BW for 512bit instructions on shorts
match(Set dst (ReplicateS con)); match(Set dst (ReplicateS con));
format %{ "replicateS $dst,$con" %} format %{ "replicateS $dst,$con" %}
ins_encode %{ ins_encode %{
uint vlen = vector_length(this); uint vlen = vector_length(this);
InternalAddress constaddr = $constantaddress(replicate8_imm($con$$constant, 2)); InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 2));
if (vlen == 2) { if (vlen == 2) {
__ movdl($dst$$XMMRegister, constaddr); __ movdl($dst$$XMMRegister, const_addr);
} else { } else {
__ movq($dst$$XMMRegister, constaddr); __ movq($dst$$XMMRegister, const_addr);
if (vlen == 32 || VM_Version::supports_avx512vlbw() ) { // AVX512VL for <512bit operands if (vlen >= 8) {
assert(VM_Version::supports_avx512bw(), "required"); if (VM_Version::supports_avx2()) {
int vlen_enc = vector_length_encoding(this); int vlen_enc = vector_length_encoding(this);
__ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
} else { } else {
__ movq($dst$$XMMRegister, constaddr); assert(vlen == 8, "sanity");
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
if (vlen >= 16) {
assert(vlen == 16, "sanity"); // vlen == 32 && !AVX512BW is covered by ReplS_imm_leg
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
} }
} }
} }
@ -3348,19 +3270,6 @@ instruct ReplS_imm(vec dst, immI con) %{
ins_pipe( fpu_reg_reg ); ins_pipe( fpu_reg_reg );
%} %}
instruct ReplS_imm_leg(legVec dst, immI con) %{
predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512bw());
match(Set dst (ReplicateS con));
format %{ "replicateS $dst,$con" %}
ins_encode %{
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
%}
ins_pipe( pipe_slow );
%}
instruct ReplS_zero(vec dst, immI0 zero) %{ instruct ReplS_zero(vec dst, immI0 zero) %{
match(Set dst (ReplicateS zero)); match(Set dst (ReplicateS zero));
format %{ "replicateS $dst,$zero" %} format %{ "replicateS $dst,$zero" %}
@ -3406,13 +3315,10 @@ instruct ReplI_mem(vec dst, memory mem) %{
uint vlen = vector_length(this); uint vlen = vector_length(this);
if (vlen <= 4) { if (vlen <= 4) {
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
} else if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands } else {
assert(VM_Version::supports_avx2(), "sanity");
int vector_len = vector_length_encoding(this); int vector_len = vector_length_encoding(this);
__ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
} else {
assert(vlen == 8, "sanity");
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
} }
%} %}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
@ -3423,20 +3329,17 @@ instruct ReplI_imm(vec dst, immI con) %{
format %{ "replicateI $dst,$con" %} format %{ "replicateI $dst,$con" %}
ins_encode %{ ins_encode %{
uint vlen = vector_length(this); uint vlen = vector_length(this);
InternalAddress constaddr = $constantaddress(replicate8_imm($con$$constant, 4)); InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 4));
if (vlen == 2) { if (vlen <= 4) {
__ movq($dst$$XMMRegister, constaddr); __ movq($dst$$XMMRegister, const_addr);
} else if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands if (vlen == 4) {
int vector_len = vector_length_encoding(this); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
__ movq($dst$$XMMRegister, constaddr);
__ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
} else {
__ movq($dst$$XMMRegister, constaddr);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
if (vlen >= 8) {
assert(vlen == 8, "sanity");
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
} }
} else {
assert(VM_Version::supports_avx2(), "sanity");
int vector_len = vector_length_encoding(this);
__ movq($dst$$XMMRegister, const_addr);
__ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
} }
%} %}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
@ -3546,14 +3449,10 @@ instruct ReplL_mem(vec dst, memory mem) %{
if (vlen == 2) { if (vlen == 2) {
__ movq($dst$$XMMRegister, $mem$$Address); __ movq($dst$$XMMRegister, $mem$$Address);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
} else if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands } else {
assert(VM_Version::supports_avx2(), "sanity");
int vlen_enc = vector_length_encoding(this); int vlen_enc = vector_length_encoding(this);
__ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
} else {
assert(vlen == 4, "sanity");
__ movq($dst$$XMMRegister, $mem$$Address);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
} }
%} %}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
@ -3569,15 +3468,11 @@ instruct ReplL_imm(vec dst, immL con) %{
if (vlen == 2) { if (vlen == 2) {
__ movq($dst$$XMMRegister, const_addr); __ movq($dst$$XMMRegister, const_addr);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
} else if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands } else {
assert(VM_Version::supports_avx2(), "sanity");
int vlen_enc = vector_length_encoding(this); int vlen_enc = vector_length_encoding(this);
__ movq($dst$$XMMRegister, const_addr); __ movq($dst$$XMMRegister, const_addr);
__ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
} else {
assert(vlen == 4, "sanity");
__ movq($dst$$XMMRegister, const_addr);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
} }
%} %}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
@ -3607,9 +3502,9 @@ instruct ReplF_reg(vec dst, vlRegF src) %{
uint vlen = vector_length(this); uint vlen = vector_length(this);
if (vlen <= 4) { if (vlen <= 4) {
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
} else if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands } else if (VM_Version::supports_avx2()) {
int vector_len = vector_length_encoding(this); int vector_len = vector_length_encoding(this);
__ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); // reg-to-reg variant requires AVX2
} else { } else {
assert(vlen == 8, "sanity"); assert(vlen == 8, "sanity");
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
@ -3627,13 +3522,10 @@ instruct ReplF_mem(vec dst, memory mem) %{
uint vlen = vector_length(this); uint vlen = vector_length(this);
if (vlen <= 4) { if (vlen <= 4) {
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
} else if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands } else {
assert(VM_Version::supports_avx(), "sanity");
int vector_len = vector_length_encoding(this); int vector_len = vector_length_encoding(this);
__ vbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len);
} else {
assert(vlen == 8, "sanity");
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
} }
%} %}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
@ -3664,9 +3556,9 @@ instruct ReplD_reg(vec dst, vlRegD src) %{
uint vlen = vector_length(this); uint vlen = vector_length(this);
if (vlen == 2) { if (vlen == 2) {
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
} else if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands } else if (VM_Version::supports_avx2()) {
int vector_len = vector_length_encoding(this); int vector_len = vector_length_encoding(this);
__ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); // reg-to-reg variant requires AVX2
} else { } else {
assert(vlen == 4, "sanity"); assert(vlen == 4, "sanity");
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
@ -3684,13 +3576,10 @@ instruct ReplD_mem(vec dst, memory mem) %{
uint vlen = vector_length(this); uint vlen = vector_length(this);
if (vlen == 2) { if (vlen == 2) {
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44);
} else if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands } else {
assert(VM_Version::supports_avx(), "sanity");
int vector_len = vector_length_encoding(this); int vector_len = vector_length_encoding(this);
__ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len);
} else {
assert(vlen == 4, "sanity");
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x44);
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
} }
%} %}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );