8292289: [vectorapi] Improve the implementation of VectorTestNode

Reviewed-by: xgong, kvn
This commit is contained in:
Quan Anh Mai 2022-12-08 20:25:20 +00:00
parent d35e840024
commit 3dfadeebd0
23 changed files with 335 additions and 290 deletions

View file

@ -5869,67 +5869,62 @@ instruct vroundD(vReg dst, vReg src, immI rmode) %{
// anytrue
instruct vtest_anytrue_neon(iRegINoSp dst, vReg src1, vReg src2, vReg tmp, rFlagsReg cr) %{
instruct vtest_anytrue_neon(rFlagsReg cr, vReg src1, vReg src2, vReg tmp) %{
predicate(UseSVE == 0 &&
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
match(Set dst (VectorTest src1 src2 ));
effect(TEMP tmp, KILL cr);
format %{ "vtest_anytrue_neon $dst, $src1\t# KILL $tmp, cr" %}
match(Set cr (VectorTest src1 src2));
effect(TEMP tmp);
format %{ "vtest_anytrue_neon $src1\t# KILL $tmp" %}
ins_encode %{
// No need to use src2.
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src1);
assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
__ addv($tmp$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B, $src1$$FloatRegister);
__ umov($dst$$Register, $tmp$$FloatRegister, __ B, 0);
__ cmpw($dst$$Register, zr);
__ csetw($dst$$Register, Assembler::NE);
__ umov(rscratch1, $tmp$$FloatRegister, __ B, 0);
__ cmpw(rscratch1, zr);
%}
ins_pipe(pipe_slow);
%}
instruct vtest_anytrue_sve(iRegINoSp dst, pReg src1, pReg src2, rFlagsReg cr) %{
instruct vtest_anytrue_sve(rFlagsReg cr, pReg src1, pReg src2) %{
predicate(UseSVE > 0 &&
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
match(Set dst (VectorTest src1 src2));
effect(KILL cr);
format %{ "vtest_anytrue_sve $dst, $src1\t# KILL cr" %}
match(Set cr (VectorTest src1 src2));
format %{ "vtest_anytrue_sve $src1" %}
ins_encode %{
// "src2" is not used for sve.
__ sve_ptest(ptrue, $src1$$PRegister);
__ csetw($dst$$Register, Assembler::NE);
%}
ins_pipe(pipe_slow);
%}
// alltrue
instruct vtest_alltrue_neon(iRegINoSp dst, vReg src1, vReg src2, vReg tmp, rFlagsReg cr) %{
instruct vtest_alltrue_neon(rFlagsReg cr, vReg src1, vReg src2, vReg tmp) %{
predicate(UseSVE == 0 &&
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
match(Set dst (VectorTest src1 src2));
effect(TEMP tmp, KILL cr);
format %{ "vtest_alltrue_neon $dst, $src1\t# KILL $tmp, cr" %}
match(Set cr (VectorTest src1 src2));
effect(TEMP tmp);
format %{ "vtest_alltrue_neon $src1\t# KILL $tmp" %}
ins_encode %{
// No need to use src2.
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src1);
assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
__ uminv($tmp$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B, $src1$$FloatRegister);
__ umov($dst$$Register, $tmp$$FloatRegister, __ B, 0);
__ cmpw($dst$$Register, 0xff);
__ csetw($dst$$Register, Assembler::EQ);
__ umov(rscratch1, $tmp$$FloatRegister, __ B, 0);
__ cmpw(rscratch1, 0xff);
%}
ins_pipe(pipe_slow);
%}
instruct vtest_alltrue_sve(iRegINoSp dst, pReg src1, pReg src2, pReg ptmp, rFlagsReg cr) %{
instruct vtest_alltrue_sve(rFlagsReg cr, pReg src1, pReg src2, pReg ptmp) %{
predicate(UseSVE > 0 &&
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
match(Set dst (VectorTest src1 src2));
effect(TEMP ptmp, KILL cr);
format %{ "vtest_alltrue_sve $dst, $src1, $src2\t# KILL $ptmp, cr" %}
match(Set cr (VectorTest src1 src2));
effect(TEMP ptmp);
format %{ "vtest_alltrue_sve $src1, $src2\t# KILL $ptmp" %}
ins_encode %{
__ sve_eors($ptmp$$PRegister, ptrue, $src1$$PRegister, $src2$$PRegister);
__ csetw($dst$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}

View file

@ -4241,67 +4241,62 @@ instruct vroundD(vReg dst, vReg src, immI rmode) %{
// anytrue
instruct vtest_anytrue_neon(iRegINoSp dst, vReg src1, vReg src2, vReg tmp, rFlagsReg cr) %{
instruct vtest_anytrue_neon(rFlagsReg cr, vReg src1, vReg src2, vReg tmp) %{
predicate(UseSVE == 0 &&
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
match(Set dst (VectorTest src1 src2 ));
effect(TEMP tmp, KILL cr);
format %{ "vtest_anytrue_neon $dst, $src1\t# KILL $tmp, cr" %}
match(Set cr (VectorTest src1 src2));
effect(TEMP tmp);
format %{ "vtest_anytrue_neon $src1\t# KILL $tmp" %}
ins_encode %{
// No need to use src2.
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src1);
assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
__ addv($tmp$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B, $src1$$FloatRegister);
__ umov($dst$$Register, $tmp$$FloatRegister, __ B, 0);
__ cmpw($dst$$Register, zr);
__ csetw($dst$$Register, Assembler::NE);
__ umov(rscratch1, $tmp$$FloatRegister, __ B, 0);
__ cmpw(rscratch1, zr);
%}
ins_pipe(pipe_slow);
%}
instruct vtest_anytrue_sve(iRegINoSp dst, pReg src1, pReg src2, rFlagsReg cr) %{
instruct vtest_anytrue_sve(rFlagsReg cr, pReg src1, pReg src2) %{
predicate(UseSVE > 0 &&
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
match(Set dst (VectorTest src1 src2));
effect(KILL cr);
format %{ "vtest_anytrue_sve $dst, $src1\t# KILL cr" %}
match(Set cr (VectorTest src1 src2));
format %{ "vtest_anytrue_sve $src1" %}
ins_encode %{
// "src2" is not used for sve.
__ sve_ptest(ptrue, $src1$$PRegister);
__ csetw($dst$$Register, Assembler::NE);
%}
ins_pipe(pipe_slow);
%}
// alltrue
instruct vtest_alltrue_neon(iRegINoSp dst, vReg src1, vReg src2, vReg tmp, rFlagsReg cr) %{
instruct vtest_alltrue_neon(rFlagsReg cr, vReg src1, vReg src2, vReg tmp) %{
predicate(UseSVE == 0 &&
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
match(Set dst (VectorTest src1 src2));
effect(TEMP tmp, KILL cr);
format %{ "vtest_alltrue_neon $dst, $src1\t# KILL $tmp, cr" %}
match(Set cr (VectorTest src1 src2));
effect(TEMP tmp);
format %{ "vtest_alltrue_neon $src1\t# KILL $tmp" %}
ins_encode %{
// No need to use src2.
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src1);
assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
__ uminv($tmp$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B, $src1$$FloatRegister);
__ umov($dst$$Register, $tmp$$FloatRegister, __ B, 0);
__ cmpw($dst$$Register, 0xff);
__ csetw($dst$$Register, Assembler::EQ);
__ umov(rscratch1, $tmp$$FloatRegister, __ B, 0);
__ cmpw(rscratch1, 0xff);
%}
ins_pipe(pipe_slow);
%}
instruct vtest_alltrue_sve(iRegINoSp dst, pReg src1, pReg src2, pReg ptmp, rFlagsReg cr) %{
instruct vtest_alltrue_sve(rFlagsReg cr, pReg src1, pReg src2, pReg ptmp) %{
predicate(UseSVE > 0 &&
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
match(Set dst (VectorTest src1 src2));
effect(TEMP ptmp, KILL cr);
format %{ "vtest_alltrue_sve $dst, $src1, $src2\t# KILL $ptmp, cr" %}
match(Set cr (VectorTest src1 src2));
effect(TEMP ptmp);
format %{ "vtest_alltrue_sve $src1, $src2\t# KILL $ptmp" %}
ins_encode %{
__ sve_eors($ptmp$$PRegister, ptrue, $src1$$PRegister, $src2$$PRegister);
__ csetw($dst$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}

View file

@ -163,6 +163,16 @@
// Implements a variant of EncodeISOArrayNode that encode ASCII only
static const bool supports_encode_ascii_array = true;
// An all-set mask is used for the alltrue vector test with SVE
static constexpr bool vectortest_needs_second_argument(bool is_alltrue, bool is_predicate) {
return is_predicate && is_alltrue;
}
// BoolTest mask for vector test intrinsics
static constexpr BoolTest::mask vectortest_mask(bool is_alltrue, bool is_predicate, int vlen) {
return is_alltrue ? BoolTest::eq : BoolTest::ne;
}
// Returns pre-selection estimated size of a vector operation.
static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
switch(vopc) {

View file

@ -155,6 +155,16 @@
// Implements a variant of EncodeISOArrayNode that encode ASCII only
static const bool supports_encode_ascii_array = false;
// Some architecture needs a helper to check for alltrue vector
static constexpr bool vectortest_needs_second_argument(bool is_alltrue, bool is_predicate) {
return false;
}
// BoolTest mask for vector test intrinsics
static constexpr BoolTest::mask vectortest_mask(bool is_alltrue, bool is_predicate, int vlen) {
return BoolTest::illegal;
}
// Returns pre-selection estimated size of a vector operation.
static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
switch(vopc) {

View file

@ -164,6 +164,16 @@
// Implements a variant of EncodeISOArrayNode that encode ASCII only
static const bool supports_encode_ascii_array = true;
// Some architecture needs a helper to check for alltrue vector
static constexpr bool vectortest_needs_second_argument(bool is_alltrue, bool is_predicate) {
return false;
}
// BoolTest mask for vector test intrinsics
static constexpr BoolTest::mask vectortest_mask(bool is_alltrue, bool is_predicate, int vlen) {
return BoolTest::illegal;
}
// Returns pre-selection estimated size of a vector operation.
static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
switch(vopc) {

View file

@ -161,6 +161,16 @@
// Implements a variant of EncodeISOArrayNode that encode ASCII only
static const bool supports_encode_ascii_array = false;
// Some architecture needs a helper to check for alltrue vector
static constexpr bool vectortest_needs_second_argument(bool is_alltrue, bool is_predicate) {
return false;
}
// BoolTest mask for vector test intrinsics
static constexpr BoolTest::mask vectortest_mask(bool is_alltrue, bool is_predicate, int vlen) {
return BoolTest::illegal;
}
// Returns pre-selection estimated size of a vector operation.
static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
switch(vopc) {

View file

@ -153,6 +153,16 @@
// Implements a variant of EncodeISOArrayNode that encode ASCII only
static const bool supports_encode_ascii_array = true;
// Some architecture needs a helper to check for alltrue vector
static constexpr bool vectortest_needs_second_argument(bool is_alltrue, bool is_predicate) {
return false;
}
// BoolTest mask for vector test intrinsics
static constexpr BoolTest::mask vectortest_mask(bool is_alltrue, bool is_predicate, int vlen) {
return BoolTest::illegal;
}
// Returns pre-selection estimated size of a vector operation.
static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
switch(vopc) {

View file

@ -5447,13 +5447,38 @@ void Assembler::vptest(XMMRegister dst, XMMRegister src, int vector_len) {
emit_int16(0x17, (0xC0 | encode));
}
void Assembler::vtestps(XMMRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x0E, (0xC0 | encode));
}
void Assembler::evptestmb(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx512vlbw(), "");
// Encoding: EVEX.NDS.XXX.66.0F.W0 DB /r
assert(vector_len == AVX_512bit ? VM_Version::supports_avx512bw() : VM_Version::supports_avx512vlbw(), "");
// Encoding: EVEX.NDS.XXX.66.0F38.W0 DB /r
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16((unsigned char)0x26, (0xC0 | encode));
emit_int16(0x26, (0xC0 | encode));
}
void Assembler::evptestmd(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(vector_len == AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx512vl(), "");
// Encoding: EVEX.NDS.XXX.66.0F38.W0 DB /r
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x27, (0xC0 | encode));
}
void Assembler::evptestnmd(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(vector_len == AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx512vl(), "");
// Encoding: EVEX.NDS.XXX.F3.0F38.W0 DB /r
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x27, (0xC0 | encode));
}
void Assembler::punpcklbw(XMMRegister dst, Address src) {

View file

@ -1973,9 +1973,12 @@ private:
void vptest(XMMRegister dst, Address src);
void evptestmb(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evptestmd(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evptestnmd(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
// Vector compare
void vptest(XMMRegister dst, XMMRegister src, int vector_len);
void vtestps(XMMRegister dst, XMMRegister src, int vector_len);
// Interleave Low Bytes
void punpcklbw(XMMRegister dst, XMMRegister src);

View file

@ -2415,58 +2415,32 @@ void C2_MacroAssembler::evpblend(BasicType typ, XMMRegister dst, KRegister kmask
}
}
void C2_MacroAssembler::vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2,
XMMRegister vtmp1, XMMRegister vtmp2, KRegister mask) {
switch(vlen) {
case 4:
assert(vtmp1 != xnoreg, "required.");
// Broadcast lower 32 bits to 128 bits before ptest
pshufd(vtmp1, src1, 0x0);
if (bt == BoolTest::overflow) {
assert(vtmp2 != xnoreg, "required.");
pshufd(vtmp2, src2, 0x0);
} else {
assert(vtmp2 == xnoreg, "required.");
vtmp2 = src2;
}
ptest(vtmp1, vtmp2);
break;
case 8:
assert(vtmp1 != xnoreg, "required.");
// Broadcast lower 64 bits to 128 bits before ptest
pshufd(vtmp1, src1, 0x4);
if (bt == BoolTest::overflow) {
assert(vtmp2 != xnoreg, "required.");
pshufd(vtmp2, src2, 0x4);
} else {
assert(vtmp2 == xnoreg, "required.");
vtmp2 = src2;
}
ptest(vtmp1, vtmp2);
break;
case 16:
assert((vtmp1 == xnoreg) && (vtmp2 == xnoreg), "required.");
ptest(src1, src2);
break;
case 32:
assert((vtmp1 == xnoreg) && (vtmp2 == xnoreg), "required.");
vptest(src1, src2, Assembler::AVX_256bit);
break;
case 64:
{
assert((vtmp1 == xnoreg) && (vtmp2 == xnoreg), "required.");
evpcmpeqb(mask, src1, src2, Assembler::AVX_512bit);
if (bt == BoolTest::ne) {
ktestql(mask, mask);
} else {
assert(bt == BoolTest::overflow, "required");
kortestql(mask, mask);
}
}
break;
default:
assert(false,"Should not reach here.");
break;
void C2_MacroAssembler::vectortest(BasicType bt, XMMRegister src1, XMMRegister src2, XMMRegister vtmp, int vlen_in_bytes) {
assert(vlen_in_bytes <= 32, "");
int esize = type2aelembytes(bt);
if (vlen_in_bytes == 32) {
assert(vtmp == xnoreg, "required.");
if (esize >= 4) {
vtestps(src1, src2, AVX_256bit);
} else {
vptest(src1, src2, AVX_256bit);
}
return;
}
if (vlen_in_bytes < 16) {
// Duplicate the lower part to fill the whole register,
// Don't need to do so for src2
assert(vtmp != xnoreg, "required");
int shuffle_imm = (vlen_in_bytes == 4) ? 0x00 : 0x04;
pshufd(vtmp, src1, shuffle_imm);
} else {
assert(vtmp == xnoreg, "required");
vtmp = src1;
}
if (esize >= 4 && VM_Version::supports_avx()) {
vtestps(vtmp, src2, AVX_128bit);
} else {
ptest(vtmp, src2);
}
}

View file

@ -138,8 +138,7 @@ public:
void get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, XMMRegister vtmp = xnoreg);
// vector test
void vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2,
XMMRegister vtmp1 = xnoreg, XMMRegister vtmp2 = xnoreg, KRegister mask = knoreg);
void vectortest(BasicType bt, XMMRegister src1, XMMRegister src2, XMMRegister vtmp, int vlen_in_bytes);
// Covert B2X
void vconvert_b2x(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, int vlen_enc);

View file

@ -9013,26 +9013,6 @@ void MacroAssembler::evand(BasicType type, XMMRegister dst, KRegister mask, XMMR
}
}
void MacroAssembler::anytrue(Register dst, uint masklen, KRegister src1, KRegister src2) {
masklen = masklen < 8 ? 8 : masklen;
ktest(masklen, src1, src2);
setb(Assembler::notZero, dst);
movzbl(dst, dst);
}
void MacroAssembler::alltrue(Register dst, uint masklen, KRegister src1, KRegister src2, KRegister kscratch) {
if (masklen < 8) {
knotbl(kscratch, src2);
kortestbl(src1, kscratch);
setb(Assembler::carrySet, dst);
movzbl(dst, dst);
} else {
ktest(masklen, src1, src2);
setb(Assembler::carrySet, dst);
movzbl(dst, dst);
}
}
void MacroAssembler::kortest(uint masklen, KRegister src1, KRegister src2) {
switch(masklen) {
case 8:

View file

@ -1768,9 +1768,6 @@ public:
using Assembler::vpternlogq;
void vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, AddressLiteral src3, int vector_len, Register rscratch = noreg);
void alltrue(Register dst, uint masklen, KRegister src1, KRegister src2, KRegister kscratch);
void anytrue(Register dst, uint masklen, KRegister src, KRegister kscratch);
void cmov32( Condition cc, Register dst, Address src);
void cmov32( Condition cc, Register dst, Register src);

View file

@ -183,6 +183,25 @@
// Implements a variant of EncodeISOArrayNode that encode ASCII only
static const bool supports_encode_ascii_array = true;
// Without predicated input, an all-one vector is needed for the alltrue vector test
static constexpr bool vectortest_needs_second_argument(bool is_alltrue, bool is_predicate) {
return is_alltrue && !is_predicate;
}
// BoolTest mask for vector test intrinsics
static constexpr BoolTest::mask vectortest_mask(bool is_alltrue, bool is_predicate, int vlen) {
if (!is_alltrue) {
return BoolTest::ne;
}
if (!is_predicate) {
return BoolTest::lt;
}
if ((vlen == 8 && !VM_Version::supports_avx512dq()) || vlen < 8) {
return BoolTest::eq;
}
return BoolTest::lt;
}
// Returns pre-selection estimated size of a vector operation.
// Currently, it's a rudimentary heuristic based on emitted code size for complex
// IR nodes used by unroll policy. Idea is to constrain unrolling factor and prevent

View file

@ -1850,8 +1850,6 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
return false; // Implementation limitation
} else if (size_in_bits < 32) {
return false; // Implementation limitation
} else if (size_in_bits == 512 && (VM_Version::supports_avx512bw() == false)) {
return false; // Implementation limitation
}
break;
case Op_VectorLoadShuffle:
@ -8048,169 +8046,70 @@ instruct vabsnegD(vec dst, vec src) %{
//------------------------------------- VectorTest --------------------------------------------
#ifdef _LP64
instruct vptest_alltrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp1, legVec vtmp2, rFlagsReg cr) %{
predicate(!VM_Version::supports_avx512bwdq() &&
Matcher::vector_length_in_bytes(n->in(1)) >= 4 &&
Matcher::vector_length_in_bytes(n->in(1)) < 16 &&
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
match(Set dst (VectorTest src1 src2 ));
effect(TEMP vtmp1, TEMP vtmp2, KILL cr);
format %{ "vptest_alltrue_lt16 $dst,$src1, $src2\t! using $vtmp1, $vtmp2 and $cr as TEMP" %}
ins_encode %{
int vlen = Matcher::vector_length_in_bytes(this, $src1);
__ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
__ setb(Assembler::carrySet, $dst$$Register);
__ movzbl($dst$$Register, $dst$$Register);
%}
ins_pipe( pipe_slow );
%}
instruct vptest_alltrue_ge16(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{
predicate(!VM_Version::supports_avx512bwdq() &&
Matcher::vector_length_in_bytes(n->in(1)) >= 16 &&
Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
match(Set dst (VectorTest src1 src2 ));
effect(KILL cr);
format %{ "vptest_alltrue_ge16 $dst,$src1, $src2\t! using $cr as TEMP" %}
ins_encode %{
int vlen = Matcher::vector_length_in_bytes(this, $src1);
__ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg);
__ setb(Assembler::carrySet, $dst$$Register);
__ movzbl($dst$$Register, $dst$$Register);
%}
ins_pipe( pipe_slow );
%}
instruct vptest_alltrue_lt8_evex(rRegI dst, kReg src1, kReg src2, kReg kscratch, rFlagsReg cr) %{
predicate(VM_Version::supports_avx512bwdq() &&
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow &&
n->in(1)->bottom_type()->isa_vectmask() &&
Matcher::vector_length(n->in(1)) < 8);
match(Set dst (VectorTest src1 src2));
effect(KILL cr, TEMP kscratch);
format %{ "vptest_alltrue_lt8_evex $dst,$src1,$src2\t! using $cr as TEMP" %}
ins_encode %{
const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), "");
uint masklen = Matcher::vector_length(this, $src1);
__ alltrue($dst$$Register, masklen, $src1$$KRegister, $src2$$KRegister, $kscratch$$KRegister);
%}
ins_pipe( pipe_slow );
%}
instruct vptest_alltrue_ge8_evex(rRegI dst, kReg src1, kReg src2, rFlagsReg cr) %{
predicate(VM_Version::supports_avx512bwdq() &&
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow &&
n->in(1)->bottom_type()->isa_vectmask() &&
Matcher::vector_length(n->in(1)) >= 8);
match(Set dst (VectorTest src1 src2));
effect(KILL cr);
format %{ "vptest_alltrue_ge8_evex $dst,$src1,$src2\t! using $cr as TEMP" %}
ins_encode %{
const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), "");
uint masklen = Matcher::vector_length(this, $src1);
__ alltrue($dst$$Register, masklen, $src1$$KRegister, $src2$$KRegister, knoreg);
%}
ins_pipe( pipe_slow );
%}
instruct vptest_anytrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp, rFlagsReg cr) %{
predicate(!VM_Version::supports_avx512bwdq() &&
Matcher::vector_length_in_bytes(n->in(1)) >= 4 &&
Matcher::vector_length_in_bytes(n->in(1)) < 16 &&
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
match(Set dst (VectorTest src1 src2 ));
effect(TEMP vtmp, KILL cr);
format %{ "vptest_anytrue_lt16 $dst,$src1,$src2\t! using $vtmp, $cr as TEMP" %}
ins_encode %{
int vlen = Matcher::vector_length_in_bytes(this, $src1);
__ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
__ setb(Assembler::notZero, $dst$$Register);
__ movzbl($dst$$Register, $dst$$Register);
%}
ins_pipe( pipe_slow );
%}
instruct vptest_anytrue_ge16(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{
predicate(!VM_Version::supports_avx512bwdq() &&
Matcher::vector_length_in_bytes(n->in(1)) >= 16 &&
Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
match(Set dst (VectorTest src1 src2 ));
effect(KILL cr);
format %{ "vptest_anytrue_ge16 $dst,$src1,$src2\t! using $cr as TEMP" %}
ins_encode %{
int vlen = Matcher::vector_length_in_bytes(this, $src1);
__ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg);
__ setb(Assembler::notZero, $dst$$Register);
__ movzbl($dst$$Register, $dst$$Register);
%}
ins_pipe( pipe_slow );
%}
instruct vptest_anytrue_evex(rRegI dst, kReg src1, kReg src2, rFlagsReg cr) %{
predicate(VM_Version::supports_avx512bwdq() &&
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
match(Set dst (VectorTest src1 src2));
effect(KILL cr);
format %{ "vptest_anytrue_lt8_evex $dst,$src1,$src2\t! using $cr as TEMP" %}
ins_encode %{
const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), "");
uint masklen = Matcher::vector_length(this, $src1);
__ anytrue($dst$$Register, masklen, $src1$$KRegister, $src2$$KRegister);
%}
ins_pipe( pipe_slow );
%}
instruct cmpvptest_anytrue_lt16(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero, legVec vtmp) %{
predicate(!VM_Version::supports_avx512bwdq() &&
Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 &&
Matcher::vector_length_in_bytes(n->in(1)->in(1)) < 16 &&
static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne);
match(Set cr (CmpI (VectorTest src1 src2) zero));
instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
match(Set cr (VectorTest src1 src2));
effect(TEMP vtmp);
format %{ "cmpvptest_anytrue_lt16 $src1,$src2\t! using $vtmp as TEMP" %}
format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this, $src1);
int vlen = Matcher::vector_length_in_bytes(this, $src1);
__ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
__ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
%}
ins_pipe( pipe_slow );
%}
instruct cmpvptest_anytrue_ge16(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero) %{
predicate(!VM_Version::supports_avx512bwdq() &&
Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 16 &&
Matcher::vector_length_in_bytes(n->in(1)->in(1)) < 64 &&
static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne);
match(Set cr (CmpI (VectorTest src1 src2) zero));
format %{ "cmpvptest_anytrue_ge16 $src1,$src2\t!" %}
instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
match(Set cr (VectorTest src1 src2));
format %{ "vptest_ge16 $src1, $src2\n\t" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this, $src1);
int vlen = Matcher::vector_length_in_bytes(this, $src1);
__ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg);
__ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
%}
ins_pipe( pipe_slow );
%}
instruct cmpvptest_anytrue_evex(rFlagsReg cr, kReg src1, kReg src2, immI_0 zero) %{
predicate(VM_Version::supports_avx512bwdq() &&
static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne);
match(Set cr (CmpI (VectorTest src1 src2) zero));
format %{ "cmpvptest_anytrue_evex $src1,$src2\t!" %}
instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
predicate((Matcher::vector_length(n->in(1)) < 8 ||
(Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
match(Set cr (VectorTest src1 src2));
effect(TEMP tmp);
format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
ins_encode %{
uint masklen = Matcher::vector_length(this, $src1);
const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), "");
masklen = masklen < 8 ? 8 : masklen;
__ ktest(masklen, $src1$$KRegister, $src2$$KRegister);
__ kmovwl($tmp$$Register, $src1$$KRegister);
__ andl($tmp$$Register, (1 << masklen) - 1);
__ cmpl($tmp$$Register, (1 << masklen) - 1);
%}
ins_pipe( pipe_slow );
%}
instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
predicate((Matcher::vector_length(n->in(1)) < 8 ||
(Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
match(Set cr (VectorTest src1 src2));
effect(TEMP tmp);
format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
ins_encode %{
uint masklen = Matcher::vector_length(this, $src1);
__ kmovwl($tmp$$Register, $src1$$KRegister);
__ andl($tmp$$Register, (1 << masklen) - 1);
%}
ins_pipe( pipe_slow );
%}
instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
predicate(Matcher::vector_length(n->in(1)) >= 16 ||
(Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
match(Set cr (VectorTest src1 src2));
format %{ "ktest_ge8 $src1, $src2\n\t" %}
ins_encode %{
uint masklen = Matcher::vector_length(this, $src1);
__ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
%}
ins_pipe( pipe_slow );
%}

View file

@ -31,6 +31,7 @@
#include "opto/node.hpp"
#include "opto/phaseX.hpp"
#include "opto/regmask.hpp"
#include "opto/subnode.hpp"
#include "runtime/vm_version.hpp"
class Compile;

View file

@ -1248,6 +1248,11 @@ Node *PhaseIterGVN::transform_old(Node* n) {
assert(!_table.find_index(n->_idx), "found duplicate entry in table");
}
// Allow Bool -> Cmp idealisation in late inlining intrinsics that return a bool
if (n->is_Cmp()) {
add_users_to_worklist(n);
}
// Apply the Ideal call in a loop until it no longer applies
Node* k = n;
DEBUG_ONLY(dead_loop_check(k);)

View file

@ -1427,7 +1427,10 @@ Node *BoolNode::Ideal(PhaseGVN *phase, bool can_reshape) {
Node *cmp = in(1);
if( !cmp->is_Sub() ) return NULL;
int cop = cmp->Opcode();
if( cop == Op_FastLock || cop == Op_FastUnlock || cmp->is_SubTypeCheck()) return NULL;
if( cop == Op_FastLock || cop == Op_FastUnlock ||
cmp->is_SubTypeCheck() || cop == Op_VectorTest ) {
return NULL;
}
Node *cmp1 = cmp->in(1);
Node *cmp2 = cmp->in(2);
if( !cmp1 ) return NULL;
@ -1459,6 +1462,20 @@ Node *BoolNode::Ideal(PhaseGVN *phase, bool can_reshape) {
return new BoolNode( cmp, _test.commute() );
}
// Change "bool eq/ne (cmp (cmove (bool tst (cmp2)) 1 0) 0)" into "bool tst/~tst (cmp2)"
if (cop == Op_CmpI &&
(_test._test == BoolTest::eq || _test._test == BoolTest::ne) &&
cmp1_op == Op_CMoveI && cmp2->find_int_con(1) == 0) {
// 0 should be on the true branch
if (cmp1->in(CMoveNode::IfTrue)->find_int_con(1) == 0 &&
cmp1->in(CMoveNode::IfFalse)->find_int_con(0) != 0) {
BoolNode* target = cmp1->in(CMoveNode::Condition)->as_Bool();
return new BoolNode(target->in(1),
(_test._test == BoolTest::eq) ? target->_test._test :
target->_test.negate());
}
}
// Change "bool eq/ne (cmp (and X 16) 16)" into "bool ne/eq (cmp (and X 16) 0)".
if (cop == Op_CmpI &&
(_test._test == BoolTest::eq || _test._test == BoolTest::ne) &&

View file

@ -1799,14 +1799,24 @@ bool LibraryCallKit::inline_vector_test() {
}
Node* opd1 = unbox_vector(argument(4), vbox_type, elem_bt, num_elem);
Node* opd2 = unbox_vector(argument(5), vbox_type, elem_bt, num_elem);
Node* opd2;
if (Matcher::vectortest_needs_second_argument(booltest == BoolTest::overflow,
opd1->bottom_type()->isa_vectmask())) {
opd2 = unbox_vector(argument(5), vbox_type, elem_bt, num_elem);
} else {
opd2 = opd1;
}
if (opd1 == NULL || opd2 == NULL) {
return false; // operand unboxing failed
}
Node* test = new VectorTestNode(opd1, opd2, booltest);
test = gvn().transform(test);
set_result(test);
Node* cmp = gvn().transform(new VectorTestNode(opd1, opd2, booltest));
BoolTest::mask test = Matcher::vectortest_mask(booltest == BoolTest::overflow,
opd1->bottom_type()->isa_vectmask(), num_elem);
Node* bol = gvn().transform(new BoolNode(cmp, test));
Node* res = gvn().transform(new CMoveINode(bol, gvn().intcon(0), gvn().intcon(1), TypeInt::BOOL));
set_result(res);
C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
return true;
}

View file

@ -1422,7 +1422,7 @@ class VectorMaskWrapperNode : public VectorNode {
Node* vector_mask() const { return in(2); }
};
class VectorTestNode : public Node {
class VectorTestNode : public CmpNode {
private:
BoolTest::mask _predicate;
@ -1430,18 +1430,18 @@ class VectorTestNode : public Node {
uint size_of() const { return sizeof(*this); }
public:
VectorTestNode(Node* in1, Node* in2, BoolTest::mask predicate) : Node(NULL, in1, in2), _predicate(predicate) {
VectorTestNode(Node* in1, Node* in2, BoolTest::mask predicate) : CmpNode(in1, in2), _predicate(predicate) {
assert(in2->bottom_type()->is_vect() == in2->bottom_type()->is_vect(), "same vector type");
}
virtual int Opcode() const;
virtual uint hash() const { return Node::hash() + _predicate; }
virtual const Type* Value(PhaseGVN* phase) const { return TypeInt::CC; }
virtual const Type* sub(const Type*, const Type*) const { return TypeInt::CC; }
BoolTest::mask get_predicate() const { return _predicate; }
virtual bool cmp( const Node &n ) const {
return Node::cmp(n) && _predicate == ((VectorTestNode&)n)._predicate;
}
virtual const Type *bottom_type() const { return TypeInt::BOOL; }
virtual uint ideal_reg() const { return Op_RegI; } // TODO Should be RegFlags but due to missing comparison flags for BoolTest
// in middle-end, we make it boolean result directly.
BoolTest::mask get_predicate() const { return _predicate; }
};
class VectorBlendNode : public VectorNode {

View file

@ -1875,7 +1875,7 @@
declare_c2_type(XorVMaskNode, VectorNode) \
declare_c2_type(VectorBoxNode, Node) \
declare_c2_type(VectorBoxAllocateNode, CallStaticJavaNode) \
declare_c2_type(VectorTestNode, Node) \
declare_c2_type(VectorTestNode, CmpNode) \
\
/*********************/ \
/* Adapter Blob Entries */ \

View file

@ -1114,6 +1114,11 @@ public class IRNode {
beforeMatchingNameRegex(VECTOR_UCAST_S2X, "VectorUCastS2X");
}
public static final String VECTOR_TEST = PREFIX + "VECTOR_TEST" + POSTFIX;
static {
beforeMatchingNameRegex(VECTOR_TEST, "VectorTest");
}
public static final String VFABD = PREFIX + "VFABD" + POSTFIX;
static {
machOnlyNameRegex(VFABD, "vfabd");

View file

@ -0,0 +1,71 @@
/*
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package compiler.vectorapi;
import compiler.lib.ir_framework.*;
import jdk.incubator.vector.ByteVector;
import jdk.incubator.vector.VectorMask;
/*
* @test
* @bug 8292289
* @summary Test idealization of VectorTest intrinsics to eliminate
* the materialization of the result as an int
* @modules jdk.incubator.vector
* @library /test/lib /
* @requires (os.simpleArch == "x64" & vm.cpu.features ~= ".*sse4.*" & (vm.opt.UseSSE == "null" | vm.opt.UseSSE > 3))
* | os.arch == "aarch64"
* @run driver compiler.vectorapi.TestVectorTest
*/
public class TestVectorTest {
public static void main(String[] args) {
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector");
}
@DontInline
public int call() { return 1; }
@Test
@IR(failOn = {IRNode.CMP_I, IRNode.CMOVE_I})
@IR(counts = {IRNode.VECTOR_TEST, "1"})
public int branch(long maskLong) {
var mask = VectorMask.fromLong(ByteVector.SPECIES_PREFERRED, maskLong);
return mask.allTrue() ? call() : 0;
}
@Test
@IR(failOn = {IRNode.CMP_I})
@IR(counts = {IRNode.VECTOR_TEST, "1", IRNode.CMOVE_I, "1"})
public int cmove(long maskLong) {
var mask = VectorMask.fromLong(ByteVector.SPECIES_PREFERRED, maskLong);
return mask.allTrue() ? 1 : 0;
}
@Run(test = {"branch", "cmove"})
public void run() {
branch(-1);
branch(100);
cmove(-1);
cmove(100);
}
}