mirror of
https://github.com/openjdk/jdk.git
synced 2025-09-17 09:34:38 +02:00
8292289: [vectorapi] Improve the implementation of VectorTestNode
Reviewed-by: xgong, kvn
This commit is contained in:
parent
d35e840024
commit
3dfadeebd0
23 changed files with 335 additions and 290 deletions
|
@ -5869,67 +5869,62 @@ instruct vroundD(vReg dst, vReg src, immI rmode) %{
|
||||||
|
|
||||||
// anytrue
|
// anytrue
|
||||||
|
|
||||||
instruct vtest_anytrue_neon(iRegINoSp dst, vReg src1, vReg src2, vReg tmp, rFlagsReg cr) %{
|
instruct vtest_anytrue_neon(rFlagsReg cr, vReg src1, vReg src2, vReg tmp) %{
|
||||||
predicate(UseSVE == 0 &&
|
predicate(UseSVE == 0 &&
|
||||||
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
|
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
|
||||||
match(Set dst (VectorTest src1 src2 ));
|
match(Set cr (VectorTest src1 src2));
|
||||||
effect(TEMP tmp, KILL cr);
|
effect(TEMP tmp);
|
||||||
format %{ "vtest_anytrue_neon $dst, $src1\t# KILL $tmp, cr" %}
|
format %{ "vtest_anytrue_neon $src1\t# KILL $tmp" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
// No need to use src2.
|
// No need to use src2.
|
||||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src1);
|
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src1);
|
||||||
assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
|
assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
|
||||||
__ addv($tmp$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B, $src1$$FloatRegister);
|
__ addv($tmp$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B, $src1$$FloatRegister);
|
||||||
__ umov($dst$$Register, $tmp$$FloatRegister, __ B, 0);
|
__ umov(rscratch1, $tmp$$FloatRegister, __ B, 0);
|
||||||
__ cmpw($dst$$Register, zr);
|
__ cmpw(rscratch1, zr);
|
||||||
__ csetw($dst$$Register, Assembler::NE);
|
|
||||||
%}
|
%}
|
||||||
ins_pipe(pipe_slow);
|
ins_pipe(pipe_slow);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
instruct vtest_anytrue_sve(iRegINoSp dst, pReg src1, pReg src2, rFlagsReg cr) %{
|
instruct vtest_anytrue_sve(rFlagsReg cr, pReg src1, pReg src2) %{
|
||||||
predicate(UseSVE > 0 &&
|
predicate(UseSVE > 0 &&
|
||||||
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
|
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
|
||||||
match(Set dst (VectorTest src1 src2));
|
match(Set cr (VectorTest src1 src2));
|
||||||
effect(KILL cr);
|
format %{ "vtest_anytrue_sve $src1" %}
|
||||||
format %{ "vtest_anytrue_sve $dst, $src1\t# KILL cr" %}
|
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
// "src2" is not used for sve.
|
// "src2" is not used for sve.
|
||||||
__ sve_ptest(ptrue, $src1$$PRegister);
|
__ sve_ptest(ptrue, $src1$$PRegister);
|
||||||
__ csetw($dst$$Register, Assembler::NE);
|
|
||||||
%}
|
%}
|
||||||
ins_pipe(pipe_slow);
|
ins_pipe(pipe_slow);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
// alltrue
|
// alltrue
|
||||||
|
|
||||||
instruct vtest_alltrue_neon(iRegINoSp dst, vReg src1, vReg src2, vReg tmp, rFlagsReg cr) %{
|
instruct vtest_alltrue_neon(rFlagsReg cr, vReg src1, vReg src2, vReg tmp) %{
|
||||||
predicate(UseSVE == 0 &&
|
predicate(UseSVE == 0 &&
|
||||||
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
|
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
|
||||||
match(Set dst (VectorTest src1 src2));
|
match(Set cr (VectorTest src1 src2));
|
||||||
effect(TEMP tmp, KILL cr);
|
effect(TEMP tmp);
|
||||||
format %{ "vtest_alltrue_neon $dst, $src1\t# KILL $tmp, cr" %}
|
format %{ "vtest_alltrue_neon $src1\t# KILL $tmp" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
// No need to use src2.
|
// No need to use src2.
|
||||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src1);
|
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src1);
|
||||||
assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
|
assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
|
||||||
__ uminv($tmp$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B, $src1$$FloatRegister);
|
__ uminv($tmp$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B, $src1$$FloatRegister);
|
||||||
__ umov($dst$$Register, $tmp$$FloatRegister, __ B, 0);
|
__ umov(rscratch1, $tmp$$FloatRegister, __ B, 0);
|
||||||
__ cmpw($dst$$Register, 0xff);
|
__ cmpw(rscratch1, 0xff);
|
||||||
__ csetw($dst$$Register, Assembler::EQ);
|
|
||||||
%}
|
%}
|
||||||
ins_pipe(pipe_slow);
|
ins_pipe(pipe_slow);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
instruct vtest_alltrue_sve(iRegINoSp dst, pReg src1, pReg src2, pReg ptmp, rFlagsReg cr) %{
|
instruct vtest_alltrue_sve(rFlagsReg cr, pReg src1, pReg src2, pReg ptmp) %{
|
||||||
predicate(UseSVE > 0 &&
|
predicate(UseSVE > 0 &&
|
||||||
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
|
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
|
||||||
match(Set dst (VectorTest src1 src2));
|
match(Set cr (VectorTest src1 src2));
|
||||||
effect(TEMP ptmp, KILL cr);
|
effect(TEMP ptmp);
|
||||||
format %{ "vtest_alltrue_sve $dst, $src1, $src2\t# KILL $ptmp, cr" %}
|
format %{ "vtest_alltrue_sve $src1, $src2\t# KILL $ptmp" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ sve_eors($ptmp$$PRegister, ptrue, $src1$$PRegister, $src2$$PRegister);
|
__ sve_eors($ptmp$$PRegister, ptrue, $src1$$PRegister, $src2$$PRegister);
|
||||||
__ csetw($dst$$Register, Assembler::EQ);
|
|
||||||
%}
|
%}
|
||||||
ins_pipe(pipe_slow);
|
ins_pipe(pipe_slow);
|
||||||
%}
|
%}
|
||||||
|
|
|
@ -4241,67 +4241,62 @@ instruct vroundD(vReg dst, vReg src, immI rmode) %{
|
||||||
|
|
||||||
// anytrue
|
// anytrue
|
||||||
|
|
||||||
instruct vtest_anytrue_neon(iRegINoSp dst, vReg src1, vReg src2, vReg tmp, rFlagsReg cr) %{
|
instruct vtest_anytrue_neon(rFlagsReg cr, vReg src1, vReg src2, vReg tmp) %{
|
||||||
predicate(UseSVE == 0 &&
|
predicate(UseSVE == 0 &&
|
||||||
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
|
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
|
||||||
match(Set dst (VectorTest src1 src2 ));
|
match(Set cr (VectorTest src1 src2));
|
||||||
effect(TEMP tmp, KILL cr);
|
effect(TEMP tmp);
|
||||||
format %{ "vtest_anytrue_neon $dst, $src1\t# KILL $tmp, cr" %}
|
format %{ "vtest_anytrue_neon $src1\t# KILL $tmp" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
// No need to use src2.
|
// No need to use src2.
|
||||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src1);
|
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src1);
|
||||||
assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
|
assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
|
||||||
__ addv($tmp$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B, $src1$$FloatRegister);
|
__ addv($tmp$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B, $src1$$FloatRegister);
|
||||||
__ umov($dst$$Register, $tmp$$FloatRegister, __ B, 0);
|
__ umov(rscratch1, $tmp$$FloatRegister, __ B, 0);
|
||||||
__ cmpw($dst$$Register, zr);
|
__ cmpw(rscratch1, zr);
|
||||||
__ csetw($dst$$Register, Assembler::NE);
|
|
||||||
%}
|
%}
|
||||||
ins_pipe(pipe_slow);
|
ins_pipe(pipe_slow);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
instruct vtest_anytrue_sve(iRegINoSp dst, pReg src1, pReg src2, rFlagsReg cr) %{
|
instruct vtest_anytrue_sve(rFlagsReg cr, pReg src1, pReg src2) %{
|
||||||
predicate(UseSVE > 0 &&
|
predicate(UseSVE > 0 &&
|
||||||
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
|
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
|
||||||
match(Set dst (VectorTest src1 src2));
|
match(Set cr (VectorTest src1 src2));
|
||||||
effect(KILL cr);
|
format %{ "vtest_anytrue_sve $src1" %}
|
||||||
format %{ "vtest_anytrue_sve $dst, $src1\t# KILL cr" %}
|
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
// "src2" is not used for sve.
|
// "src2" is not used for sve.
|
||||||
__ sve_ptest(ptrue, $src1$$PRegister);
|
__ sve_ptest(ptrue, $src1$$PRegister);
|
||||||
__ csetw($dst$$Register, Assembler::NE);
|
|
||||||
%}
|
%}
|
||||||
ins_pipe(pipe_slow);
|
ins_pipe(pipe_slow);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
// alltrue
|
// alltrue
|
||||||
|
|
||||||
instruct vtest_alltrue_neon(iRegINoSp dst, vReg src1, vReg src2, vReg tmp, rFlagsReg cr) %{
|
instruct vtest_alltrue_neon(rFlagsReg cr, vReg src1, vReg src2, vReg tmp) %{
|
||||||
predicate(UseSVE == 0 &&
|
predicate(UseSVE == 0 &&
|
||||||
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
|
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
|
||||||
match(Set dst (VectorTest src1 src2));
|
match(Set cr (VectorTest src1 src2));
|
||||||
effect(TEMP tmp, KILL cr);
|
effect(TEMP tmp);
|
||||||
format %{ "vtest_alltrue_neon $dst, $src1\t# KILL $tmp, cr" %}
|
format %{ "vtest_alltrue_neon $src1\t# KILL $tmp" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
// No need to use src2.
|
// No need to use src2.
|
||||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src1);
|
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src1);
|
||||||
assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
|
assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
|
||||||
__ uminv($tmp$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B, $src1$$FloatRegister);
|
__ uminv($tmp$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B, $src1$$FloatRegister);
|
||||||
__ umov($dst$$Register, $tmp$$FloatRegister, __ B, 0);
|
__ umov(rscratch1, $tmp$$FloatRegister, __ B, 0);
|
||||||
__ cmpw($dst$$Register, 0xff);
|
__ cmpw(rscratch1, 0xff);
|
||||||
__ csetw($dst$$Register, Assembler::EQ);
|
|
||||||
%}
|
%}
|
||||||
ins_pipe(pipe_slow);
|
ins_pipe(pipe_slow);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
instruct vtest_alltrue_sve(iRegINoSp dst, pReg src1, pReg src2, pReg ptmp, rFlagsReg cr) %{
|
instruct vtest_alltrue_sve(rFlagsReg cr, pReg src1, pReg src2, pReg ptmp) %{
|
||||||
predicate(UseSVE > 0 &&
|
predicate(UseSVE > 0 &&
|
||||||
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
|
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
|
||||||
match(Set dst (VectorTest src1 src2));
|
match(Set cr (VectorTest src1 src2));
|
||||||
effect(TEMP ptmp, KILL cr);
|
effect(TEMP ptmp);
|
||||||
format %{ "vtest_alltrue_sve $dst, $src1, $src2\t# KILL $ptmp, cr" %}
|
format %{ "vtest_alltrue_sve $src1, $src2\t# KILL $ptmp" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
__ sve_eors($ptmp$$PRegister, ptrue, $src1$$PRegister, $src2$$PRegister);
|
__ sve_eors($ptmp$$PRegister, ptrue, $src1$$PRegister, $src2$$PRegister);
|
||||||
__ csetw($dst$$Register, Assembler::EQ);
|
|
||||||
%}
|
%}
|
||||||
ins_pipe(pipe_slow);
|
ins_pipe(pipe_slow);
|
||||||
%}
|
%}
|
||||||
|
|
|
@ -163,6 +163,16 @@
|
||||||
// Implements a variant of EncodeISOArrayNode that encode ASCII only
|
// Implements a variant of EncodeISOArrayNode that encode ASCII only
|
||||||
static const bool supports_encode_ascii_array = true;
|
static const bool supports_encode_ascii_array = true;
|
||||||
|
|
||||||
|
// An all-set mask is used for the alltrue vector test with SVE
|
||||||
|
static constexpr bool vectortest_needs_second_argument(bool is_alltrue, bool is_predicate) {
|
||||||
|
return is_predicate && is_alltrue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// BoolTest mask for vector test intrinsics
|
||||||
|
static constexpr BoolTest::mask vectortest_mask(bool is_alltrue, bool is_predicate, int vlen) {
|
||||||
|
return is_alltrue ? BoolTest::eq : BoolTest::ne;
|
||||||
|
}
|
||||||
|
|
||||||
// Returns pre-selection estimated size of a vector operation.
|
// Returns pre-selection estimated size of a vector operation.
|
||||||
static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
|
static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
|
||||||
switch(vopc) {
|
switch(vopc) {
|
||||||
|
|
|
@ -155,6 +155,16 @@
|
||||||
// Implements a variant of EncodeISOArrayNode that encode ASCII only
|
// Implements a variant of EncodeISOArrayNode that encode ASCII only
|
||||||
static const bool supports_encode_ascii_array = false;
|
static const bool supports_encode_ascii_array = false;
|
||||||
|
|
||||||
|
// Some architecture needs a helper to check for alltrue vector
|
||||||
|
static constexpr bool vectortest_needs_second_argument(bool is_alltrue, bool is_predicate) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// BoolTest mask for vector test intrinsics
|
||||||
|
static constexpr BoolTest::mask vectortest_mask(bool is_alltrue, bool is_predicate, int vlen) {
|
||||||
|
return BoolTest::illegal;
|
||||||
|
}
|
||||||
|
|
||||||
// Returns pre-selection estimated size of a vector operation.
|
// Returns pre-selection estimated size of a vector operation.
|
||||||
static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
|
static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
|
||||||
switch(vopc) {
|
switch(vopc) {
|
||||||
|
|
|
@ -164,6 +164,16 @@
|
||||||
// Implements a variant of EncodeISOArrayNode that encode ASCII only
|
// Implements a variant of EncodeISOArrayNode that encode ASCII only
|
||||||
static const bool supports_encode_ascii_array = true;
|
static const bool supports_encode_ascii_array = true;
|
||||||
|
|
||||||
|
// Some architecture needs a helper to check for alltrue vector
|
||||||
|
static constexpr bool vectortest_needs_second_argument(bool is_alltrue, bool is_predicate) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// BoolTest mask for vector test intrinsics
|
||||||
|
static constexpr BoolTest::mask vectortest_mask(bool is_alltrue, bool is_predicate, int vlen) {
|
||||||
|
return BoolTest::illegal;
|
||||||
|
}
|
||||||
|
|
||||||
// Returns pre-selection estimated size of a vector operation.
|
// Returns pre-selection estimated size of a vector operation.
|
||||||
static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
|
static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
|
||||||
switch(vopc) {
|
switch(vopc) {
|
||||||
|
|
|
@ -161,6 +161,16 @@
|
||||||
// Implements a variant of EncodeISOArrayNode that encode ASCII only
|
// Implements a variant of EncodeISOArrayNode that encode ASCII only
|
||||||
static const bool supports_encode_ascii_array = false;
|
static const bool supports_encode_ascii_array = false;
|
||||||
|
|
||||||
|
// Some architecture needs a helper to check for alltrue vector
|
||||||
|
static constexpr bool vectortest_needs_second_argument(bool is_alltrue, bool is_predicate) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// BoolTest mask for vector test intrinsics
|
||||||
|
static constexpr BoolTest::mask vectortest_mask(bool is_alltrue, bool is_predicate, int vlen) {
|
||||||
|
return BoolTest::illegal;
|
||||||
|
}
|
||||||
|
|
||||||
// Returns pre-selection estimated size of a vector operation.
|
// Returns pre-selection estimated size of a vector operation.
|
||||||
static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
|
static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
|
||||||
switch(vopc) {
|
switch(vopc) {
|
||||||
|
|
|
@ -153,6 +153,16 @@
|
||||||
// Implements a variant of EncodeISOArrayNode that encode ASCII only
|
// Implements a variant of EncodeISOArrayNode that encode ASCII only
|
||||||
static const bool supports_encode_ascii_array = true;
|
static const bool supports_encode_ascii_array = true;
|
||||||
|
|
||||||
|
// Some architecture needs a helper to check for alltrue vector
|
||||||
|
static constexpr bool vectortest_needs_second_argument(bool is_alltrue, bool is_predicate) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// BoolTest mask for vector test intrinsics
|
||||||
|
static constexpr BoolTest::mask vectortest_mask(bool is_alltrue, bool is_predicate, int vlen) {
|
||||||
|
return BoolTest::illegal;
|
||||||
|
}
|
||||||
|
|
||||||
// Returns pre-selection estimated size of a vector operation.
|
// Returns pre-selection estimated size of a vector operation.
|
||||||
static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
|
static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
|
||||||
switch(vopc) {
|
switch(vopc) {
|
||||||
|
|
|
@ -5447,13 +5447,38 @@ void Assembler::vptest(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||||
emit_int16(0x17, (0xC0 | encode));
|
emit_int16(0x17, (0xC0 | encode));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Assembler::vtestps(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||||
|
assert(VM_Version::supports_avx(), "");
|
||||||
|
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||||
|
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||||
|
emit_int16(0x0E, (0xC0 | encode));
|
||||||
|
}
|
||||||
|
|
||||||
void Assembler::evptestmb(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
void Assembler::evptestmb(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||||
assert(VM_Version::supports_avx512vlbw(), "");
|
assert(vector_len == AVX_512bit ? VM_Version::supports_avx512bw() : VM_Version::supports_avx512vlbw(), "");
|
||||||
// Encoding: EVEX.NDS.XXX.66.0F.W0 DB /r
|
// Encoding: EVEX.NDS.XXX.66.0F38.W0 DB /r
|
||||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||||
attributes.set_is_evex_instruction();
|
attributes.set_is_evex_instruction();
|
||||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||||
emit_int16((unsigned char)0x26, (0xC0 | encode));
|
emit_int16(0x26, (0xC0 | encode));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Assembler::evptestmd(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||||
|
assert(vector_len == AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx512vl(), "");
|
||||||
|
// Encoding: EVEX.NDS.XXX.66.0F38.W0 DB /r
|
||||||
|
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||||
|
attributes.set_is_evex_instruction();
|
||||||
|
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||||
|
emit_int16(0x27, (0xC0 | encode));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Assembler::evptestnmd(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||||
|
assert(vector_len == AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx512vl(), "");
|
||||||
|
// Encoding: EVEX.NDS.XXX.F3.0F38.W0 DB /r
|
||||||
|
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||||
|
attributes.set_is_evex_instruction();
|
||||||
|
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
|
||||||
|
emit_int16(0x27, (0xC0 | encode));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::punpcklbw(XMMRegister dst, Address src) {
|
void Assembler::punpcklbw(XMMRegister dst, Address src) {
|
||||||
|
|
|
@ -1973,9 +1973,12 @@ private:
|
||||||
void vptest(XMMRegister dst, Address src);
|
void vptest(XMMRegister dst, Address src);
|
||||||
|
|
||||||
void evptestmb(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
void evptestmb(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||||
|
void evptestmd(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||||
|
void evptestnmd(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||||
|
|
||||||
// Vector compare
|
// Vector compare
|
||||||
void vptest(XMMRegister dst, XMMRegister src, int vector_len);
|
void vptest(XMMRegister dst, XMMRegister src, int vector_len);
|
||||||
|
void vtestps(XMMRegister dst, XMMRegister src, int vector_len);
|
||||||
|
|
||||||
// Interleave Low Bytes
|
// Interleave Low Bytes
|
||||||
void punpcklbw(XMMRegister dst, XMMRegister src);
|
void punpcklbw(XMMRegister dst, XMMRegister src);
|
||||||
|
|
|
@ -2415,58 +2415,32 @@ void C2_MacroAssembler::evpblend(BasicType typ, XMMRegister dst, KRegister kmask
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void C2_MacroAssembler::vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2,
|
void C2_MacroAssembler::vectortest(BasicType bt, XMMRegister src1, XMMRegister src2, XMMRegister vtmp, int vlen_in_bytes) {
|
||||||
XMMRegister vtmp1, XMMRegister vtmp2, KRegister mask) {
|
assert(vlen_in_bytes <= 32, "");
|
||||||
switch(vlen) {
|
int esize = type2aelembytes(bt);
|
||||||
case 4:
|
if (vlen_in_bytes == 32) {
|
||||||
assert(vtmp1 != xnoreg, "required.");
|
assert(vtmp == xnoreg, "required.");
|
||||||
// Broadcast lower 32 bits to 128 bits before ptest
|
if (esize >= 4) {
|
||||||
pshufd(vtmp1, src1, 0x0);
|
vtestps(src1, src2, AVX_256bit);
|
||||||
if (bt == BoolTest::overflow) {
|
} else {
|
||||||
assert(vtmp2 != xnoreg, "required.");
|
vptest(src1, src2, AVX_256bit);
|
||||||
pshufd(vtmp2, src2, 0x0);
|
}
|
||||||
} else {
|
return;
|
||||||
assert(vtmp2 == xnoreg, "required.");
|
}
|
||||||
vtmp2 = src2;
|
if (vlen_in_bytes < 16) {
|
||||||
}
|
// Duplicate the lower part to fill the whole register,
|
||||||
ptest(vtmp1, vtmp2);
|
// Don't need to do so for src2
|
||||||
break;
|
assert(vtmp != xnoreg, "required");
|
||||||
case 8:
|
int shuffle_imm = (vlen_in_bytes == 4) ? 0x00 : 0x04;
|
||||||
assert(vtmp1 != xnoreg, "required.");
|
pshufd(vtmp, src1, shuffle_imm);
|
||||||
// Broadcast lower 64 bits to 128 bits before ptest
|
} else {
|
||||||
pshufd(vtmp1, src1, 0x4);
|
assert(vtmp == xnoreg, "required");
|
||||||
if (bt == BoolTest::overflow) {
|
vtmp = src1;
|
||||||
assert(vtmp2 != xnoreg, "required.");
|
}
|
||||||
pshufd(vtmp2, src2, 0x4);
|
if (esize >= 4 && VM_Version::supports_avx()) {
|
||||||
} else {
|
vtestps(vtmp, src2, AVX_128bit);
|
||||||
assert(vtmp2 == xnoreg, "required.");
|
} else {
|
||||||
vtmp2 = src2;
|
ptest(vtmp, src2);
|
||||||
}
|
|
||||||
ptest(vtmp1, vtmp2);
|
|
||||||
break;
|
|
||||||
case 16:
|
|
||||||
assert((vtmp1 == xnoreg) && (vtmp2 == xnoreg), "required.");
|
|
||||||
ptest(src1, src2);
|
|
||||||
break;
|
|
||||||
case 32:
|
|
||||||
assert((vtmp1 == xnoreg) && (vtmp2 == xnoreg), "required.");
|
|
||||||
vptest(src1, src2, Assembler::AVX_256bit);
|
|
||||||
break;
|
|
||||||
case 64:
|
|
||||||
{
|
|
||||||
assert((vtmp1 == xnoreg) && (vtmp2 == xnoreg), "required.");
|
|
||||||
evpcmpeqb(mask, src1, src2, Assembler::AVX_512bit);
|
|
||||||
if (bt == BoolTest::ne) {
|
|
||||||
ktestql(mask, mask);
|
|
||||||
} else {
|
|
||||||
assert(bt == BoolTest::overflow, "required");
|
|
||||||
kortestql(mask, mask);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
assert(false,"Should not reach here.");
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -138,8 +138,7 @@ public:
|
||||||
void get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, XMMRegister vtmp = xnoreg);
|
void get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, XMMRegister vtmp = xnoreg);
|
||||||
|
|
||||||
// vector test
|
// vector test
|
||||||
void vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2,
|
void vectortest(BasicType bt, XMMRegister src1, XMMRegister src2, XMMRegister vtmp, int vlen_in_bytes);
|
||||||
XMMRegister vtmp1 = xnoreg, XMMRegister vtmp2 = xnoreg, KRegister mask = knoreg);
|
|
||||||
|
|
||||||
// Covert B2X
|
// Covert B2X
|
||||||
void vconvert_b2x(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, int vlen_enc);
|
void vconvert_b2x(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, int vlen_enc);
|
||||||
|
|
|
@ -9013,26 +9013,6 @@ void MacroAssembler::evand(BasicType type, XMMRegister dst, KRegister mask, XMMR
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MacroAssembler::anytrue(Register dst, uint masklen, KRegister src1, KRegister src2) {
|
|
||||||
masklen = masklen < 8 ? 8 : masklen;
|
|
||||||
ktest(masklen, src1, src2);
|
|
||||||
setb(Assembler::notZero, dst);
|
|
||||||
movzbl(dst, dst);
|
|
||||||
}
|
|
||||||
|
|
||||||
void MacroAssembler::alltrue(Register dst, uint masklen, KRegister src1, KRegister src2, KRegister kscratch) {
|
|
||||||
if (masklen < 8) {
|
|
||||||
knotbl(kscratch, src2);
|
|
||||||
kortestbl(src1, kscratch);
|
|
||||||
setb(Assembler::carrySet, dst);
|
|
||||||
movzbl(dst, dst);
|
|
||||||
} else {
|
|
||||||
ktest(masklen, src1, src2);
|
|
||||||
setb(Assembler::carrySet, dst);
|
|
||||||
movzbl(dst, dst);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void MacroAssembler::kortest(uint masklen, KRegister src1, KRegister src2) {
|
void MacroAssembler::kortest(uint masklen, KRegister src1, KRegister src2) {
|
||||||
switch(masklen) {
|
switch(masklen) {
|
||||||
case 8:
|
case 8:
|
||||||
|
|
|
@ -1768,9 +1768,6 @@ public:
|
||||||
using Assembler::vpternlogq;
|
using Assembler::vpternlogq;
|
||||||
void vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, AddressLiteral src3, int vector_len, Register rscratch = noreg);
|
void vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, AddressLiteral src3, int vector_len, Register rscratch = noreg);
|
||||||
|
|
||||||
void alltrue(Register dst, uint masklen, KRegister src1, KRegister src2, KRegister kscratch);
|
|
||||||
void anytrue(Register dst, uint masklen, KRegister src, KRegister kscratch);
|
|
||||||
|
|
||||||
void cmov32( Condition cc, Register dst, Address src);
|
void cmov32( Condition cc, Register dst, Address src);
|
||||||
void cmov32( Condition cc, Register dst, Register src);
|
void cmov32( Condition cc, Register dst, Register src);
|
||||||
|
|
||||||
|
|
|
@ -183,6 +183,25 @@
|
||||||
// Implements a variant of EncodeISOArrayNode that encode ASCII only
|
// Implements a variant of EncodeISOArrayNode that encode ASCII only
|
||||||
static const bool supports_encode_ascii_array = true;
|
static const bool supports_encode_ascii_array = true;
|
||||||
|
|
||||||
|
// Without predicated input, an all-one vector is needed for the alltrue vector test
|
||||||
|
static constexpr bool vectortest_needs_second_argument(bool is_alltrue, bool is_predicate) {
|
||||||
|
return is_alltrue && !is_predicate;
|
||||||
|
}
|
||||||
|
|
||||||
|
// BoolTest mask for vector test intrinsics
|
||||||
|
static constexpr BoolTest::mask vectortest_mask(bool is_alltrue, bool is_predicate, int vlen) {
|
||||||
|
if (!is_alltrue) {
|
||||||
|
return BoolTest::ne;
|
||||||
|
}
|
||||||
|
if (!is_predicate) {
|
||||||
|
return BoolTest::lt;
|
||||||
|
}
|
||||||
|
if ((vlen == 8 && !VM_Version::supports_avx512dq()) || vlen < 8) {
|
||||||
|
return BoolTest::eq;
|
||||||
|
}
|
||||||
|
return BoolTest::lt;
|
||||||
|
}
|
||||||
|
|
||||||
// Returns pre-selection estimated size of a vector operation.
|
// Returns pre-selection estimated size of a vector operation.
|
||||||
// Currently, it's a rudimentary heuristic based on emitted code size for complex
|
// Currently, it's a rudimentary heuristic based on emitted code size for complex
|
||||||
// IR nodes used by unroll policy. Idea is to constrain unrolling factor and prevent
|
// IR nodes used by unroll policy. Idea is to constrain unrolling factor and prevent
|
||||||
|
|
|
@ -1850,8 +1850,6 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
|
||||||
return false; // Implementation limitation
|
return false; // Implementation limitation
|
||||||
} else if (size_in_bits < 32) {
|
} else if (size_in_bits < 32) {
|
||||||
return false; // Implementation limitation
|
return false; // Implementation limitation
|
||||||
} else if (size_in_bits == 512 && (VM_Version::supports_avx512bw() == false)) {
|
|
||||||
return false; // Implementation limitation
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case Op_VectorLoadShuffle:
|
case Op_VectorLoadShuffle:
|
||||||
|
@ -8048,169 +8046,70 @@ instruct vabsnegD(vec dst, vec src) %{
|
||||||
//------------------------------------- VectorTest --------------------------------------------
|
//------------------------------------- VectorTest --------------------------------------------
|
||||||
|
|
||||||
#ifdef _LP64
|
#ifdef _LP64
|
||||||
instruct vptest_alltrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp1, legVec vtmp2, rFlagsReg cr) %{
|
instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
|
||||||
predicate(!VM_Version::supports_avx512bwdq() &&
|
predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
|
||||||
Matcher::vector_length_in_bytes(n->in(1)) >= 4 &&
|
match(Set cr (VectorTest src1 src2));
|
||||||
Matcher::vector_length_in_bytes(n->in(1)) < 16 &&
|
|
||||||
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
|
|
||||||
match(Set dst (VectorTest src1 src2 ));
|
|
||||||
effect(TEMP vtmp1, TEMP vtmp2, KILL cr);
|
|
||||||
format %{ "vptest_alltrue_lt16 $dst,$src1, $src2\t! using $vtmp1, $vtmp2 and $cr as TEMP" %}
|
|
||||||
ins_encode %{
|
|
||||||
int vlen = Matcher::vector_length_in_bytes(this, $src1);
|
|
||||||
__ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
|
|
||||||
__ setb(Assembler::carrySet, $dst$$Register);
|
|
||||||
__ movzbl($dst$$Register, $dst$$Register);
|
|
||||||
%}
|
|
||||||
ins_pipe( pipe_slow );
|
|
||||||
%}
|
|
||||||
|
|
||||||
instruct vptest_alltrue_ge16(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{
|
|
||||||
predicate(!VM_Version::supports_avx512bwdq() &&
|
|
||||||
Matcher::vector_length_in_bytes(n->in(1)) >= 16 &&
|
|
||||||
Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
|
|
||||||
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
|
|
||||||
match(Set dst (VectorTest src1 src2 ));
|
|
||||||
effect(KILL cr);
|
|
||||||
format %{ "vptest_alltrue_ge16 $dst,$src1, $src2\t! using $cr as TEMP" %}
|
|
||||||
ins_encode %{
|
|
||||||
int vlen = Matcher::vector_length_in_bytes(this, $src1);
|
|
||||||
__ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg);
|
|
||||||
__ setb(Assembler::carrySet, $dst$$Register);
|
|
||||||
__ movzbl($dst$$Register, $dst$$Register);
|
|
||||||
%}
|
|
||||||
ins_pipe( pipe_slow );
|
|
||||||
%}
|
|
||||||
|
|
||||||
instruct vptest_alltrue_lt8_evex(rRegI dst, kReg src1, kReg src2, kReg kscratch, rFlagsReg cr) %{
|
|
||||||
predicate(VM_Version::supports_avx512bwdq() &&
|
|
||||||
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow &&
|
|
||||||
n->in(1)->bottom_type()->isa_vectmask() &&
|
|
||||||
Matcher::vector_length(n->in(1)) < 8);
|
|
||||||
match(Set dst (VectorTest src1 src2));
|
|
||||||
effect(KILL cr, TEMP kscratch);
|
|
||||||
format %{ "vptest_alltrue_lt8_evex $dst,$src1,$src2\t! using $cr as TEMP" %}
|
|
||||||
ins_encode %{
|
|
||||||
const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
|
|
||||||
const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
|
|
||||||
assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), "");
|
|
||||||
uint masklen = Matcher::vector_length(this, $src1);
|
|
||||||
__ alltrue($dst$$Register, masklen, $src1$$KRegister, $src2$$KRegister, $kscratch$$KRegister);
|
|
||||||
%}
|
|
||||||
ins_pipe( pipe_slow );
|
|
||||||
%}
|
|
||||||
|
|
||||||
|
|
||||||
instruct vptest_alltrue_ge8_evex(rRegI dst, kReg src1, kReg src2, rFlagsReg cr) %{
|
|
||||||
predicate(VM_Version::supports_avx512bwdq() &&
|
|
||||||
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow &&
|
|
||||||
n->in(1)->bottom_type()->isa_vectmask() &&
|
|
||||||
Matcher::vector_length(n->in(1)) >= 8);
|
|
||||||
match(Set dst (VectorTest src1 src2));
|
|
||||||
effect(KILL cr);
|
|
||||||
format %{ "vptest_alltrue_ge8_evex $dst,$src1,$src2\t! using $cr as TEMP" %}
|
|
||||||
ins_encode %{
|
|
||||||
const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
|
|
||||||
const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
|
|
||||||
assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), "");
|
|
||||||
uint masklen = Matcher::vector_length(this, $src1);
|
|
||||||
__ alltrue($dst$$Register, masklen, $src1$$KRegister, $src2$$KRegister, knoreg);
|
|
||||||
%}
|
|
||||||
ins_pipe( pipe_slow );
|
|
||||||
%}
|
|
||||||
|
|
||||||
|
|
||||||
instruct vptest_anytrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp, rFlagsReg cr) %{
|
|
||||||
predicate(!VM_Version::supports_avx512bwdq() &&
|
|
||||||
Matcher::vector_length_in_bytes(n->in(1)) >= 4 &&
|
|
||||||
Matcher::vector_length_in_bytes(n->in(1)) < 16 &&
|
|
||||||
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
|
|
||||||
match(Set dst (VectorTest src1 src2 ));
|
|
||||||
effect(TEMP vtmp, KILL cr);
|
|
||||||
format %{ "vptest_anytrue_lt16 $dst,$src1,$src2\t! using $vtmp, $cr as TEMP" %}
|
|
||||||
ins_encode %{
|
|
||||||
int vlen = Matcher::vector_length_in_bytes(this, $src1);
|
|
||||||
__ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
|
|
||||||
__ setb(Assembler::notZero, $dst$$Register);
|
|
||||||
__ movzbl($dst$$Register, $dst$$Register);
|
|
||||||
%}
|
|
||||||
ins_pipe( pipe_slow );
|
|
||||||
%}
|
|
||||||
|
|
||||||
instruct vptest_anytrue_ge16(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{
|
|
||||||
predicate(!VM_Version::supports_avx512bwdq() &&
|
|
||||||
Matcher::vector_length_in_bytes(n->in(1)) >= 16 &&
|
|
||||||
Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
|
|
||||||
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
|
|
||||||
match(Set dst (VectorTest src1 src2 ));
|
|
||||||
effect(KILL cr);
|
|
||||||
format %{ "vptest_anytrue_ge16 $dst,$src1,$src2\t! using $cr as TEMP" %}
|
|
||||||
ins_encode %{
|
|
||||||
int vlen = Matcher::vector_length_in_bytes(this, $src1);
|
|
||||||
__ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg);
|
|
||||||
__ setb(Assembler::notZero, $dst$$Register);
|
|
||||||
__ movzbl($dst$$Register, $dst$$Register);
|
|
||||||
%}
|
|
||||||
ins_pipe( pipe_slow );
|
|
||||||
%}
|
|
||||||
|
|
||||||
instruct vptest_anytrue_evex(rRegI dst, kReg src1, kReg src2, rFlagsReg cr) %{
|
|
||||||
predicate(VM_Version::supports_avx512bwdq() &&
|
|
||||||
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
|
|
||||||
match(Set dst (VectorTest src1 src2));
|
|
||||||
effect(KILL cr);
|
|
||||||
format %{ "vptest_anytrue_lt8_evex $dst,$src1,$src2\t! using $cr as TEMP" %}
|
|
||||||
ins_encode %{
|
|
||||||
const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
|
|
||||||
const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
|
|
||||||
assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), "");
|
|
||||||
uint masklen = Matcher::vector_length(this, $src1);
|
|
||||||
__ anytrue($dst$$Register, masklen, $src1$$KRegister, $src2$$KRegister);
|
|
||||||
%}
|
|
||||||
ins_pipe( pipe_slow );
|
|
||||||
%}
|
|
||||||
|
|
||||||
instruct cmpvptest_anytrue_lt16(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero, legVec vtmp) %{
|
|
||||||
predicate(!VM_Version::supports_avx512bwdq() &&
|
|
||||||
Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 &&
|
|
||||||
Matcher::vector_length_in_bytes(n->in(1)->in(1)) < 16 &&
|
|
||||||
static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne);
|
|
||||||
match(Set cr (CmpI (VectorTest src1 src2) zero));
|
|
||||||
effect(TEMP vtmp);
|
effect(TEMP vtmp);
|
||||||
format %{ "cmpvptest_anytrue_lt16 $src1,$src2\t! using $vtmp as TEMP" %}
|
format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
|
BasicType bt = Matcher::vector_element_basic_type(this, $src1);
|
||||||
int vlen = Matcher::vector_length_in_bytes(this, $src1);
|
int vlen = Matcher::vector_length_in_bytes(this, $src1);
|
||||||
__ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
|
__ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
|
|
||||||
instruct cmpvptest_anytrue_ge16(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero) %{
|
instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
|
||||||
predicate(!VM_Version::supports_avx512bwdq() &&
|
predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
|
||||||
Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 16 &&
|
match(Set cr (VectorTest src1 src2));
|
||||||
Matcher::vector_length_in_bytes(n->in(1)->in(1)) < 64 &&
|
format %{ "vptest_ge16 $src1, $src2\n\t" %}
|
||||||
static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne);
|
|
||||||
match(Set cr (CmpI (VectorTest src1 src2) zero));
|
|
||||||
format %{ "cmpvptest_anytrue_ge16 $src1,$src2\t!" %}
|
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
|
BasicType bt = Matcher::vector_element_basic_type(this, $src1);
|
||||||
int vlen = Matcher::vector_length_in_bytes(this, $src1);
|
int vlen = Matcher::vector_length_in_bytes(this, $src1);
|
||||||
__ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg);
|
__ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
|
|
||||||
instruct cmpvptest_anytrue_evex(rFlagsReg cr, kReg src1, kReg src2, immI_0 zero) %{
|
instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
|
||||||
predicate(VM_Version::supports_avx512bwdq() &&
|
predicate((Matcher::vector_length(n->in(1)) < 8 ||
|
||||||
static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne);
|
(Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
|
||||||
match(Set cr (CmpI (VectorTest src1 src2) zero));
|
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
|
||||||
format %{ "cmpvptest_anytrue_evex $src1,$src2\t!" %}
|
match(Set cr (VectorTest src1 src2));
|
||||||
|
effect(TEMP tmp);
|
||||||
|
format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
uint masklen = Matcher::vector_length(this, $src1);
|
uint masklen = Matcher::vector_length(this, $src1);
|
||||||
const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
|
__ kmovwl($tmp$$Register, $src1$$KRegister);
|
||||||
const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
|
__ andl($tmp$$Register, (1 << masklen) - 1);
|
||||||
assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), "");
|
__ cmpl($tmp$$Register, (1 << masklen) - 1);
|
||||||
masklen = masklen < 8 ? 8 : masklen;
|
%}
|
||||||
__ ktest(masklen, $src1$$KRegister, $src2$$KRegister);
|
ins_pipe( pipe_slow );
|
||||||
|
%}
|
||||||
|
|
||||||
|
instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
|
||||||
|
predicate((Matcher::vector_length(n->in(1)) < 8 ||
|
||||||
|
(Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
|
||||||
|
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
|
||||||
|
match(Set cr (VectorTest src1 src2));
|
||||||
|
effect(TEMP tmp);
|
||||||
|
format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
|
||||||
|
ins_encode %{
|
||||||
|
uint masklen = Matcher::vector_length(this, $src1);
|
||||||
|
__ kmovwl($tmp$$Register, $src1$$KRegister);
|
||||||
|
__ andl($tmp$$Register, (1 << masklen) - 1);
|
||||||
|
%}
|
||||||
|
ins_pipe( pipe_slow );
|
||||||
|
%}
|
||||||
|
|
||||||
|
instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
|
||||||
|
predicate(Matcher::vector_length(n->in(1)) >= 16 ||
|
||||||
|
(Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
|
||||||
|
match(Set cr (VectorTest src1 src2));
|
||||||
|
format %{ "ktest_ge8 $src1, $src2\n\t" %}
|
||||||
|
ins_encode %{
|
||||||
|
uint masklen = Matcher::vector_length(this, $src1);
|
||||||
|
__ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
|
|
|
@ -31,6 +31,7 @@
|
||||||
#include "opto/node.hpp"
|
#include "opto/node.hpp"
|
||||||
#include "opto/phaseX.hpp"
|
#include "opto/phaseX.hpp"
|
||||||
#include "opto/regmask.hpp"
|
#include "opto/regmask.hpp"
|
||||||
|
#include "opto/subnode.hpp"
|
||||||
#include "runtime/vm_version.hpp"
|
#include "runtime/vm_version.hpp"
|
||||||
|
|
||||||
class Compile;
|
class Compile;
|
||||||
|
|
|
@ -1248,6 +1248,11 @@ Node *PhaseIterGVN::transform_old(Node* n) {
|
||||||
assert(!_table.find_index(n->_idx), "found duplicate entry in table");
|
assert(!_table.find_index(n->_idx), "found duplicate entry in table");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Allow Bool -> Cmp idealisation in late inlining intrinsics that return a bool
|
||||||
|
if (n->is_Cmp()) {
|
||||||
|
add_users_to_worklist(n);
|
||||||
|
}
|
||||||
|
|
||||||
// Apply the Ideal call in a loop until it no longer applies
|
// Apply the Ideal call in a loop until it no longer applies
|
||||||
Node* k = n;
|
Node* k = n;
|
||||||
DEBUG_ONLY(dead_loop_check(k);)
|
DEBUG_ONLY(dead_loop_check(k);)
|
||||||
|
|
|
@ -1427,7 +1427,10 @@ Node *BoolNode::Ideal(PhaseGVN *phase, bool can_reshape) {
|
||||||
Node *cmp = in(1);
|
Node *cmp = in(1);
|
||||||
if( !cmp->is_Sub() ) return NULL;
|
if( !cmp->is_Sub() ) return NULL;
|
||||||
int cop = cmp->Opcode();
|
int cop = cmp->Opcode();
|
||||||
if( cop == Op_FastLock || cop == Op_FastUnlock || cmp->is_SubTypeCheck()) return NULL;
|
if( cop == Op_FastLock || cop == Op_FastUnlock ||
|
||||||
|
cmp->is_SubTypeCheck() || cop == Op_VectorTest ) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
Node *cmp1 = cmp->in(1);
|
Node *cmp1 = cmp->in(1);
|
||||||
Node *cmp2 = cmp->in(2);
|
Node *cmp2 = cmp->in(2);
|
||||||
if( !cmp1 ) return NULL;
|
if( !cmp1 ) return NULL;
|
||||||
|
@ -1459,6 +1462,20 @@ Node *BoolNode::Ideal(PhaseGVN *phase, bool can_reshape) {
|
||||||
return new BoolNode( cmp, _test.commute() );
|
return new BoolNode( cmp, _test.commute() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Change "bool eq/ne (cmp (cmove (bool tst (cmp2)) 1 0) 0)" into "bool tst/~tst (cmp2)"
|
||||||
|
if (cop == Op_CmpI &&
|
||||||
|
(_test._test == BoolTest::eq || _test._test == BoolTest::ne) &&
|
||||||
|
cmp1_op == Op_CMoveI && cmp2->find_int_con(1) == 0) {
|
||||||
|
// 0 should be on the true branch
|
||||||
|
if (cmp1->in(CMoveNode::IfTrue)->find_int_con(1) == 0 &&
|
||||||
|
cmp1->in(CMoveNode::IfFalse)->find_int_con(0) != 0) {
|
||||||
|
BoolNode* target = cmp1->in(CMoveNode::Condition)->as_Bool();
|
||||||
|
return new BoolNode(target->in(1),
|
||||||
|
(_test._test == BoolTest::eq) ? target->_test._test :
|
||||||
|
target->_test.negate());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Change "bool eq/ne (cmp (and X 16) 16)" into "bool ne/eq (cmp (and X 16) 0)".
|
// Change "bool eq/ne (cmp (and X 16) 16)" into "bool ne/eq (cmp (and X 16) 0)".
|
||||||
if (cop == Op_CmpI &&
|
if (cop == Op_CmpI &&
|
||||||
(_test._test == BoolTest::eq || _test._test == BoolTest::ne) &&
|
(_test._test == BoolTest::eq || _test._test == BoolTest::ne) &&
|
||||||
|
|
|
@ -1799,14 +1799,24 @@ bool LibraryCallKit::inline_vector_test() {
|
||||||
}
|
}
|
||||||
|
|
||||||
Node* opd1 = unbox_vector(argument(4), vbox_type, elem_bt, num_elem);
|
Node* opd1 = unbox_vector(argument(4), vbox_type, elem_bt, num_elem);
|
||||||
Node* opd2 = unbox_vector(argument(5), vbox_type, elem_bt, num_elem);
|
Node* opd2;
|
||||||
|
if (Matcher::vectortest_needs_second_argument(booltest == BoolTest::overflow,
|
||||||
|
opd1->bottom_type()->isa_vectmask())) {
|
||||||
|
opd2 = unbox_vector(argument(5), vbox_type, elem_bt, num_elem);
|
||||||
|
} else {
|
||||||
|
opd2 = opd1;
|
||||||
|
}
|
||||||
if (opd1 == NULL || opd2 == NULL) {
|
if (opd1 == NULL || opd2 == NULL) {
|
||||||
return false; // operand unboxing failed
|
return false; // operand unboxing failed
|
||||||
}
|
}
|
||||||
Node* test = new VectorTestNode(opd1, opd2, booltest);
|
|
||||||
test = gvn().transform(test);
|
|
||||||
|
|
||||||
set_result(test);
|
Node* cmp = gvn().transform(new VectorTestNode(opd1, opd2, booltest));
|
||||||
|
BoolTest::mask test = Matcher::vectortest_mask(booltest == BoolTest::overflow,
|
||||||
|
opd1->bottom_type()->isa_vectmask(), num_elem);
|
||||||
|
Node* bol = gvn().transform(new BoolNode(cmp, test));
|
||||||
|
Node* res = gvn().transform(new CMoveINode(bol, gvn().intcon(0), gvn().intcon(1), TypeInt::BOOL));
|
||||||
|
|
||||||
|
set_result(res);
|
||||||
C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
|
C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1422,7 +1422,7 @@ class VectorMaskWrapperNode : public VectorNode {
|
||||||
Node* vector_mask() const { return in(2); }
|
Node* vector_mask() const { return in(2); }
|
||||||
};
|
};
|
||||||
|
|
||||||
class VectorTestNode : public Node {
|
class VectorTestNode : public CmpNode {
|
||||||
private:
|
private:
|
||||||
BoolTest::mask _predicate;
|
BoolTest::mask _predicate;
|
||||||
|
|
||||||
|
@ -1430,18 +1430,18 @@ class VectorTestNode : public Node {
|
||||||
uint size_of() const { return sizeof(*this); }
|
uint size_of() const { return sizeof(*this); }
|
||||||
|
|
||||||
public:
|
public:
|
||||||
VectorTestNode(Node* in1, Node* in2, BoolTest::mask predicate) : Node(NULL, in1, in2), _predicate(predicate) {
|
VectorTestNode(Node* in1, Node* in2, BoolTest::mask predicate) : CmpNode(in1, in2), _predicate(predicate) {
|
||||||
assert(in2->bottom_type()->is_vect() == in2->bottom_type()->is_vect(), "same vector type");
|
assert(in2->bottom_type()->is_vect() == in2->bottom_type()->is_vect(), "same vector type");
|
||||||
}
|
}
|
||||||
virtual int Opcode() const;
|
virtual int Opcode() const;
|
||||||
virtual uint hash() const { return Node::hash() + _predicate; }
|
virtual uint hash() const { return Node::hash() + _predicate; }
|
||||||
|
virtual const Type* Value(PhaseGVN* phase) const { return TypeInt::CC; }
|
||||||
|
virtual const Type* sub(const Type*, const Type*) const { return TypeInt::CC; }
|
||||||
|
BoolTest::mask get_predicate() const { return _predicate; }
|
||||||
|
|
||||||
virtual bool cmp( const Node &n ) const {
|
virtual bool cmp( const Node &n ) const {
|
||||||
return Node::cmp(n) && _predicate == ((VectorTestNode&)n)._predicate;
|
return Node::cmp(n) && _predicate == ((VectorTestNode&)n)._predicate;
|
||||||
}
|
}
|
||||||
virtual const Type *bottom_type() const { return TypeInt::BOOL; }
|
|
||||||
virtual uint ideal_reg() const { return Op_RegI; } // TODO Should be RegFlags but due to missing comparison flags for BoolTest
|
|
||||||
// in middle-end, we make it boolean result directly.
|
|
||||||
BoolTest::mask get_predicate() const { return _predicate; }
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class VectorBlendNode : public VectorNode {
|
class VectorBlendNode : public VectorNode {
|
||||||
|
|
|
@ -1875,7 +1875,7 @@
|
||||||
declare_c2_type(XorVMaskNode, VectorNode) \
|
declare_c2_type(XorVMaskNode, VectorNode) \
|
||||||
declare_c2_type(VectorBoxNode, Node) \
|
declare_c2_type(VectorBoxNode, Node) \
|
||||||
declare_c2_type(VectorBoxAllocateNode, CallStaticJavaNode) \
|
declare_c2_type(VectorBoxAllocateNode, CallStaticJavaNode) \
|
||||||
declare_c2_type(VectorTestNode, Node) \
|
declare_c2_type(VectorTestNode, CmpNode) \
|
||||||
\
|
\
|
||||||
/*********************/ \
|
/*********************/ \
|
||||||
/* Adapter Blob Entries */ \
|
/* Adapter Blob Entries */ \
|
||||||
|
|
|
@ -1114,6 +1114,11 @@ public class IRNode {
|
||||||
beforeMatchingNameRegex(VECTOR_UCAST_S2X, "VectorUCastS2X");
|
beforeMatchingNameRegex(VECTOR_UCAST_S2X, "VectorUCastS2X");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static final String VECTOR_TEST = PREFIX + "VECTOR_TEST" + POSTFIX;
|
||||||
|
static {
|
||||||
|
beforeMatchingNameRegex(VECTOR_TEST, "VectorTest");
|
||||||
|
}
|
||||||
|
|
||||||
public static final String VFABD = PREFIX + "VFABD" + POSTFIX;
|
public static final String VFABD = PREFIX + "VFABD" + POSTFIX;
|
||||||
static {
|
static {
|
||||||
machOnlyNameRegex(VFABD, "vfabd");
|
machOnlyNameRegex(VFABD, "vfabd");
|
||||||
|
|
71
test/hotspot/jtreg/compiler/vectorapi/TestVectorTest.java
Normal file
71
test/hotspot/jtreg/compiler/vectorapi/TestVectorTest.java
Normal file
|
@ -0,0 +1,71 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
package compiler.vectorapi;
|
||||||
|
|
||||||
|
import compiler.lib.ir_framework.*;
|
||||||
|
import jdk.incubator.vector.ByteVector;
|
||||||
|
import jdk.incubator.vector.VectorMask;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @test
|
||||||
|
* @bug 8292289
|
||||||
|
* @summary Test idealization of VectorTest intrinsics to eliminate
|
||||||
|
* the materialization of the result as an int
|
||||||
|
* @modules jdk.incubator.vector
|
||||||
|
* @library /test/lib /
|
||||||
|
* @requires (os.simpleArch == "x64" & vm.cpu.features ~= ".*sse4.*" & (vm.opt.UseSSE == "null" | vm.opt.UseSSE > 3))
|
||||||
|
* | os.arch == "aarch64"
|
||||||
|
* @run driver compiler.vectorapi.TestVectorTest
|
||||||
|
*/
|
||||||
|
public class TestVectorTest {
|
||||||
|
public static void main(String[] args) {
|
||||||
|
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector");
|
||||||
|
}
|
||||||
|
|
||||||
|
@DontInline
|
||||||
|
public int call() { return 1; }
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(failOn = {IRNode.CMP_I, IRNode.CMOVE_I})
|
||||||
|
@IR(counts = {IRNode.VECTOR_TEST, "1"})
|
||||||
|
public int branch(long maskLong) {
|
||||||
|
var mask = VectorMask.fromLong(ByteVector.SPECIES_PREFERRED, maskLong);
|
||||||
|
return mask.allTrue() ? call() : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(failOn = {IRNode.CMP_I})
|
||||||
|
@IR(counts = {IRNode.VECTOR_TEST, "1", IRNode.CMOVE_I, "1"})
|
||||||
|
public int cmove(long maskLong) {
|
||||||
|
var mask = VectorMask.fromLong(ByteVector.SPECIES_PREFERRED, maskLong);
|
||||||
|
return mask.allTrue() ? 1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = {"branch", "cmove"})
|
||||||
|
public void run() {
|
||||||
|
branch(-1);
|
||||||
|
branch(100);
|
||||||
|
cmove(-1);
|
||||||
|
cmove(100);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue