mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-27 14:54:52 +02:00
8192846: Support cmov vectorization for float
Reviewed-by: kvn
This commit is contained in:
parent
c8bf23d93a
commit
b96c85c640
12 changed files with 82 additions and 13 deletions
|
@ -7449,6 +7449,27 @@ void Assembler::blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMM
|
||||||
emit_int8((unsigned char)(0xF0 & src2_enc<<4));
|
emit_int8((unsigned char)(0xF0 & src2_enc<<4));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Assembler::cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
|
||||||
|
assert(VM_Version::supports_avx(), "");
|
||||||
|
assert(!VM_Version::supports_evex(), "");
|
||||||
|
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
|
int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
|
||||||
|
emit_int8((unsigned char)0xC2);
|
||||||
|
emit_int8((unsigned char)(0xC0 | encode));
|
||||||
|
emit_int8((unsigned char)(0xF & cop));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Assembler::blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
|
||||||
|
assert(VM_Version::supports_avx(), "");
|
||||||
|
assert(!VM_Version::supports_evex(), "");
|
||||||
|
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
|
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||||
|
emit_int8((unsigned char)0x4A);
|
||||||
|
emit_int8((unsigned char)(0xC0 | encode));
|
||||||
|
int src2_enc = src2->encoding();
|
||||||
|
emit_int8((unsigned char)(0xF0 & src2_enc<<4));
|
||||||
|
}
|
||||||
|
|
||||||
void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
|
void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
|
||||||
assert(VM_Version::supports_avx2(), "");
|
assert(VM_Version::supports_avx2(), "");
|
||||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||||
|
|
|
@ -2114,9 +2114,11 @@ private:
|
||||||
// runtime code and native libraries.
|
// runtime code and native libraries.
|
||||||
void vzeroupper();
|
void vzeroupper();
|
||||||
|
|
||||||
// AVX support for vectorized conditional move (double). The following two instructions used only coupled.
|
// AVX support for vectorized conditional move (float/double). The following two instructions used only coupled.
|
||||||
void cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
|
void cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
|
||||||
void blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
|
void blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
|
||||||
|
void cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
|
||||||
|
void blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
|
||||||
void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
|
void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
|
@ -1263,6 +1263,7 @@ const bool Matcher::match_rule_supported(int opcode) {
|
||||||
if (!VM_Version::supports_cx8())
|
if (!VM_Version::supports_cx8())
|
||||||
ret_value = false;
|
ret_value = false;
|
||||||
break;
|
break;
|
||||||
|
case Op_CMoveVF:
|
||||||
case Op_CMoveVD:
|
case Op_CMoveVD:
|
||||||
if (UseAVX < 1 || UseAVX > 2)
|
if (UseAVX < 1 || UseAVX > 2)
|
||||||
ret_value = false;
|
ret_value = false;
|
||||||
|
@ -1304,6 +1305,9 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
|
||||||
if ((vlen == 32) && (VM_Version::supports_avx512bw() == false))
|
if ((vlen == 32) && (VM_Version::supports_avx512bw() == false))
|
||||||
ret_value = false;
|
ret_value = false;
|
||||||
break;
|
break;
|
||||||
|
case Op_CMoveVF:
|
||||||
|
if (vlen != 8)
|
||||||
|
ret_value = false;
|
||||||
case Op_CMoveVD:
|
case Op_CMoveVD:
|
||||||
if (vlen != 4)
|
if (vlen != 4)
|
||||||
ret_value = false;
|
ret_value = false;
|
||||||
|
@ -8170,6 +8174,22 @@ instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
instruct vcmov8F_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{
|
||||||
|
predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 8);
|
||||||
|
match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2)));
|
||||||
|
effect(TEMP dst, USE src1, USE src2);
|
||||||
|
format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t"
|
||||||
|
"blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t"
|
||||||
|
%}
|
||||||
|
ins_encode %{
|
||||||
|
int vector_len = 1;
|
||||||
|
int cond = (Assembler::Condition)($copnd$$cmpcode);
|
||||||
|
__ cmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len);
|
||||||
|
__ blendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len);
|
||||||
|
%}
|
||||||
|
ins_pipe( pipe_slow );
|
||||||
|
%}
|
||||||
|
|
||||||
instruct vcmov4D_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{
|
instruct vcmov4D_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{
|
||||||
predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 4);
|
predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 4);
|
||||||
match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2)));
|
match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2)));
|
||||||
|
|
|
@ -4164,7 +4164,7 @@ bool MatchRule::is_vector() const {
|
||||||
"AddVB","AddVS","AddVI","AddVL","AddVF","AddVD",
|
"AddVB","AddVS","AddVI","AddVL","AddVF","AddVD",
|
||||||
"SubVB","SubVS","SubVI","SubVL","SubVF","SubVD",
|
"SubVB","SubVS","SubVI","SubVL","SubVF","SubVD",
|
||||||
"MulVS","MulVI","MulVL","MulVF","MulVD",
|
"MulVS","MulVI","MulVL","MulVF","MulVD",
|
||||||
"CMoveVD",
|
"CMoveVD", "CMoveVF",
|
||||||
"DivVF","DivVD",
|
"DivVF","DivVD",
|
||||||
"AbsVF","AbsVD",
|
"AbsVF","AbsVD",
|
||||||
"NegVF","NegVD",
|
"NegVF","NegVD",
|
||||||
|
|
|
@ -195,6 +195,9 @@
|
||||||
product(bool, UseSubwordForMaxVector, true, \
|
product(bool, UseSubwordForMaxVector, true, \
|
||||||
"Use Subword Analysis to set maximum vector size") \
|
"Use Subword Analysis to set maximum vector size") \
|
||||||
\
|
\
|
||||||
|
product(bool, UseVectorCmov, false, \
|
||||||
|
"Use Vectorized Cmov") \
|
||||||
|
\
|
||||||
develop(intx, UnrollLimitForProfileCheck, 1, \
|
develop(intx, UnrollLimitForProfileCheck, 1, \
|
||||||
"Don't use profile_trip_cnt() to restrict unrolling until " \
|
"Don't use profile_trip_cnt() to restrict unrolling until " \
|
||||||
"unrolling would push the number of unrolled iterations above " \
|
"unrolling would push the number of unrolled iterations above " \
|
||||||
|
|
|
@ -66,6 +66,7 @@ macro(ConstraintCast)
|
||||||
macro(CMoveD)
|
macro(CMoveD)
|
||||||
macro(CMoveVD)
|
macro(CMoveVD)
|
||||||
macro(CMoveF)
|
macro(CMoveF)
|
||||||
|
macro(CMoveVF)
|
||||||
macro(CMoveI)
|
macro(CMoveI)
|
||||||
macro(CMoveL)
|
macro(CMoveL)
|
||||||
macro(CMoveP)
|
macro(CMoveP)
|
||||||
|
|
|
@ -528,13 +528,12 @@ Node *PhaseIdealLoop::conditional_move( Node *region ) {
|
||||||
BasicType bt = phi->type()->basic_type();
|
BasicType bt = phi->type()->basic_type();
|
||||||
switch (bt) {
|
switch (bt) {
|
||||||
case T_DOUBLE:
|
case T_DOUBLE:
|
||||||
|
case T_FLOAT:
|
||||||
if (C->use_cmove()) {
|
if (C->use_cmove()) {
|
||||||
continue; //TODO: maybe we want to add some cost
|
continue; //TODO: maybe we want to add some cost
|
||||||
}
|
}
|
||||||
case T_FLOAT: {
|
|
||||||
cost += Matcher::float_cmove_cost(); // Could be very expensive
|
cost += Matcher::float_cmove_cost(); // Could be very expensive
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
case T_LONG: {
|
case T_LONG: {
|
||||||
cost += Matcher::long_cmove_cost(); // May encodes as 2 CMOV's
|
cost += Matcher::long_cmove_cost(); // May encodes as 2 CMOV's
|
||||||
}
|
}
|
||||||
|
@ -613,8 +612,9 @@ Node *PhaseIdealLoop::conditional_move( Node *region ) {
|
||||||
}
|
}
|
||||||
// Check for highly predictable branch. No point in CMOV'ing if
|
// Check for highly predictable branch. No point in CMOV'ing if
|
||||||
// we are going to predict accurately all the time.
|
// we are going to predict accurately all the time.
|
||||||
if (C->use_cmove() && cmp_op == Op_CmpD) ;//keep going
|
if (C->use_cmove() && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) {
|
||||||
else if (iff->_prob < infrequent_prob ||
|
//keep going
|
||||||
|
} else if (iff->_prob < infrequent_prob ||
|
||||||
iff->_prob > (1.0f - infrequent_prob))
|
iff->_prob > (1.0f - infrequent_prob))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
|
|
@ -2267,6 +2267,7 @@ void Matcher::find_shared( Node *n ) {
|
||||||
case Op_CMoveL:
|
case Op_CMoveL:
|
||||||
case Op_CMoveN:
|
case Op_CMoveN:
|
||||||
case Op_CMoveP:
|
case Op_CMoveP:
|
||||||
|
case Op_CMoveVF:
|
||||||
case Op_CMoveVD: {
|
case Op_CMoveVD: {
|
||||||
// Restructure into a binary tree for Matching. It's possible that
|
// Restructure into a binary tree for Matching. It's possible that
|
||||||
// we could move this code up next to the graph reshaping for IfNodes
|
// we could move this code up next to the graph reshaping for IfNodes
|
||||||
|
|
|
@ -58,7 +58,7 @@ SuperWord::SuperWord(PhaseIdealLoop* phase) :
|
||||||
_mem_slice_tail(arena(), 8, 0, NULL), // memory slice tails
|
_mem_slice_tail(arena(), 8, 0, NULL), // memory slice tails
|
||||||
_node_info(arena(), 8, 0, SWNodeInfo::initial), // info needed per node
|
_node_info(arena(), 8, 0, SWNodeInfo::initial), // info needed per node
|
||||||
_clone_map(phase->C->clone_map()), // map of nodes created in cloning
|
_clone_map(phase->C->clone_map()), // map of nodes created in cloning
|
||||||
_cmovev_kit(_arena, this), // map to facilitate CMoveVD creation
|
_cmovev_kit(_arena, this), // map to facilitate CMoveV creation
|
||||||
_align_to_ref(NULL), // memory reference to align vectors to
|
_align_to_ref(NULL), // memory reference to align vectors to
|
||||||
_disjoint_ptrs(arena(), 8, 0, OrderedPair::initial), // runtime disambiguated pointer pairs
|
_disjoint_ptrs(arena(), 8, 0, OrderedPair::initial), // runtime disambiguated pointer pairs
|
||||||
_dg(_arena), // dependence graph
|
_dg(_arena), // dependence graph
|
||||||
|
@ -511,8 +511,7 @@ void SuperWord::SLP_extract() {
|
||||||
combine_packs();
|
combine_packs();
|
||||||
|
|
||||||
construct_my_pack_map();
|
construct_my_pack_map();
|
||||||
|
if (UseVectorCmov) {
|
||||||
if (_do_vector_loop) {
|
|
||||||
merge_packs_to_cmovd();
|
merge_packs_to_cmovd();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1249,8 +1248,8 @@ void SuperWord::set_alignment(Node* s1, Node* s2, int align) {
|
||||||
|
|
||||||
//------------------------------data_size---------------------------
|
//------------------------------data_size---------------------------
|
||||||
int SuperWord::data_size(Node* s) {
|
int SuperWord::data_size(Node* s) {
|
||||||
Node* use = NULL; //test if the node is a candidate for CMoveVD optimization, then return the size of CMov
|
Node* use = NULL; //test if the node is a candidate for CMoveV optimization, then return the size of CMov
|
||||||
if (_do_vector_loop) {
|
if (UseVectorCmov) {
|
||||||
use = _cmovev_kit.is_Bool_candidate(s);
|
use = _cmovev_kit.is_Bool_candidate(s);
|
||||||
if (use != NULL) {
|
if (use != NULL) {
|
||||||
return data_size(use);
|
return data_size(use);
|
||||||
|
@ -1260,6 +1259,7 @@ int SuperWord::data_size(Node* s) {
|
||||||
return data_size(use);
|
return data_size(use);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int bsize = type2aelembytes(velt_basic_type(s));
|
int bsize = type2aelembytes(velt_basic_type(s));
|
||||||
assert(bsize != 0, "valid size");
|
assert(bsize != 0, "valid size");
|
||||||
return bsize;
|
return bsize;
|
||||||
|
@ -1718,6 +1718,9 @@ Node_List* CMoveKit::make_cmovevd_pack(Node_List* cmovd_pk) {
|
||||||
if (!cmovd->is_CMove()) {
|
if (!cmovd->is_CMove()) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
if (cmovd->Opcode() != Op_CMoveF && cmovd->Opcode() != Op_CMoveD) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
if (pack(cmovd) != NULL) { // already in the cmov pack
|
if (pack(cmovd) != NULL) { // already in the cmov pack
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -2377,7 +2380,13 @@ void SuperWord::output() {
|
||||||
}
|
}
|
||||||
BasicType bt = velt_basic_type(n);
|
BasicType bt = velt_basic_type(n);
|
||||||
const TypeVect* vt = TypeVect::make(bt, vlen);
|
const TypeVect* vt = TypeVect::make(bt, vlen);
|
||||||
vn = new CMoveVDNode(cc, src1, src2, vt);
|
assert(bt == T_FLOAT || bt == T_DOUBLE, "Only vectorization for FP cmovs is supported");
|
||||||
|
if (bt == T_FLOAT) {
|
||||||
|
vn = new CMoveVFNode(cc, src1, src2, vt);
|
||||||
|
} else {
|
||||||
|
assert(bt == T_DOUBLE, "Expected double");
|
||||||
|
vn = new CMoveVDNode(cc, src1, src2, vt);
|
||||||
|
}
|
||||||
NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created new CMove node %d: ", vn->_idx); vn->dump();})
|
NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created new CMove node %d: ", vn->_idx); vn->dump();})
|
||||||
} else if (opc == Op_FmaD || opc == Op_FmaF) {
|
} else if (opc == Op_FmaD || opc == Op_FmaF) {
|
||||||
// Promote operands to vector
|
// Promote operands to vector
|
||||||
|
|
|
@ -92,6 +92,9 @@ int VectorNode::opcode(int sopc, BasicType bt) {
|
||||||
case Op_FmaF:
|
case Op_FmaF:
|
||||||
assert(bt == T_FLOAT, "must be");
|
assert(bt == T_FLOAT, "must be");
|
||||||
return Op_FmaVF;
|
return Op_FmaVF;
|
||||||
|
case Op_CMoveF:
|
||||||
|
assert(bt == T_FLOAT, "must be");
|
||||||
|
return Op_CMoveVF;
|
||||||
case Op_CMoveD:
|
case Op_CMoveD:
|
||||||
assert(bt == T_DOUBLE, "must be");
|
assert(bt == T_DOUBLE, "must be");
|
||||||
return Op_CMoveVD;
|
return Op_CMoveVD;
|
||||||
|
|
|
@ -277,8 +277,16 @@ public:
|
||||||
virtual int Opcode() const;
|
virtual int Opcode() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
//------------------------------CMoveVFNode--------------------------------------
|
||||||
|
// Vector float conditional move
|
||||||
|
class CMoveVFNode : public VectorNode {
|
||||||
|
public:
|
||||||
|
CMoveVFNode(Node* in1, Node* in2, Node* in3, const TypeVect* vt) : VectorNode(in1, in2, in3, vt) {}
|
||||||
|
virtual int Opcode() const;
|
||||||
|
};
|
||||||
|
|
||||||
//------------------------------CMoveVDNode--------------------------------------
|
//------------------------------CMoveVDNode--------------------------------------
|
||||||
// Vector multiply double
|
// Vector double conditional move
|
||||||
class CMoveVDNode : public VectorNode {
|
class CMoveVDNode : public VectorNode {
|
||||||
public:
|
public:
|
||||||
CMoveVDNode(Node* in1, Node* in2, Node* in3, const TypeVect* vt) : VectorNode(in1, in2, in3, vt) {}
|
CMoveVDNode(Node* in1, Node* in2, Node* in3, const TypeVect* vt) : VectorNode(in1, in2, in3, vt) {}
|
||||||
|
|
|
@ -1991,6 +1991,7 @@ typedef PaddedEnd<ObjectMonitor> PaddedObjectMonitor;
|
||||||
declare_c2_type(MulVDNode, VectorNode) \
|
declare_c2_type(MulVDNode, VectorNode) \
|
||||||
declare_c2_type(FmaVDNode, VectorNode) \
|
declare_c2_type(FmaVDNode, VectorNode) \
|
||||||
declare_c2_type(FmaVFNode, VectorNode) \
|
declare_c2_type(FmaVFNode, VectorNode) \
|
||||||
|
declare_c2_type(CMoveVFNode, VectorNode) \
|
||||||
declare_c2_type(CMoveVDNode, VectorNode) \
|
declare_c2_type(CMoveVDNode, VectorNode) \
|
||||||
declare_c2_type(MulReductionVDNode, ReductionNode) \
|
declare_c2_type(MulReductionVDNode, ReductionNode) \
|
||||||
declare_c2_type(DivVFNode, VectorNode) \
|
declare_c2_type(DivVFNode, VectorNode) \
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue