mirror of
https://github.com/openjdk/jdk.git
synced 2025-09-17 17:44:40 +02:00
8282711: Accelerate Math.signum function for AVX and AVX512 target.
Reviewed-by: sviswanathan, thartmann
This commit is contained in:
parent
0a4a6403bb
commit
e4066628ad
13 changed files with 337 additions and 2 deletions
|
@ -4420,6 +4420,48 @@ void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, XMMRegister
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
void C2_MacroAssembler::vector_signum_evex(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one,
|
||||||
|
KRegister ktmp1, int vec_enc) {
|
||||||
|
if (opcode == Op_SignumVD) {
|
||||||
|
vsubpd(dst, zero, one, vec_enc);
|
||||||
|
// if src < 0 ? -1 : 1
|
||||||
|
evcmppd(ktmp1, k0, src, zero, Assembler::LT_OQ, vec_enc);
|
||||||
|
evblendmpd(dst, ktmp1, one, dst, true, vec_enc);
|
||||||
|
// if src == NaN, -0.0 or 0.0 return src.
|
||||||
|
evcmppd(ktmp1, k0, src, zero, Assembler::EQ_UQ, vec_enc);
|
||||||
|
evblendmpd(dst, ktmp1, dst, src, true, vec_enc);
|
||||||
|
} else {
|
||||||
|
assert(opcode == Op_SignumVF, "");
|
||||||
|
vsubps(dst, zero, one, vec_enc);
|
||||||
|
// if src < 0 ? -1 : 1
|
||||||
|
evcmpps(ktmp1, k0, src, zero, Assembler::LT_OQ, vec_enc);
|
||||||
|
evblendmps(dst, ktmp1, one, dst, true, vec_enc);
|
||||||
|
// if src == NaN, -0.0 or 0.0 return src.
|
||||||
|
evcmpps(ktmp1, k0, src, zero, Assembler::EQ_UQ, vec_enc);
|
||||||
|
evblendmps(dst, ktmp1, dst, src, true, vec_enc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void C2_MacroAssembler::vector_signum_avx(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one,
|
||||||
|
XMMRegister xtmp1, int vec_enc) {
|
||||||
|
if (opcode == Op_SignumVD) {
|
||||||
|
vsubpd(dst, zero, one, vec_enc);
|
||||||
|
// if src < 0 ? -1 : 1
|
||||||
|
vblendvpd(dst, one, dst, src, vec_enc);
|
||||||
|
// if src == NaN, -0.0 or 0.0 return src.
|
||||||
|
vcmppd(xtmp1, src, zero, Assembler::EQ_UQ, vec_enc);
|
||||||
|
vblendvpd(dst, dst, src, xtmp1, vec_enc);
|
||||||
|
} else {
|
||||||
|
assert(opcode == Op_SignumVF, "");
|
||||||
|
vsubps(dst, zero, one, vec_enc);
|
||||||
|
// if src < 0 ? -1 : 1
|
||||||
|
vblendvps(dst, one, dst, src, vec_enc);
|
||||||
|
// if src == NaN, -0.0 or 0.0 return src.
|
||||||
|
vcmpps(xtmp1, src, zero, Assembler::EQ_UQ, vec_enc);
|
||||||
|
vblendvps(dst, dst, src, xtmp1, vec_enc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void C2_MacroAssembler::vector_maskall_operation(KRegister dst, Register src, int mask_len) {
|
void C2_MacroAssembler::vector_maskall_operation(KRegister dst, Register src, int mask_len) {
|
||||||
if (VM_Version::supports_avx512bw()) {
|
if (VM_Version::supports_avx512bw()) {
|
||||||
if (mask_len > 32) {
|
if (mask_len > 32) {
|
||||||
|
|
|
@ -340,6 +340,12 @@ public:
|
||||||
void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, Address src3,
|
void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, Address src3,
|
||||||
bool merge, BasicType bt, int vlen_enc);
|
bool merge, BasicType bt, int vlen_enc);
|
||||||
|
|
||||||
|
void vector_signum_avx(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one,
|
||||||
|
XMMRegister xtmp1, int vec_enc);
|
||||||
|
|
||||||
|
void vector_signum_evex(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one,
|
||||||
|
KRegister ktmp1, int vec_enc);
|
||||||
|
|
||||||
void udivI(Register rax, Register divisor, Register rdx);
|
void udivI(Register rax, Register divisor, Register rdx);
|
||||||
void umodI(Register rax, Register divisor, Register rdx);
|
void umodI(Register rax, Register divisor, Register rdx);
|
||||||
void udivmodI(Register rax, Register divisor, Register rdx, Register tmp);
|
void udivmodI(Register rax, Register divisor, Register rdx, Register tmp);
|
||||||
|
@ -349,6 +355,7 @@ public:
|
||||||
void umodL(Register rax, Register divisor, Register rdx);
|
void umodL(Register rax, Register divisor, Register rdx);
|
||||||
void udivmodL(Register rax, Register divisor, Register rdx, Register tmp);
|
void udivmodL(Register rax, Register divisor, Register rdx, Register tmp);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void vector_popcount_int(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
void vector_popcount_int(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||||
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp,
|
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp,
|
||||||
int vec_enc);
|
int vec_enc);
|
||||||
|
|
|
@ -1886,6 +1886,12 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case Op_SignumVD:
|
||||||
|
case Op_SignumVF:
|
||||||
|
if (UseAVX < 1) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
break;
|
||||||
case Op_PopCountVI:
|
case Op_PopCountVI:
|
||||||
if (!VM_Version::supports_avx512_vpopcntdq() &&
|
if (!VM_Version::supports_avx512_vpopcntdq() &&
|
||||||
(vlen == 16) && !VM_Version::supports_avx512bw()) {
|
(vlen == 16) && !VM_Version::supports_avx512bw()) {
|
||||||
|
@ -6089,6 +6095,36 @@ instruct signumD_reg(regD dst, regD zero, regD one, rRegP scratch, rFlagsReg cr)
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
|
||||||
|
predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
|
||||||
|
match(Set dst (SignumVF src (Binary zero one)));
|
||||||
|
match(Set dst (SignumVD src (Binary zero one)));
|
||||||
|
effect(TEMP dst, TEMP xtmp1);
|
||||||
|
format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
|
||||||
|
ins_encode %{
|
||||||
|
int opcode = this->ideal_Opcode();
|
||||||
|
int vec_enc = vector_length_encoding(this);
|
||||||
|
__ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
|
||||||
|
$xtmp1$$XMMRegister, vec_enc);
|
||||||
|
%}
|
||||||
|
ins_pipe( pipe_slow );
|
||||||
|
%}
|
||||||
|
|
||||||
|
instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
|
||||||
|
predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
|
||||||
|
match(Set dst (SignumVF src (Binary zero one)));
|
||||||
|
match(Set dst (SignumVD src (Binary zero one)));
|
||||||
|
effect(TEMP dst, TEMP ktmp1);
|
||||||
|
format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
|
||||||
|
ins_encode %{
|
||||||
|
int opcode = this->ideal_Opcode();
|
||||||
|
int vec_enc = vector_length_encoding(this);
|
||||||
|
__ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
|
||||||
|
$ktmp1$$KRegister, vec_enc);
|
||||||
|
%}
|
||||||
|
ins_pipe( pipe_slow );
|
||||||
|
%}
|
||||||
|
|
||||||
// ---------------------------------------
|
// ---------------------------------------
|
||||||
// For copySign use 0xE4 as writemask for vpternlog
|
// For copySign use 0xE4 as writemask for vpternlog
|
||||||
// Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
|
// Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
|
||||||
|
|
|
@ -4237,7 +4237,7 @@ bool MatchRule::is_vector() const {
|
||||||
"VectorCastL2X", "VectorCastF2X", "VectorCastD2X",
|
"VectorCastL2X", "VectorCastF2X", "VectorCastD2X",
|
||||||
"VectorUCastB2X", "VectorUCastS2X", "VectorUCastI2X",
|
"VectorUCastB2X", "VectorUCastS2X", "VectorUCastI2X",
|
||||||
"VectorMaskWrapper","VectorMaskCmp","VectorReinterpret","LoadVectorMasked","StoreVectorMasked",
|
"VectorMaskWrapper","VectorMaskCmp","VectorReinterpret","LoadVectorMasked","StoreVectorMasked",
|
||||||
"FmaVD","FmaVF","PopCountVI", "PopCountVL", "VectorLongToMask",
|
"FmaVD","FmaVF","PopCountVI", "PopCountVL", "SignumVF", "SignumVD", "VectorLongToMask",
|
||||||
// Next are vector mask ops.
|
// Next are vector mask ops.
|
||||||
"MaskAll", "AndVMask", "OrVMask", "XorVMask", "VectorMaskCast",
|
"MaskAll", "AndVMask", "OrVMask", "XorVMask", "VectorMaskCast",
|
||||||
"RoundVF", "RoundVD",
|
"RoundVF", "RoundVD",
|
||||||
|
|
|
@ -321,6 +321,8 @@ macro(CopySignD)
|
||||||
macro(CopySignF)
|
macro(CopySignF)
|
||||||
macro(SignumD)
|
macro(SignumD)
|
||||||
macro(SignumF)
|
macro(SignumF)
|
||||||
|
macro(SignumVF)
|
||||||
|
macro(SignumVD)
|
||||||
macro(SqrtD)
|
macro(SqrtD)
|
||||||
macro(SqrtF)
|
macro(SqrtF)
|
||||||
macro(RoundF)
|
macro(RoundF)
|
||||||
|
|
|
@ -2456,6 +2456,8 @@ void Matcher::find_shared_post_visit(Node* n, uint opcode) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Op_CopySignD:
|
case Op_CopySignD:
|
||||||
|
case Op_SignumVF:
|
||||||
|
case Op_SignumVD:
|
||||||
case Op_SignumF:
|
case Op_SignumF:
|
||||||
case Op_SignumD: {
|
case Op_SignumD: {
|
||||||
Node* pair = new BinaryNode(n->in(2), n->in(3));
|
Node* pair = new BinaryNode(n->in(2), n->in(3));
|
||||||
|
|
|
@ -2525,6 +2525,13 @@ bool SuperWord::output() {
|
||||||
Node* in2 = vector_opd(p, 2);
|
Node* in2 = vector_opd(p, 2);
|
||||||
vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n));
|
vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n));
|
||||||
vlen_in_bytes = vn->as_Vector()->length_in_bytes();
|
vlen_in_bytes = vn->as_Vector()->length_in_bytes();
|
||||||
|
} else if (opc == Op_SignumF || opc == Op_SignumD) {
|
||||||
|
assert(n->req() == 4, "four inputs expected");
|
||||||
|
Node* in = vector_opd(p, 1);
|
||||||
|
Node* zero = vector_opd(p, 2);
|
||||||
|
Node* one = vector_opd(p, 3);
|
||||||
|
vn = VectorNode::make(opc, in, zero, one, vlen, velt_basic_type(n));
|
||||||
|
vlen_in_bytes = vn->as_Vector()->length_in_bytes();
|
||||||
} else if (n->req() == 3 && !is_cmov_pack(p)) {
|
} else if (n->req() == 3 && !is_cmov_pack(p)) {
|
||||||
// Promote operands to vector
|
// Promote operands to vector
|
||||||
Node* in1 = NULL;
|
Node* in1 = NULL;
|
||||||
|
|
|
@ -245,6 +245,10 @@ int VectorNode::opcode(int sopc, BasicType bt) {
|
||||||
return Op_VectorCastF2X;
|
return Op_VectorCastF2X;
|
||||||
case Op_ConvD2L:
|
case Op_ConvD2L:
|
||||||
return Op_VectorCastD2X;
|
return Op_VectorCastD2X;
|
||||||
|
case Op_SignumF:
|
||||||
|
return Op_SignumVF;
|
||||||
|
case Op_SignumD:
|
||||||
|
return Op_SignumVD;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return 0; // Unimplemented
|
return 0; // Unimplemented
|
||||||
|
@ -646,6 +650,8 @@ VectorNode* VectorNode::make(int vopc, Node* n1, Node* n2, Node* n3, const TypeV
|
||||||
switch (vopc) {
|
switch (vopc) {
|
||||||
case Op_FmaVD: return new FmaVDNode(n1, n2, n3, vt);
|
case Op_FmaVD: return new FmaVDNode(n1, n2, n3, vt);
|
||||||
case Op_FmaVF: return new FmaVFNode(n1, n2, n3, vt);
|
case Op_FmaVF: return new FmaVFNode(n1, n2, n3, vt);
|
||||||
|
case Op_SignumVD: return new SignumVDNode(n1, n2, n3, vt);
|
||||||
|
case Op_SignumVF: return new SignumVFNode(n1, n2, n3, vt);
|
||||||
default:
|
default:
|
||||||
fatal("Missed vector creation for '%s'", NodeClassNames[vopc]);
|
fatal("Missed vector creation for '%s'", NodeClassNames[vopc]);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
|
@ -1674,4 +1674,20 @@ public:
|
||||||
virtual int Opcode() const;
|
virtual int Opcode() const;
|
||||||
Node* Ideal(PhaseGVN* phase, bool can_reshape);
|
Node* Ideal(PhaseGVN* phase, bool can_reshape);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class SignumVFNode : public VectorNode {
|
||||||
|
public:
|
||||||
|
SignumVFNode(Node* in1, Node* zero, Node* one, const TypeVect* vt)
|
||||||
|
: VectorNode(in1, zero, one, vt) {}
|
||||||
|
|
||||||
|
virtual int Opcode() const;
|
||||||
|
};
|
||||||
|
|
||||||
|
class SignumVDNode : public VectorNode {
|
||||||
|
public:
|
||||||
|
SignumVDNode(Node* in1, Node* zero, Node* one, const TypeVect* vt)
|
||||||
|
: VectorNode(in1, zero, one, vt) {}
|
||||||
|
|
||||||
|
virtual int Opcode() const;
|
||||||
|
};
|
||||||
#endif // SHARE_OPTO_VECTORNODE_HPP
|
#endif // SHARE_OPTO_VECTORNODE_HPP
|
||||||
|
|
|
@ -89,6 +89,7 @@ public class TestDoubleVect {
|
||||||
test_divv(a0, a1, -VALUE);
|
test_divv(a0, a1, -VALUE);
|
||||||
test_diva(a0, a1, a3);
|
test_diva(a0, a1, a3);
|
||||||
test_negc(a0, a1);
|
test_negc(a0, a1);
|
||||||
|
test_signum(a0, a1);
|
||||||
test_rint(a0, a1);
|
test_rint(a0, a1);
|
||||||
test_ceil(a0, a1);
|
test_ceil(a0, a1);
|
||||||
test_floor(a0, a1);
|
test_floor(a0, a1);
|
||||||
|
@ -426,6 +427,19 @@ public class TestDoubleVect {
|
||||||
errn += verify("test_sqrt: ", i, a0[i], Math.sqrt((double)(ADD_INIT+i)));
|
errn += verify("test_sqrt: ", i, a0[i], Math.sqrt((double)(ADD_INIT+i)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test_signum(a0, a1);
|
||||||
|
errn += verify("test_signum: ", 0, a0[0], (Double.NaN));
|
||||||
|
errn += verify("test_signum: ", 1, a0[1], 1.0);
|
||||||
|
errn += verify("test_signum: ", 2, a0[2], -1.0);
|
||||||
|
errn += verify("test_signum: ", 3, a0[3], 1.0);
|
||||||
|
errn += verify("test_signum: ", 4, a0[4], 1.0);
|
||||||
|
errn += verify("test_signum: ", 5, a0[5], 1.0);
|
||||||
|
errn += verify("test_signum: ", 6, a0[6], 0.0);
|
||||||
|
errn += verify("test_signum: ", 7, a0[7], -0.0);
|
||||||
|
for (int i=8; i<ARRLEN; i++) {
|
||||||
|
errn += verify("test_signum: ", i, a0[i], (double)(((double)(ADD_INIT+i)) > 0.0 ? 1.0 : -1.0));
|
||||||
|
}
|
||||||
|
|
||||||
a1[6] = +0x1.fffffffffffffp-2;
|
a1[6] = +0x1.fffffffffffffp-2;
|
||||||
a1[7] = +0x1.0p-1;
|
a1[7] = +0x1.0p-1;
|
||||||
a1[8] = +0x1.0000000000001p-1;
|
a1[8] = +0x1.0000000000001p-1;
|
||||||
|
@ -590,6 +604,13 @@ public class TestDoubleVect {
|
||||||
end = System.currentTimeMillis();
|
end = System.currentTimeMillis();
|
||||||
System.out.println("test_negc_n: " + (end - start));
|
System.out.println("test_negc_n: " + (end - start));
|
||||||
|
|
||||||
|
start = System.currentTimeMillis();
|
||||||
|
for (int i=0; i<ITERS; i++) {
|
||||||
|
test_signum(a0, a1);
|
||||||
|
}
|
||||||
|
end = System.currentTimeMillis();
|
||||||
|
System.out.println("test_signum_n: " + (end - start));
|
||||||
|
|
||||||
start = System.currentTimeMillis();
|
start = System.currentTimeMillis();
|
||||||
for (int i=0; i<ITERS; i++) {
|
for (int i=0; i<ITERS; i++) {
|
||||||
test_sqrt(a0, a1);
|
test_sqrt(a0, a1);
|
||||||
|
@ -693,6 +714,12 @@ public class TestDoubleVect {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void test_signum(double[] a0, double[] a1) {
|
||||||
|
for (int i = 0; i < a0.length; i+=1) {
|
||||||
|
a0[i] = Math.signum(a1[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void test_rint(double[] a0, double[] a1) {
|
static void test_rint(double[] a0, double[] a1) {
|
||||||
for (int i = 0; i < a0.length; i+=1) {
|
for (int i = 0; i < a0.length; i+=1) {
|
||||||
a0[i] = Math.rint(a1[i] + ((double)(i))/1000);
|
a0[i] = Math.rint(a1[i] + ((double)(i))/1000);
|
||||||
|
|
|
@ -88,6 +88,7 @@ public class TestFloatVect {
|
||||||
test_divv(a0, a1, -VALUE);
|
test_divv(a0, a1, -VALUE);
|
||||||
test_diva(a0, a1, a3);
|
test_diva(a0, a1, a3);
|
||||||
test_negc(a0, a1);
|
test_negc(a0, a1);
|
||||||
|
test_signum(a0, a1);
|
||||||
test_sqrt(a0, a1);
|
test_sqrt(a0, a1);
|
||||||
test_round(i0, a1);
|
test_round(i0, a1);
|
||||||
}
|
}
|
||||||
|
@ -345,6 +346,7 @@ public class TestFloatVect {
|
||||||
errn += verify("test_diva_n: ", i, a0[i], ((ADD_INIT+i)/(-VALUE)));
|
errn += verify("test_diva_n: ", i, a0[i], ((ADD_INIT+i)/(-VALUE)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
test_negc(a0, a1);
|
test_negc(a0, a1);
|
||||||
errn += verify("test_negc: ", 0, a0[0], (Float.NaN));
|
errn += verify("test_negc: ", 0, a0[0], (Float.NaN));
|
||||||
errn += verify("test_negc: ", 1, a0[1], (Float.NEGATIVE_INFINITY));
|
errn += verify("test_negc: ", 1, a0[1], (Float.NEGATIVE_INFINITY));
|
||||||
|
@ -372,6 +374,19 @@ public class TestFloatVect {
|
||||||
errn += verify("test_sqrt: ", i, a0[i], (float)(Math.sqrt((double)(ADD_INIT+i))));
|
errn += verify("test_sqrt: ", i, a0[i], (float)(Math.sqrt((double)(ADD_INIT+i))));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test_signum(a0, a1);
|
||||||
|
errn += verify("test_signum: ", 0, a0[0], (Float.NaN));
|
||||||
|
errn += verify("test_signum: ", 1, a0[1], 1.0f);
|
||||||
|
errn += verify("test_signum: ", 2, a0[2], -1.0f);
|
||||||
|
errn += verify("test_signum: ", 3, a0[3], 1.0f);
|
||||||
|
errn += verify("test_signum: ", 4, a0[4], 1.0f);
|
||||||
|
errn += verify("test_signum: ", 5, a0[5], 1.0f);
|
||||||
|
errn += verify("test_signum: ", 6, a0[6], 0.0f);
|
||||||
|
errn += verify("test_signum: ", 7, a0[7], -0.0f);
|
||||||
|
for (int i=8; i<ARRLEN; i++) {
|
||||||
|
errn += verify("test_signum: ", i, a0[i], (((float)(ADD_INIT+i)) > 0.0f ? 1.0f : -1.0f));
|
||||||
|
}
|
||||||
|
|
||||||
a1[6] = +0x1.fffffep-2f;
|
a1[6] = +0x1.fffffep-2f;
|
||||||
a1[7] = +0x1.0p-1f;
|
a1[7] = +0x1.0p-1f;
|
||||||
a1[8] = +0x1.000002p-1f;
|
a1[8] = +0x1.000002p-1f;
|
||||||
|
@ -400,7 +415,6 @@ public class TestFloatVect {
|
||||||
for (int i=14; i<ARRLEN; i++) {
|
for (int i=14; i<ARRLEN; i++) {
|
||||||
errn += verify("test_round: ", i, i0[i], Math.round(((float)(ADD_INIT+i))));
|
errn += verify("test_round: ", i, i0[i], Math.round(((float)(ADD_INIT+i))));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (errn > 0)
|
if (errn > 0)
|
||||||
|
@ -537,6 +551,13 @@ public class TestFloatVect {
|
||||||
end = System.currentTimeMillis();
|
end = System.currentTimeMillis();
|
||||||
System.out.println("test_negc_n: " + (end - start));
|
System.out.println("test_negc_n: " + (end - start));
|
||||||
|
|
||||||
|
start = System.currentTimeMillis();
|
||||||
|
for (int i=0; i<ITERS; i++) {
|
||||||
|
test_signum(a0, a1);
|
||||||
|
}
|
||||||
|
end = System.currentTimeMillis();
|
||||||
|
System.out.println("test_signum_n: " + (end - start));
|
||||||
|
|
||||||
start = System.currentTimeMillis();
|
start = System.currentTimeMillis();
|
||||||
for (int i=0; i<ITERS; i++) {
|
for (int i=0; i<ITERS; i++) {
|
||||||
test_sqrt(a0, a1);
|
test_sqrt(a0, a1);
|
||||||
|
@ -635,6 +656,12 @@ public class TestFloatVect {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void test_signum(float[] a0, float[] a1) {
|
||||||
|
for (int i = 0; i < a0.length; i+=1) {
|
||||||
|
a0[i] = Math.signum(a1[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void test_negc(float[] a0, float[] a1) {
|
static void test_negc(float[] a0, float[] a1) {
|
||||||
for (int i = 0; i < a0.length; i+=1) {
|
for (int i = 0; i < a0.length; i+=1) {
|
||||||
a0[i] = (float)(-((float)a1[i]));
|
a0[i] = (float)(-((float)a1[i]));
|
||||||
|
|
|
@ -0,0 +1,93 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @test
|
||||||
|
* @bug 8282711
|
||||||
|
* @summary Accelerate Math.signum function for AVX and AVX512.
|
||||||
|
* @requires vm.compiler2.enabled
|
||||||
|
* @requires vm.cpu.features ~= ".*avx.*"
|
||||||
|
* @requires os.simpleArch == "x64"
|
||||||
|
* @library /test/lib /
|
||||||
|
* @run driver compiler.vectorization.TestSignumVector
|
||||||
|
*/
|
||||||
|
|
||||||
|
package compiler.vectorization;
|
||||||
|
|
||||||
|
import compiler.lib.ir_framework.*;
|
||||||
|
|
||||||
|
public class TestSignumVector {
|
||||||
|
private static final int ARRLEN = 1024;
|
||||||
|
private static final int ITERS = 11000;
|
||||||
|
|
||||||
|
private static double [] dinp;
|
||||||
|
private static double [] dout;
|
||||||
|
private static float [] finp;
|
||||||
|
private static float [] fout;
|
||||||
|
|
||||||
|
public static void main(String args[]) {
|
||||||
|
TestFramework.runWithFlags("-XX:-TieredCompilation",
|
||||||
|
"-XX:CompileThresholdScaling=0.3");
|
||||||
|
System.out.println("PASSED");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = {"SignumVD" , " > 0 "})
|
||||||
|
public void test_signum_double(double[] dout, double[] dinp) {
|
||||||
|
for (int i = 0; i < dout.length; i+=1) {
|
||||||
|
dout[i] = Math.signum(dinp[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = {"test_signum_double"}, mode = RunMode.STANDALONE)
|
||||||
|
public void kernel_test_signum_double() {
|
||||||
|
dinp = new double[ARRLEN];
|
||||||
|
dout = new double[ARRLEN];
|
||||||
|
for(int i = 0 ; i < ARRLEN; i++) {
|
||||||
|
dinp[i] = (double)i*1.4;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < ITERS; i++) {
|
||||||
|
test_signum_double(dout , dinp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = {"SignumVF" , " > 0 "})
|
||||||
|
public void test_signum_float(float[] fout, float[] finp) {
|
||||||
|
for (int i = 0; i < finp.length; i+=1) {
|
||||||
|
fout[i] = Math.signum(finp[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = {"test_signum_float"}, mode = RunMode.STANDALONE)
|
||||||
|
public void kernel_test_round() {
|
||||||
|
finp = new float[ARRLEN];
|
||||||
|
fout = new float[ARRLEN];
|
||||||
|
for(int i = 0 ; i < ARRLEN; i++) {
|
||||||
|
finp[i] = (float)i*1.4f;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < ITERS; i++) {
|
||||||
|
test_signum_float(fout , finp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
70
test/micro/org/openjdk/bench/java/math/VectorSignum.java
Normal file
70
test/micro/org/openjdk/bench/java/math/VectorSignum.java
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
package org.openjdk.bench.vm.compiler;
|
||||||
|
|
||||||
|
import org.openjdk.jmh.annotations.*;
|
||||||
|
import org.openjdk.jmh.infra.*;
|
||||||
|
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
@BenchmarkMode(Mode.AverageTime)
|
||||||
|
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||||
|
@State(Scope.Thread)
|
||||||
|
public class VectorSignum {
|
||||||
|
@Param({"256", "512", "1024", "2048"})
|
||||||
|
private static int SIZE;
|
||||||
|
|
||||||
|
private double[] res_doubles = new double[SIZE];
|
||||||
|
private double[] doubles = new double[SIZE];
|
||||||
|
private float[] res_floats = new float[SIZE];
|
||||||
|
private float[] floats = new float[SIZE];
|
||||||
|
|
||||||
|
private Random r = new Random(1024);
|
||||||
|
|
||||||
|
@Setup
|
||||||
|
public void init() {
|
||||||
|
doubles = new double[SIZE];
|
||||||
|
floats = new float[SIZE];
|
||||||
|
res_doubles = new double[SIZE];
|
||||||
|
res_floats = new float[SIZE];
|
||||||
|
for (int i=0; i<SIZE; i++) {
|
||||||
|
floats[i] = r.nextFloat();
|
||||||
|
doubles[i] = r.nextDouble();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void floatSignum() {
|
||||||
|
for(int i = 0; i < SIZE; i++) {
|
||||||
|
res_floats[i] = Math.signum(floats[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void doubleSignum() {
|
||||||
|
for(int i = 0; i < SIZE; i++) {
|
||||||
|
res_doubles[i] = Math.signum(doubles[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue