8223347: Integration of Vector API (Incubator)

Co-authored-by: Vivek Deshpande <vdeshpande@openjdk.org>
Co-authored-by: Qi Feng <qfeng@openjdk.org>
Co-authored-by: Ian Graves <igraves@openjdk.org>
Co-authored-by: Jean-Philippe Halimi <jphalimi@openjdk.org>
Co-authored-by: Vladimir Ivanov <vlivanov@openjdk.org>
Co-authored-by: Ningsheng Jian <njian@openjdk.org>
Co-authored-by: Razvan Lupusoru <rlupusoru@openjdk.org>
Co-authored-by: Smita Kamath <svkamath@openjdk.org>
Co-authored-by: Rahul Kandu <rkandu@openjdk.org>
Co-authored-by: Kishor Kharbas <kkharbas@openjdk.org>
Co-authored-by: Eric Liu <Eric.Liu2@arm.com>
Co-authored-by: Aaloan Miftah <someusername3@gmail.com>
Co-authored-by: John R Rose <jrose@openjdk.org>
Co-authored-by: Shravya Rukmannagari <srukmannagar@openjdk.org>
Co-authored-by: Paul Sandoz <psandoz@openjdk.org>
Co-authored-by: Sandhya Viswanathan <sviswanathan@openjdk.org>
Co-authored-by: Lauren Walkowski <lauren.walkowski@arm.com>
Co-authored-by: Yang Zang <Yang.Zhang@arm.com>
Co-authored-by: Joshua Zhu <jzhu@openjdk.org>
Co-authored-by: Wang Zhuo <wzhuo@openjdk.org>
Co-authored-by: Jatin Bhateja <jbhateja@openjdk.org>
Reviewed-by: erikj, chegar, kvn, darcy, forax, briangoetz, aph, epavlova, coleenp
This commit is contained in:
Paul Sandoz 2020-10-14 20:02:46 +00:00
parent 386e7e8b73
commit 0c99b19258
336 changed files with 293978 additions and 2083 deletions

View file

@ -112,6 +112,7 @@ void MacroAssembler::cmpklass(Address src1, Metadata* obj) {
cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
}
void MacroAssembler::cmpklass(Register src1, Metadata* obj) {
cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
}
@ -2495,6 +2496,7 @@ void MacroAssembler::movdqu(XMMRegister dst, Address src) {
void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) {
assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
if (dst->encoding() == src->encoding()) return;
Assembler::movdqu(dst, src);
}
@ -2519,6 +2521,7 @@ void MacroAssembler::vmovdqu(XMMRegister dst, Address src) {
void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) {
assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
if (dst->encoding() == src->encoding()) return;
Assembler::vmovdqu(dst, src);
}
@ -2532,6 +2535,64 @@ void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scrat
}
}
void MacroAssembler::kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg) {
if (reachable(src)) {
kmovwl(dst, as_Address(src));
} else {
lea(scratch_reg, src);
kmovwl(dst, Address(scratch_reg, 0));
}
}
void MacroAssembler::evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
int vector_len, Register scratch_reg) {
if (reachable(src)) {
if (mask == k0) {
Assembler::evmovdqub(dst, as_Address(src), merge, vector_len);
} else {
Assembler::evmovdqub(dst, mask, as_Address(src), merge, vector_len);
}
} else {
lea(scratch_reg, src);
if (mask == k0) {
Assembler::evmovdqub(dst, Address(scratch_reg, 0), merge, vector_len);
} else {
Assembler::evmovdqub(dst, mask, Address(scratch_reg, 0), merge, vector_len);
}
}
}
void MacroAssembler::evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
int vector_len, Register scratch_reg) {
if (reachable(src)) {
Assembler::evmovdquw(dst, mask, as_Address(src), merge, vector_len);
} else {
lea(scratch_reg, src);
Assembler::evmovdquw(dst, mask, Address(scratch_reg, 0), merge, vector_len);
}
}
void MacroAssembler::evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
int vector_len, Register scratch_reg) {
if (reachable(src)) {
Assembler::evmovdqul(dst, mask, as_Address(src), merge, vector_len);
} else {
lea(scratch_reg, src);
Assembler::evmovdqul(dst, mask, Address(scratch_reg, 0), merge, vector_len);
}
}
void MacroAssembler::evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
int vector_len, Register scratch_reg) {
if (reachable(src)) {
Assembler::evmovdquq(dst, mask, as_Address(src), merge, vector_len);
} else {
lea(scratch_reg, src);
Assembler::evmovdquq(dst, mask, Address(scratch_reg, 0), merge, vector_len);
}
}
void MacroAssembler::evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
if (reachable(src)) {
Assembler::evmovdquq(dst, as_Address(src), vector_len);
@ -3019,6 +3080,98 @@ void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src,
Assembler::vpcmpeqw(dst, nds, src, vector_len);
}
void MacroAssembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds,
AddressLiteral src, int vector_len, Register scratch_reg) {
if (reachable(src)) {
Assembler::evpcmpeqd(kdst, mask, nds, as_Address(src), vector_len);
} else {
lea(scratch_reg, src);
Assembler::evpcmpeqd(kdst, mask, nds, Address(scratch_reg, 0), vector_len);
}
}
void MacroAssembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
int comparison, int vector_len, Register scratch_reg) {
if (reachable(src)) {
Assembler::evpcmpd(kdst, mask, nds, as_Address(src), comparison, vector_len);
} else {
lea(scratch_reg, src);
Assembler::evpcmpd(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
}
}
void MacroAssembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
int comparison, int vector_len, Register scratch_reg) {
if (reachable(src)) {
Assembler::evpcmpq(kdst, mask, nds, as_Address(src), comparison, vector_len);
} else {
lea(scratch_reg, src);
Assembler::evpcmpq(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
}
}
void MacroAssembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
int comparison, int vector_len, Register scratch_reg) {
if (reachable(src)) {
Assembler::evpcmpb(kdst, mask, nds, as_Address(src), comparison, vector_len);
} else {
lea(scratch_reg, src);
Assembler::evpcmpb(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
}
}
void MacroAssembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
int comparison, int vector_len, Register scratch_reg) {
if (reachable(src)) {
Assembler::evpcmpw(kdst, mask, nds, as_Address(src), comparison, vector_len);
} else {
lea(scratch_reg, src);
Assembler::evpcmpw(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
}
}
void MacroAssembler::vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len) {
if (width == Assembler::Q) {
Assembler::vpcmpCCq(dst, nds, src, cond_encoding, vector_len);
} else {
Assembler::vpcmpCCbwd(dst, nds, src, cond_encoding, vector_len);
}
}
void MacroAssembler::vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg) {
int eq_cond_enc = 0x29;
int gt_cond_enc = 0x37;
if (width != Assembler::Q) {
eq_cond_enc = 0x74 + width;
gt_cond_enc = 0x64 + width;
}
switch (cond) {
case eq:
vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len);
break;
case neq:
vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len);
vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
break;
case le:
vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len);
vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
break;
case nlt:
vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len);
vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
break;
case lt:
vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len);
break;
case nle:
vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len);
break;
default:
assert(false, "Should not reach here");
}
}
void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
assert(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
Assembler::vpmovzxbw(dst, src, vector_len);
@ -3143,6 +3296,16 @@ void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src
}
}
void MacroAssembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src,
bool merge, int vector_len, Register scratch_reg) {
if (reachable(src)) {
Assembler::evpord(dst, mask, nds, as_Address(src), merge, vector_len);
} else {
lea(scratch_reg, src);
Assembler::evpord(dst, mask, nds, Address(scratch_reg, 0), merge, vector_len);
}
}
void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
if (reachable(src)) {
vdivsd(dst, nds, as_Address(src));
@ -3239,7 +3402,14 @@ void MacroAssembler::vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src,
}
}
//-------------------------------------------------------------------------------------------
void MacroAssembler::vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
if (reachable(src)) {
Assembler::vpermd(dst, nds, as_Address(src), vector_len);
} else {
lea(scratch_reg, src);
Assembler::vpermd(dst, nds, Address(scratch_reg, 0), vector_len);
}
}
void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
@ -5765,7 +5935,7 @@ void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register
bind(VECTOR64_LOOP);
// AVX512 code to compare 64 byte vectors.
evmovdqub(rymm0, Address(obja, result), Assembler::AVX_512bit);
evmovdqub(rymm0, Address(obja, result), false, Assembler::AVX_512bit);
evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit);
kortestql(k7, k7);
jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL); // mismatch
@ -5784,7 +5954,7 @@ void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register
notq(tmp2);
kmovql(k3, tmp2);
evmovdqub(rymm0, k3, Address(obja, result), Assembler::AVX_512bit);
evmovdqub(rymm0, k3, Address(obja, result), false, Assembler::AVX_512bit);
evpcmpeqb(k7, k3, rymm0, Address(objb, result), Assembler::AVX_512bit);
ktestql(k7, k3);
@ -7579,7 +7749,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
notl(result);
kmovdl(k3, result);
evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit);
evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
ktestd(k2, k3);
jcc(Assembler::carryClear, return_zero);
@ -7604,7 +7774,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
negptr(len);
bind(copy_32_loop);
evmovdquw(tmp1Reg, Address(src, len, Address::times_2), Assembler::AVX_512bit);
evmovdquw(tmp1Reg, Address(src, len, Address::times_2), /*merge*/ false, Assembler::AVX_512bit);
evpcmpuw(k2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
kortestdl(k2, k2);
jcc(Assembler::carryClear, return_zero);
@ -7629,7 +7799,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
kmovdl(k3, result);
evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit);
evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
ktestd(k2, k3);
jcc(Assembler::carryClear, return_zero);
@ -7774,7 +7944,7 @@ void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len
// inflate 32 chars per iter
bind(copy_32_loop);
vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_512bit);
evmovdquw(Address(dst, len, Address::times_2), tmp1, Assembler::AVX_512bit);
evmovdquw(Address(dst, len, Address::times_2), tmp1, /*merge*/ false, Assembler::AVX_512bit);
addptr(len, 32);
jcc(Assembler::notZero, copy_32_loop);
@ -7789,7 +7959,7 @@ void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len
notl(tmp3_aliased);
kmovdl(k2, tmp3_aliased);
evpmovzxbw(tmp1, k2, Address(src, 0), Assembler::AVX_512bit);
evmovdquw(Address(dst, 0), k2, tmp1, Assembler::AVX_512bit);
evmovdquw(Address(dst, 0), k2, tmp1, /*merge*/ true, Assembler::AVX_512bit);
jmp(done);
bind(avx3_threshold);