mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-28 15:24:43 +02:00
8223347: Integration of Vector API (Incubator)
Co-authored-by: Vivek Deshpande <vdeshpande@openjdk.org> Co-authored-by: Qi Feng <qfeng@openjdk.org> Co-authored-by: Ian Graves <igraves@openjdk.org> Co-authored-by: Jean-Philippe Halimi <jphalimi@openjdk.org> Co-authored-by: Vladimir Ivanov <vlivanov@openjdk.org> Co-authored-by: Ningsheng Jian <njian@openjdk.org> Co-authored-by: Razvan Lupusoru <rlupusoru@openjdk.org> Co-authored-by: Smita Kamath <svkamath@openjdk.org> Co-authored-by: Rahul Kandu <rkandu@openjdk.org> Co-authored-by: Kishor Kharbas <kkharbas@openjdk.org> Co-authored-by: Eric Liu <Eric.Liu2@arm.com> Co-authored-by: Aaloan Miftah <someusername3@gmail.com> Co-authored-by: John R Rose <jrose@openjdk.org> Co-authored-by: Shravya Rukmannagari <srukmannagar@openjdk.org> Co-authored-by: Paul Sandoz <psandoz@openjdk.org> Co-authored-by: Sandhya Viswanathan <sviswanathan@openjdk.org> Co-authored-by: Lauren Walkowski <lauren.walkowski@arm.com> Co-authored-by: Yang Zang <Yang.Zhang@arm.com> Co-authored-by: Joshua Zhu <jzhu@openjdk.org> Co-authored-by: Wang Zhuo <wzhuo@openjdk.org> Co-authored-by: Jatin Bhateja <jbhateja@openjdk.org> Reviewed-by: erikj, chegar, kvn, darcy, forax, briangoetz, aph, epavlova, coleenp
This commit is contained in:
parent
386e7e8b73
commit
0c99b19258
336 changed files with 293978 additions and 2083 deletions
|
@ -112,6 +112,7 @@ void MacroAssembler::cmpklass(Address src1, Metadata* obj) {
|
|||
cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
|
||||
}
|
||||
|
||||
|
||||
void MacroAssembler::cmpklass(Register src1, Metadata* obj) {
|
||||
cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
|
||||
}
|
||||
|
@ -2495,6 +2496,7 @@ void MacroAssembler::movdqu(XMMRegister dst, Address src) {
|
|||
|
||||
void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) {
|
||||
assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
|
||||
if (dst->encoding() == src->encoding()) return;
|
||||
Assembler::movdqu(dst, src);
|
||||
}
|
||||
|
||||
|
@ -2519,6 +2521,7 @@ void MacroAssembler::vmovdqu(XMMRegister dst, Address src) {
|
|||
|
||||
void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) {
|
||||
assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
|
||||
if (dst->encoding() == src->encoding()) return;
|
||||
Assembler::vmovdqu(dst, src);
|
||||
}
|
||||
|
||||
|
@ -2532,6 +2535,64 @@ void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scrat
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
void MacroAssembler::kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
kmovwl(dst, as_Address(src));
|
||||
} else {
|
||||
lea(scratch_reg, src);
|
||||
kmovwl(dst, Address(scratch_reg, 0));
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
|
||||
int vector_len, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
if (mask == k0) {
|
||||
Assembler::evmovdqub(dst, as_Address(src), merge, vector_len);
|
||||
} else {
|
||||
Assembler::evmovdqub(dst, mask, as_Address(src), merge, vector_len);
|
||||
}
|
||||
} else {
|
||||
lea(scratch_reg, src);
|
||||
if (mask == k0) {
|
||||
Assembler::evmovdqub(dst, Address(scratch_reg, 0), merge, vector_len);
|
||||
} else {
|
||||
Assembler::evmovdqub(dst, mask, Address(scratch_reg, 0), merge, vector_len);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
|
||||
int vector_len, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
Assembler::evmovdquw(dst, mask, as_Address(src), merge, vector_len);
|
||||
} else {
|
||||
lea(scratch_reg, src);
|
||||
Assembler::evmovdquw(dst, mask, Address(scratch_reg, 0), merge, vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
|
||||
int vector_len, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
Assembler::evmovdqul(dst, mask, as_Address(src), merge, vector_len);
|
||||
} else {
|
||||
lea(scratch_reg, src);
|
||||
Assembler::evmovdqul(dst, mask, Address(scratch_reg, 0), merge, vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
|
||||
int vector_len, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
Assembler::evmovdquq(dst, mask, as_Address(src), merge, vector_len);
|
||||
} else {
|
||||
lea(scratch_reg, src);
|
||||
Assembler::evmovdquq(dst, mask, Address(scratch_reg, 0), merge, vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
|
||||
if (reachable(src)) {
|
||||
Assembler::evmovdquq(dst, as_Address(src), vector_len);
|
||||
|
@ -3019,6 +3080,98 @@ void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src,
|
|||
Assembler::vpcmpeqw(dst, nds, src, vector_len);
|
||||
}
|
||||
|
||||
void MacroAssembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds,
|
||||
AddressLiteral src, int vector_len, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
Assembler::evpcmpeqd(kdst, mask, nds, as_Address(src), vector_len);
|
||||
} else {
|
||||
lea(scratch_reg, src);
|
||||
Assembler::evpcmpeqd(kdst, mask, nds, Address(scratch_reg, 0), vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
|
||||
int comparison, int vector_len, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
Assembler::evpcmpd(kdst, mask, nds, as_Address(src), comparison, vector_len);
|
||||
} else {
|
||||
lea(scratch_reg, src);
|
||||
Assembler::evpcmpd(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
|
||||
int comparison, int vector_len, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
Assembler::evpcmpq(kdst, mask, nds, as_Address(src), comparison, vector_len);
|
||||
} else {
|
||||
lea(scratch_reg, src);
|
||||
Assembler::evpcmpq(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
|
||||
int comparison, int vector_len, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
Assembler::evpcmpb(kdst, mask, nds, as_Address(src), comparison, vector_len);
|
||||
} else {
|
||||
lea(scratch_reg, src);
|
||||
Assembler::evpcmpb(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
|
||||
int comparison, int vector_len, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
Assembler::evpcmpw(kdst, mask, nds, as_Address(src), comparison, vector_len);
|
||||
} else {
|
||||
lea(scratch_reg, src);
|
||||
Assembler::evpcmpw(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len) {
|
||||
if (width == Assembler::Q) {
|
||||
Assembler::vpcmpCCq(dst, nds, src, cond_encoding, vector_len);
|
||||
} else {
|
||||
Assembler::vpcmpCCbwd(dst, nds, src, cond_encoding, vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg) {
|
||||
int eq_cond_enc = 0x29;
|
||||
int gt_cond_enc = 0x37;
|
||||
if (width != Assembler::Q) {
|
||||
eq_cond_enc = 0x74 + width;
|
||||
gt_cond_enc = 0x64 + width;
|
||||
}
|
||||
switch (cond) {
|
||||
case eq:
|
||||
vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len);
|
||||
break;
|
||||
case neq:
|
||||
vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len);
|
||||
vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
|
||||
break;
|
||||
case le:
|
||||
vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len);
|
||||
vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
|
||||
break;
|
||||
case nlt:
|
||||
vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len);
|
||||
vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
|
||||
break;
|
||||
case lt:
|
||||
vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len);
|
||||
break;
|
||||
case nle:
|
||||
vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len);
|
||||
break;
|
||||
default:
|
||||
assert(false, "Should not reach here");
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
|
||||
assert(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
|
||||
Assembler::vpmovzxbw(dst, src, vector_len);
|
||||
|
@ -3143,6 +3296,16 @@ void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src
|
|||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src,
|
||||
bool merge, int vector_len, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
Assembler::evpord(dst, mask, nds, as_Address(src), merge, vector_len);
|
||||
} else {
|
||||
lea(scratch_reg, src);
|
||||
Assembler::evpord(dst, mask, nds, Address(scratch_reg, 0), merge, vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
|
||||
if (reachable(src)) {
|
||||
vdivsd(dst, nds, as_Address(src));
|
||||
|
@ -3239,7 +3402,14 @@ void MacroAssembler::vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src,
|
|||
}
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------------------------
|
||||
void MacroAssembler::vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
Assembler::vpermd(dst, nds, as_Address(src), vector_len);
|
||||
} else {
|
||||
lea(scratch_reg, src);
|
||||
Assembler::vpermd(dst, nds, Address(scratch_reg, 0), vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
|
||||
const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
|
||||
|
@ -5765,7 +5935,7 @@ void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register
|
|||
|
||||
bind(VECTOR64_LOOP);
|
||||
// AVX512 code to compare 64 byte vectors.
|
||||
evmovdqub(rymm0, Address(obja, result), Assembler::AVX_512bit);
|
||||
evmovdqub(rymm0, Address(obja, result), false, Assembler::AVX_512bit);
|
||||
evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit);
|
||||
kortestql(k7, k7);
|
||||
jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL); // mismatch
|
||||
|
@ -5784,7 +5954,7 @@ void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register
|
|||
notq(tmp2);
|
||||
kmovql(k3, tmp2);
|
||||
|
||||
evmovdqub(rymm0, k3, Address(obja, result), Assembler::AVX_512bit);
|
||||
evmovdqub(rymm0, k3, Address(obja, result), false, Assembler::AVX_512bit);
|
||||
evpcmpeqb(k7, k3, rymm0, Address(objb, result), Assembler::AVX_512bit);
|
||||
|
||||
ktestql(k7, k3);
|
||||
|
@ -7579,7 +7749,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
|
|||
notl(result);
|
||||
kmovdl(k3, result);
|
||||
|
||||
evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit);
|
||||
evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
|
||||
evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
|
||||
ktestd(k2, k3);
|
||||
jcc(Assembler::carryClear, return_zero);
|
||||
|
@ -7604,7 +7774,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
|
|||
negptr(len);
|
||||
|
||||
bind(copy_32_loop);
|
||||
evmovdquw(tmp1Reg, Address(src, len, Address::times_2), Assembler::AVX_512bit);
|
||||
evmovdquw(tmp1Reg, Address(src, len, Address::times_2), /*merge*/ false, Assembler::AVX_512bit);
|
||||
evpcmpuw(k2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
|
||||
kortestdl(k2, k2);
|
||||
jcc(Assembler::carryClear, return_zero);
|
||||
|
@ -7629,7 +7799,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
|
|||
|
||||
kmovdl(k3, result);
|
||||
|
||||
evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit);
|
||||
evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
|
||||
evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
|
||||
ktestd(k2, k3);
|
||||
jcc(Assembler::carryClear, return_zero);
|
||||
|
@ -7774,7 +7944,7 @@ void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len
|
|||
// inflate 32 chars per iter
|
||||
bind(copy_32_loop);
|
||||
vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_512bit);
|
||||
evmovdquw(Address(dst, len, Address::times_2), tmp1, Assembler::AVX_512bit);
|
||||
evmovdquw(Address(dst, len, Address::times_2), tmp1, /*merge*/ false, Assembler::AVX_512bit);
|
||||
addptr(len, 32);
|
||||
jcc(Assembler::notZero, copy_32_loop);
|
||||
|
||||
|
@ -7789,7 +7959,7 @@ void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len
|
|||
notl(tmp3_aliased);
|
||||
kmovdl(k2, tmp3_aliased);
|
||||
evpmovzxbw(tmp1, k2, Address(src, 0), Assembler::AVX_512bit);
|
||||
evmovdquw(Address(dst, 0), k2, tmp1, Assembler::AVX_512bit);
|
||||
evmovdquw(Address(dst, 0), k2, tmp1, /*merge*/ true, Assembler::AVX_512bit);
|
||||
|
||||
jmp(done);
|
||||
bind(avx3_threshold);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue