8284813: x86 Code cleanup related to move instructions.

Reviewed-by: kvn, sviswanathan
This commit is contained in:
Jatin Bhateja 2022-05-05 03:20:01 +00:00
parent d43ae723b8
commit 3092b5615d
8 changed files with 123 additions and 152 deletions

View file

@ -3021,53 +3021,27 @@ void Assembler::vmovdqu(Address dst, XMMRegister src) {
} }
// Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64) // Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len) { void Assembler::evmovdqub(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), ""); assert(VM_Version::supports_avx512vlbw(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction(); attributes.set_is_evex_instruction();
if (merge) { if (merge) {
attributes.reset_is_clear_context(); attributes.reset_is_clear_context();
} }
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3; int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
emit_int16(0x6F, (0xC0 | encode)); emit_int16(0x6F, (0xC0 | encode));
} }
void Assembler::evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len) { void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), ""); // Unmasked instruction
InstructionMark im(this); evmovdqub(dst, k0, src, /*merge*/ false, vector_len);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
if (merge) {
attributes.reset_is_clear_context();
}
vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
emit_operand(dst, src);
}
void Assembler::evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(src != xnoreg, "sanity");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
if (merge) {
attributes.reset_is_clear_context();
}
vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
emit_int8(0x7F);
emit_operand(src, dst);
} }
void Assembler::evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { void Assembler::evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) {
assert(VM_Version::supports_avx512vlbw(), ""); assert(VM_Version::supports_avx512vlbw(), "");
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_embedded_opmask_register_specifier(mask); attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction(); attributes.set_is_evex_instruction();
@ -3079,11 +3053,16 @@ void Assembler::evmovdqub(XMMRegister dst, KRegister mask, Address src, bool mer
emit_operand(dst, src); emit_operand(dst, src);
} }
void Assembler::evmovdqub(XMMRegister dst, Address src, int vector_len) {
// Unmasked instruction
evmovdqub(dst, k0, src, /*merge*/ false, vector_len);
}
void Assembler::evmovdqub(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { void Assembler::evmovdqub(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_avx512vlbw(), ""); assert(VM_Version::supports_avx512vlbw(), "");
assert(src != xnoreg, "sanity"); assert(src != xnoreg, "sanity");
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_embedded_opmask_register_specifier(mask); attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction(); attributes.set_is_evex_instruction();
@ -3095,25 +3074,15 @@ void Assembler::evmovdqub(Address dst, KRegister mask, XMMRegister src, bool mer
emit_operand(src, dst); emit_operand(src, dst);
} }
void Assembler::evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len) { void Assembler::evmovdquw(XMMRegister dst, Address src, int vector_len) {
assert(VM_Version::supports_evex(), ""); // Unmasked instruction
InstructionMark im(this); evmovdquw(dst, k0, src, /*merge*/ false, vector_len);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
if (merge) {
attributes.reset_is_clear_context();
}
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
emit_operand(dst, src);
} }
void Assembler::evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { void Assembler::evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) {
assert(VM_Version::supports_avx512vlbw(), ""); assert(VM_Version::supports_avx512vlbw(), "");
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_embedded_opmask_register_specifier(mask); attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction(); attributes.set_is_evex_instruction();
@ -3125,27 +3094,16 @@ void Assembler::evmovdquw(XMMRegister dst, KRegister mask, Address src, bool mer
emit_operand(dst, src); emit_operand(dst, src);
} }
void Assembler::evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len) { void Assembler::evmovdquw(Address dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), ""); // Unmasked instruction
assert(src != xnoreg, "sanity"); evmovdquw(dst, k0, src, /*merge*/ false, vector_len);
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
if (merge) {
attributes.reset_is_clear_context();
}
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
emit_int8(0x7F);
emit_operand(src, dst);
} }
void Assembler::evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { void Assembler::evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_avx512vlbw(), ""); assert(VM_Version::supports_avx512vlbw(), "");
assert(src != xnoreg, "sanity"); assert(src != xnoreg, "sanity");
InstructionMark im(this); InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_embedded_opmask_register_specifier(mask); attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction(); attributes.set_is_evex_instruction();

View file

@ -1552,27 +1552,33 @@ private:
void vmovdqu(XMMRegister dst, XMMRegister src); void vmovdqu(XMMRegister dst, XMMRegister src);
// Move Unaligned 512bit Vector // Move Unaligned 512bit Vector
void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len); void evmovdqub(XMMRegister dst, XMMRegister src, int vector_len);
void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len); void evmovdqub(XMMRegister dst, Address src, int vector_len);
void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len); void evmovdqub(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len); void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
void evmovdqub(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len); void evmovdqub(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len);
void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len); void evmovdquw(XMMRegister dst, Address src, int vector_len);
void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len); void evmovdquw(Address dst, XMMRegister src, int vector_len);
void evmovdquw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len); void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
void evmovdqul(Address dst, XMMRegister src, int vector_len); void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evmovdqul(XMMRegister dst, Address src, int vector_len);
void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len); void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len);
void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len); void evmovdqul(XMMRegister dst, Address src, int vector_len);
void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len); void evmovdqul(Address dst, XMMRegister src, int vector_len);
void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len); void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evmovdquq(Address dst, XMMRegister src, int vector_len); void evmovdquq(Address dst, XMMRegister src, int vector_len);
void evmovdquq(XMMRegister dst, Address src, int vector_len); void evmovdquq(XMMRegister dst, Address src, int vector_len);
void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len); void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len);
void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len); void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
// Move lower 64bit to high 64bit in 128bit register // Move lower 64bit to high 64bit in 128bit register
void movlhps(XMMRegister dst, XMMRegister src); void movlhps(XMMRegister dst, XMMRegister src);

View file

@ -1983,12 +1983,12 @@ void C2_MacroAssembler::reduce8D(int opcode, XMMRegister dst, XMMRegister src, X
reduce4D(opcode, dst, vtmp1, vtmp1, vtmp2); reduce4D(opcode, dst, vtmp1, vtmp1, vtmp2);
} }
void C2_MacroAssembler::evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, int vector_len) { void C2_MacroAssembler::evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, bool merge, int vector_len) {
MacroAssembler::evmovdqu(type, kmask, dst, src, vector_len); MacroAssembler::evmovdqu(type, kmask, dst, src, merge, vector_len);
} }
void C2_MacroAssembler::evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, int vector_len) { void C2_MacroAssembler::evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, bool merge, int vector_len) {
MacroAssembler::evmovdqu(type, kmask, dst, src, vector_len); MacroAssembler::evmovdqu(type, kmask, dst, src, merge, vector_len);
} }

View file

@ -119,8 +119,8 @@ public:
void evgather(BasicType typ, XMMRegister dst, KRegister mask, Register base, XMMRegister idx, int vector_len); void evgather(BasicType typ, XMMRegister dst, KRegister mask, Register base, XMMRegister idx, int vector_len);
void evscatter(BasicType typ, Register base, XMMRegister idx, KRegister mask, XMMRegister src, int vector_len); void evscatter(BasicType typ, Register base, XMMRegister idx, KRegister mask, XMMRegister src, int vector_len);
void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, int vector_len); void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, bool merge, int vector_len);
void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, int vector_len); void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, bool merge, int vector_len);
// extract // extract
void extract(BasicType typ, Register dst, XMMRegister src, int idx); void extract(BasicType typ, Register dst, XMMRegister src, int idx);

View file

@ -2606,19 +2606,11 @@ void MacroAssembler::kmovwl(KRegister dst, AddressLiteral src, Register scratch_
void MacroAssembler::evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, void MacroAssembler::evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
int vector_len, Register scratch_reg) { int vector_len, Register scratch_reg) {
if (reachable(src)) { if (reachable(src)) {
if (mask == k0) {
Assembler::evmovdqub(dst, as_Address(src), merge, vector_len);
} else {
Assembler::evmovdqub(dst, mask, as_Address(src), merge, vector_len); Assembler::evmovdqub(dst, mask, as_Address(src), merge, vector_len);
}
} else { } else {
lea(scratch_reg, src); lea(scratch_reg, src);
if (mask == k0) {
Assembler::evmovdqub(dst, Address(scratch_reg, 0), merge, vector_len);
} else {
Assembler::evmovdqub(dst, mask, Address(scratch_reg, 0), merge, vector_len); Assembler::evmovdqub(dst, mask, Address(scratch_reg, 0), merge, vector_len);
} }
}
} }
void MacroAssembler::evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, void MacroAssembler::evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
@ -5310,23 +5302,23 @@ void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegiste
movq(Address(base, disp), xtmp); movq(Address(base, disp), xtmp);
break; break;
case 2: case 2:
evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_128bit); evmovdqu(T_LONG, k0, Address(base, disp), xtmp, false, Assembler::AVX_128bit);
break; break;
case 3: case 3:
movl(rtmp, 0x7); movl(rtmp, 0x7);
kmovwl(mask, rtmp); kmovwl(mask, rtmp);
evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_256bit); evmovdqu(T_LONG, mask, Address(base, disp), xtmp, true, Assembler::AVX_256bit);
break; break;
case 4: case 4:
evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit); evmovdqu(T_LONG, k0, Address(base, disp), xtmp, false, Assembler::AVX_256bit);
break; break;
case 5: case 5:
if (use64byteVector) { if (use64byteVector) {
movl(rtmp, 0x1F); movl(rtmp, 0x1F);
kmovwl(mask, rtmp); kmovwl(mask, rtmp);
evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_512bit); evmovdqu(T_LONG, mask, Address(base, disp), xtmp, true, Assembler::AVX_512bit);
} else { } else {
evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit); evmovdqu(T_LONG, k0, Address(base, disp), xtmp, false, Assembler::AVX_256bit);
movq(Address(base, disp + 32), xtmp); movq(Address(base, disp + 32), xtmp);
} }
break; break;
@ -5334,22 +5326,22 @@ void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegiste
if (use64byteVector) { if (use64byteVector) {
movl(rtmp, 0x3F); movl(rtmp, 0x3F);
kmovwl(mask, rtmp); kmovwl(mask, rtmp);
evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_512bit); evmovdqu(T_LONG, mask, Address(base, disp), xtmp, true, Assembler::AVX_512bit);
} else { } else {
evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit); evmovdqu(T_LONG, k0, Address(base, disp), xtmp, false, Assembler::AVX_256bit);
evmovdqu(T_LONG, k0, Address(base, disp + 32), xtmp, Assembler::AVX_128bit); evmovdqu(T_LONG, k0, Address(base, disp + 32), xtmp, false, Assembler::AVX_128bit);
} }
break; break;
case 7: case 7:
if (use64byteVector) { if (use64byteVector) {
movl(rtmp, 0x7F); movl(rtmp, 0x7F);
kmovwl(mask, rtmp); kmovwl(mask, rtmp);
evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_512bit); evmovdqu(T_LONG, mask, Address(base, disp), xtmp, true, Assembler::AVX_512bit);
} else { } else {
evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit); evmovdqu(T_LONG, k0, Address(base, disp), xtmp, false, Assembler::AVX_256bit);
movl(rtmp, 0x7); movl(rtmp, 0x7);
kmovwl(mask, rtmp); kmovwl(mask, rtmp);
evmovdqu(T_LONG, mask, Address(base, disp + 32), xtmp, Assembler::AVX_256bit); evmovdqu(T_LONG, mask, Address(base, disp + 32), xtmp, true, Assembler::AVX_256bit);
} }
break; break;
default: default:
@ -6321,7 +6313,7 @@ void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register
bind(VECTOR64_LOOP); bind(VECTOR64_LOOP);
// AVX512 code to compare 64 byte vectors. // AVX512 code to compare 64 byte vectors.
evmovdqub(rymm0, Address(obja, result), false, Assembler::AVX_512bit); evmovdqub(rymm0, Address(obja, result), Assembler::AVX_512bit);
evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit); evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit);
kortestql(k7, k7); kortestql(k7, k7);
jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL); // mismatch jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL); // mismatch
@ -8162,7 +8154,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
negptr(len); negptr(len);
bind(copy_32_loop); bind(copy_32_loop);
evmovdquw(tmp1Reg, Address(src, len, Address::times_2), /*merge*/ false, Assembler::AVX_512bit); evmovdquw(tmp1Reg, Address(src, len, Address::times_2), Assembler::AVX_512bit);
evpcmpuw(mask1, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit); evpcmpuw(mask1, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
kortestdl(mask1, mask1); kortestdl(mask1, mask1);
jcc(Assembler::carryClear, return_zero); jcc(Assembler::carryClear, return_zero);
@ -8332,7 +8324,7 @@ void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len
// inflate 32 chars per iter // inflate 32 chars per iter
bind(copy_32_loop); bind(copy_32_loop);
vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_512bit); vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_512bit);
evmovdquw(Address(dst, len, Address::times_2), tmp1, /*merge*/ false, Assembler::AVX_512bit); evmovdquw(Address(dst, len, Address::times_2), tmp1, Assembler::AVX_512bit);
addptr(len, 32); addptr(len, 32);
jcc(Assembler::notZero, copy_32_loop); jcc(Assembler::notZero, copy_32_loop);
@ -8434,23 +8426,23 @@ void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len
} }
void MacroAssembler::evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, int vector_len) { void MacroAssembler::evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, bool merge, int vector_len) {
switch(type) { switch(type) {
case T_BYTE: case T_BYTE:
case T_BOOLEAN: case T_BOOLEAN:
evmovdqub(dst, kmask, src, false, vector_len); evmovdqub(dst, kmask, src, merge, vector_len);
break; break;
case T_CHAR: case T_CHAR:
case T_SHORT: case T_SHORT:
evmovdquw(dst, kmask, src, false, vector_len); evmovdquw(dst, kmask, src, merge, vector_len);
break; break;
case T_INT: case T_INT:
case T_FLOAT: case T_FLOAT:
evmovdqul(dst, kmask, src, false, vector_len); evmovdqul(dst, kmask, src, merge, vector_len);
break; break;
case T_LONG: case T_LONG:
case T_DOUBLE: case T_DOUBLE:
evmovdquq(dst, kmask, src, false, vector_len); evmovdquq(dst, kmask, src, merge, vector_len);
break; break;
default: default:
fatal("Unexpected type argument %s", type2name(type)); fatal("Unexpected type argument %s", type2name(type));
@ -8458,23 +8450,23 @@ void MacroAssembler::evmovdqu(BasicType type, KRegister kmask, XMMRegister dst,
} }
} }
void MacroAssembler::evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, int vector_len) { void MacroAssembler::evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, bool merge, int vector_len) {
switch(type) { switch(type) {
case T_BYTE: case T_BYTE:
case T_BOOLEAN: case T_BOOLEAN:
evmovdqub(dst, kmask, src, true, vector_len); evmovdqub(dst, kmask, src, merge, vector_len);
break; break;
case T_CHAR: case T_CHAR:
case T_SHORT: case T_SHORT:
evmovdquw(dst, kmask, src, true, vector_len); evmovdquw(dst, kmask, src, merge, vector_len);
break; break;
case T_INT: case T_INT:
case T_FLOAT: case T_FLOAT:
evmovdqul(dst, kmask, src, true, vector_len); evmovdqul(dst, kmask, src, merge, vector_len);
break; break;
case T_LONG: case T_LONG:
case T_DOUBLE: case T_DOUBLE:
evmovdquq(dst, kmask, src, true, vector_len); evmovdquq(dst, kmask, src, merge, vector_len);
break; break;
default: default:
fatal("Unexpected type argument %s", type2name(type)); fatal("Unexpected type argument %s", type2name(type));
@ -8863,7 +8855,7 @@ void MacroAssembler::fill_masked(BasicType bt, Address dst, XMMRegister xmm, KRe
movptr(temp, -1); movptr(temp, -1);
bzhiq(temp, temp, length); bzhiq(temp, temp, length);
kmov(mask, temp); kmov(mask, temp);
evmovdqu(bt, mask, dst, xmm, vec_enc); evmovdqu(bt, mask, dst, xmm, true, vec_enc);
} }
// Set memory operation for length "less than" 64 bytes. // Set memory operation for length "less than" 64 bytes.
@ -8995,7 +8987,7 @@ void MacroAssembler::generate_fill_avx3(BasicType type, Register to, Register va
mov64(r8, -1L); mov64(r8, -1L);
bzhiq(r8, r8, rtmp); bzhiq(r8, r8, rtmp);
kmovql(k2, r8); kmovql(k2, r8);
evmovdqu(T_BYTE, k2, Address(to, 0), xtmp, Assembler::AVX_256bit); evmovdqu(T_BYTE, k2, Address(to, 0), xtmp, true, Assembler::AVX_256bit);
addq(to, rtmp); addq(to, rtmp);
shrq(rtmp, shift); shrq(rtmp, shift);
subq(count, rtmp); subq(count, rtmp);
@ -9065,7 +9057,7 @@ void MacroAssembler::generate_fill_avx3(BasicType type, Register to, Register va
mov64(r8, -1L); mov64(r8, -1L);
bzhiq(r8, r8, rtmp); bzhiq(r8, r8, rtmp);
kmovql(k2, r8); kmovql(k2, r8);
evmovdqu(T_BYTE, k2, Address(to, 0), xtmp, Assembler::AVX_512bit); evmovdqu(T_BYTE, k2, Address(to, 0), xtmp, true, Assembler::AVX_512bit);
addq(to, rtmp); addq(to, rtmp);
shrq(rtmp, shift); shrq(rtmp, shift);
subq(count, rtmp); subq(count, rtmp);

View file

@ -1147,49 +1147,64 @@ public:
// AVX512 Unaligned // AVX512 Unaligned
void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, int vector_len); void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, bool merge, int vector_len);
void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, int vector_len); void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, bool merge, int vector_len);
void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); } void evmovdqub(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::evmovdqub(dst, src, vector_len); }
void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); } void evmovdqub(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdqub(dst, src, vector_len); }
void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); } void evmovdqub(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
if (dst->encoding() != src->encoding() || mask != k0) {
Assembler::evmovdqub(dst, mask, src, merge, vector_len);
}
}
void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); } void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); }
void evmovdqub(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); } void evmovdqub(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); }
void evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg); void evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquw(dst, src, merge, vector_len); } void evmovdquw(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdquw(dst, src, vector_len); }
void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); } void evmovdquw(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdquw(dst, src, vector_len); }
void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len) { Assembler::evmovdquw(dst, src, merge, vector_len); } void evmovdquw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
if (dst->encoding() != src->encoding() || mask != k0) {
Assembler::evmovdquw(dst, mask, src, merge, vector_len);
}
}
void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); } void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); }
void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); }
void evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg); void evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
void evmovdqul(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); }
void evmovdqul(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); }
void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) { void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
if (dst->encoding() == src->encoding()) return; if (dst->encoding() != src->encoding()) {
Assembler::evmovdqul(dst, src, vector_len); Assembler::evmovdqul(dst, src, vector_len);
} }
void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); } }
void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); } void evmovdqul(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); }
void evmovdqul(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); }
void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
if (dst->encoding() == src->encoding() && mask == k0) return; if (dst->encoding() != src->encoding() || mask != k0) {
Assembler::evmovdqul(dst, mask, src, merge, vector_len); Assembler::evmovdqul(dst, mask, src, merge, vector_len);
} }
}
void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); }
void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); }
void evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg); void evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
if (dst->encoding() != src->encoding()) {
Assembler::evmovdquq(dst, src, vector_len);
}
}
void evmovdquq(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); } void evmovdquq(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
void evmovdquq(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); } void evmovdquq(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
void evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch); void evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch);
void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
if (dst->encoding() == src->encoding()) return;
Assembler::evmovdquq(dst, src, vector_len);
}
void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); }
void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); }
void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
if (dst->encoding() == src->encoding() && mask == k0) return; if (dst->encoding() != src->encoding() || mask != k0) {
Assembler::evmovdquq(dst, mask, src, merge, vector_len); Assembler::evmovdquq(dst, mask, src, merge, vector_len);
} }
}
void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); }
void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); }
void evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg); void evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
// Move Aligned Double Quadword // Move Aligned Double Quadword

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2020, 2021, Intel Corporation. All rights reserved. * Copyright (c) 2020, 2022, Intel Corporation. All rights reserved.
* *
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
@ -199,8 +199,8 @@ void MacroAssembler::copy64_masked_avx(Register dst, Register src, XMMRegister x
mov64(temp, -1L); mov64(temp, -1L);
bzhiq(temp, temp, length); bzhiq(temp, temp, length);
kmovql(mask, temp); kmovql(mask, temp);
evmovdqu(type[shift], mask, xmm, Address(src, index, scale, offset), Assembler::AVX_512bit); evmovdqu(type[shift], mask, xmm, Address(src, index, scale, offset), false, Assembler::AVX_512bit);
evmovdqu(type[shift], mask, Address(dst, index, scale, offset), xmm, Assembler::AVX_512bit); evmovdqu(type[shift], mask, Address(dst, index, scale, offset), xmm, true, Assembler::AVX_512bit);
} }
} }
@ -214,8 +214,8 @@ void MacroAssembler::copy32_masked_avx(Register dst, Register src, XMMRegister x
mov64(temp, -1L); mov64(temp, -1L);
bzhiq(temp, temp, length); bzhiq(temp, temp, length);
kmovql(mask, temp); kmovql(mask, temp);
evmovdqu(type[shift], mask, xmm, Address(src, index, scale, offset), Assembler::AVX_256bit); evmovdqu(type[shift], mask, xmm, Address(src, index, scale, offset), false, Assembler::AVX_256bit);
evmovdqu(type[shift], mask, Address(dst, index, scale, offset), xmm, Assembler::AVX_256bit); evmovdqu(type[shift], mask, Address(dst, index, scale, offset), xmm, true, Assembler::AVX_256bit);
} }

View file

@ -8817,7 +8817,7 @@ instruct vmasked_load64(vec dst, memory mem, kReg mask) %{
ins_encode %{ ins_encode %{
BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
int vector_len = vector_length_encoding(this); int vector_len = vector_length_encoding(this);
__ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, vector_len); __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
%} %}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
@ -8850,7 +8850,7 @@ instruct vmasked_store64(memory mem, vec src, kReg mask) %{
const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
int vector_len = vector_length_encoding(src_node); int vector_len = vector_length_encoding(src_node);
__ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, vector_len); __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vector_len);
%} %}
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}