mirror of
https://github.com/openjdk/jdk.git
synced 2025-09-16 17:14:41 +02:00
8279282: [vectorapi] Matcher::supports_vector_comparison_unsigned is not needed on x86
Reviewed-by: kvn, sviswanathan, vlivanov
This commit is contained in:
parent
3419ff7ba7
commit
02390c79b1
8 changed files with 104 additions and 138 deletions
|
@ -2473,6 +2473,16 @@ void Assembler::movddup(XMMRegister dst, XMMRegister src) {
|
||||||
emit_int16(0x12, 0xC0 | encode);
|
emit_int16(0x12, 0xC0 | encode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Assembler::vmovddup(XMMRegister dst, Address src, int vector_len) {
|
||||||
|
assert(VM_Version::supports_avx(), "");
|
||||||
|
InstructionMark im(this);
|
||||||
|
InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||||
|
attributes.set_rex_vex_w_reverted();
|
||||||
|
simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||||
|
emit_int8(0x12);
|
||||||
|
emit_operand(dst, src);
|
||||||
|
}
|
||||||
|
|
||||||
void Assembler::kmovbl(KRegister dst, KRegister src) {
|
void Assembler::kmovbl(KRegister dst, KRegister src) {
|
||||||
assert(VM_Version::supports_avx512dq(), "");
|
assert(VM_Version::supports_avx512dq(), "");
|
||||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||||
|
|
|
@ -1467,6 +1467,7 @@ private:
|
||||||
void movb(Register dst, Address src);
|
void movb(Register dst, Address src);
|
||||||
|
|
||||||
void movddup(XMMRegister dst, XMMRegister src);
|
void movddup(XMMRegister dst, XMMRegister src);
|
||||||
|
void vmovddup(XMMRegister dst, Address src, int vector_len);
|
||||||
|
|
||||||
void kandbl(KRegister dst, KRegister src1, KRegister src2);
|
void kandbl(KRegister dst, KRegister src1, KRegister src2);
|
||||||
void kandwl(KRegister dst, KRegister src1, KRegister src2);
|
void kandwl(KRegister dst, KRegister src1, KRegister src2);
|
||||||
|
|
|
@ -2192,84 +2192,6 @@ void C2_MacroAssembler::evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void C2_MacroAssembler::vpcmpu(BasicType typ, XMMRegister dst, XMMRegister src1, XMMRegister src2, ComparisonPredicate comparison,
|
|
||||||
int vlen_in_bytes, XMMRegister vtmp1, XMMRegister vtmp2, Register scratch) {
|
|
||||||
int vlen_enc = vector_length_encoding(vlen_in_bytes*2);
|
|
||||||
switch (typ) {
|
|
||||||
case T_BYTE:
|
|
||||||
vpmovzxbw(vtmp1, src1, vlen_enc);
|
|
||||||
vpmovzxbw(vtmp2, src2, vlen_enc);
|
|
||||||
vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::W, vlen_enc, scratch);
|
|
||||||
vpacksswb(dst, dst, dst, vlen_enc);
|
|
||||||
break;
|
|
||||||
case T_SHORT:
|
|
||||||
vpmovzxwd(vtmp1, src1, vlen_enc);
|
|
||||||
vpmovzxwd(vtmp2, src2, vlen_enc);
|
|
||||||
vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::D, vlen_enc, scratch);
|
|
||||||
vpackssdw(dst, dst, dst, vlen_enc);
|
|
||||||
break;
|
|
||||||
case T_INT:
|
|
||||||
vpmovzxdq(vtmp1, src1, vlen_enc);
|
|
||||||
vpmovzxdq(vtmp2, src2, vlen_enc);
|
|
||||||
vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::Q, vlen_enc, scratch);
|
|
||||||
vpermilps(dst, dst, 8, vlen_enc);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
assert(false, "Should not reach here");
|
|
||||||
}
|
|
||||||
if (vlen_in_bytes == 16) {
|
|
||||||
vpermpd(dst, dst, 0x8, vlen_enc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void C2_MacroAssembler::vpcmpu32(BasicType typ, XMMRegister dst, XMMRegister src1, XMMRegister src2, ComparisonPredicate comparison, int vlen_in_bytes,
|
|
||||||
XMMRegister vtmp1, XMMRegister vtmp2, XMMRegister vtmp3, Register scratch) {
|
|
||||||
int vlen_enc = vector_length_encoding(vlen_in_bytes);
|
|
||||||
switch (typ) {
|
|
||||||
case T_BYTE:
|
|
||||||
vpmovzxbw(vtmp1, src1, vlen_enc);
|
|
||||||
vpmovzxbw(vtmp2, src2, vlen_enc);
|
|
||||||
vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::W, vlen_enc, scratch);
|
|
||||||
vextracti128(vtmp1, src1, 1);
|
|
||||||
vextracti128(vtmp2, src2, 1);
|
|
||||||
vpmovzxbw(vtmp1, vtmp1, vlen_enc);
|
|
||||||
vpmovzxbw(vtmp2, vtmp2, vlen_enc);
|
|
||||||
vpcmpCCW(vtmp3, vtmp1, vtmp2, comparison, Assembler::W, vlen_enc, scratch);
|
|
||||||
vpacksswb(dst, dst, vtmp3, vlen_enc);
|
|
||||||
vpermpd(dst, dst, 0xd8, vlen_enc);
|
|
||||||
break;
|
|
||||||
case T_SHORT:
|
|
||||||
vpmovzxwd(vtmp1, src1, vlen_enc);
|
|
||||||
vpmovzxwd(vtmp2, src2, vlen_enc);
|
|
||||||
vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::D, vlen_enc, scratch);
|
|
||||||
vextracti128(vtmp1, src1, 1);
|
|
||||||
vextracti128(vtmp2, src2, 1);
|
|
||||||
vpmovzxwd(vtmp1, vtmp1, vlen_enc);
|
|
||||||
vpmovzxwd(vtmp2, vtmp2, vlen_enc);
|
|
||||||
vpcmpCCW(vtmp3, vtmp1, vtmp2, comparison, Assembler::D, vlen_enc, scratch);
|
|
||||||
vpackssdw(dst, dst, vtmp3, vlen_enc);
|
|
||||||
vpermpd(dst, dst, 0xd8, vlen_enc);
|
|
||||||
break;
|
|
||||||
case T_INT:
|
|
||||||
vpmovzxdq(vtmp1, src1, vlen_enc);
|
|
||||||
vpmovzxdq(vtmp2, src2, vlen_enc);
|
|
||||||
vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::Q, vlen_enc, scratch);
|
|
||||||
vpshufd(dst, dst, 8, vlen_enc);
|
|
||||||
vpermq(dst, dst, 8, vlen_enc);
|
|
||||||
vextracti128(vtmp1, src1, 1);
|
|
||||||
vextracti128(vtmp2, src2, 1);
|
|
||||||
vpmovzxdq(vtmp1, vtmp1, vlen_enc);
|
|
||||||
vpmovzxdq(vtmp2, vtmp2, vlen_enc);
|
|
||||||
vpcmpCCW(vtmp3, vtmp1, vtmp2, comparison, Assembler::Q, vlen_enc, scratch);
|
|
||||||
vpshufd(vtmp3, vtmp3, 8, vlen_enc);
|
|
||||||
vpermq(vtmp3, vtmp3, 0x80, vlen_enc);
|
|
||||||
vpblendd(dst, dst, vtmp3, 0xf0, vlen_enc);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
assert(false, "Should not reach here");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void C2_MacroAssembler::evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len) {
|
void C2_MacroAssembler::evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len) {
|
||||||
switch(typ) {
|
switch(typ) {
|
||||||
case T_BYTE:
|
case T_BYTE:
|
||||||
|
|
|
@ -146,12 +146,6 @@ public:
|
||||||
|
|
||||||
void load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes);
|
void load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes);
|
||||||
|
|
||||||
// vector compare
|
|
||||||
void vpcmpu(BasicType typ, XMMRegister dst, XMMRegister src1, XMMRegister src2, ComparisonPredicate comparison, int vlen_in_bytes,
|
|
||||||
XMMRegister vtmp1, XMMRegister vtmp2, Register scratch);
|
|
||||||
void vpcmpu32(BasicType typ, XMMRegister dst, XMMRegister src1, XMMRegister src2, ComparisonPredicate comparison, int vlen_in_bytes,
|
|
||||||
XMMRegister vtmp1, XMMRegister vtmp2, XMMRegister vtmp3, Register scratch);
|
|
||||||
|
|
||||||
// Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.
|
// Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.
|
||||||
|
|
||||||
// dst = src1 reduce(op, src2) using vtmp as temps
|
// dst = src1 reduce(op, src2) using vtmp as temps
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
@ -2702,6 +2702,15 @@ void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MacroAssembler::vmovddup(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
|
||||||
|
if (reachable(src)) {
|
||||||
|
Assembler::vmovddup(dst, as_Address(src), vector_len);
|
||||||
|
} else {
|
||||||
|
lea(rscratch, src);
|
||||||
|
Assembler::vmovddup(dst, Address(rscratch, 0), vector_len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) {
|
void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) {
|
||||||
if (reachable(src)) {
|
if (reachable(src)) {
|
||||||
Assembler::mulsd(dst, as_Address(src));
|
Assembler::mulsd(dst, as_Address(src));
|
||||||
|
@ -3151,6 +3160,15 @@ void MacroAssembler::vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_l
|
||||||
Assembler::vpbroadcastw(dst, src, vector_len);
|
Assembler::vpbroadcastw(dst, src, vector_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MacroAssembler::vbroadcastsd(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
|
||||||
|
if (reachable(src)) {
|
||||||
|
Assembler::vbroadcastsd(dst, as_Address(src), vector_len);
|
||||||
|
} else {
|
||||||
|
lea(rscratch, src);
|
||||||
|
Assembler::vbroadcastsd(dst, Address(rscratch, 0), vector_len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void MacroAssembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
void MacroAssembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||||
assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
|
assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
|
||||||
Assembler::vpcmpeqb(dst, nds, src, vector_len);
|
Assembler::vpcmpeqb(dst, nds, src, vector_len);
|
||||||
|
@ -3219,7 +3237,7 @@ void MacroAssembler::vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MacroAssembler::vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg) {
|
void MacroAssembler::vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister xtmp, ComparisonPredicate cond, Width width, int vector_len) {
|
||||||
int eq_cond_enc = 0x29;
|
int eq_cond_enc = 0x29;
|
||||||
int gt_cond_enc = 0x37;
|
int gt_cond_enc = 0x37;
|
||||||
if (width != Assembler::Q) {
|
if (width != Assembler::Q) {
|
||||||
|
@ -3232,15 +3250,18 @@ void MacroAssembler::vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src,
|
||||||
break;
|
break;
|
||||||
case neq:
|
case neq:
|
||||||
vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len);
|
vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len);
|
||||||
vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
|
vallones(xtmp, vector_len);
|
||||||
|
vpxor(dst, xtmp, dst, vector_len);
|
||||||
break;
|
break;
|
||||||
case le:
|
case le:
|
||||||
vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len);
|
vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len);
|
||||||
vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
|
vallones(xtmp, vector_len);
|
||||||
|
vpxor(dst, xtmp, dst, vector_len);
|
||||||
break;
|
break;
|
||||||
case nlt:
|
case nlt:
|
||||||
vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len);
|
vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len);
|
||||||
vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
|
vallones(xtmp, vector_len);
|
||||||
|
vpxor(dst, xtmp, dst, vector_len);
|
||||||
break;
|
break;
|
||||||
case lt:
|
case lt:
|
||||||
vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len);
|
vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len);
|
||||||
|
|
|
@ -1176,6 +1176,9 @@ public:
|
||||||
void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); }
|
void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); }
|
||||||
void movsd(XMMRegister dst, AddressLiteral src);
|
void movsd(XMMRegister dst, AddressLiteral src);
|
||||||
|
|
||||||
|
using Assembler::vmovddup;
|
||||||
|
void vmovddup(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = rscratch1);
|
||||||
|
|
||||||
void mulpd(XMMRegister dst, XMMRegister src) { Assembler::mulpd(dst, src); }
|
void mulpd(XMMRegister dst, XMMRegister src) { Assembler::mulpd(dst, src); }
|
||||||
void mulpd(XMMRegister dst, Address src) { Assembler::mulpd(dst, src); }
|
void mulpd(XMMRegister dst, Address src) { Assembler::mulpd(dst, src); }
|
||||||
void mulpd(XMMRegister dst, AddressLiteral src);
|
void mulpd(XMMRegister dst, AddressLiteral src);
|
||||||
|
@ -1284,6 +1287,9 @@ public:
|
||||||
void vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
|
void vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
|
||||||
void vpbroadcastw(XMMRegister dst, Address src, int vector_len) { Assembler::vpbroadcastw(dst, src, vector_len); }
|
void vpbroadcastw(XMMRegister dst, Address src, int vector_len) { Assembler::vpbroadcastw(dst, src, vector_len); }
|
||||||
|
|
||||||
|
using Assembler::vbroadcastsd;
|
||||||
|
void vbroadcastsd(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = rscratch1);
|
||||||
|
|
||||||
void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||||
|
|
||||||
void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||||
|
@ -1310,7 +1316,7 @@ public:
|
||||||
void evpbroadcast(BasicType type, XMMRegister dst, Register src, int vector_len);
|
void evpbroadcast(BasicType type, XMMRegister dst, Register src, int vector_len);
|
||||||
|
|
||||||
// Emit comparison instruction for the specified comparison predicate.
|
// Emit comparison instruction for the specified comparison predicate.
|
||||||
void vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg);
|
void vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister xtmp, ComparisonPredicate cond, Width width, int vector_len);
|
||||||
void vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len);
|
void vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len);
|
||||||
|
|
||||||
void vpmovzxbw(XMMRegister dst, Address src, int vector_len);
|
void vpmovzxbw(XMMRegister dst, Address src, int vector_len);
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
@ -166,20 +166,7 @@
|
||||||
|
|
||||||
// Does the CPU supports vector unsigned comparison instructions?
|
// Does the CPU supports vector unsigned comparison instructions?
|
||||||
static const bool supports_vector_comparison_unsigned(int vlen, BasicType bt) {
|
static const bool supports_vector_comparison_unsigned(int vlen, BasicType bt) {
|
||||||
int vlen_in_bytes = vlen * type2aelembytes(bt);
|
return true;
|
||||||
if ((UseAVX > 2) && (VM_Version::supports_avx512vl() || vlen_in_bytes == 64))
|
|
||||||
return true;
|
|
||||||
else {
|
|
||||||
// instruction set supports only signed comparison
|
|
||||||
// so need to zero extend to higher integral type and perform comparison
|
|
||||||
// cannot cast long to higher integral type
|
|
||||||
// and on avx1 cannot cast 128 bit integral vectors to higher size
|
|
||||||
|
|
||||||
if ((bt != T_LONG) &&
|
|
||||||
((UseAVX >= 2) || (vlen_in_bytes <= 8)))
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Some microarchitectures have mask registers used on vectors
|
// Some microarchitectures have mask registers used on vectors
|
||||||
|
|
|
@ -2560,6 +2560,18 @@ static inline jlong replicate8_imm(int con, int width) {
|
||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline jlong high_bit_set(BasicType bt) {
|
||||||
|
switch (bt) {
|
||||||
|
case T_BYTE: return 0x8080808080808080;
|
||||||
|
case T_SHORT: return 0x8000800080008000;
|
||||||
|
case T_INT: return 0x8000000080000000;
|
||||||
|
case T_LONG: return 0x8000000000000000;
|
||||||
|
default:
|
||||||
|
ShouldNotReachHere();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#ifndef PRODUCT
|
#ifndef PRODUCT
|
||||||
void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
|
void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
|
||||||
st->print("nop \t# %d bytes pad for loops and calls", _count);
|
st->print("nop \t# %d bytes pad for loops and calls", _count);
|
||||||
|
@ -7313,62 +7325,75 @@ instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
|
|
||||||
instruct vcmp(legVec dst, legVec src1, legVec src2, immI8 cond, rRegP scratch) %{
|
instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
|
||||||
predicate(n->bottom_type()->isa_vectmask() == NULL &&
|
predicate(n->bottom_type()->isa_vectmask() == NULL &&
|
||||||
!is_unsigned_booltest_pred(n->in(2)->get_int()) &&
|
!is_unsigned_booltest_pred(n->in(2)->get_int()) &&
|
||||||
Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
|
Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
|
||||||
Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
|
Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
|
||||||
is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
|
is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
|
||||||
|
(n->in(2)->get_int() == BoolTest::eq ||
|
||||||
|
n->in(2)->get_int() == BoolTest::lt ||
|
||||||
|
n->in(2)->get_int() == BoolTest::gt)); // cond
|
||||||
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
|
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
|
||||||
effect(TEMP scratch);
|
format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
|
||||||
format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %}
|
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
int vlen_enc = vector_length_encoding(this, $src1);
|
int vlen_enc = vector_length_encoding(this, $src1);
|
||||||
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
|
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
|
||||||
Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
|
Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
|
||||||
__ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, ww, vlen_enc, $scratch$$Register);
|
__ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
|
|
||||||
instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec vtmp1, legVec vtmp2, rRegP scratch) %{
|
instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
|
||||||
|
predicate(n->bottom_type()->isa_vectmask() == NULL &&
|
||||||
|
!is_unsigned_booltest_pred(n->in(2)->get_int()) &&
|
||||||
|
Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
|
||||||
|
Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
|
||||||
|
is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
|
||||||
|
(n->in(2)->get_int() == BoolTest::ne ||
|
||||||
|
n->in(2)->get_int() == BoolTest::le ||
|
||||||
|
n->in(2)->get_int() == BoolTest::ge)); // cond
|
||||||
|
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
|
||||||
|
effect(TEMP dst, TEMP xtmp);
|
||||||
|
format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
|
||||||
|
ins_encode %{
|
||||||
|
int vlen_enc = vector_length_encoding(this, $src1);
|
||||||
|
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
|
||||||
|
Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
|
||||||
|
__ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
|
||||||
|
%}
|
||||||
|
ins_pipe( pipe_slow );
|
||||||
|
%}
|
||||||
|
|
||||||
|
instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
|
||||||
predicate(n->bottom_type()->isa_vectmask() == NULL &&
|
predicate(n->bottom_type()->isa_vectmask() == NULL &&
|
||||||
is_unsigned_booltest_pred(n->in(2)->get_int()) &&
|
is_unsigned_booltest_pred(n->in(2)->get_int()) &&
|
||||||
Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
|
Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
|
||||||
Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 16 && // src1
|
Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
|
||||||
is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
|
is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
|
||||||
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
|
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
|
||||||
effect(TEMP vtmp1, TEMP vtmp2, TEMP scratch);
|
effect(TEMP dst, TEMP xtmp);
|
||||||
format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %}
|
format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
int vlen = Matcher::vector_length_in_bytes(this, $src1);
|
InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
|
||||||
|
int vlen_enc = vector_length_encoding(this, $src1);
|
||||||
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
|
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
|
||||||
BasicType bt = Matcher::vector_element_basic_type(this, $src1);
|
Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
|
||||||
__ vpcmpu(bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen, $vtmp1$$XMMRegister,
|
|
||||||
$vtmp2$$XMMRegister, $scratch$$Register);
|
if (vlen_enc == Assembler::AVX_128bit) {
|
||||||
|
__ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
|
||||||
|
} else {
|
||||||
|
__ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
|
||||||
|
}
|
||||||
|
__ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
|
||||||
|
__ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
|
||||||
|
__ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
|
|
||||||
instruct vcmpu32(legVec dst, legVec src1, legVec src2, immI8 cond, legVec vtmp1, legVec vtmp2, legVec vtmp3, rRegP scratch) %{
|
instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktmp) %{
|
||||||
predicate(n->bottom_type()->isa_vectmask() == NULL &&
|
|
||||||
is_unsigned_booltest_pred(n->in(2)->get_int()) &&
|
|
||||||
Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 32 && // src1
|
|
||||||
is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
|
|
||||||
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
|
|
||||||
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP scratch);
|
|
||||||
format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %}
|
|
||||||
ins_encode %{
|
|
||||||
int vlen = Matcher::vector_length_in_bytes(this, $src1);
|
|
||||||
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
|
|
||||||
BasicType bt = Matcher::vector_element_basic_type(this, $src1);
|
|
||||||
__ vpcmpu32(bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen, $vtmp1$$XMMRegister,
|
|
||||||
$vtmp2$$XMMRegister, $vtmp3$$XMMRegister, $scratch$$Register);
|
|
||||||
%}
|
|
||||||
ins_pipe( pipe_slow );
|
|
||||||
%}
|
|
||||||
|
|
||||||
instruct vcmpu64(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktmp) %{
|
|
||||||
predicate((n->bottom_type()->isa_vectmask() == NULL &&
|
predicate((n->bottom_type()->isa_vectmask() == NULL &&
|
||||||
Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
|
Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
|
||||||
is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
|
is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue