8271515: Integration of JEP 417: Vector API (Third Incubator)

Co-authored-by: Sandhya Viswanathan <sviswanathan@openjdk.org>
Co-authored-by: Jatin Bhateja <jbhateja@openjdk.org>
Co-authored-by: Ningsheng Jian <njian@openjdk.org>
Co-authored-by: Xiaohong Gong <xgong@openjdk.org>
Co-authored-by: Eric Liu <eliu@openjdk.org>
Co-authored-by: Jie Fu <jiefu@openjdk.org>
Co-authored-by: Vladimir Ivanov <vlivanov@openjdk.org>
Co-authored-by: John R Rose <jrose@openjdk.org>
Co-authored-by: Paul Sandoz <psandoz@openjdk.org>
Co-authored-by: Rado Smogura <mail@smogura.eu>
Reviewed-by: kvn, sviswanathan, ngasson
This commit is contained in:
Paul Sandoz 2021-11-15 21:48:38 +00:00
parent 9326eb1461
commit a59c9b2ac2
104 changed files with 20106 additions and 5976 deletions

View file

@ -2059,7 +2059,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
if (src_hi != OptoReg::Bad) {
if (src_hi != OptoReg::Bad && !bottom_type()->isa_vectmask()) {
assert((src_lo&1)==0 && src_lo+1==src_hi &&
(dst_lo&1)==0 && dst_lo+1==dst_hi,
"expected aligned-adjacent pairs");
@ -2074,7 +2074,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
int src_offset = ra_->reg2offset(src_lo);
int dst_offset = ra_->reg2offset(dst_lo);
if (bottom_type()->isa_vect() != NULL) {
if (bottom_type()->isa_vect() && !bottom_type()->isa_vectmask()) {
uint ireg = ideal_reg();
if (ireg == Op_VecA && cbuf) {
C2_MacroAssembler _masm(cbuf);
@ -2180,11 +2180,30 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
} else if (dst_lo_rc == rc_float) { // stack --> fpr load
__ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
is64 ? __ D : __ S, src_offset);
} else if (dst_lo_rc == rc_predicate) {
__ unspill_sve_predicate(as_PRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo),
Matcher::scalable_vector_reg_size(T_BYTE) >> 3);
} else { // stack --> stack copy
assert(dst_lo_rc == rc_stack, "spill to bad register class");
if (ideal_reg() == Op_RegVectMask) {
__ spill_copy_sve_predicate_stack_to_stack(src_offset, dst_offset,
Matcher::scalable_vector_reg_size(T_BYTE) >> 3);
} else {
__ unspill(rscratch1, is64, src_offset);
__ spill(rscratch1, is64, dst_offset);
}
}
break;
case rc_predicate:
if (dst_lo_rc == rc_predicate) {
__ sve_mov(as_PRegister(Matcher::_regEncode[dst_lo]), as_PRegister(Matcher::_regEncode[src_lo]));
} else if (dst_lo_rc == rc_stack) {
__ spill_sve_predicate(as_PRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo),
Matcher::scalable_vector_reg_size(T_BYTE) >> 3);
} else {
assert(false, "bad src and dst rc_class combination.");
ShouldNotReachHere();
}
break;
default:
assert(false, "bad rc_class for spill");
@ -2204,7 +2223,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
} else {
st->print("%s", Matcher::regName[dst_lo]);
}
if (bottom_type()->isa_vect() != NULL) {
if (bottom_type()->isa_vect() && !bottom_type()->isa_vectmask()) {
int vsize = 0;
switch (ideal_reg()) {
case Op_VecD:
@ -2221,6 +2240,10 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
ShouldNotReachHere();
}
st->print("\t# vector spill size = %d", vsize);
} else if (ideal_reg() == Op_RegVectMask) {
assert(Matcher::supports_scalable_vector(), "bad register type for spill");
int vsize = Matcher::scalable_predicate_reg_slots() * 32;
st->print("\t# predicate spill size = %d", vsize);
} else {
st->print("\t# spill size = %d", is64 ? 64 : 32);
}
@ -2382,6 +2405,18 @@ const bool Matcher::match_rule_supported(int opcode) {
ret_value = false;
}
break;
case Op_LoadVectorMasked:
case Op_StoreVectorMasked:
case Op_LoadVectorGatherMasked:
case Op_StoreVectorScatterMasked:
case Op_MaskAll:
case Op_AndVMask:
case Op_OrVMask:
case Op_XorVMask:
if (UseSVE == 0) {
ret_value = false;
}
break;
}
return ret_value; // Per default match rules are supported.
@ -2430,6 +2465,15 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
return vector_size_supported(bt, vlen);
}
const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
// Only SVE supports masked operations.
if (UseSVE == 0) {
return false;
}
return match_rule_supported(opcode) &&
masked_op_sve_supported(opcode, vlen, bt);
}
const RegMask* Matcher::predicate_reg_mask(void) {
return &_PR_REG_mask;
}
@ -2643,10 +2687,14 @@ bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
// Should the matcher clone input 'm' of node 'n'?
bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
mstack.push(m, Visit); // m = ShiftCntV
// ShiftV src (ShiftCntV con)
// StoreVector (VectorStoreMask src)
if (is_vshift_con_pattern(n, m) ||
(UseSVE > 0 && m->Opcode() == Op_VectorStoreMask && n->Opcode() == Op_StoreVector)) {
mstack.push(m, Visit);
return true;
}
return false;
}
@ -5505,6 +5553,7 @@ operand pReg()
%{
constraint(ALLOC_IN_RC(pr_reg));
match(RegVectMask);
match(pRegGov);
op_cost(0);
format %{ %}
interface(REG_INTER);
@ -8854,6 +8903,17 @@ instruct castVV(vReg dst)
ins_pipe(pipe_class_empty);
%}
instruct castVVMask(pRegGov dst)
%{
match(Set dst (CastVV dst));
size(0);
format %{ "# castVV of $dst" %}
ins_encode(/* empty encoding */);
ins_cost(0);
ins_pipe(pipe_class_empty);
%}
// ============================================================================
// Atomic operation instructions
//

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -583,7 +583,7 @@ class Address {
static bool offset_ok_for_immed(int64_t offset, uint shift);
static bool offset_ok_for_sve_immed(long offset, int shift, int vl /* sve vector length */) {
static bool offset_ok_for_sve_immed(int64_t offset, int shift, int vl /* sve vector length */) {
if (offset % vl == 0) {
// Convert address offset into sve imm offset (MUL VL).
int sve_offset = offset / vl;
@ -2976,6 +2976,32 @@ private:
pgrf(Pg, 10), rf(Zn_or_Vn, 5), rf(Zd_or_Vd, 0);
}
void sve_shift_imm_encoding(SIMD_RegVariant T, int shift, bool isSHR,
int& tszh, int& tszl_imm) {
/* The encodings for the tszh:tszl:imm3 fields
* for shift right is calculated as:
* 0001 xxx B, shift = 16 - UInt(tszh:tszl:imm3)
* 001x xxx H, shift = 32 - UInt(tszh:tszl:imm3)
* 01xx xxx S, shift = 64 - UInt(tszh:tszl:imm3)
* 1xxx xxx D, shift = 128 - UInt(tszh:tszl:imm3)
* for shift left is calculated as:
* 0001 xxx B, shift = UInt(tszh:tszl:imm3) - 8
* 001x xxx H, shift = UInt(tszh:tszl:imm3) - 16
* 01xx xxx S, shift = UInt(tszh:tszl:imm3) - 32
* 1xxx xxx D, shift = UInt(tszh:tszl:imm3) - 64
*/
assert(T != Q, "Invalid register variant");
if (isSHR) {
assert(((1 << (T + 3)) >= shift) && (shift > 0) , "Invalid shift value");
} else {
assert(((1 << (T + 3)) > shift) && (shift >= 0) , "Invalid shift value");
}
int cVal = (1 << ((T + 3) + (isSHR ? 1 : 0)));
int encodedShift = isSHR ? cVal - shift : cVal + shift;
tszh = encodedShift >> 5;
tszl_imm = encodedShift & 0x1f;
}
public:
// SVE integer arithmetic - predicate
@ -2987,16 +3013,19 @@ public:
INSN(sve_abs, 0b00000100, 0b010110101); // vector abs, unary
INSN(sve_add, 0b00000100, 0b000000000); // vector add
INSN(sve_and, 0b00000100, 0b011010000); // vector and
INSN(sve_andv, 0b00000100, 0b011010001); // bitwise and reduction to scalar
INSN(sve_asr, 0b00000100, 0b010000100); // vector arithmetic shift right
INSN(sve_cnt, 0b00000100, 0b011010101) // count non-zero bits
INSN(sve_cnt, 0b00000100, 0b011010101); // count non-zero bits
INSN(sve_cpy, 0b00000101, 0b100000100); // copy scalar to each active vector element
INSN(sve_eor, 0b00000100, 0b011001000); // vector eor
INSN(sve_eorv, 0b00000100, 0b011001001); // bitwise xor reduction to scalar
INSN(sve_lsl, 0b00000100, 0b010011100); // vector logical shift left
INSN(sve_lsr, 0b00000100, 0b010001100); // vector logical shift right
INSN(sve_mul, 0b00000100, 0b010000000); // vector mul
INSN(sve_neg, 0b00000100, 0b010111101); // vector neg, unary
INSN(sve_not, 0b00000100, 0b011110101); // bitwise invert vector, unary
INSN(sve_orr, 0b00000100, 0b011000000); // vector or
INSN(sve_orv, 0b00000100, 0b011000001); // bitwise or reduction to scalar
INSN(sve_smax, 0b00000100, 0b001000000); // signed maximum vectors
INSN(sve_smaxv, 0b00000100, 0b001000001); // signed maximum reduction to scalar
@ -3039,10 +3068,11 @@ public:
f(op2, 15, 13), pgrf(Pg, 10), rf(Zn, 5), rf(Zda, 0); \
}
INSN(sve_fmla, 0b01100101, 1, 0b000); // floating-point fused multiply-add: Zda = Zda + Zn * Zm
INSN(sve_fmla, 0b01100101, 1, 0b000); // floating-point fused multiply-add, writing addend: Zda = Zda + Zn * Zm
INSN(sve_fmls, 0b01100101, 1, 0b001); // floating-point fused multiply-subtract: Zda = Zda + -Zn * Zm
INSN(sve_fnmla, 0b01100101, 1, 0b010); // floating-point negated fused multiply-add: Zda = -Zda + -Zn * Zm
INSN(sve_fnmls, 0b01100101, 1, 0b011); // floating-point negated fused multiply-subtract: Zda = -Zda + Zn * Zm
INSN(sve_fmad, 0b01100101, 1, 0b100); // floating-point fused multiply-add, writing multiplicand: Zda = Zm + Zda * Zn
INSN(sve_mla, 0b00000100, 0, 0b010); // multiply-add: Zda = Zda + Zn*Zm
INSN(sve_mls, 0b00000100, 0, 0b011); // multiply-subtract: Zda = Zda + -Zn*Zm
#undef INSN
@ -3064,28 +3094,8 @@ public:
#define INSN(NAME, opc, isSHR) \
void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, int shift) { \
starti; \
/* The encodings for the tszh:tszl:imm3 fields (bits 23:22 20:19 18:16) \
* for shift right is calculated as: \
* 0001 xxx B, shift = 16 - UInt(tszh:tszl:imm3) \
* 001x xxx H, shift = 32 - UInt(tszh:tszl:imm3) \
* 01xx xxx S, shift = 64 - UInt(tszh:tszl:imm3) \
* 1xxx xxx D, shift = 128 - UInt(tszh:tszl:imm3) \
* for shift left is calculated as: \
* 0001 xxx B, shift = UInt(tszh:tszl:imm3) - 8 \
* 001x xxx H, shift = UInt(tszh:tszl:imm3) - 16 \
* 01xx xxx S, shift = UInt(tszh:tszl:imm3) - 32 \
* 1xxx xxx D, shift = UInt(tszh:tszl:imm3) - 64 \
*/ \
assert(T != Q, "Invalid register variant"); \
if (isSHR) { \
assert(((1 << (T + 3)) >= shift) && (shift > 0) , "Invalid shift value"); \
} else { \
assert(((1 << (T + 3)) > shift) && (shift >= 0) , "Invalid shift value"); \
} \
int cVal = (1 << ((T + 3) + (isSHR ? 1 : 0))); \
int encodedShift = isSHR ? cVal - shift : cVal + shift; \
int tszh = encodedShift >> 5; \
int tszl_imm = encodedShift & 0x1f; \
int tszh, tszl_imm; \
sve_shift_imm_encoding(T, shift, isSHR, tszh, tszl_imm); \
f(0b00000100, 31, 24); \
f(tszh, 23, 22), f(1,21), f(tszl_imm, 20, 16); \
f(0b100, 15, 13), f(opc, 12, 10), rf(Zn, 5), rf(Zd, 0); \
@ -3096,6 +3106,21 @@ public:
INSN(sve_lsr, 0b101, /* isSHR = */ true);
#undef INSN
// SVE bitwise shift by immediate (predicated)
#define INSN(NAME, opc, isSHR) \
void NAME(FloatRegister Zdn, SIMD_RegVariant T, PRegister Pg, int shift) { \
starti; \
int tszh, tszl_imm; \
sve_shift_imm_encoding(T, shift, isSHR, tszh, tszl_imm); \
f(0b00000100, 31, 24), f(tszh, 23, 22), f(0b00, 21, 20), f(opc, 19, 16); \
f(0b100, 15, 13), pgrf(Pg, 10), f(tszl_imm, 9, 5), rf(Zdn, 0); \
}
INSN(sve_asr, 0b0000, /* isSHR = */ true);
INSN(sve_lsl, 0b0011, /* isSHR = */ false);
INSN(sve_lsr, 0b0001, /* isSHR = */ true);
#undef INSN
private:
// Scalar base + immediate index
@ -3207,6 +3232,24 @@ public:
INSN(sve_dec, 1);
#undef INSN
// SVE predicate logical operations
#define INSN(NAME, op1, op2, op3) \
void NAME(PRegister Pd, PRegister Pg, PRegister Pn, PRegister Pm) { \
starti; \
f(0b00100101, 31, 24), f(op1, 23, 22), f(0b00, 21, 20); \
prf(Pm, 16), f(0b01, 15, 14), prf(Pg, 10), f(op2, 9); \
prf(Pn, 5), f(op3, 4), prf(Pd, 0); \
}
INSN(sve_and, 0b00, 0b0, 0b0);
INSN(sve_ands, 0b01, 0b0, 0b0);
INSN(sve_eor, 0b00, 0b1, 0b0);
INSN(sve_eors, 0b01, 0b1, 0b0);
INSN(sve_orr, 0b10, 0b0, 0b0);
INSN(sve_orrs, 0b11, 0b0, 0b0);
INSN(sve_bic, 0b00, 0b0, 0b1);
#undef INSN
// SVE increment register by predicate count
void sve_incp(const Register rd, SIMD_RegVariant T, PRegister pg) {
starti;
@ -3240,12 +3283,47 @@ public:
f(sh, 13), sf(imm8, 12, 5), rf(Zd, 0);
}
// SVE predicate test
void sve_ptest(PRegister Pg, PRegister Pn) {
starti;
f(0b001001010101000011, 31, 14), prf(Pg, 10), f(0, 9), prf(Pn, 5), f(0, 4, 0);
}
// SVE predicate initialize
void sve_ptrue(PRegister pd, SIMD_RegVariant esize, int pattern = 0b11111) {
starti;
f(0b00100101, 31, 24), f(esize, 23, 22), f(0b011000111000, 21, 10);
f(pattern, 9, 5), f(0b0, 4), prf(pd, 0);
}
// SVE predicate zero
void sve_pfalse(PRegister pd) {
starti;
f(0b00100101, 31, 24), f(0b00, 23, 22), f(0b011000111001, 21, 10);
f(0b000000, 9, 4), prf(pd, 0);
}
// SVE load/store predicate register
#define INSN(NAME, op1) \
void NAME(PRegister Pt, const Address &a) { \
starti; \
assert(a.index() == noreg, "invalid address variant"); \
f(op1, 31, 29), f(0b0010110, 28, 22), sf(a.offset() >> 3, 21, 16), \
f(0b000, 15, 13), f(a.offset() & 0x7, 12, 10), srf(a.base(), 5), \
f(0, 4), prf(Pt, 0); \
}
INSN(sve_ldr, 0b100); // LDR (predicate)
INSN(sve_str, 0b111); // STR (predicate)
#undef INSN
// SVE move predicate register
void sve_mov(PRegister Pd, PRegister Pn) {
starti;
f(0b001001011000, 31, 20), prf(Pn, 16), f(0b01, 15, 14), prf(Pn, 10);
f(0, 9), prf(Pn, 5), f(0, 4), prf(Pd, 0);
}
// SVE copy general-purpose register to vector elements (predicated)
void sve_cpy(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, Register Rn) {
starti;
@ -3348,6 +3426,18 @@ void sve_cmp(Condition cond, PRegister Pd, SIMD_RegVariant T,
INSN(sve_sunpklo, 0b00); // Unsigned unpack and extend half of vector - low half
#undef INSN
// SVE unpack predicate elements
#define INSN(NAME, op) \
void NAME(PRegister Pd, PRegister Pn) { \
starti; \
f(0b000001010011000, 31, 17), f(op, 16), f(0b0100000, 15, 9); \
prf(Pn, 5), f(0b0, 4), prf(Pd, 0); \
}
INSN(sve_punpkhi, 0b1); // Unpack and widen high half of predicate
INSN(sve_punpklo, 0b0); // Unpack and widen low half of predicate
#undef INSN
// SVE permute vector elements
#define INSN(NAME, op) \
void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \
@ -3361,6 +3451,19 @@ void sve_cmp(Condition cond, PRegister Pd, SIMD_RegVariant T,
INSN(sve_uzp2, 0b1); // Concatenate odd elements from two vectors
#undef INSN
// SVE permute predicate elements
#define INSN(NAME, op) \
void NAME(PRegister Pd, SIMD_RegVariant T, PRegister Pn, PRegister Pm) { \
starti; \
assert(T != Q, "invalid size"); \
f(0b00000101, 31, 24), f(T, 23, 22), f(0b10, 21, 20), prf(Pm, 16); \
f(0b01001, 15, 11), f(op, 10), f(0b0, 9), prf(Pn, 5), f(0b0, 4), prf(Pd, 0); \
}
INSN(sve_uzp1, 0b0); // Concatenate even elements from two predicates
INSN(sve_uzp2, 0b1); // Concatenate odd elements from two predicates
#undef INSN
// Predicate counted loop (SVE) (32-bit variants are not included)
#define INSN(NAME, decode) \
void NAME(PRegister Pd, SIMD_RegVariant T, Register Rn, Register Rm) { \

View file

@ -972,28 +972,219 @@ void C2_MacroAssembler::sve_compare(PRegister pd, BasicType bt, PRegister pg,
}
}
void C2_MacroAssembler::sve_vmask_reduction(int opc, Register dst, SIMD_RegVariant size, FloatRegister src,
PRegister pg, PRegister pn, int length) {
assert(pg->is_governing(), "This register has to be a governing predicate register");
// The conditional flags will be clobbered by this function
sve_cmp(Assembler::NE, pn, size, pg, src, 0);
switch (opc) {
case Op_VectorMaskTrueCount:
sve_cntp(dst, size, ptrue, pn);
break;
case Op_VectorMaskFirstTrue:
sve_brkb(pn, pg, pn, false);
sve_cntp(dst, size, ptrue, pn);
break;
case Op_VectorMaskLastTrue:
sve_rev(pn, size, pn);
sve_brkb(pn, ptrue, pn, false);
sve_cntp(dst, size, ptrue, pn);
movw(rscratch1, length - 1);
// Get index of the last mask lane that is set
void C2_MacroAssembler::sve_vmask_lasttrue(Register dst, BasicType bt, PRegister src, PRegister ptmp) {
SIMD_RegVariant size = elemType_to_regVariant(bt);
sve_rev(ptmp, size, src);
sve_brkb(ptmp, ptrue, ptmp, false);
sve_cntp(dst, size, ptrue, ptmp);
movw(rscratch1, MaxVectorSize / type2aelembytes(bt) - 1);
subw(dst, rscratch1, dst);
}
void C2_MacroAssembler::sve_vector_extend(FloatRegister dst, SIMD_RegVariant dst_size,
FloatRegister src, SIMD_RegVariant src_size) {
assert(dst_size > src_size && dst_size <= D && src_size <= S, "invalid element size");
if (src_size == B) {
switch (dst_size) {
case H:
sve_sunpklo(dst, H, src);
break;
case S:
sve_sunpklo(dst, H, src);
sve_sunpklo(dst, S, dst);
break;
case D:
sve_sunpklo(dst, H, src);
sve_sunpklo(dst, S, dst);
sve_sunpklo(dst, D, dst);
break;
default:
ShouldNotReachHere();
}
} else if (src_size == H) {
if (dst_size == S) {
sve_sunpklo(dst, S, src);
} else { // D
sve_sunpklo(dst, S, src);
sve_sunpklo(dst, D, dst);
}
} else if (src_size == S) {
sve_sunpklo(dst, D, src);
}
}
// Vector narrow from src to dst with specified element sizes.
// High part of dst vector will be filled with zero.
void C2_MacroAssembler::sve_vector_narrow(FloatRegister dst, SIMD_RegVariant dst_size,
FloatRegister src, SIMD_RegVariant src_size,
FloatRegister tmp) {
assert(dst_size < src_size && dst_size <= S && src_size <= D, "invalid element size");
sve_dup(tmp, src_size, 0);
if (src_size == D) {
switch (dst_size) {
case S:
sve_uzp1(dst, S, src, tmp);
break;
case H:
sve_uzp1(dst, S, src, tmp);
sve_uzp1(dst, H, dst, tmp);
break;
case B:
sve_uzp1(dst, S, src, tmp);
sve_uzp1(dst, H, dst, tmp);
sve_uzp1(dst, B, dst, tmp);
break;
default:
ShouldNotReachHere();
}
} else if (src_size == S) {
if (dst_size == H) {
sve_uzp1(dst, H, src, tmp);
} else { // B
sve_uzp1(dst, H, src, tmp);
sve_uzp1(dst, B, dst, tmp);
}
} else if (src_size == H) {
sve_uzp1(dst, B, src, tmp);
}
}
// Extend src predicate to dst predicate with the same lane count but larger
// element size, e.g. 64Byte -> 512Long
void C2_MacroAssembler::sve_vmaskcast_extend(PRegister dst, PRegister src,
uint dst_element_length_in_bytes,
uint src_element_length_in_bytes) {
if (dst_element_length_in_bytes == 2 * src_element_length_in_bytes) {
sve_punpklo(dst, src);
} else if (dst_element_length_in_bytes == 4 * src_element_length_in_bytes) {
sve_punpklo(dst, src);
sve_punpklo(dst, dst);
} else if (dst_element_length_in_bytes == 8 * src_element_length_in_bytes) {
sve_punpklo(dst, src);
sve_punpklo(dst, dst);
sve_punpklo(dst, dst);
} else {
assert(false, "unsupported");
ShouldNotReachHere();
}
}
// Narrow src predicate to dst predicate with the same lane count but
// smaller element size, e.g. 512Long -> 64Byte
void C2_MacroAssembler::sve_vmaskcast_narrow(PRegister dst, PRegister src,
uint dst_element_length_in_bytes, uint src_element_length_in_bytes) {
// The insignificant bits in src predicate are expected to be zero.
if (dst_element_length_in_bytes * 2 == src_element_length_in_bytes) {
sve_uzp1(dst, B, src, src);
} else if (dst_element_length_in_bytes * 4 == src_element_length_in_bytes) {
sve_uzp1(dst, H, src, src);
sve_uzp1(dst, B, dst, dst);
} else if (dst_element_length_in_bytes * 8 == src_element_length_in_bytes) {
sve_uzp1(dst, S, src, src);
sve_uzp1(dst, H, dst, dst);
sve_uzp1(dst, B, dst, dst);
} else {
assert(false, "unsupported");
ShouldNotReachHere();
}
}
void C2_MacroAssembler::sve_reduce_integral(int opc, Register dst, BasicType bt, Register src1,
FloatRegister src2, PRegister pg, FloatRegister tmp) {
assert(bt == T_BYTE || bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported element type");
assert(pg->is_governing(), "This register has to be a governing predicate register");
assert_different_registers(src1, dst);
// Register "dst" and "tmp" are to be clobbered, and "src1" and "src2" should be preserved.
Assembler::SIMD_RegVariant size = elemType_to_regVariant(bt);
switch (opc) {
case Op_AddReductionVI: {
sve_uaddv(tmp, size, pg, src2);
smov(dst, tmp, size, 0);
if (bt == T_BYTE) {
addw(dst, src1, dst, ext::sxtb);
} else if (bt == T_SHORT) {
addw(dst, src1, dst, ext::sxth);
} else {
addw(dst, dst, src1);
}
break;
}
case Op_AddReductionVL: {
sve_uaddv(tmp, size, pg, src2);
umov(dst, tmp, size, 0);
add(dst, dst, src1);
break;
}
case Op_AndReductionV: {
sve_andv(tmp, size, pg, src2);
if (bt == T_LONG) {
umov(dst, tmp, size, 0);
andr(dst, dst, src1);
} else {
smov(dst, tmp, size, 0);
andw(dst, dst, src1);
}
break;
}
case Op_OrReductionV: {
sve_orv(tmp, size, pg, src2);
if (bt == T_LONG) {
umov(dst, tmp, size, 0);
orr(dst, dst, src1);
} else {
smov(dst, tmp, size, 0);
orrw(dst, dst, src1);
}
break;
}
case Op_XorReductionV: {
sve_eorv(tmp, size, pg, src2);
if (bt == T_LONG) {
umov(dst, tmp, size, 0);
eor(dst, dst, src1);
} else {
smov(dst, tmp, size, 0);
eorw(dst, dst, src1);
}
break;
}
case Op_MaxReductionV: {
sve_smaxv(tmp, size, pg, src2);
if (bt == T_LONG) {
umov(dst, tmp, size, 0);
cmp(dst, src1);
csel(dst, dst, src1, Assembler::GT);
} else {
smov(dst, tmp, size, 0);
cmpw(dst, src1);
cselw(dst, dst, src1, Assembler::GT);
}
break;
}
case Op_MinReductionV: {
sve_sminv(tmp, size, pg, src2);
if (bt == T_LONG) {
umov(dst, tmp, size, 0);
cmp(dst, src1);
csel(dst, dst, src1, Assembler::LT);
} else {
smov(dst, tmp, size, 0);
cmpw(dst, src1);
cselw(dst, dst, src1, Assembler::LT);
}
break;
}
default:
assert(false, "unsupported");
ShouldNotReachHere();
}
if (opc == Op_AndReductionV || opc == Op_OrReductionV || opc == Op_XorReductionV) {
if (bt == T_BYTE) {
sxtb(dst, dst);
} else if (bt == T_SHORT) {
sxth(dst, dst);
}
}
}

View file

@ -61,8 +61,22 @@
void sve_compare(PRegister pd, BasicType bt, PRegister pg,
FloatRegister zn, FloatRegister zm, int cond);
void sve_vmask_reduction(int opc, Register dst, SIMD_RegVariant size, FloatRegister src,
PRegister pg, PRegister pn, int length = MaxVectorSize);
void sve_vmask_lasttrue(Register dst, BasicType bt, PRegister src, PRegister ptmp);
void sve_vector_extend(FloatRegister dst, SIMD_RegVariant dst_size,
FloatRegister src, SIMD_RegVariant src_size);
void sve_vector_narrow(FloatRegister dst, SIMD_RegVariant dst_size,
FloatRegister src, SIMD_RegVariant src_size, FloatRegister tmp);
void sve_vmaskcast_extend(PRegister dst, PRegister src,
uint dst_element_length_in_bytes, uint src_element_lenght_in_bytes);
void sve_vmaskcast_narrow(PRegister dst, PRegister src,
uint dst_element_length_in_bytes, uint src_element_lenght_in_bytes);
void sve_reduce_integral(int opc, Register dst, BasicType bt, Register src1,
FloatRegister src2, PRegister pg, FloatRegister tmp);
// Generate predicate through whilelo, by comparing ZR with an unsigned
// immediate. rscratch1 will be clobbered.

View file

@ -315,6 +315,7 @@ private:
MacroAssembler* const _masm;
RegSet _gp_regs;
FloatRegSet _fp_regs;
PRegSet _p_regs;
public:
void initialize(ZLoadBarrierStubC2* stub) {
@ -328,6 +329,8 @@ public:
_gp_regs += RegSet::of(vm_reg->as_Register());
} else if (vm_reg->is_FloatRegister()) {
_fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister());
} else if (vm_reg->is_PRegister()) {
_p_regs += PRegSet::of(vm_reg->as_PRegister());
} else {
fatal("Unknown register type");
}
@ -341,7 +344,8 @@ public:
ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
_masm(masm),
_gp_regs(),
_fp_regs() {
_fp_regs(),
_p_regs() {
// Figure out what registers to save/restore
initialize(stub);
@ -349,10 +353,12 @@ public:
// Save registers
__ push(_gp_regs, sp);
__ push_fp(_fp_regs, sp);
__ push_p(_p_regs, sp);
}
~ZSaveLiveRegisters() {
// Restore registers
__ pop_p(_p_regs, sp);
__ pop_fp(_fp_regs, sp);
// External runtime call may clobber ptrue reg

View file

@ -1978,7 +1978,7 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
return count * 2;
}
// Return the number of dwords poped
// Return the number of dwords popped
int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
int words_pushed = 0;
bool use_sve = false;
@ -2037,6 +2037,80 @@ int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
return count * 2;
}
// Return the number of dwords pushed
int MacroAssembler::push_p(unsigned int bitset, Register stack) {
bool use_sve = false;
int sve_predicate_size_in_slots = 0;
#ifdef COMPILER2
use_sve = Matcher::supports_scalable_vector();
if (use_sve) {
sve_predicate_size_in_slots = Matcher::scalable_predicate_reg_slots();
}
#endif
if (!use_sve) {
return 0;
}
unsigned char regs[PRegisterImpl::number_of_saved_registers];
int count = 0;
for (int reg = 0; reg < PRegisterImpl::number_of_saved_registers; reg++) {
if (1 & bitset)
regs[count++] = reg;
bitset >>= 1;
}
if (count == 0) {
return 0;
}
int total_push_bytes = align_up(sve_predicate_size_in_slots *
VMRegImpl::stack_slot_size * count, 16);
sub(stack, stack, total_push_bytes);
for (int i = 0; i < count; i++) {
sve_str(as_PRegister(regs[i]), Address(stack, i));
}
return total_push_bytes / 8;
}
// Return the number of dwords popped
int MacroAssembler::pop_p(unsigned int bitset, Register stack) {
bool use_sve = false;
int sve_predicate_size_in_slots = 0;
#ifdef COMPILER2
use_sve = Matcher::supports_scalable_vector();
if (use_sve) {
sve_predicate_size_in_slots = Matcher::scalable_predicate_reg_slots();
}
#endif
if (!use_sve) {
return 0;
}
unsigned char regs[PRegisterImpl::number_of_saved_registers];
int count = 0;
for (int reg = 0; reg < PRegisterImpl::number_of_saved_registers; reg++) {
if (1 & bitset)
regs[count++] = reg;
bitset >>= 1;
}
if (count == 0) {
return 0;
}
int total_pop_bytes = align_up(sve_predicate_size_in_slots *
VMRegImpl::stack_slot_size * count, 16);
for (int i = count - 1; i >= 0; i--) {
sve_ldr(as_PRegister(regs[i]), Address(stack, i));
}
add(stack, stack, total_pop_bytes);
return total_pop_bytes / 8;
}
#ifdef ASSERT
void MacroAssembler::verify_heapbase(const char* msg) {
#if 0
@ -2495,7 +2569,7 @@ void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) {
}
void MacroAssembler::push_CPU_state(bool save_vectors, bool use_sve,
int sve_vector_size_in_bytes) {
int sve_vector_size_in_bytes, int total_predicate_in_bytes) {
push(RegSet::range(r0, r29), sp); // integer registers except lr & sp
if (save_vectors && use_sve && sve_vector_size_in_bytes > 16) {
sub(sp, sp, sve_vector_size_in_bytes * FloatRegisterImpl::number_of_registers);
@ -2512,10 +2586,22 @@ void MacroAssembler::push_CPU_state(bool save_vectors, bool use_sve,
}
st1(v0, v1, v2, v3, save_vectors ? T2D : T1D, sp);
}
if (save_vectors && use_sve && total_predicate_in_bytes > 0) {
sub(sp, sp, total_predicate_in_bytes);
for (int i = 0; i < PRegisterImpl::number_of_saved_registers; i++) {
sve_str(as_PRegister(i), Address(sp, i));
}
}
}
void MacroAssembler::pop_CPU_state(bool restore_vectors, bool use_sve,
int sve_vector_size_in_bytes) {
int sve_vector_size_in_bytes, int total_predicate_in_bytes) {
if (restore_vectors && use_sve && total_predicate_in_bytes > 0) {
for (int i = PRegisterImpl::number_of_saved_registers - 1; i >= 0; i--) {
sve_ldr(as_PRegister(i), Address(sp, i));
}
add(sp, sp, total_predicate_in_bytes);
}
if (restore_vectors && use_sve && sve_vector_size_in_bytes > 16) {
for (int i = FloatRegisterImpl::number_of_registers - 1; i >= 0; i--) {
sve_ldr(as_FloatRegister(i), Address(sp, i));

View file

@ -455,6 +455,9 @@ private:
int push_fp(unsigned int bitset, Register stack);
int pop_fp(unsigned int bitset, Register stack);
int push_p(unsigned int bitset, Register stack);
int pop_p(unsigned int bitset, Register stack);
void mov(Register dst, Address a);
public:
@ -466,6 +469,9 @@ public:
static RegSet call_clobbered_registers();
void push_p(PRegSet regs, Register stack) { if (regs.bits()) push_p(regs.bits(), stack); }
void pop_p(PRegSet regs, Register stack) { if (regs.bits()) pop_p(regs.bits(), stack); }
// Push and pop everything that might be clobbered by a native
// runtime call except rscratch1 and rscratch2. (They are always
// scratch, so we don't have to protect them.) Only save the lower
@ -865,9 +871,9 @@ public:
DEBUG_ONLY(void verify_heapbase(const char* msg);)
void push_CPU_state(bool save_vectors = false, bool use_sve = false,
int sve_vector_size_in_bytes = 0);
int sve_vector_size_in_bytes = 0, int total_predicate_in_bytes = 0);
void pop_CPU_state(bool restore_vectors = false, bool use_sve = false,
int sve_vector_size_in_bytes = 0);
int sve_vector_size_in_bytes = 0, int total_predicate_in_bytes = 0);
// Round up to a power of two
void round_to(Register reg, int modulus);
@ -1361,9 +1367,14 @@ public:
void spill(FloatRegister Vx, SIMD_RegVariant T, int offset) {
str(Vx, T, spill_address(1 << (int)T, offset));
}
void spill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) {
sve_str(Zx, sve_spill_address(vector_reg_size_in_bytes, offset));
}
void spill_sve_predicate(PRegister pr, int offset, int predicate_reg_size_in_bytes) {
sve_str(pr, sve_spill_address(predicate_reg_size_in_bytes, offset));
}
void unspill(Register Rx, bool is64, int offset) {
if (is64) {
ldr(Rx, spill_address(8, offset));
@ -1374,9 +1385,14 @@ public:
void unspill(FloatRegister Vx, SIMD_RegVariant T, int offset) {
ldr(Vx, T, spill_address(1 << (int)T, offset));
}
void unspill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) {
sve_ldr(Zx, sve_spill_address(vector_reg_size_in_bytes, offset));
}
void unspill_sve_predicate(PRegister pr, int offset, int predicate_reg_size_in_bytes) {
sve_ldr(pr, sve_spill_address(predicate_reg_size_in_bytes, offset));
}
void spill_copy128(int src_offset, int dst_offset,
Register tmp1=rscratch1, Register tmp2=rscratch2) {
if (src_offset < 512 && (src_offset & 7) == 0 &&
@ -1399,6 +1415,12 @@ public:
dst_offset += 16;
}
}
void spill_copy_sve_predicate_stack_to_stack(int src_offset, int dst_offset,
int sve_predicate_reg_size_in_bytes) {
sve_ldr(ptrue, sve_spill_address(sve_predicate_reg_size_in_bytes, src_offset));
sve_str(ptrue, sve_spill_address(sve_predicate_reg_size_in_bytes, dst_offset));
reinitialize_ptrue();
}
void cache_wb(Address line);
void cache_wbsync(bool is_pre);

View file

@ -1,6 +1,6 @@
/*
* Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
* Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -34,7 +34,8 @@ const int ConcreteRegisterImpl::max_fpr
FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register;
const int ConcreteRegisterImpl::max_pr
= ConcreteRegisterImpl::max_fpr + PRegisterImpl::number_of_registers;
= ConcreteRegisterImpl::max_fpr +
PRegisterImpl::number_of_registers * PRegisterImpl::max_slots_per_register;
const char* RegisterImpl::name() const {
const char* names[number_of_registers] = {

View file

@ -1,6 +1,6 @@
/*
* Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
* Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -243,6 +243,11 @@ class PRegisterImpl: public AbstractRegisterImpl {
enum {
number_of_registers = 16,
number_of_governing_registers = 8,
// p0-p7 are governing predicates for load/store and arithmetic, but p7 is
// preserved as an all-true predicate in OpenJDK. And since we don't support
// non-governing predicate registers allocation for non-temp register, the
// predicate registers to be saved are p0-p6.
number_of_saved_registers = number_of_governing_registers - 1,
max_slots_per_register = 1
};
@ -377,6 +382,7 @@ public:
typedef AbstractRegSet<Register> RegSet;
typedef AbstractRegSet<FloatRegister> FloatRegSet;
typedef AbstractRegSet<PRegister> PRegSet;
template <class RegImpl>
class RegSetIterator {

View file

@ -101,7 +101,10 @@ class RegisterSaver {
int reg_offset_in_bytes(Register r);
int r0_offset_in_bytes() { return reg_offset_in_bytes(r0); }
int rscratch1_offset_in_bytes() { return reg_offset_in_bytes(rscratch1); }
int v0_offset_in_bytes(void) { return 0; }
int v0_offset_in_bytes();
// Total stack size in bytes for saving sve predicate registers.
int total_sve_predicate_in_bytes();
// Capture info about frame layout
// Note this is only correct when not saving full vectors.
@ -139,24 +142,49 @@ int RegisterSaver::reg_offset_in_bytes(Register r) {
}
#endif
int r0_offset = (slots_per_vect * FloatRegisterImpl::number_of_registers) * BytesPerInt;
int r0_offset = v0_offset_in_bytes() + (slots_per_vect * FloatRegisterImpl::number_of_registers) * BytesPerInt;
return r0_offset + r->encoding() * wordSize;
}
int RegisterSaver::v0_offset_in_bytes() {
// The floating point registers are located above the predicate registers if
// they are present in the stack frame pushed by save_live_registers(). So the
// offset depends on the saved total predicate vectors in the stack frame.
return (total_sve_predicate_in_bytes() / VMRegImpl::stack_slot_size) * BytesPerInt;
}
int RegisterSaver::total_sve_predicate_in_bytes() {
#ifdef COMPILER2
if (_save_vectors && Matcher::supports_scalable_vector()) {
// The number of total predicate bytes is unlikely to be a multiple
// of 16 bytes so we manually align it up.
return align_up(Matcher::scalable_predicate_reg_slots() *
VMRegImpl::stack_slot_size *
PRegisterImpl::number_of_saved_registers, 16);
}
#endif
return 0;
}
OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
bool use_sve = false;
int sve_vector_size_in_bytes = 0;
int sve_vector_size_in_slots = 0;
int sve_predicate_size_in_slots = 0;
int total_predicate_in_bytes = total_sve_predicate_in_bytes();
int total_predicate_in_slots = total_predicate_in_bytes / VMRegImpl::stack_slot_size;
#ifdef COMPILER2
use_sve = Matcher::supports_scalable_vector();
if (use_sve) {
sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
sve_vector_size_in_slots = Matcher::scalable_vector_reg_size(T_FLOAT);
sve_predicate_size_in_slots = Matcher::scalable_predicate_reg_slots();
}
#endif
#if COMPILER2_OR_JVMCI
if (_save_vectors) {
int vect_words = 0;
int extra_save_slots_per_register = 0;
// Save upper half of vector registers
if (use_sve) {
@ -164,9 +192,10 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
} else {
extra_save_slots_per_register = FloatRegisterImpl::extra_save_slots_per_neon_register;
}
vect_words = FloatRegisterImpl::number_of_registers * extra_save_slots_per_register /
VMRegImpl::slots_per_word;
additional_frame_words += vect_words;
int extra_vector_bytes = extra_save_slots_per_register *
VMRegImpl::stack_slot_size *
FloatRegisterImpl::number_of_registers;
additional_frame_words += ((extra_vector_bytes + total_predicate_in_bytes) / wordSize);
}
#else
assert(!_save_vectors, "vectors are generated only by C2 and JVMCI");
@ -184,7 +213,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
// Save Integer and Float registers.
__ enter();
__ push_CPU_state(_save_vectors, use_sve, sve_vector_size_in_bytes);
__ push_CPU_state(_save_vectors, use_sve, sve_vector_size_in_bytes, total_predicate_in_bytes);
// Set an oopmap for the call site. This oopmap will map all
// oop-registers and debug-info registers as callee-saved. This
@ -201,8 +230,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
// Register slots are 8 bytes wide, 32 floating-point registers.
int sp_offset = RegisterImpl::max_slots_per_register * i +
FloatRegisterImpl::save_slots_per_register * FloatRegisterImpl::number_of_registers;
oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots),
r->as_VMReg());
oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots), r->as_VMReg());
}
}
@ -210,13 +238,20 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
FloatRegister r = as_FloatRegister(i);
int sp_offset = 0;
if (_save_vectors) {
sp_offset = use_sve ? (sve_vector_size_in_slots * i) :
sp_offset = use_sve ? (total_predicate_in_slots + sve_vector_size_in_slots * i) :
(FloatRegisterImpl::slots_per_neon_register * i);
} else {
sp_offset = FloatRegisterImpl::save_slots_per_register * i;
}
oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
r->as_VMReg());
oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg());
}
if (_save_vectors && use_sve) {
for (int i = 0; i < PRegisterImpl::number_of_saved_registers; i++) {
PRegister r = as_PRegister(i);
int sp_offset = sve_predicate_size_in_slots * i;
oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg());
}
}
return oop_map;
@ -225,7 +260,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
#ifdef COMPILER2
__ pop_CPU_state(_save_vectors, Matcher::supports_scalable_vector(),
Matcher::scalable_vector_reg_size(T_BYTE));
Matcher::scalable_vector_reg_size(T_BYTE), total_sve_predicate_in_bytes());
#else
#if !INCLUDE_JVMCI
assert(!_save_vectors, "vectors are generated only by C2 and JVMCI");
@ -238,8 +273,10 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
// Is vector's size (in bytes) bigger than a size saved by default?
// 8 bytes vector registers are saved by default on AArch64.
// The SVE supported min vector size is 8 bytes and we need to save
// predicate registers when the vector size is 8 bytes as well.
bool SharedRuntime::is_wide_vector(int size) {
return size > 8;
return size > 8 || (UseSVE > 0 && size >= 8);
}
// The java_calling_convention describes stack locations as ideal slots on

View file

@ -983,6 +983,10 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
return ret_value; // Per default match rules are supported.
}
const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
return false;
}
const RegMask* Matcher::predicate_reg_mask(void) {
return NULL;
}

View file

@ -2177,6 +2177,10 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
return true; // Per default match rules are supported.
}
const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
return false;
}
const RegMask* Matcher::predicate_reg_mask(void) {
return NULL;
}

View file

@ -1536,6 +1536,10 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
return true; // Per default match rules are supported.
}
const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
return false;
}
const RegMask* Matcher::predicate_reg_mask(void) {
return NULL;
}

File diff suppressed because it is too large Load diff

View file

@ -1462,8 +1462,23 @@ private:
void movddup(XMMRegister dst, XMMRegister src);
void kandbl(KRegister dst, KRegister src1, KRegister src2);
void kandwl(KRegister dst, KRegister src1, KRegister src2);
void kanddl(KRegister dst, KRegister src1, KRegister src2);
void kandql(KRegister dst, KRegister src1, KRegister src2);
void korbl(KRegister dst, KRegister src1, KRegister src2);
void korwl(KRegister dst, KRegister src1, KRegister src2);
void kordl(KRegister dst, KRegister src1, KRegister src2);
void korql(KRegister dst, KRegister src1, KRegister src2);
void kxorbl(KRegister dst, KRegister src1, KRegister src2);
void kxorwl(KRegister dst, KRegister src1, KRegister src2);
void kxordl(KRegister dst, KRegister src1, KRegister src2);
void kxorql(KRegister dst, KRegister src1, KRegister src2);
void kmovbl(KRegister dst, Register src);
void kmovbl(Register dst, KRegister src);
void kmovbl(KRegister dst, KRegister src);
void kmovwl(KRegister dst, Register src);
void kmovwl(KRegister dst, Address src);
void kmovwl(Register dst, KRegister src);
@ -1477,7 +1492,9 @@ private:
void kmovql(KRegister dst, Register src);
void kmovql(Register dst, KRegister src);
void knotbl(KRegister dst, KRegister src);
void knotwl(KRegister dst, KRegister src);
void knotdl(KRegister dst, KRegister src);
void knotql(KRegister dst, KRegister src);
void kortestbl(KRegister dst, KRegister src);
@ -1485,10 +1502,19 @@ private:
void kortestdl(KRegister dst, KRegister src);
void kortestql(KRegister dst, KRegister src);
void kxnorbl(KRegister dst, KRegister src1, KRegister src2);
void kshiftlbl(KRegister dst, KRegister src, int imm8);
void kshiftrbl(KRegister dst, KRegister src, int imm8);
void kshiftrwl(KRegister dst, KRegister src, int imm8);
void kshiftrdl(KRegister dst, KRegister src, int imm8);
void kshiftrql(KRegister dst, KRegister src, int imm8);
void ktestq(KRegister src1, KRegister src2);
void ktestd(KRegister src1, KRegister src2);
void ktestql(KRegister dst, KRegister src);
void ktestdl(KRegister dst, KRegister src);
void ktestwl(KRegister dst, KRegister src);
void ktestbl(KRegister dst, KRegister src);
void movdl(XMMRegister dst, Register src);
void movdl(Register dst, XMMRegister src);
@ -2152,9 +2178,6 @@ private:
void bzhiq(Register dst, Register src1, Register src2);
//====================VECTOR ARITHMETIC=====================================
void evpmovd2m(KRegister kdst, XMMRegister src, int vector_len);
void evpmovq2m(KRegister kdst, XMMRegister src, int vector_len);
// Add Packed Floating-Point Values
void addpd(XMMRegister dst, XMMRegister src);
void addpd(XMMRegister dst, Address src);
@ -2246,6 +2269,136 @@ private:
void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
// Leaf level assembler routines for masked operations.
void evpaddb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpaddb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpaddw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpaddw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpaddd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpaddd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpaddq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpaddq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evaddps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evaddps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evaddpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evaddpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpsubb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpsubb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpsubw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpsubw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpsubd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpsubd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpsubq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpsubq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evsubps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evsubps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evsubpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evsubpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpmullw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpmullw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpmulld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpmulld(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpmullq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpmullq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evmulps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evmulps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evmulpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evmulpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evdivps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evdivps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evdivpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evdivpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpabsb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evpabsb(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
void evpabsw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evpabsw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
void evpabsd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evpabsd(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
void evpabsq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evpabsq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
void evpfma213ps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpfma213ps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpfma213pd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpfma213pd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpermb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpermb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpermw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpermw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpermd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpermd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpermq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpermq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpsllw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpslld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpsllq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpsrlw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpsrld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpsrlq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpsraw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpsrad(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpsraq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evsqrtps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evsqrtps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evsqrtpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evsqrtpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpsllw(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
void evpslld(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
void evpsllq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
void evpsrlw(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
void evpsrld(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
void evpsrlq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
void evpsraw(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
void evpsrad(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
void evpsraq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
void evpsllvw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpsllvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpsllvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpsrlvw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpsrlvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpsrlvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpsravw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpsravd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpsravq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpmaxsb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpmaxsw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpmaxsd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpmaxsq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpminsb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpminsw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpminsd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpminsq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpmaxsb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpmaxsw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpmaxsd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpmaxsq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpminsb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpminsw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpminsd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpminsq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evporq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evporq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpandq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpandq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpxorq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpxorq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evprold(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
void evprolq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
void evprolvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evprolvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evprord(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
void evprorq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
void evprorvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evprorvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
// Sub packed integers
void psubb(XMMRegister dst, XMMRegister src);
void psubw(XMMRegister dst, XMMRegister src);
@ -2364,7 +2517,6 @@ private:
void pand(XMMRegister dst, XMMRegister src);
void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
// Andn packed integers
@ -2377,15 +2529,11 @@ private:
void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
// Xor packed integers
void pxor(XMMRegister dst, XMMRegister src);
void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
@ -2527,6 +2675,13 @@ private:
int comparison, bool is_signed, int vector_len);
void evpmovb2m(KRegister dst, XMMRegister src, int vector_len);
void evpmovw2m(KRegister dst, XMMRegister src, int vector_len);
void evpmovd2m(KRegister dst, XMMRegister src, int vector_len);
void evpmovq2m(KRegister dst, XMMRegister src, int vector_len);
void evpmovm2b(XMMRegister dst, KRegister src, int vector_len);
void evpmovm2w(XMMRegister dst, KRegister src, int vector_len);
void evpmovm2d(XMMRegister dst, KRegister src, int vector_len);
void evpmovm2q(XMMRegister dst, KRegister src, int vector_len);
// Vector blends
void blendvps(XMMRegister dst, XMMRegister src);

View file

@ -1461,6 +1461,19 @@ void C2_MacroAssembler::load_vector_mask(XMMRegister dst, XMMRegister src, int v
}
}
void C2_MacroAssembler::load_vector_mask(KRegister dst, XMMRegister src, XMMRegister xtmp,
Register tmp, bool novlbwdq, int vlen_enc) {
if (novlbwdq) {
vpmovsxbd(xtmp, src, vlen_enc);
evpcmpd(dst, k0, xtmp, ExternalAddress(StubRoutines::x86::vector_int_mask_cmp_bits()),
Assembler::eq, true, vlen_enc, tmp);
} else {
vpxor(xtmp, xtmp, xtmp, vlen_enc);
vpsubb(xtmp, xtmp, src, vlen_enc);
evpmovb2m(dst, xtmp, vlen_enc);
}
}
void C2_MacroAssembler::load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes) {
ExternalAddress addr(StubRoutines::x86::vector_iota_indices());
if (vlen_in_bytes == 4) {
@ -3827,14 +3840,231 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register
}
}
void C2_MacroAssembler::evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst,
XMMRegister src1, int imm8, bool merge, int vlen_enc) {
switch(ideal_opc) {
case Op_LShiftVS:
Assembler::evpsllw(dst, mask, src1, imm8, merge, vlen_enc); break;
case Op_LShiftVI:
Assembler::evpslld(dst, mask, src1, imm8, merge, vlen_enc); break;
case Op_LShiftVL:
Assembler::evpsllq(dst, mask, src1, imm8, merge, vlen_enc); break;
case Op_RShiftVS:
Assembler::evpsraw(dst, mask, src1, imm8, merge, vlen_enc); break;
case Op_RShiftVI:
Assembler::evpsrad(dst, mask, src1, imm8, merge, vlen_enc); break;
case Op_RShiftVL:
Assembler::evpsraq(dst, mask, src1, imm8, merge, vlen_enc); break;
case Op_URShiftVS:
Assembler::evpsrlw(dst, mask, src1, imm8, merge, vlen_enc); break;
case Op_URShiftVI:
Assembler::evpsrld(dst, mask, src1, imm8, merge, vlen_enc); break;
case Op_URShiftVL:
Assembler::evpsrlq(dst, mask, src1, imm8, merge, vlen_enc); break;
case Op_RotateRightV:
evrord(eType, dst, mask, src1, imm8, merge, vlen_enc); break;
case Op_RotateLeftV:
evrold(eType, dst, mask, src1, imm8, merge, vlen_enc); break;
default:
fatal("Unsupported masked operation"); break;
}
}
void C2_MacroAssembler::evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst,
XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc,
bool is_varshift) {
switch (ideal_opc) {
case Op_AddVB:
evpaddb(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_AddVS:
evpaddw(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_AddVI:
evpaddd(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_AddVL:
evpaddq(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_AddVF:
evaddps(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_AddVD:
evaddpd(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_SubVB:
evpsubb(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_SubVS:
evpsubw(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_SubVI:
evpsubd(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_SubVL:
evpsubq(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_SubVF:
evsubps(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_SubVD:
evsubpd(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_MulVS:
evpmullw(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_MulVI:
evpmulld(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_MulVL:
evpmullq(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_MulVF:
evmulps(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_MulVD:
evmulpd(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_DivVF:
evdivps(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_DivVD:
evdivpd(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_SqrtVF:
evsqrtps(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_SqrtVD:
evsqrtpd(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_AbsVB:
evpabsb(dst, mask, src2, merge, vlen_enc); break;
case Op_AbsVS:
evpabsw(dst, mask, src2, merge, vlen_enc); break;
case Op_AbsVI:
evpabsd(dst, mask, src2, merge, vlen_enc); break;
case Op_AbsVL:
evpabsq(dst, mask, src2, merge, vlen_enc); break;
case Op_FmaVF:
evpfma213ps(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_FmaVD:
evpfma213pd(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_VectorRearrange:
evperm(eType, dst, mask, src2, src1, merge, vlen_enc); break;
case Op_LShiftVS:
evpsllw(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
case Op_LShiftVI:
evpslld(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
case Op_LShiftVL:
evpsllq(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
case Op_RShiftVS:
evpsraw(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
case Op_RShiftVI:
evpsrad(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
case Op_RShiftVL:
evpsraq(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
case Op_URShiftVS:
evpsrlw(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
case Op_URShiftVI:
evpsrld(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
case Op_URShiftVL:
evpsrlq(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
case Op_RotateLeftV:
evrold(eType, dst, mask, src1, src2, merge, vlen_enc); break;
case Op_RotateRightV:
evrord(eType, dst, mask, src1, src2, merge, vlen_enc); break;
case Op_MaxV:
evpmaxs(eType, dst, mask, src1, src2, merge, vlen_enc); break;
case Op_MinV:
evpmins(eType, dst, mask, src1, src2, merge, vlen_enc); break;
case Op_XorV:
evxor(eType, dst, mask, src1, src2, merge, vlen_enc); break;
case Op_OrV:
evor(eType, dst, mask, src1, src2, merge, vlen_enc); break;
case Op_AndV:
evand(eType, dst, mask, src1, src2, merge, vlen_enc); break;
default:
fatal("Unsupported masked operation"); break;
}
}
void C2_MacroAssembler::evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst,
XMMRegister src1, Address src2, bool merge, int vlen_enc) {
switch (ideal_opc) {
case Op_AddVB:
evpaddb(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_AddVS:
evpaddw(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_AddVI:
evpaddd(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_AddVL:
evpaddq(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_AddVF:
evaddps(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_AddVD:
evaddpd(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_SubVB:
evpsubb(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_SubVS:
evpsubw(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_SubVI:
evpsubd(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_SubVL:
evpsubq(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_SubVF:
evsubps(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_SubVD:
evsubpd(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_MulVS:
evpmullw(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_MulVI:
evpmulld(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_MulVL:
evpmullq(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_MulVF:
evmulps(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_MulVD:
evmulpd(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_DivVF:
evdivps(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_DivVD:
evdivpd(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_FmaVF:
evpfma213ps(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_FmaVD:
evpfma213pd(dst, mask, src1, src2, merge, vlen_enc); break;
case Op_MaxV:
evpmaxs(eType, dst, mask, src1, src2, merge, vlen_enc); break;
case Op_MinV:
evpmins(eType, dst, mask, src1, src2, merge, vlen_enc); break;
case Op_XorV:
evxor(eType, dst, mask, src1, src2, merge, vlen_enc); break;
case Op_OrV:
evor(eType, dst, mask, src1, src2, merge, vlen_enc); break;
case Op_AndV:
evand(eType, dst, mask, src1, src2, merge, vlen_enc); break;
default:
fatal("Unsupported masked operation"); break;
}
}
void C2_MacroAssembler::masked_op(int ideal_opc, int mask_len, KRegister dst,
KRegister src1, KRegister src2) {
BasicType etype = T_ILLEGAL;
switch(mask_len) {
case 2:
case 4:
case 8: etype = T_BYTE; break;
case 16: etype = T_SHORT; break;
case 32: etype = T_INT; break;
case 64: etype = T_LONG; break;
default: fatal("Unsupported type"); break;
}
assert(etype != T_ILLEGAL, "");
switch(ideal_opc) {
case Op_AndVMask:
kand(etype, dst, src1, src2); break;
case Op_OrVMask:
kor(etype, dst, src1, src2); break;
case Op_XorVMask:
kxor(etype, dst, src1, src2); break;
default:
fatal("Unsupported masked operation"); break;
}
}
#ifdef _LP64
void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp,
Register tmp, KRegister ktmp, int masklen, int vec_enc) {
assert(VM_Version::supports_avx512vlbw(), "");
vpxor(xtmp, xtmp, xtmp, vec_enc);
vpsubb(xtmp, xtmp, mask, vec_enc);
evpmovb2m(ktmp, xtmp, vec_enc);
kmovql(tmp, ktmp);
void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, KRegister mask,
Register tmp, int masklen, int masksize,
int vec_enc) {
if(VM_Version::supports_avx512bw()) {
kmovql(tmp, mask);
} else {
assert(masklen <= 16, "");
kmovwl(tmp, mask);
}
if (masksize < 16) {
andq(tmp, (((jlong)1 << masklen) - 1));
}
switch(opc) {
case Op_VectorMaskTrueCount:
popcntq(dst, tmp);
@ -3854,12 +4084,13 @@ void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, XMMRegister
}
void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp,
XMMRegister xtmp1, Register tmp, int masklen, int vec_enc) {
XMMRegister xtmp1, Register tmp, int masklen, int masksize,
int vec_enc) {
assert(VM_Version::supports_avx(), "");
vpxor(xtmp, xtmp, xtmp, vec_enc);
vpsubb(xtmp, xtmp, mask, vec_enc);
vpmovmskb(tmp, xtmp, vec_enc);
if (masklen < 64) {
if (masksize < 16) {
andq(tmp, (((jlong)1 << masklen) - 1));
}
switch(opc) {

View file

@ -142,6 +142,8 @@ public:
void evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);
void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt, bool is_legacy);
void load_vector_mask(KRegister dst, XMMRegister src, XMMRegister xtmp, Register tmp, bool novlbwdq, int vlen_enc);
void load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes);
// vector compare
@ -222,11 +224,10 @@ public:
public:
#ifdef _LP64
void vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp, Register tmp,
KRegister ktmp, int masklen, int vec_enc);
void vector_mask_operation(int opc, Register dst, KRegister mask, Register tmp, int masklen, int masksize, int vec_enc);
void vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp, XMMRegister xtmp1,
Register tmp, int masklen, int vec_enc);
Register tmp, int masklen, int masksize, int vec_enc);
#endif
void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
@ -273,4 +274,18 @@ public:
Register limit, Register result, Register chr,
XMMRegister vec1, XMMRegister vec2, bool is_char, KRegister mask = knoreg);
void evmasked_op(int ideal_opc, BasicType eType, KRegister mask,
XMMRegister dst, XMMRegister src1, XMMRegister src2,
bool merge, int vlen_enc, bool is_varshift = false);
void evmasked_op(int ideal_opc, BasicType eType, KRegister mask,
XMMRegister dst, XMMRegister src1, Address src2,
bool merge, int vlen_enc);
void evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst,
XMMRegister src1, int imm8, bool merge, int vlen_enc);
void masked_op(int ideal_opc, int mask_len, KRegister dst,
KRegister src1, KRegister src2);
#endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP

View file

@ -8267,6 +8267,379 @@ void MacroAssembler::evmovdqu(BasicType type, KRegister kmask, Address dst, XMMR
}
}
void MacroAssembler::knot(uint masklen, KRegister dst, KRegister src, KRegister ktmp, Register rtmp) {
switch(masklen) {
case 2:
knotbl(dst, src);
movl(rtmp, 3);
kmovbl(ktmp, rtmp);
kandbl(dst, ktmp, dst);
break;
case 4:
knotbl(dst, src);
movl(rtmp, 15);
kmovbl(ktmp, rtmp);
kandbl(dst, ktmp, dst);
break;
case 8:
knotbl(dst, src);
break;
case 16:
knotwl(dst, src);
break;
case 32:
knotdl(dst, src);
break;
case 64:
knotql(dst, src);
break;
default:
fatal("Unexpected vector length %d", masklen);
break;
}
}
void MacroAssembler::kand(BasicType type, KRegister dst, KRegister src1, KRegister src2) {
switch(type) {
case T_BOOLEAN:
case T_BYTE:
kandbl(dst, src1, src2);
break;
case T_CHAR:
case T_SHORT:
kandwl(dst, src1, src2);
break;
case T_INT:
case T_FLOAT:
kanddl(dst, src1, src2);
break;
case T_LONG:
case T_DOUBLE:
kandql(dst, src1, src2);
break;
default:
fatal("Unexpected type argument %s", type2name(type));
break;
}
}
void MacroAssembler::kor(BasicType type, KRegister dst, KRegister src1, KRegister src2) {
switch(type) {
case T_BOOLEAN:
case T_BYTE:
korbl(dst, src1, src2);
break;
case T_CHAR:
case T_SHORT:
korwl(dst, src1, src2);
break;
case T_INT:
case T_FLOAT:
kordl(dst, src1, src2);
break;
case T_LONG:
case T_DOUBLE:
korql(dst, src1, src2);
break;
default:
fatal("Unexpected type argument %s", type2name(type));
break;
}
}
void MacroAssembler::kxor(BasicType type, KRegister dst, KRegister src1, KRegister src2) {
switch(type) {
case T_BOOLEAN:
case T_BYTE:
kxorbl(dst, src1, src2);
break;
case T_CHAR:
case T_SHORT:
kxorwl(dst, src1, src2);
break;
case T_INT:
case T_FLOAT:
kxordl(dst, src1, src2);
break;
case T_LONG:
case T_DOUBLE:
kxorql(dst, src1, src2);
break;
default:
fatal("Unexpected type argument %s", type2name(type));
break;
}
}
void MacroAssembler::evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
switch(type) {
case T_BOOLEAN:
case T_BYTE:
evpermb(dst, mask, nds, src, merge, vector_len); break;
case T_CHAR:
case T_SHORT:
evpermw(dst, mask, nds, src, merge, vector_len); break;
case T_INT:
case T_FLOAT:
evpermd(dst, mask, nds, src, merge, vector_len); break;
case T_LONG:
case T_DOUBLE:
evpermq(dst, mask, nds, src, merge, vector_len); break;
default:
fatal("Unexpected type argument %s", type2name(type)); break;
}
}
void MacroAssembler::evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) {
switch(type) {
case T_BOOLEAN:
case T_BYTE:
evpermb(dst, mask, nds, src, merge, vector_len); break;
case T_CHAR:
case T_SHORT:
evpermw(dst, mask, nds, src, merge, vector_len); break;
case T_INT:
case T_FLOAT:
evpermd(dst, mask, nds, src, merge, vector_len); break;
case T_LONG:
case T_DOUBLE:
evpermq(dst, mask, nds, src, merge, vector_len); break;
default:
fatal("Unexpected type argument %s", type2name(type)); break;
}
}
void MacroAssembler::evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) {
switch(type) {
case T_BYTE:
evpminsb(dst, mask, nds, src, merge, vector_len); break;
case T_SHORT:
evpminsw(dst, mask, nds, src, merge, vector_len); break;
case T_INT:
evpminsd(dst, mask, nds, src, merge, vector_len); break;
case T_LONG:
evpminsq(dst, mask, nds, src, merge, vector_len); break;
default:
fatal("Unexpected type argument %s", type2name(type)); break;
}
}
void MacroAssembler::evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) {
switch(type) {
case T_BYTE:
evpmaxsb(dst, mask, nds, src, merge, vector_len); break;
case T_SHORT:
evpmaxsw(dst, mask, nds, src, merge, vector_len); break;
case T_INT:
evpmaxsd(dst, mask, nds, src, merge, vector_len); break;
case T_LONG:
evpmaxsq(dst, mask, nds, src, merge, vector_len); break;
default:
fatal("Unexpected type argument %s", type2name(type)); break;
}
}
void MacroAssembler::evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
switch(type) {
case T_BYTE:
evpminsb(dst, mask, nds, src, merge, vector_len); break;
case T_SHORT:
evpminsw(dst, mask, nds, src, merge, vector_len); break;
case T_INT:
evpminsd(dst, mask, nds, src, merge, vector_len); break;
case T_LONG:
evpminsq(dst, mask, nds, src, merge, vector_len); break;
default:
fatal("Unexpected type argument %s", type2name(type)); break;
}
}
void MacroAssembler::evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
switch(type) {
case T_BYTE:
evpmaxsb(dst, mask, nds, src, merge, vector_len); break;
case T_SHORT:
evpmaxsw(dst, mask, nds, src, merge, vector_len); break;
case T_INT:
evpmaxsd(dst, mask, nds, src, merge, vector_len); break;
case T_LONG:
evpmaxsq(dst, mask, nds, src, merge, vector_len); break;
default:
fatal("Unexpected type argument %s", type2name(type)); break;
}
}
void MacroAssembler::evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
switch(type) {
case T_INT:
evpxord(dst, mask, nds, src, merge, vector_len); break;
case T_LONG:
evpxorq(dst, mask, nds, src, merge, vector_len); break;
default:
fatal("Unexpected type argument %s", type2name(type)); break;
}
}
void MacroAssembler::evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) {
switch(type) {
case T_INT:
evpxord(dst, mask, nds, src, merge, vector_len); break;
case T_LONG:
evpxorq(dst, mask, nds, src, merge, vector_len); break;
default:
fatal("Unexpected type argument %s", type2name(type)); break;
}
}
void MacroAssembler::evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
switch(type) {
case T_INT:
Assembler::evpord(dst, mask, nds, src, merge, vector_len); break;
case T_LONG:
evporq(dst, mask, nds, src, merge, vector_len); break;
default:
fatal("Unexpected type argument %s", type2name(type)); break;
}
}
void MacroAssembler::evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) {
switch(type) {
case T_INT:
Assembler::evpord(dst, mask, nds, src, merge, vector_len); break;
case T_LONG:
evporq(dst, mask, nds, src, merge, vector_len); break;
default:
fatal("Unexpected type argument %s", type2name(type)); break;
}
}
void MacroAssembler::evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
switch(type) {
case T_INT:
evpandd(dst, mask, nds, src, merge, vector_len); break;
case T_LONG:
evpandq(dst, mask, nds, src, merge, vector_len); break;
default:
fatal("Unexpected type argument %s", type2name(type)); break;
}
}
void MacroAssembler::evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) {
switch(type) {
case T_INT:
evpandd(dst, mask, nds, src, merge, vector_len); break;
case T_LONG:
evpandq(dst, mask, nds, src, merge, vector_len); break;
default:
fatal("Unexpected type argument %s", type2name(type)); break;
}
}
void MacroAssembler::anytrue(Register dst, uint masklen, KRegister src1, KRegister src2) {
masklen = masklen < 8 ? 8 : masklen;
ktest(masklen, src1, src2);
setb(Assembler::notZero, dst);
movzbl(dst, dst);
}
void MacroAssembler::alltrue(Register dst, uint masklen, KRegister src1, KRegister src2, KRegister kscratch) {
if (masklen < 8) {
knotbl(kscratch, src2);
kortestbl(src1, kscratch);
setb(Assembler::carrySet, dst);
movzbl(dst, dst);
} else {
ktest(masklen, src1, src2);
setb(Assembler::carrySet, dst);
movzbl(dst, dst);
}
}
void MacroAssembler::kortest(uint masklen, KRegister src1, KRegister src2) {
switch(masklen) {
case 8:
kortestbl(src1, src2);
break;
case 16:
kortestwl(src1, src2);
break;
case 32:
kortestdl(src1, src2);
break;
case 64:
kortestql(src1, src2);
break;
default:
fatal("Unexpected mask length %d", masklen);
break;
}
}
void MacroAssembler::ktest(uint masklen, KRegister src1, KRegister src2) {
switch(masklen) {
case 8:
ktestbl(src1, src2);
break;
case 16:
ktestwl(src1, src2);
break;
case 32:
ktestdl(src1, src2);
break;
case 64:
ktestql(src1, src2);
break;
default:
fatal("Unexpected mask length %d", masklen);
break;
}
}
void MacroAssembler::evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc) {
switch(type) {
case T_INT:
evprold(dst, mask, src, shift, merge, vlen_enc); break;
case T_LONG:
evprolq(dst, mask, src, shift, merge, vlen_enc); break;
default:
fatal("Unexpected type argument %s", type2name(type)); break;
break;
}
}
void MacroAssembler::evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc) {
switch(type) {
case T_INT:
evprord(dst, mask, src, shift, merge, vlen_enc); break;
case T_LONG:
evprorq(dst, mask, src, shift, merge, vlen_enc); break;
default:
fatal("Unexpected type argument %s", type2name(type)); break;
}
}
void MacroAssembler::evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc) {
switch(type) {
case T_INT:
evprolvd(dst, mask, src1, src2, merge, vlen_enc); break;
case T_LONG:
evprolvq(dst, mask, src1, src2, merge, vlen_enc); break;
default:
fatal("Unexpected type argument %s", type2name(type)); break;
}
}
void MacroAssembler::evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc) {
switch(type) {
case T_INT:
evprorvd(dst, mask, src1, src2, merge, vlen_enc); break;
case T_LONG:
evprorvq(dst, mask, src1, src2, merge, vlen_enc); break;
default:
fatal("Unexpected type argument %s", type2name(type)); break;
}
}
#if COMPILER2_OR_JVMCI
void MacroAssembler::fill_masked(BasicType bt, Address dst, XMMRegister xmm, KRegister mask,

View file

@ -1338,6 +1338,75 @@ public:
void evpsraq(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
void evpsraq(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
void evpsllw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
if (!is_varshift) {
Assembler::evpsllw(dst, mask, nds, src, merge, vector_len);
} else {
Assembler::evpsllvw(dst, mask, nds, src, merge, vector_len);
}
}
void evpslld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
if (!is_varshift) {
Assembler::evpslld(dst, mask, nds, src, merge, vector_len);
} else {
Assembler::evpsllvd(dst, mask, nds, src, merge, vector_len);
}
}
void evpsllq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
if (!is_varshift) {
Assembler::evpsllq(dst, mask, nds, src, merge, vector_len);
} else {
Assembler::evpsllvq(dst, mask, nds, src, merge, vector_len);
}
}
void evpsrlw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
if (!is_varshift) {
Assembler::evpsrlw(dst, mask, nds, src, merge, vector_len);
} else {
Assembler::evpsrlvw(dst, mask, nds, src, merge, vector_len);
}
}
void evpsrld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
if (!is_varshift) {
Assembler::evpsrld(dst, mask, nds, src, merge, vector_len);
} else {
Assembler::evpsrlvd(dst, mask, nds, src, merge, vector_len);
}
}
void evpsrlq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
if (!is_varshift) {
Assembler::evpsrlq(dst, mask, nds, src, merge, vector_len);
} else {
Assembler::evpsrlvq(dst, mask, nds, src, merge, vector_len);
}
}
void evpsraw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
if (!is_varshift) {
Assembler::evpsraw(dst, mask, nds, src, merge, vector_len);
} else {
Assembler::evpsravw(dst, mask, nds, src, merge, vector_len);
}
}
void evpsrad(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
if (!is_varshift) {
Assembler::evpsrad(dst, mask, nds, src, merge, vector_len);
} else {
Assembler::evpsravd(dst, mask, nds, src, merge, vector_len);
}
}
void evpsraq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
if (!is_varshift) {
Assembler::evpsraq(dst, mask, nds, src, merge, vector_len);
} else {
Assembler::evpsravq(dst, mask, nds, src, merge, vector_len);
}
}
void evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
void vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
@ -1627,7 +1696,33 @@ public:
Assembler::evpclmulqdq(dst, nds, src, 0x11, vector_len);
}
// Data
// AVX-512 mask operations.
void kand(BasicType etype, KRegister dst, KRegister src1, KRegister src2);
void kor(BasicType type, KRegister dst, KRegister src1, KRegister src2);
void knot(uint masklen, KRegister dst, KRegister src, KRegister ktmp = knoreg, Register rtmp = noreg);
void kxor(BasicType type, KRegister dst, KRegister src1, KRegister src2);
void kortest(uint masklen, KRegister src1, KRegister src2);
void ktest(uint masklen, KRegister src1, KRegister src2);
void evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
void evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc);
void evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc);
void evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc);
void evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc);
void alltrue(Register dst, uint masklen, KRegister src1, KRegister src2, KRegister kscratch);
void anytrue(Register dst, uint masklen, KRegister src, KRegister kscratch);
void cmov32( Condition cc, Register dst, Address src);
void cmov32( Condition cc, Register dst, Register src);

View file

@ -4001,6 +4001,7 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::x86::_vector_byte_perm_mask = generate_vector_byte_perm_mask("vector_byte_perm_mask");
StubRoutines::x86::_vector_long_sign_mask = generate_vector_mask_long_double("vector_long_sign_mask", 0x80000000, 0x00000000);
StubRoutines::x86::_vector_all_bits_set = generate_vector_mask("vector_all_bits_set", 0xFFFFFFFF);
StubRoutines::x86::_vector_int_mask_cmp_bits = generate_vector_mask("vector_int_mask_cmp_bits", 0x00000001);
StubRoutines::x86::_vector_iota_indices = generate_iota_indices("iota_indices");
// support for verify_oop (must happen after universe_init)

View file

@ -7676,6 +7676,7 @@ address generate_avx_ghash_processBlocks() {
StubRoutines::x86::_vector_double_sign_mask = generate_vector_mask("vector_double_sign_mask", 0x7FFFFFFFFFFFFFFF);
StubRoutines::x86::_vector_double_sign_flip = generate_vector_mask("vector_double_sign_flip", 0x8000000000000000);
StubRoutines::x86::_vector_all_bits_set = generate_vector_mask("vector_all_bits_set", 0xFFFFFFFFFFFFFFFF);
StubRoutines::x86::_vector_int_mask_cmp_bits = generate_vector_mask("vector_int_mask_cmp_bits", 0x0000000100000001);
StubRoutines::x86::_vector_short_to_byte_mask = generate_vector_mask("vector_short_to_byte_mask", 0x00ff00ff00ff00ff);
StubRoutines::x86::_vector_byte_perm_mask = generate_vector_byte_perm_mask("vector_byte_perm_mask");
StubRoutines::x86::_vector_int_to_byte_mask = generate_vector_mask("vector_int_to_byte_mask", 0x000000ff000000ff);

View file

@ -48,6 +48,7 @@ address StubRoutines::x86::_vector_int_to_byte_mask = NULL;
address StubRoutines::x86::_vector_int_to_short_mask = NULL;
address StubRoutines::x86::_vector_all_bits_set = NULL;
address StubRoutines::x86::_vector_byte_shuffle_mask = NULL;
address StubRoutines::x86::_vector_int_mask_cmp_bits = NULL;
address StubRoutines::x86::_vector_short_shuffle_mask = NULL;
address StubRoutines::x86::_vector_int_shuffle_mask = NULL;
address StubRoutines::x86::_vector_long_shuffle_mask = NULL;

View file

@ -165,6 +165,7 @@ class x86 {
static address _vector_double_sign_flip;
static address _vector_long_sign_mask;
static address _vector_all_bits_set;
static address _vector_int_mask_cmp_bits;
static address _vector_byte_perm_mask;
static address _vector_int_to_byte_mask;
static address _vector_int_to_short_mask;
@ -289,6 +290,10 @@ class x86 {
return _vector_all_bits_set;
}
static address vector_int_mask_cmp_bits() {
return _vector_int_mask_cmp_bits;
}
static address vector_byte_perm_mask() {
return _vector_byte_perm_mask;
}

View file

@ -884,6 +884,7 @@ public:
static bool supports_avx512bw() { return (_features & CPU_AVX512BW) != 0; }
static bool supports_avx512vl() { return (_features & CPU_AVX512VL) != 0; }
static bool supports_avx512vlbw() { return (supports_evex() && supports_avx512bw() && supports_avx512vl()); }
static bool supports_avx512bwdq() { return (supports_evex() && supports_avx512bw() && supports_avx512dq()); }
static bool supports_avx512vldq() { return (supports_evex() && supports_avx512dq() && supports_avx512vl()); }
static bool supports_avx512vlbwdq() { return (supports_evex() && supports_avx512vl() &&
supports_avx512bw() && supports_avx512dq()); }

File diff suppressed because it is too large Load diff

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -269,6 +269,7 @@ Form::DataType Form::is_load_from_memory(const char *opType) const {
if( strcmp(opType,"LoadS")==0 ) return Form::idealS;
if( strcmp(opType,"LoadVector")==0 ) return Form::idealV;
if( strcmp(opType,"LoadVectorGather")==0 ) return Form::idealV;
if( strcmp(opType,"LoadVectorGatherMasked")==0 ) return Form::idealV;
if( strcmp(opType,"LoadVectorMasked")==0 ) return Form::idealV;
assert( strcmp(opType,"Load") != 0, "Must type Loads" );
return Form::none;
@ -287,6 +288,7 @@ Form::DataType Form::is_store_to_memory(const char *opType) const {
if( strcmp(opType,"StoreNKlass")==0) return Form::idealNKlass;
if( strcmp(opType,"StoreVector")==0 ) return Form::idealV;
if( strcmp(opType,"StoreVectorScatter")==0 ) return Form::idealV;
if( strcmp(opType,"StoreVectorScatterMasked")==0 ) return Form::idealV;
if( strcmp(opType,"StoreVectorMasked")==0 ) return Form::idealV;
assert( strcmp(opType,"Store") != 0, "Must type Stores" );
return Form::none;

View file

@ -2281,6 +2281,7 @@ bool OperandForm::is_bound_register() const {
if (strcmp(name, "RegD") == 0) size = 2;
if (strcmp(name, "RegL") == 0) size = 2;
if (strcmp(name, "RegN") == 0) size = 1;
if (strcmp(name, "RegVectMask") == 0) size = globalAD->get_preproc_def("AARCH64") ? 1 : 2;
if (strcmp(name, "VecX") == 0) size = 4;
if (strcmp(name, "VecY") == 0) size = 8;
if (strcmp(name, "VecZ") == 0) size = 16;
@ -3514,7 +3515,8 @@ int MatchNode::needs_ideal_memory_edge(FormDict &globals) const {
"StoreB","StoreC","Store" ,"StoreFP",
"LoadI", "LoadL", "LoadP" ,"LoadN", "LoadD" ,"LoadF" ,
"LoadB" , "LoadUB", "LoadUS" ,"LoadS" ,"Load" ,
"StoreVector", "LoadVector", "LoadVectorGather", "StoreVectorScatter", "LoadVectorMasked", "StoreVectorMasked",
"StoreVector", "LoadVector", "LoadVectorMasked", "StoreVectorMasked",
"LoadVectorGather", "StoreVectorScatter", "LoadVectorGatherMasked", "StoreVectorScatterMasked",
"LoadRange", "LoadKlass", "LoadNKlass", "LoadL_unaligned", "LoadD_unaligned",
"LoadPLocked",
"StorePConditional", "StoreIConditional", "StoreLConditional",
@ -3818,41 +3820,66 @@ bool MatchNode::equivalent(FormDict &globals, MatchNode *mNode2) {
return true;
}
//-------------------------- has_commutative_op -------------------------------
//-------------------------- count_commutative_op -------------------------------
// Recursively check for commutative operations with subtree operands
// which could be swapped.
void MatchNode::count_commutative_op(int& count) {
static const char *commut_op_list[] = {
"AddI","AddL","AddF","AddD",
"AddVB","AddVS","AddVI","AddVL","AddVF","AddVD",
"AndI","AndL",
"AndV",
"MaxI","MinI","MaxF","MinF","MaxD","MinD",
"MaxV", "MinV",
"MulI","MulL","MulF","MulD",
"MulVB","MulVS","MulVI","MulVL","MulVF","MulVD",
"OrI","OrL",
"OrV",
"XorI","XorL",
"XorV"
"XorI","XorL"
};
int cnt = sizeof(commut_op_list)/sizeof(char*);
if( _lChild && _rChild && (_lChild->_lChild || _rChild->_lChild) ) {
static const char *commut_vector_op_list[] = {
"AddVB", "AddVS", "AddVI", "AddVL", "AddVF", "AddVD",
"MulVB", "MulVS", "MulVI", "MulVL", "MulVF", "MulVD",
"AndV", "OrV", "XorV",
"MaxV", "MinV"
};
if (_lChild && _rChild && (_lChild->_lChild || _rChild->_lChild)) {
// Don't swap if right operand is an immediate constant.
bool is_const = false;
if( _rChild->_lChild == NULL && _rChild->_rChild == NULL ) {
if (_rChild->_lChild == NULL && _rChild->_rChild == NULL) {
FormDict &globals = _AD.globalNames();
const Form *form = globals[_rChild->_opType];
if ( form ) {
if (form) {
OperandForm *oper = form->is_operand();
if( oper && oper->interface_type(globals) == Form::constant_interface )
if (oper && oper->interface_type(globals) == Form::constant_interface)
is_const = true;
}
}
if( !is_const ) {
for( int i=0; i<cnt; i++ ) {
if( strcmp(_opType, commut_op_list[i]) == 0 ) {
if (!is_const) {
int scalar_cnt = sizeof(commut_op_list)/sizeof(char*);
int vector_cnt = sizeof(commut_vector_op_list)/sizeof(char*);
bool matched = false;
// Check the commutative vector op first. It's noncommutative if
// the current node is a masked vector op, since a mask value
// is added to the original vector node's input list and the original
// first two inputs are packed into one BinaryNode. So don't swap
// if one of the operands is a BinaryNode.
for (int i = 0; i < vector_cnt; i++) {
if (strcmp(_opType, commut_vector_op_list[i]) == 0) {
if (strcmp(_lChild->_opType, "Binary") != 0 &&
strcmp(_rChild->_opType, "Binary") != 0) {
count++;
_commutative_id = count; // id should be > 0
}
matched = true;
break;
}
}
// Then check the scalar op if the current op is not in
// the commut_vector_op_list.
if (!matched) {
for (int i = 0; i < scalar_cnt; i++) {
if (strcmp(_opType, commut_op_list[i]) == 0) {
count++;
_commutative_id = count; // id should be > 0
break;
@ -3860,9 +3887,10 @@ void MatchNode::count_commutative_op(int& count) {
}
}
}
if( _lChild )
}
if (_lChild)
_lChild->count_commutative_op(count);
if( _rChild )
if (_rChild)
_rChild->count_commutative_op(count);
}
@ -4088,6 +4116,7 @@ int MatchRule::is_expensive() const {
strcmp(opType,"AndReductionV")==0 ||
strcmp(opType,"OrReductionV")==0 ||
strcmp(opType,"XorReductionV")==0 ||
strcmp(opType,"MaskAll")==0 ||
0 /* 0 to line up columns nicely */ )
return 1;
}
@ -4200,17 +4229,18 @@ bool MatchRule::is_vector() const {
"URShiftVB","URShiftVS","URShiftVI","URShiftVL",
"ReplicateB","ReplicateS","ReplicateI","ReplicateL","ReplicateF","ReplicateD",
"RoundDoubleModeV","RotateLeftV" , "RotateRightV", "LoadVector","StoreVector",
"LoadVectorGather", "StoreVectorScatter",
"LoadVectorGather", "StoreVectorScatter", "LoadVectorGatherMasked", "StoreVectorScatterMasked",
"VectorTest", "VectorLoadMask", "VectorStoreMask", "VectorBlend", "VectorInsert",
"VectorRearrange","VectorLoadShuffle", "VectorLoadConst",
"VectorCastB2X", "VectorCastS2X", "VectorCastI2X",
"VectorCastL2X", "VectorCastF2X", "VectorCastD2X",
"VectorMaskWrapper", "VectorMaskCmp", "VectorReinterpret","LoadVectorMasked","StoreVectorMasked",
"FmaVD", "FmaVF","PopCountVI",
// Next are vector mask ops.
"MaskAll", "AndVMask", "OrVMask", "XorVMask", "VectorMaskCast",
// Next are not supported currently.
"PackB","PackS","PackI","PackL","PackF","PackD","Pack2L","Pack2D",
"ExtractB","ExtractUB","ExtractC","ExtractS","ExtractI","ExtractL","ExtractF","ExtractD",
"VectorMaskCast"
"ExtractB","ExtractUB","ExtractC","ExtractS","ExtractI","ExtractL","ExtractF","ExtractD"
};
int cnt = sizeof(vector_list)/sizeof(char*);
if (_rChild) {

View file

@ -832,121 +832,278 @@ class methodHandle;
/* Vector API intrinsification support */ \
\
do_intrinsic(_VectorUnaryOp, jdk_internal_vm_vector_VectorSupport, vector_unary_op_name, vector_unary_op_sig, F_S) \
do_signature(vector_unary_op_sig, "(ILjava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;Ljava/util/function/Function;)Ljava/lang/Object;") \
do_signature(vector_unary_op_sig, "(I" \
"Ljava/lang/Class;" \
"Ljava/lang/Class;Ljava/lang/Class;" \
"I" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \
"Ljdk/internal/vm/vector/VectorSupport$UnaryOperation;)" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;") \
do_name(vector_unary_op_name, "unaryOp") \
\
do_intrinsic(_VectorBinaryOp, jdk_internal_vm_vector_VectorSupport, vector_binary_op_name, vector_binary_op_sig, F_S) \
do_signature(vector_binary_op_sig, "(ILjava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;Ljava/lang/Object;" \
"Ljava/util/function/BiFunction;)Ljava/lang/Object;") \
do_signature(vector_binary_op_sig, "(I" \
"Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"I" \
"Ljdk/internal/vm/vector/VectorSupport$VectorPayload;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorPayload;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \
"Ljdk/internal/vm/vector/VectorSupport$BinaryOperation;)" \
"Ljdk/internal/vm/vector/VectorSupport$VectorPayload;") \
do_name(vector_binary_op_name, "binaryOp") \
\
do_intrinsic(_VectorTernaryOp, jdk_internal_vm_vector_VectorSupport, vector_ternary_op_name, vector_ternary_op_sig, F_S) \
do_signature(vector_ternary_op_sig, "(ILjava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;Ljava/lang/Object;" \
"Ljava/lang/Object;Ljdk/internal/vm/vector/VectorSupport$TernaryOperation;)Ljava/lang/Object;") \
do_signature(vector_ternary_op_sig, "(I" \
"Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"I" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \
"Ljdk/internal/vm/vector/VectorSupport$TernaryOperation;)" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;") \
do_name(vector_ternary_op_name, "ternaryOp") \
\
do_intrinsic(_VectorBroadcastCoerced, jdk_internal_vm_vector_VectorSupport, vector_broadcast_coerced_name, vector_broadcast_coerced_sig, F_S)\
do_signature(vector_broadcast_coerced_sig, "(Ljava/lang/Class;Ljava/lang/Class;IJLjdk/internal/vm/vector/VectorSupport$VectorSpecies;" \
"Ljdk/internal/vm/vector/VectorSupport$BroadcastOperation;)Ljava/lang/Object;") \
do_signature(vector_broadcast_coerced_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"I" \
"J" \
"Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \
"Ljdk/internal/vm/vector/VectorSupport$BroadcastOperation;)" \
"Ljdk/internal/vm/vector/VectorSupport$VectorPayload;") \
do_name(vector_broadcast_coerced_name, "broadcastCoerced") \
\
do_intrinsic(_VectorShuffleIota, jdk_internal_vm_vector_VectorSupport, vector_shuffle_step_iota_name, vector_shuffle_step_iota_sig, F_S) \
do_signature(vector_shuffle_step_iota_sig, "(Ljava/lang/Class;Ljava/lang/Class;Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \
"IIIILjdk/internal/vm/vector/VectorSupport$ShuffleIotaOperation;)Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;") \
do_signature(vector_shuffle_step_iota_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \
"IIII" \
"Ljdk/internal/vm/vector/VectorSupport$ShuffleIotaOperation;)" \
"Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;") \
do_name(vector_shuffle_step_iota_name, "shuffleIota") \
\
do_intrinsic(_VectorShuffleToVector, jdk_internal_vm_vector_VectorSupport, vector_shuffle_to_vector_name, vector_shuffle_to_vector_sig, F_S) \
do_signature(vector_shuffle_to_vector_sig, "(Ljava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;" \
"ILjdk/internal/vm/vector/VectorSupport$ShuffleToVectorOperation;)Ljava/lang/Object;") \
do_signature(vector_shuffle_to_vector_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;" \
"ILjdk/internal/vm/vector/VectorSupport$ShuffleToVectorOperation;)" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;") \
do_name(vector_shuffle_to_vector_name, "shuffleToVector") \
\
do_intrinsic(_VectorLoadOp, jdk_internal_vm_vector_VectorSupport, vector_load_op_name, vector_load_op_sig, F_S) \
do_signature(vector_load_op_sig, "(Ljava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;JLjava/lang/Object;" \
"ILjdk/internal/vm/vector/VectorSupport$VectorSpecies;Ljdk/internal/vm/vector/VectorSupport$LoadOperation;)Ljava/lang/Object;") \
do_signature(vector_load_op_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"I" \
"Ljava/lang/Object;" \
"J" \
"Ljava/lang/Object;" \
"I" \
"Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \
"Ljdk/internal/vm/vector/VectorSupport$LoadOperation;)" \
"Ljdk/internal/vm/vector/VectorSupport$VectorPayload;") \
do_name(vector_load_op_name, "load") \
\
do_intrinsic(_VectorLoadMaskedOp, jdk_internal_vm_vector_VectorSupport, vector_load_masked_op_name, vector_load_masked_op_sig, F_S) \
do_signature(vector_load_masked_op_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"I" \
"Ljava/lang/Object;" \
"J" \
"Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \
"Ljava/lang/Object;" \
"I" \
"Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \
"Ljdk/internal/vm/vector/VectorSupport$LoadVectorMaskedOperation;)" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;") \
do_name(vector_load_masked_op_name, "loadMasked") \
\
do_intrinsic(_VectorStoreOp, jdk_internal_vm_vector_VectorSupport, vector_store_op_name, vector_store_op_sig, F_S) \
do_signature(vector_store_op_sig, "(Ljava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;JLjdk/internal/vm/vector/VectorSupport$Vector;" \
"Ljava/lang/Object;ILjdk/internal/vm/vector/VectorSupport$StoreVectorOperation;)V") \
do_signature(vector_store_op_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"I" \
"Ljava/lang/Object;" \
"J" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"Ljava/lang/Object;ILjdk/internal/vm/vector/VectorSupport$StoreVectorOperation;)" \
"V") \
do_name(vector_store_op_name, "store") \
\
do_intrinsic(_VectorReductionCoerced, jdk_internal_vm_vector_VectorSupport, vector_reduction_coerced_name, vector_reduction_coerced_sig, F_S) \
do_signature(vector_reduction_coerced_sig, "(ILjava/lang/Class;Ljava/lang/Class;ILjdk/internal/vm/vector/VectorSupport$Vector;Ljava/util/function/Function;)J") \
do_intrinsic(_VectorStoreMaskedOp, jdk_internal_vm_vector_VectorSupport, vector_store_masked_op_name, vector_store_masked_op_sig, F_S) \
do_signature(vector_store_masked_op_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"I" \
"Ljava/lang/Object;" \
"J" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \
"Ljava/lang/Object;" \
"I" \
"Ljdk/internal/vm/vector/VectorSupport$StoreVectorMaskedOperation;)" \
"V") \
do_name(vector_store_masked_op_name, "storeMasked") \
\
do_intrinsic(_VectorReductionCoerced, jdk_internal_vm_vector_VectorSupport, vector_reduction_coerced_name, vector_reduction_coerced_sig, F_S)\
do_signature(vector_reduction_coerced_sig, "(I" \
"Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"I" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \
"Ljdk/internal/vm/vector/VectorSupport$ReductionOperation;)" \
"J") \
do_name(vector_reduction_coerced_name, "reductionCoerced") \
\
do_intrinsic(_VectorTest, jdk_internal_vm_vector_VectorSupport, vector_test_name, vector_test_sig, F_S) \
do_signature(vector_test_sig, "(ILjava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;Ljava/lang/Object;Ljava/util/function/BiFunction;)Z") \
do_signature(vector_test_sig, "(I" \
"Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"I" \
"Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \
"Ljava/util/function/BiFunction;)" \
"Z") \
do_name(vector_test_name, "test") \
\
do_intrinsic(_VectorBlend, jdk_internal_vm_vector_VectorSupport, vector_blend_name, vector_blend_sig, F_S) \
do_signature(vector_blend_sig, "(Ljava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;I" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;Ljdk/internal/vm/vector/VectorSupport$Vector;Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorBlendOp;)Ljdk/internal/vm/vector/VectorSupport$Vector;") \
do_signature(vector_blend_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"I" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorBlendOp;)" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;") \
do_name(vector_blend_name, "blend") \
\
do_intrinsic(_VectorCompare, jdk_internal_vm_vector_VectorSupport, vector_compare_name, vector_compare_sig, F_S) \
do_signature(vector_compare_sig, "(ILjava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;I" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;" "Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorCompareOp;" ")" "Ljdk/internal/vm/vector/VectorSupport$VectorMask;") \
do_signature(vector_compare_sig, "(I" \
"Ljava/lang/Class;" \
"Ljava/lang/Class;Ljava/lang/Class;" \
"I" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorCompareOp;)" \
"Ljdk/internal/vm/vector/VectorSupport$VectorMask;") \
do_name(vector_compare_name, "compare") \
\
do_intrinsic(_VectorRearrange, jdk_internal_vm_vector_VectorSupport, vector_rearrange_name, vector_rearrange_sig, F_S) \
do_signature(vector_rearrange_sig, "(Ljava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;I" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorRearrangeOp;)Ljdk/internal/vm/vector/VectorSupport$Vector;") \
do_signature(vector_rearrange_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"I" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorRearrangeOp;)" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;") \
do_name(vector_rearrange_name, "rearrangeOp") \
\
do_intrinsic(_VectorExtract, jdk_internal_vm_vector_VectorSupport, vector_extract_name, vector_extract_sig, F_S) \
do_signature(vector_extract_sig, "(Ljava/lang/Class;Ljava/lang/Class;I" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;I" \
"Ljdk/internal/vm/vector/VectorSupport$VecExtractOp;)J") \
do_signature(vector_extract_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"I" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"I" \
"Ljdk/internal/vm/vector/VectorSupport$VecExtractOp;)" \
"J") \
do_name(vector_extract_name, "extract") \
\
do_intrinsic(_VectorInsert, jdk_internal_vm_vector_VectorSupport, vector_insert_name, vector_insert_sig, F_S) \
do_signature(vector_insert_sig, "(Ljava/lang/Class;Ljava/lang/Class;I" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;IJ" \
"Ljdk/internal/vm/vector/VectorSupport$VecInsertOp;)Ljdk/internal/vm/vector/VectorSupport$Vector;") \
do_signature(vector_insert_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"I" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"IJ" \
"Ljdk/internal/vm/vector/VectorSupport$VecInsertOp;)" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;") \
do_name(vector_insert_name, "insert") \
\
do_intrinsic(_VectorBroadcastInt, jdk_internal_vm_vector_VectorSupport, vector_broadcast_int_name, vector_broadcast_int_sig, F_S) \
do_signature(vector_broadcast_int_sig, "(ILjava/lang/Class;Ljava/lang/Class;I" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;I" \
"Ljdk/internal/vm/vector/VectorSupport$VectorBroadcastIntOp;)Ljdk/internal/vm/vector/VectorSupport$Vector;") \
do_signature(vector_broadcast_int_sig, "(I" \
"Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"I" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"I" \
"Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorBroadcastIntOp;)" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;") \
do_name(vector_broadcast_int_name, "broadcastInt") \
\
do_intrinsic(_VectorConvert, jdk_internal_vm_vector_VectorSupport, vector_convert_name, vector_convert_sig, F_S) \
do_signature(vector_convert_sig, "(ILjava/lang/Class;Ljava/lang/Class;I" \
"Ljava/lang/Class;Ljava/lang/Class;I" \
do_signature(vector_convert_sig, "(I" \
"Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"I" \
"Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"I" \
"Ljdk/internal/vm/vector/VectorSupport$VectorPayload;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorConvertOp;)Ljdk/internal/vm/vector/VectorSupport$VectorPayload;") \
"Ljdk/internal/vm/vector/VectorSupport$VectorConvertOp;)" \
"Ljdk/internal/vm/vector/VectorSupport$VectorPayload;") \
do_name(vector_convert_name, "convert") \
\
do_intrinsic(_VectorGatherOp, jdk_internal_vm_vector_VectorSupport, vector_gather_name, vector_gather_sig, F_S) \
do_signature(vector_gather_sig, "(Ljava/lang/Class;Ljava/lang/Class;ILjava/lang/Class;" \
"Ljava/lang/Object;J" \
do_signature(vector_gather_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"I" \
"Ljava/lang/Class;" \
"Ljava/lang/Object;" \
"J" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"Ljava/lang/Object;I[II" \
"Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \
"Ljava/lang/Object;" \
"I[II" \
"Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \
"Ljdk/internal/vm/vector/VectorSupport$LoadVectorOperationWithMap;)" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;") \
do_name(vector_gather_name, "loadWithMap") \
\
do_intrinsic(_VectorScatterOp, jdk_internal_vm_vector_VectorSupport, vector_scatter_name, vector_scatter_sig, F_S) \
do_signature(vector_scatter_sig, "(Ljava/lang/Class;Ljava/lang/Class;ILjava/lang/Class;" \
"Ljava/lang/Object;J" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"Ljava/lang/Object;I[II" \
"Ljdk/internal/vm/vector/VectorSupport$StoreVectorOperationWithMap;)V") \
do_signature(vector_scatter_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"I" \
"Ljava/lang/Class;" \
"Ljava/lang/Object;" \
"J" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorMask;Ljava/lang/Object;" \
"I[II" \
"Ljdk/internal/vm/vector/VectorSupport$StoreVectorOperationWithMap;)" \
"V") \
do_name(vector_scatter_name, "storeWithMap") \
\
do_intrinsic(_VectorRebox, jdk_internal_vm_vector_VectorSupport, vector_rebox_name, vector_rebox_sig, F_S) \
do_alias(vector_rebox_sig, object_object_signature) \
do_signature(vector_rebox_sig, "(Ljdk/internal/vm/vector/VectorSupport$VectorPayload;)" \
"Ljdk/internal/vm/vector/VectorSupport$VectorPayload;") \
do_name(vector_rebox_name, "maybeRebox") \
\
do_intrinsic(_VectorMaskOp, jdk_internal_vm_vector_VectorSupport, vector_mask_oper_name, vector_mask_oper_sig, F_S) \
do_signature(vector_mask_oper_sig, "(ILjava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorMaskOp;)I") \
do_signature(vector_mask_oper_sig, "(I" \
"Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"I" \
"Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorMaskOp;)" \
"J") \
do_name(vector_mask_oper_name, "maskReductionCoerced") \
\
/* (2) Bytecode intrinsics */ \

View file

@ -694,7 +694,9 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
case vmIntrinsics::_VectorShuffleIota:
case vmIntrinsics::_VectorShuffleToVector:
case vmIntrinsics::_VectorLoadOp:
case vmIntrinsics::_VectorLoadMaskedOp:
case vmIntrinsics::_VectorStoreOp:
case vmIntrinsics::_VectorStoreMaskedOp:
case vmIntrinsics::_VectorGatherOp:
case vmIntrinsics::_VectorScatterOp:
case vmIntrinsics::_VectorReductionCoerced:

View file

@ -2398,49 +2398,105 @@ Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) {
#endif
// Phi (VB ... VB) => VB (Phi ...) (Phi ...)
if (EnableVectorReboxing && can_reshape && progress == NULL) {
PhaseIterGVN* igvn = phase->is_IterGVN();
bool all_inputs_are_equiv_vboxes = true;
for (uint i = 1; i < req(); ++i) {
Node* n = in(i);
if (in(i)->Opcode() != Op_VectorBox) {
all_inputs_are_equiv_vboxes = false;
break;
}
// Check that vector type of vboxes is equivalent
if (i != 1) {
if (Type::cmp(in(i-0)->in(VectorBoxNode::Value)->bottom_type(),
in(i-1)->in(VectorBoxNode::Value)->bottom_type()) != 0) {
all_inputs_are_equiv_vboxes = false;
break;
}
if (Type::cmp(in(i-0)->in(VectorBoxNode::Box)->bottom_type(),
in(i-1)->in(VectorBoxNode::Box)->bottom_type()) != 0) {
all_inputs_are_equiv_vboxes = false;
break;
}
}
}
if (all_inputs_are_equiv_vboxes) {
VectorBoxNode* vbox = static_cast<VectorBoxNode*>(in(1));
PhiNode* new_vbox_phi = new PhiNode(r, vbox->box_type());
PhiNode* new_vect_phi = new PhiNode(r, vbox->vec_type());
for (uint i = 1; i < req(); ++i) {
VectorBoxNode* old_vbox = static_cast<VectorBoxNode*>(in(i));
new_vbox_phi->set_req(i, old_vbox->in(VectorBoxNode::Box));
new_vect_phi->set_req(i, old_vbox->in(VectorBoxNode::Value));
}
igvn->register_new_node_with_optimizer(new_vbox_phi, this);
igvn->register_new_node_with_optimizer(new_vect_phi, this);
progress = new VectorBoxNode(igvn->C, new_vbox_phi, new_vect_phi, vbox->box_type(), vbox->vec_type());
}
if (EnableVectorReboxing && can_reshape && progress == NULL && type()->isa_oopptr()) {
progress = merge_through_phi(this, phase->is_IterGVN());
}
return progress; // Return any progress
}
Node* PhiNode::clone_through_phi(Node* root_phi, const Type* t, uint c, PhaseIterGVN* igvn) {
Node_Stack stack(1);
VectorSet visited;
Node_List node_map;
stack.push(root_phi, 1); // ignore control
visited.set(root_phi->_idx);
Node* new_phi = new PhiNode(root_phi->in(0), t);
node_map.map(root_phi->_idx, new_phi);
while (stack.is_nonempty()) {
Node* n = stack.node();
uint idx = stack.index();
assert(n->is_Phi(), "not a phi");
if (idx < n->req()) {
stack.set_index(idx + 1);
Node* def = n->in(idx);
if (def == NULL) {
continue; // ignore dead path
} else if (def->is_Phi()) { // inner node
Node* new_phi = node_map[n->_idx];
if (!visited.test_set(def->_idx)) { // not visited yet
node_map.map(def->_idx, new PhiNode(def->in(0), t));
stack.push(def, 1); // ignore control
}
Node* new_in = node_map[def->_idx];
new_phi->set_req(idx, new_in);
} else if (def->Opcode() == Op_VectorBox) { // leaf
assert(n->is_Phi(), "not a phi");
Node* new_phi = node_map[n->_idx];
new_phi->set_req(idx, def->in(c));
} else {
assert(false, "not optimizeable");
return NULL;
}
} else {
Node* new_phi = node_map[n->_idx];
igvn->register_new_node_with_optimizer(new_phi, n);
stack.pop();
}
}
return new_phi;
}
Node* PhiNode::merge_through_phi(Node* root_phi, PhaseIterGVN* igvn) {
Node_Stack stack(1);
VectorSet visited;
stack.push(root_phi, 1); // ignore control
visited.set(root_phi->_idx);
VectorBoxNode* cached_vbox = NULL;
while (stack.is_nonempty()) {
Node* n = stack.node();
uint idx = stack.index();
if (idx < n->req()) {
stack.set_index(idx + 1);
Node* in = n->in(idx);
if (in == NULL) {
continue; // ignore dead path
} else if (in->isa_Phi()) {
if (!visited.test_set(in->_idx)) {
stack.push(in, 1); // ignore control
}
} else if (in->Opcode() == Op_VectorBox) {
VectorBoxNode* vbox = static_cast<VectorBoxNode*>(in);
if (cached_vbox == NULL) {
cached_vbox = vbox;
} else if (vbox->vec_type() != cached_vbox->vec_type()) {
// TODO: vector type mismatch can be handled with additional reinterpret casts
assert(Type::cmp(vbox->vec_type(), cached_vbox->vec_type()) != 0, "inconsistent");
return NULL; // not optimizable: vector type mismatch
} else if (vbox->box_type() != cached_vbox->box_type()) {
assert(Type::cmp(vbox->box_type(), cached_vbox->box_type()) != 0, "inconsistent");
return NULL; // not optimizable: box type mismatch
}
} else {
return NULL; // not optimizable: neither Phi nor VectorBox
}
} else {
stack.pop();
}
}
assert(cached_vbox != NULL, "sanity");
const TypeInstPtr* btype = cached_vbox->box_type();
const TypeVect* vtype = cached_vbox->vec_type();
Node* new_vbox_phi = clone_through_phi(root_phi, btype, VectorBoxNode::Box, igvn);
Node* new_vect_phi = clone_through_phi(root_phi, vtype, VectorBoxNode::Value, igvn);
return new VectorBoxNode(igvn->C, new_vbox_phi, new_vect_phi, btype, vtype);
}
bool PhiNode::is_data_loop(RegionNode* r, Node* uin, const PhaseGVN* phase) {
// First, take the short cut when we know it is a loop and the EntryControl data path is dead.
// The loop node may only have one input because the entry path was removed in PhaseIdealLoop::Dominators().

View file

@ -143,6 +143,9 @@ class PhiNode : public TypeNode {
bool wait_for_region_igvn(PhaseGVN* phase);
bool is_data_loop(RegionNode* r, Node* uin, const PhaseGVN* phase);
static Node* clone_through_phi(Node* root_phi, const Type* t, uint c, PhaseIterGVN* igvn);
static Node* merge_through_phi(Node* root_phi, PhaseIterGVN* igvn);
public:
// Node layout (parallels RegionNode):
enum { Region, // Control input is the Phi's region.

View file

@ -77,6 +77,7 @@ void LRG::dump() const {
if( _is_oop ) tty->print("Oop ");
if( _is_float ) tty->print("Float ");
if( _is_vector ) tty->print("Vector ");
if( _is_predicate ) tty->print("Predicate ");
if( _is_scalable ) tty->print("Scalable ");
if( _was_spilled1 ) tty->print("Spilled ");
if( _was_spilled2 ) tty->print("Spilled2 ");
@ -638,7 +639,8 @@ void PhaseChaitin::Register_Allocate() {
LRG &lrg = lrgs(_lrg_map.live_range_id(i));
if (!lrg.alive()) {
set_bad(i);
} else if (lrg.num_regs() == 1) {
} else if ((lrg.num_regs() == 1 && !lrg.is_scalable()) ||
(lrg.is_scalable() && lrg.scalable_reg_slots() == 1)) {
set1(i, lrg.reg());
} else { // Must be a register-set
if (!lrg._fat_proj) { // Must be aligned adjacent register set
@ -653,15 +655,19 @@ void PhaseChaitin::Register_Allocate() {
// num_regs, which reflects the physical length of scalable registers.
num_regs = lrg.scalable_reg_slots();
}
OptoReg::Name lo = OptoReg::add(hi, (1-num_regs)); // Find lo
// We have to use pair [lo,lo+1] even for wide vectors because
if (num_regs == 1) {
set1(i, hi);
} else {
OptoReg::Name lo = OptoReg::add(hi, (1 - num_regs)); // Find lo
// We have to use pair [lo,lo+1] even for wide vectors/vmasks because
// the rest of code generation works only with pairs. It is safe
// since for registers encoding only 'lo' is used.
// Second reg from pair is used in ScheduleAndBundle on SPARC where
// vector max size is 8 which corresponds to registers pair.
// Second reg from pair is used in ScheduleAndBundle with vector max
// size 8 which corresponds to registers pair.
// It is also used in BuildOopMaps but oop operations are not
// vectorized.
set2(i, lo);
}
} else { // Misaligned; extract 2 bits
OptoReg::Name hi = lrg.reg(); // Get hi register
lrg.Remove(hi); // Yank from mask
@ -824,6 +830,20 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
lrg.set_scalable_reg_slots(Matcher::scalable_vector_reg_size(T_FLOAT));
}
}
if (ireg == Op_RegVectMask) {
assert(Matcher::has_predicated_vectors(), "predicated vector should be supported");
lrg._is_predicate = 1;
if (Matcher::supports_scalable_vector()) {
lrg._is_scalable = 1;
// For scalable predicate, when it is allocated in physical register,
// num_regs is RegMask::SlotsPerRegVectMask for reg mask,
// which may not be the actual physical register size.
// If it is allocated in stack, we need to get the actual
// physical length of scalable predicate register.
lrg.set_scalable_reg_slots(Matcher::scalable_predicate_reg_slots());
}
}
assert(n_type->isa_vect() == NULL || lrg._is_vector ||
ireg == Op_RegD || ireg == Op_RegL || ireg == Op_RegVectMask,
"vector must be in vector registers");
@ -919,6 +939,8 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
}
break;
case Op_RegVectMask:
assert(Matcher::has_predicated_vectors(), "sanity");
assert(RegMask::num_registers(Op_RegVectMask) == RegMask::SlotsPerRegVectMask, "sanity");
lrg.set_num_regs(RegMask::SlotsPerRegVectMask);
lrg.set_reg_pressure(1);
break;
@ -1371,6 +1393,11 @@ static OptoReg::Name find_first_set(LRG &lrg, RegMask mask, int chunk) {
}
}
return OptoReg::Bad; // will cause chunk change, and retry next chunk
} else if (lrg._is_predicate) {
assert(num_regs == RegMask::SlotsPerRegVectMask, "scalable predicate register");
num_regs = lrg.scalable_reg_slots();
mask.clear_to_sets(num_regs);
return mask.find_first_set(lrg, num_regs);
}
}
@ -1417,7 +1444,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
}
// If no bias info exists, just go with the register selection ordering
if (lrg._is_vector || lrg.num_regs() == 2) {
if (lrg._is_vector || lrg.num_regs() == 2 || lrg.is_scalable()) {
// Find an aligned set
return OptoReg::add(find_first_set(lrg, lrg.mask(), chunk), chunk);
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -163,8 +163,8 @@ public:
bool is_scalable() {
#ifdef ASSERT
if (_is_scalable) {
// Should only be a vector for now, but it could also be a RegVectMask in future.
assert(_is_vector && (_num_regs == RegMask::SlotsPerVecA), "unexpected scalable reg");
assert(_is_vector && (_num_regs == RegMask::SlotsPerVecA) ||
_is_predicate && (_num_regs == RegMask::SlotsPerRegVectMask), "unexpected scalable reg");
}
#endif
return Matcher::implements_scalable_vector && _is_scalable;
@ -195,6 +195,7 @@ public:
uint _is_oop:1, // Live-range holds an oop
_is_float:1, // True if in float registers
_is_vector:1, // True if in vector registers
_is_predicate:1, // True if in mask/predicate registers
_is_scalable:1, // True if register size is scalable
// e.g. Arm SVE vector/predicate registers.
_was_spilled1:1, // True if prior spilling on def

View file

@ -415,8 +415,10 @@ macro(MinReductionV)
macro(MaxReductionV)
macro(LoadVector)
macro(LoadVectorGather)
macro(LoadVectorGatherMasked)
macro(StoreVector)
macro(StoreVectorScatter)
macro(StoreVectorScatterMasked)
macro(LoadVectorMasked)
macro(StoreVectorMasked)
macro(VectorCmpMasked)
@ -425,6 +427,7 @@ macro(VectorMaskOp)
macro(VectorMaskTrueCount)
macro(VectorMaskFirstTrue)
macro(VectorMaskLastTrue)
macro(VectorMaskToLong)
macro(Pack)
macro(PackB)
macro(PackS)
@ -475,3 +478,7 @@ macro(VectorCastL2X)
macro(VectorCastF2X)
macro(VectorCastD2X)
macro(VectorInsert)
macro(MaskAll)
macro(AndVMask)
macro(OrVMask)
macro(XorVMask)

View file

@ -2134,7 +2134,8 @@ void Compile::Optimize() {
if (!failing() && RenumberLiveNodes && live_nodes() + NodeLimitFudgeFactor < unique()) {
Compile::TracePhase tp("", &timers[_t_renumberLive]);
initial_gvn()->replace_with(&igvn);
for_igvn()->clear();
Unique_Node_List* old_worklist = for_igvn();
old_worklist->clear();
Unique_Node_List new_worklist(C->comp_arena());
{
ResourceMark rm;
@ -2144,7 +2145,7 @@ void Compile::Optimize() {
set_for_igvn(&new_worklist);
igvn = PhaseIterGVN(initial_gvn());
igvn.optimize();
set_for_igvn(save_for_igvn);
set_for_igvn(old_worklist); // new_worklist is dead beyond this point
}
// Now that all inlining is over and no PhaseRemoveUseless will run, cut edge from root to loop
@ -2358,6 +2359,7 @@ bool Compile::has_vbox_nodes() {
static bool is_vector_unary_bitwise_op(Node* n) {
return n->Opcode() == Op_XorV &&
n->req() == 2 &&
VectorNode::is_vector_bitwise_not_pattern(n);
}
@ -2365,7 +2367,7 @@ static bool is_vector_binary_bitwise_op(Node* n) {
switch (n->Opcode()) {
case Op_AndV:
case Op_OrV:
return true;
return n->req() == 2;
case Op_XorV:
return !is_vector_unary_bitwise_op(n);
@ -3424,6 +3426,8 @@ void Compile::final_graph_reshaping_main_switch(Node* n, Final_Reshape_Counts& f
case Op_StoreVector:
case Op_LoadVectorGather:
case Op_StoreVectorScatter:
case Op_LoadVectorGatherMasked:
case Op_StoreVectorScatterMasked:
case Op_VectorCmpMasked:
case Op_VectorMaskGen:
case Op_LoadVectorMasked:

View file

@ -702,8 +702,9 @@ void PhaseCFG::adjust_register_pressure(Node* n, Block* block, intptr_t* recalc_
case Op_StoreP:
case Op_StoreN:
case Op_StoreVector:
case Op_StoreVectorScatter:
case Op_StoreVectorMasked:
case Op_StoreVectorScatter:
case Op_StoreVectorScatterMasked:
case Op_StoreNKlass:
for (uint k = 1; k < m->req(); k++) {
Node *in = m->in(k);

View file

@ -662,8 +662,12 @@ bool LibraryCallKit::try_to_inline(int predicate) {
return inline_vector_shuffle_to_vector();
case vmIntrinsics::_VectorLoadOp:
return inline_vector_mem_operation(/*is_store=*/false);
case vmIntrinsics::_VectorLoadMaskedOp:
return inline_vector_mem_masked_operation(/*is_store*/false);
case vmIntrinsics::_VectorStoreOp:
return inline_vector_mem_operation(/*is_store=*/true);
case vmIntrinsics::_VectorStoreMaskedOp:
return inline_vector_mem_masked_operation(/*is_store=*/true);
case vmIntrinsics::_VectorGatherOp:
return inline_vector_gather_scatter(/*is_scatter*/ false);
case vmIntrinsics::_VectorScatterOp:

View file

@ -319,6 +319,7 @@ class LibraryCallKit : public GraphKit {
bool inline_vector_shuffle_iota();
bool inline_vector_mask_operation();
bool inline_vector_mem_operation(bool is_store);
bool inline_vector_mem_masked_operation(bool is_store);
bool inline_vector_gather_scatter(bool is_scatter);
bool inline_vector_reduction();
bool inline_vector_test();
@ -332,14 +333,15 @@ class LibraryCallKit : public GraphKit {
Node* gen_call_to_svml(int vector_api_op_id, BasicType bt, int num_elem, Node* opd1, Node* opd2);
enum VectorMaskUseType {
VecMaskUseLoad,
VecMaskUseStore,
VecMaskUseAll,
VecMaskNotUsed
VecMaskUseLoad = 1 << 0,
VecMaskUseStore = 1 << 1,
VecMaskUseAll = VecMaskUseLoad | VecMaskUseStore,
VecMaskUsePred = 1 << 2,
VecMaskNotUsed = 1 << 3
};
bool arch_supports_vector(int op, int num_elem, BasicType type, VectorMaskUseType mask_use_type, bool has_scalar_args = false);
bool arch_supports_vector_rotate(int opc, int num_elem, BasicType elem_bt, bool has_scalar_args = false);
bool arch_supports_vector_rotate(int opc, int num_elem, BasicType elem_bt, VectorMaskUseType mask_use_type, bool has_scalar_args = false);
void clear_upper_avx() {
#ifdef X86

View file

@ -434,6 +434,24 @@ static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) {
return rms;
}
const int Matcher::scalable_predicate_reg_slots() {
assert(Matcher::has_predicated_vectors() && Matcher::supports_scalable_vector(),
"scalable predicate vector should be supported");
int vector_reg_bit_size = Matcher::scalable_vector_reg_size(T_BYTE) << LogBitsPerByte;
// We assume each predicate register is one-eighth of the size of
// scalable vector register, one mask bit per vector byte.
int predicate_reg_bit_size = vector_reg_bit_size >> 3;
// Compute number of slots which is required when scalable predicate
// register is spilled. E.g. if scalable vector register is 640 bits,
// predicate register is 80 bits, which is 2.5 * slots.
// We will round up the slot number to power of 2, which is required
// by find_first_set().
int slots = predicate_reg_bit_size & (BitsPerInt - 1)
? (predicate_reg_bit_size >> LogBitsPerInt) + 1
: predicate_reg_bit_size >> LogBitsPerInt;
return round_up_power_of_2(slots);
}
#define NOF_STACK_MASKS (3*13)
// Create the initial stack mask used by values spilling to the stack.
@ -542,6 +560,8 @@ void Matcher::init_first_stack_mask() {
if (Matcher::has_predicated_vectors()) {
*idealreg2spillmask[Op_RegVectMask] = *idealreg2regmask[Op_RegVectMask];
idealreg2spillmask[Op_RegVectMask]->OR(aligned_stack_mask);
} else {
*idealreg2spillmask[Op_RegVectMask] = RegMask::Empty;
}
if (Matcher::vector_size_supported(T_BYTE,4)) {
@ -614,6 +634,19 @@ void Matcher::init_first_stack_mask() {
if (Matcher::supports_scalable_vector()) {
int k = 1;
OptoReg::Name in = OptoReg::add(_in_arg_limit, -1);
// Exclude last input arg stack slots to avoid spilling vector register there,
// otherwise RegVectMask spills could stomp over stack slots in caller frame.
for (; (in >= init_in) && (k < scalable_predicate_reg_slots()); k++) {
scalable_stack_mask.Remove(in);
in = OptoReg::add(in, -1);
}
// For RegVectMask
scalable_stack_mask.clear_to_sets(scalable_predicate_reg_slots());
assert(scalable_stack_mask.is_AllStack(), "should be infinite stack");
*idealreg2spillmask[Op_RegVectMask] = *idealreg2regmask[Op_RegVectMask];
idealreg2spillmask[Op_RegVectMask]->OR(scalable_stack_mask);
// Exclude last input arg stack slots to avoid spilling vector register there,
// otherwise vector spills could stomp over stack slots in caller frame.
for (; (in >= init_in) && (k < scalable_vector_reg_size(T_FLOAT)); k++) {
@ -2228,6 +2261,7 @@ bool Matcher::find_shared_visit(MStack& mstack, Node* n, uint opcode, bool& mem_
case Op_MacroLogicV:
case Op_LoadVectorMasked:
case Op_VectorCmpMasked:
case Op_VectorLoadMask:
set_shared(n); // Force result into register (it will be anyways)
break;
case Op_ConP: { // Convert pointers above the centerline to NUL
@ -2273,6 +2307,21 @@ bool Matcher::find_shared_visit(MStack& mstack, Node* n, uint opcode, bool& mem_
}
void Matcher::find_shared_post_visit(Node* n, uint opcode) {
if (n->is_predicated_vector()) {
// Restructure into binary trees for Matching.
if (n->req() == 4) {
n->set_req(1, new BinaryNode(n->in(1), n->in(2)));
n->set_req(2, n->in(3));
n->del_req(3);
} else if (n->req() == 5) {
n->set_req(1, new BinaryNode(n->in(1), n->in(2)));
n->set_req(2, new BinaryNode(n->in(3), n->in(4)));
n->del_req(4);
n->del_req(3);
}
return;
}
switch(opcode) { // Handle some opcodes special
case Op_StorePConditional:
case Op_StoreIConditional:
@ -2412,12 +2461,22 @@ void Matcher::find_shared_post_visit(Node* n, uint opcode) {
n->del_req(3);
break;
}
case Op_LoadVectorGatherMasked:
case Op_StoreVectorScatter: {
Node* pair = new BinaryNode(n->in(MemNode::ValueIn), n->in(MemNode::ValueIn+1));
n->set_req(MemNode::ValueIn, pair);
n->del_req(MemNode::ValueIn+1);
break;
}
case Op_StoreVectorScatterMasked: {
Node* pair = new BinaryNode(n->in(MemNode::ValueIn+1), n->in(MemNode::ValueIn+2));
n->set_req(MemNode::ValueIn+1, pair);
n->del_req(MemNode::ValueIn+2);
pair = new BinaryNode(n->in(MemNode::ValueIn), n->in(MemNode::ValueIn+1));
n->set_req(MemNode::ValueIn, pair);
n->del_req(MemNode::ValueIn+1);
break;
}
case Op_VectorMaskCmp: {
n->set_req(1, new BinaryNode(n->in(1), n->in(2)));
n->set_req(2, n->in(3));

View file

@ -329,6 +329,8 @@ public:
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen
static const bool match_rule_supported_vector(int opcode, int vlen, BasicType bt);
static const bool match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt);
static const RegMask* predicate_reg_mask(void);
static const TypeVect* predicate_reg_type(const Type* elemTy, int length);
@ -345,6 +347,8 @@ public:
// Actual max scalable vector register length.
static const int scalable_vector_reg_size(const BasicType bt);
// Actual max scalable predicate register length.
static const int scalable_predicate_reg_slots();
// Vector ideal reg
static const uint vector_ideal_reg(int len);

View file

@ -1136,7 +1136,7 @@ Node* MemNode::can_see_stored_value(Node* st, PhaseTransform* phase) const {
return NULL;
}
// LoadVector/StoreVector needs additional check to ensure the types match.
if (store_Opcode() == Op_StoreVector) {
if (st->is_StoreVector()) {
const TypeVect* in_vt = st->as_StoreVector()->vect_type();
const TypeVect* out_vt = as_LoadVector()->vect_type();
if (in_vt != out_vt) {

View file

@ -172,7 +172,9 @@ class LoadVectorGatherNode;
class StoreVectorNode;
class StoreVectorScatterNode;
class VectorMaskCmpNode;
class VectorUnboxNode;
class VectorSet;
class VectorReinterpretNode;
// The type of all node counts and indexes.
// It must hold at least 16 bits, but must also be fast to load and store.
@ -707,6 +709,8 @@ public:
DEFINE_CLASS_ID(EncodePKlass, EncodeNarrowPtr, 1)
DEFINE_CLASS_ID(Vector, Type, 7)
DEFINE_CLASS_ID(VectorMaskCmp, Vector, 0)
DEFINE_CLASS_ID(VectorUnbox, Vector, 1)
DEFINE_CLASS_ID(VectorReinterpret, Vector, 2)
DEFINE_CLASS_ID(Proj, Node, 3)
DEFINE_CLASS_ID(CatchProj, Proj, 0)
@ -778,7 +782,8 @@ public:
Flag_is_scheduled = 1 << 12,
Flag_has_vector_mask_set = 1 << 13,
Flag_is_expensive = 1 << 14,
Flag_for_post_loop_opts_igvn = 1 << 15,
Flag_is_predicated_vector = 1 << 15,
Flag_for_post_loop_opts_igvn = 1 << 16,
_last_flag = Flag_for_post_loop_opts_igvn
};
@ -933,11 +938,13 @@ public:
DEFINE_CLASS_QUERY(SubTypeCheck)
DEFINE_CLASS_QUERY(Type)
DEFINE_CLASS_QUERY(Vector)
DEFINE_CLASS_QUERY(VectorMaskCmp)
DEFINE_CLASS_QUERY(VectorUnbox)
DEFINE_CLASS_QUERY(VectorReinterpret);
DEFINE_CLASS_QUERY(LoadVector)
DEFINE_CLASS_QUERY(LoadVectorGather)
DEFINE_CLASS_QUERY(StoreVector)
DEFINE_CLASS_QUERY(StoreVectorScatter)
DEFINE_CLASS_QUERY(VectorMaskCmp)
DEFINE_CLASS_QUERY(Unlock)
#undef DEFINE_CLASS_QUERY
@ -988,6 +995,8 @@ public:
// It must have the loop's phi as input and provide a def to the phi.
bool is_reduction() const { return (_flags & Flag_is_reduction) != 0; }
bool is_predicated_vector() const { return (_flags & Flag_is_predicated_vector) != 0; }
// The node is a CountedLoopEnd with a mask annotation so as to emit a restore context
bool has_vector_mask_set() const { return (_flags & Flag_has_vector_mask_set) != 0; }

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1998, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -309,17 +309,16 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v
// "val_reg" and "reg". For example, when "val" resides in register
// but "reg" is located in stack.
if (lrgs(val_idx).is_scalable()) {
assert(val->ideal_reg() == Op_VecA, "scalable vector register");
assert(val->ideal_reg() == Op_VecA || val->ideal_reg() == Op_RegVectMask, "scalable register");
if (OptoReg::is_stack(reg)) {
n_regs = lrgs(val_idx).scalable_reg_slots();
} else {
n_regs = RegMask::SlotsPerVecA;
n_regs = lrgs(val_idx)._is_predicate ? RegMask::SlotsPerRegVectMask : RegMask::SlotsPerVecA;
}
}
if (n_regs > 1) { // Doubles and vectors check for aligned-adjacent set
uint last;
if (lrgs(val_idx).is_scalable()) {
assert(val->ideal_reg() == Op_VecA, "scalable vector register");
if (lrgs(val_idx).is_scalable() && val->ideal_reg() == Op_VecA) {
// For scalable vector register, regmask is always SlotsPerVecA bits aligned
last = RegMask::SlotsPerVecA - 1;
} else {

View file

@ -237,7 +237,7 @@ bool RegMask::is_valid_reg(OptoReg::Name reg, const int size) const {
// HIGHEST register number in the set, or BAD if no sets.
// Works also for size 1.
OptoReg::Name RegMask::find_first_set(LRG &lrg, const int size) const {
if (lrg.is_scalable()) {
if (lrg.is_scalable() && lrg._is_vector) {
// For scalable vector register, regmask is SlotsPerVecA bits aligned.
assert(is_aligned_sets(SlotsPerVecA), "mask is not aligned, adjacent sets");
} else {

View file

@ -997,7 +997,6 @@ const Type *Type::filter_helper(const Type *kills, bool include_speculative) con
}
//------------------------------xdual------------------------------------------
const Type *Type::xdual() const {
// Note: the base() accessor asserts the sanity of _base.
assert(_type_info[base()].dual_type != Bad, "implement with v-call");
@ -2359,7 +2358,10 @@ const TypeVect *TypeVect::VECTZ = NULL; // 512-bit vectors
const TypeVect *TypeVect::VECTMASK = NULL; // predicate/mask vector
//------------------------------make-------------------------------------------
const TypeVect* TypeVect::make(const Type *elem, uint length) {
const TypeVect* TypeVect::make(const Type *elem, uint length, bool is_mask) {
if (is_mask) {
return makemask(elem, length);
}
BasicType elem_bt = elem->array_element_basic_type();
assert(is_java_primitive(elem_bt), "only primitive types in vector");
assert(Matcher::vector_size_supported(elem_bt, length), "length in range");
@ -2385,7 +2387,9 @@ const TypeVect* TypeVect::make(const Type *elem, uint length) {
}
const TypeVect *TypeVect::makemask(const Type* elem, uint length) {
if (Matcher::has_predicated_vectors()) {
BasicType elem_bt = elem->array_element_basic_type();
if (Matcher::has_predicated_vectors() &&
Matcher::match_rule_supported_vector_masked(Op_VectorLoadMask, length, elem_bt)) {
const TypeVect* mtype = Matcher::predicate_reg_type(elem, length);
return (TypeVect*)(const_cast<TypeVect*>(mtype))->hashcons();
} else {

View file

@ -804,12 +804,12 @@ public:
virtual bool singleton(void) const; // TRUE if type is a singleton
virtual bool empty(void) const; // TRUE if type is vacuous
static const TypeVect *make(const BasicType elem_bt, uint length) {
static const TypeVect *make(const BasicType elem_bt, uint length, bool is_mask = false) {
// Use bottom primitive type.
return make(get_const_basic_type(elem_bt), length);
return make(get_const_basic_type(elem_bt), length, is_mask);
}
// Used directly by Replicate nodes to construct singleton vector.
static const TypeVect *make(const Type* elem, uint length);
static const TypeVect *make(const Type* elem, uint length, bool is_mask = false);
static const TypeVect *makemask(const BasicType elem_bt, uint length) {
// Use bottom primitive type.

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -353,7 +353,10 @@ Node* PhaseVector::expand_vbox_alloc_node(VectorBoxAllocateNode* vbox_alloc,
int num_elem = vect_type->length();
bool is_mask = is_vector_mask(box_klass);
if (is_mask && bt != T_BOOLEAN) {
// If boxed mask value is present in a predicate register, it must be
// spilled to a vector though a VectorStoreMaskOperation before actual StoreVector
// operation to vector payload field.
if (is_mask && (value->bottom_type()->isa_vectmask() || bt != T_BOOLEAN)) {
value = gvn.transform(VectorStoreMaskNode::make(gvn, value, bt, num_elem));
// Although type of mask depends on its definition, in terms of storage everything is stored in boolean array.
bt = T_BOOLEAN;
@ -469,7 +472,7 @@ void PhaseVector::expand_vunbox_node(VectorUnboxNode* vec_unbox) {
C->set_max_vector_size(MAX2(C->max_vector_size(), vt->length_in_bytes()));
if (is_vector_mask(from_kls)) {
vec_val_load = gvn.transform(new VectorLoadMaskNode(vec_val_load, TypeVect::make(masktype, num_elem)));
vec_val_load = gvn.transform(new VectorLoadMaskNode(vec_val_load, TypeVect::makemask(masktype, num_elem)));
} else if (is_vector_shuffle(from_kls) && !vec_unbox->is_shuffle_to_vector()) {
assert(vec_unbox->bottom_type()->is_vect()->element_basic_type() == masktype, "expect shuffle type consistency");
vec_val_load = gvn.transform(new VectorLoadShuffleNode(vec_val_load, TypeVect::make(masktype, num_elem)));

File diff suppressed because it is too large Load diff

View file

@ -366,8 +366,8 @@ bool VectorNode::is_scalar_rotate(Node* n) {
return false;
}
bool VectorNode::is_vshift_cnt(Node* n) {
switch (n->Opcode()) {
bool VectorNode::is_vshift_cnt_opcode(int opc) {
switch (opc) {
case Op_LShiftCntV:
case Op_RShiftCntV:
return true;
@ -376,6 +376,10 @@ bool VectorNode::is_vshift_cnt(Node* n) {
}
}
bool VectorNode::is_vshift_cnt(Node* n) {
return is_vshift_cnt_opcode(n->Opcode());
}
// Check if input is loop invariant vector.
bool VectorNode::is_invariant_vector(Node* n) {
// Only Replicate vector nodes are loop invariant for now.
@ -442,10 +446,40 @@ void VectorNode::vector_operands(Node* n, uint* start, uint* end) {
}
}
VectorNode* VectorNode::make_mask_node(int vopc, Node* n1, Node* n2, uint vlen, BasicType bt) {
guarantee(vopc > 0, "vopc must be > 0");
const TypeVect* vmask_type = TypeVect::makemask(bt, vlen);
switch (vopc) {
case Op_AndV:
if (Matcher::match_rule_supported_vector_masked(Op_AndVMask, vlen, bt)) {
return new AndVMaskNode(n1, n2, vmask_type);
}
return new AndVNode(n1, n2, vmask_type);
case Op_OrV:
if (Matcher::match_rule_supported_vector_masked(Op_OrVMask, vlen, bt)) {
return new OrVMaskNode(n1, n2, vmask_type);
}
return new OrVNode(n1, n2, vmask_type);
case Op_XorV:
if (Matcher::match_rule_supported_vector_masked(Op_XorVMask, vlen, bt)) {
return new XorVMaskNode(n1, n2, vmask_type);
}
return new XorVNode(n1, n2, vmask_type);
default:
fatal("Unsupported mask vector creation for '%s'", NodeClassNames[vopc]);
return NULL;
}
}
// Make a vector node for binary operation
VectorNode* VectorNode::make(int vopc, Node* n1, Node* n2, const TypeVect* vt) {
VectorNode* VectorNode::make(int vopc, Node* n1, Node* n2, const TypeVect* vt, bool is_mask) {
// This method should not be called for unimplemented vectors.
guarantee(vopc > 0, "vopc must be > 0");
if (is_mask) {
return make_mask_node(vopc, n1, n2, vt->length(), vt->element_basic_type());
}
switch (vopc) {
case Op_AddVB: return new AddVBNode(n1, n2, vt);
case Op_AddVS: return new AddVSNode(n1, n2, vt);
@ -552,10 +586,15 @@ VectorNode* VectorNode::make(int opc, Node* n1, Node* n2, Node* n3, uint vlen, B
}
// Scalar promotion
VectorNode* VectorNode::scalar2vector(Node* s, uint vlen, const Type* opd_t) {
VectorNode* VectorNode::scalar2vector(Node* s, uint vlen, const Type* opd_t, bool is_mask) {
BasicType bt = opd_t->array_element_basic_type();
const TypeVect* vt = opd_t->singleton() ? TypeVect::make(opd_t, vlen)
: TypeVect::make(bt, vlen);
const TypeVect* vt = opd_t->singleton() ? TypeVect::make(opd_t, vlen, is_mask)
: TypeVect::make(bt, vlen, is_mask);
if (is_mask && Matcher::match_rule_supported_vector(Op_MaskAll, vlen, bt)) {
return new MaskAllNode(s, vt);
}
switch (bt) {
case T_BOOLEAN:
case T_BYTE:
@ -1006,9 +1045,10 @@ ReductionNode* ReductionNode::make(int opc, Node *ctrl, Node* n1, Node* n2, Basi
Node* VectorLoadMaskNode::Identity(PhaseGVN* phase) {
BasicType out_bt = type()->is_vect()->element_basic_type();
if (out_bt == T_BOOLEAN) {
if (!Matcher::has_predicated_vectors() && out_bt == T_BOOLEAN) {
return in(1); // redundant conversion
}
return this;
}
@ -1105,7 +1145,9 @@ Node* ReductionNode::make_reduction_input(PhaseGVN& gvn, int opc, BasicType bt)
case Op_MinReductionV:
switch (bt) {
case T_BYTE:
return gvn.makecon(TypeInt::make(max_jbyte));
case T_SHORT:
return gvn.makecon(TypeInt::make(max_jshort));
case T_INT:
return gvn.makecon(TypeInt::MAX);
case T_LONG:
@ -1120,7 +1162,9 @@ Node* ReductionNode::make_reduction_input(PhaseGVN& gvn, int opc, BasicType bt)
case Op_MaxReductionV:
switch (bt) {
case T_BYTE:
return gvn.makecon(TypeInt::make(min_jbyte));
case T_SHORT:
return gvn.makecon(TypeInt::make(min_jshort));
case T_INT:
return gvn.makecon(TypeInt::MIN);
case T_LONG:
@ -1313,16 +1357,17 @@ Node* VectorUnboxNode::Ideal(PhaseGVN* phase, bool can_reshape) {
bool is_vector_mask = vbox_klass->is_subclass_of(ciEnv::current()->vector_VectorMask_klass());
bool is_vector_shuffle = vbox_klass->is_subclass_of(ciEnv::current()->vector_VectorShuffle_klass());
if (is_vector_mask) {
const TypeVect* vmask_type = TypeVect::makemask(out_vt->element_basic_type(), out_vt->length());
if (in_vt->length_in_bytes() == out_vt->length_in_bytes() &&
Matcher::match_rule_supported_vector(Op_VectorMaskCast, out_vt->length(), out_vt->element_basic_type())) {
// Apply "VectorUnbox (VectorBox vmask) ==> VectorMaskCast (vmask)"
// directly. This could avoid the transformation ordering issue from
// "VectorStoreMask (VectorLoadMask vmask) => vmask".
return new VectorMaskCastNode(value, out_vt);
return new VectorMaskCastNode(value, vmask_type);
}
// VectorUnbox (VectorBox vmask) ==> VectorLoadMask (VectorStoreMask vmask)
value = phase->transform(VectorStoreMaskNode::make(*phase, value, in_vt->element_basic_type(), in_vt->length()));
return new VectorLoadMaskNode(value, out_vt);
return new VectorLoadMaskNode(value, vmask_type);
} else if (is_vector_shuffle) {
if (!is_shuffle_to_vector()) {
// VectorUnbox (VectorBox vshuffle) ==> VectorLoadShuffle vshuffle
@ -1380,13 +1425,14 @@ Node* VectorMaskOpNode::make(Node* mask, const Type* ty, int mopc) {
return new VectorMaskLastTrueNode(mask, ty);
case Op_VectorMaskFirstTrue:
return new VectorMaskFirstTrueNode(mask, ty);
case Op_VectorMaskToLong:
return new VectorMaskToLongNode(mask, ty);
default:
assert(false, "Unhandled operation");
}
return NULL;
}
#ifndef PRODUCT
void VectorBoxAllocateNode::dump_spec(outputStream *st) const {
CallStaticJavaNode::dump_spec(st);

View file

@ -66,16 +66,22 @@ class VectorNode : public TypeNode {
virtual int Opcode() const;
virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(vect_type()->length_in_bytes()); }
virtual uint ideal_reg() const {
return type()->ideal_reg();
}
static VectorNode* scalar2vector(Node* s, uint vlen, const Type* opd_t);
static VectorNode* scalar2vector(Node* s, uint vlen, const Type* opd_t, bool is_mask = false);
static VectorNode* shift_count(int opc, Node* cnt, uint vlen, BasicType bt);
static VectorNode* make(int opc, Node* n1, Node* n2, uint vlen, BasicType bt);
static VectorNode* make(int vopc, Node* n1, Node* n2, const TypeVect* vt);
static VectorNode* make(int vopc, Node* n1, Node* n2, const TypeVect* vt, bool is_mask = false);
static VectorNode* make(int opc, Node* n1, Node* n2, Node* n3, uint vlen, BasicType bt);
static VectorNode* make(int vopc, Node* n1, Node* n2, Node* n3, const TypeVect* vt);
static VectorNode* make_mask_node(int vopc, Node* n1, Node* n2, uint vlen, BasicType bt);
static bool is_shift_opcode(int opc);
static bool is_vshift_cnt_opcode(int opc);
static bool is_rotate_opcode(int opc);
static int opcode(int opc, BasicType bt);
@ -808,7 +814,7 @@ class StoreVectorMaskedNode : public StoreVectorNode {
public:
StoreVectorMaskedNode(Node* c, Node* mem, Node* dst, Node* src, const TypePtr* at, Node* mask)
: StoreVectorNode(c, mem, dst, at, src) {
assert(mask->bottom_type()->is_vectmask(), "sanity");
assert(mask->bottom_type()->isa_vectmask(), "sanity");
init_class_id(Class_StoreVector);
set_mismatched_access();
add_req(mask);
@ -828,7 +834,7 @@ class LoadVectorMaskedNode : public LoadVectorNode {
public:
LoadVectorMaskedNode(Node* c, Node* mem, Node* src, const TypePtr* at, const TypeVect* vt, Node* mask)
: LoadVectorNode(c, mem, src, at, vt) {
assert(mask->bottom_type()->is_vectmask(), "sanity");
assert(mask->bottom_type()->isa_vectmask(), "sanity");
init_class_id(Class_LoadVector);
set_mismatched_access();
add_req(mask);
@ -842,6 +848,45 @@ class LoadVectorMaskedNode : public LoadVectorNode {
Node* Ideal(PhaseGVN* phase, bool can_reshape);
};
//-------------------------------LoadVectorGatherMaskedNode---------------------------------
// Load Vector from memory via index map under the influence of a predicate register(mask).
class LoadVectorGatherMaskedNode : public LoadVectorNode {
public:
LoadVectorGatherMaskedNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeVect* vt, Node* indices, Node* mask)
: LoadVectorNode(c, mem, adr, at, vt) {
init_class_id(Class_LoadVector);
assert(indices->bottom_type()->is_vect(), "indices must be in vector");
assert(mask->bottom_type()->isa_vectmask(), "sanity");
add_req(indices);
add_req(mask);
assert(req() == MemNode::ValueIn + 2, "match_edge expects that last input is in MemNode::ValueIn+1");
}
virtual int Opcode() const;
virtual uint match_edge(uint idx) const { return idx == MemNode::Address ||
idx == MemNode::ValueIn ||
idx == MemNode::ValueIn + 1; }
};
//------------------------------StoreVectorScatterMaskedNode--------------------------------
// Store Vector into memory via index map under the influence of a predicate register(mask).
class StoreVectorScatterMaskedNode : public StoreVectorNode {
public:
StoreVectorScatterMaskedNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val, Node* indices, Node* mask)
: StoreVectorNode(c, mem, adr, at, val) {
init_class_id(Class_StoreVector);
assert(indices->bottom_type()->is_vect(), "indices must be in vector");
assert(mask->bottom_type()->isa_vectmask(), "sanity");
add_req(indices);
add_req(mask);
assert(req() == MemNode::ValueIn + 3, "match_edge expects that last input is in MemNode::ValueIn+2");
}
virtual int Opcode() const;
virtual uint match_edge(uint idx) const { return idx == MemNode::Address ||
idx == MemNode::ValueIn ||
idx == MemNode::ValueIn + 1 ||
idx == MemNode::ValueIn + 2; }
};
//------------------------------VectorCmpMaskedNode--------------------------------
// Vector Comparison under the influence of a predicate register(mask).
@ -856,7 +901,6 @@ class VectorCmpMaskedNode : public TypeNode {
virtual int Opcode() const;
};
class VectorMaskGenNode : public TypeNode {
public:
VectorMaskGenNode(Node* length, const Type* ty, BasicType ety): TypeNode(ty, 2), _elemType(ety) {
@ -878,7 +922,7 @@ class VectorMaskOpNode : public TypeNode {
public:
VectorMaskOpNode(Node* mask, const Type* ty, int mopc):
TypeNode(ty, 2), _mopc(mopc) {
assert(mask->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN, "");
assert(Matcher::has_predicated_vectors() || mask->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN, "");
init_req(1, mask);
}
@ -913,6 +957,42 @@ class VectorMaskLastTrueNode : public VectorMaskOpNode {
virtual int Opcode() const;
};
class VectorMaskToLongNode : public VectorMaskOpNode {
public:
VectorMaskToLongNode(Node* mask, const Type* ty):
VectorMaskOpNode(mask, ty, Op_VectorMaskToLong) {}
virtual int Opcode() const;
virtual uint ideal_reg() const { return Op_RegL; }
};
//-------------------------- Vector mask broadcast -----------------------------------
class MaskAllNode : public VectorNode {
public:
MaskAllNode(Node* in, const TypeVect* vt) : VectorNode(in, vt) {}
virtual int Opcode() const;
};
//--------------------------- Vector mask logical and --------------------------------
class AndVMaskNode : public VectorNode {
public:
AndVMaskNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1, in2, vt) {}
virtual int Opcode() const;
};
//--------------------------- Vector mask logical or ---------------------------------
class OrVMaskNode : public VectorNode {
public:
OrVMaskNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1, in2, vt) {}
virtual int Opcode() const;
};
//--------------------------- Vector mask logical xor --------------------------------
class XorVMaskNode : public VectorNode {
public:
XorVMaskNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1, in2, vt) {}
virtual int Opcode() const;
};
//=========================Promote_Scalar_to_Vector============================
//------------------------------ReplicateBNode---------------------------------
@ -1184,7 +1264,7 @@ class VectorMaskCmpNode : public VectorNode {
BoolTest::mask _predicate;
protected:
uint size_of() const { return sizeof(*this); }
virtual uint size_of() const { return sizeof(VectorMaskCmpNode); }
public:
VectorMaskCmpNode(BoolTest::mask predicate, Node* in1, Node* in2, ConINode* predicate_node, const TypeVect* vt) :
@ -1194,6 +1274,7 @@ class VectorMaskCmpNode : public VectorNode {
"VectorMaskCmp inputs must have same type for elements");
assert(in1->bottom_type()->is_vect()->length() == in2->bottom_type()->is_vect()->length(),
"VectorMaskCmp inputs must have same number of elements");
assert((BoolTest::mask)predicate_node->get_int() == predicate, "Unmatched predicates");
init_class_id(Class_VectorMaskCmp);
}
@ -1305,7 +1386,6 @@ class VectorMaskCastNode : public VectorNode {
VectorMaskCastNode(Node* in, const TypeVect* vt) : VectorNode(in, vt) {
const TypeVect* in_vt = in->bottom_type()->is_vect();
assert(in_vt->length() == vt->length(), "vector length must match");
assert(type2aelembytes(in_vt->element_basic_type()) == type2aelembytes(vt->element_basic_type()), "element size must match");
}
virtual int Opcode() const;
@ -1315,12 +1395,19 @@ class VectorMaskCastNode : public VectorNode {
class VectorReinterpretNode : public VectorNode {
private:
const TypeVect* _src_vt;
protected:
uint size_of() const { return sizeof(*this); }
uint size_of() const { return sizeof(VectorReinterpretNode); }
public:
VectorReinterpretNode(Node* in, const TypeVect* src_vt, const TypeVect* dst_vt)
: VectorNode(in, dst_vt), _src_vt(src_vt) { }
: VectorNode(in, dst_vt), _src_vt(src_vt) {
assert((!dst_vt->isa_vectmask() && !src_vt->isa_vectmask()) ||
(type2aelembytes(src_vt->element_basic_type()) >= type2aelembytes(dst_vt->element_basic_type())),
"unsupported mask widening reinterpretation");
init_class_id(Class_VectorReinterpret);
}
const TypeVect* src_type() { return _src_vt; }
virtual uint hash() const { return VectorNode::hash() + _src_vt->hash(); }
virtual bool cmp( const Node &n ) const {
return VectorNode::cmp(n) && !Type::cmp(_src_vt,((VectorReinterpretNode&)n)._src_vt);
@ -1453,6 +1540,7 @@ class VectorUnboxNode : public VectorNode {
VectorUnboxNode(Compile* C, const TypeVect* vec_type, Node* obj, Node* mem, bool shuffle_to_vector)
: VectorNode(mem, obj, vec_type) {
_shuffle_to_vector = shuffle_to_vector;
init_class_id(Class_VectorUnbox);
init_flags(Flag_is_macro);
C->add_macro_node(this);
}
@ -1482,5 +1570,4 @@ public:
virtual int Opcode() const;
Node* Ideal(PhaseGVN* phase, bool can_reshape);
};
#endif // SHARE_OPTO_VECTORNODE_HPP

View file

@ -430,6 +430,18 @@ int VectorSupport::vop2ideal(jint id, BasicType bt) {
}
break;
}
case VECTOR_OP_MASK_TOLONG: {
switch (bt) {
case T_BYTE: // fall-through
case T_SHORT: // fall-through
case T_INT: // fall-through
case T_LONG: // fall-through
case T_FLOAT: // fall-through
case T_DOUBLE: return Op_VectorMaskToLong;
default: fatal("MASK_TOLONG: %s", type2name(bt));
}
break;
}
case VECTOR_OP_TAN:
case VECTOR_OP_TANH:
case VECTOR_OP_SIN:

View file

@ -82,10 +82,11 @@ class VectorSupport : AllStatic {
VECTOR_OP_MASK_TRUECOUNT = 19,
VECTOR_OP_MASK_FIRSTTRUE = 20,
VECTOR_OP_MASK_LASTTRUE = 21,
VECTOR_OP_MASK_TOLONG = 22,
// Rotate operations
VECTOR_OP_LROTATE = 22,
VECTOR_OP_RROTATE = 23,
VECTOR_OP_LROTATE = 23,
VECTOR_OP_RROTATE = 24,
// Vector Math Library
VECTOR_OP_TAN = 101,

View file

@ -1849,6 +1849,10 @@
declare_c2_type(VectorUnboxNode, VectorNode) \
declare_c2_type(VectorReinterpretNode, VectorNode) \
declare_c2_type(VectorMaskCastNode, VectorNode) \
declare_c2_type(MaskAllNode, VectorNode) \
declare_c2_type(AndVMaskNode, VectorNode) \
declare_c2_type(OrVMaskNode, VectorNode) \
declare_c2_type(XorVMaskNode, VectorNode) \
declare_c2_type(VectorBoxNode, Node) \
declare_c2_type(VectorBoxAllocateNode, CallStaticJavaNode) \
declare_c2_type(VectorTestNode, Node) \

View file

@ -320,6 +320,17 @@ public:
return min;
}
void truncate_to(int idx) {
for (int i = 0, j = idx; j < length(); i++, j++) {
at_put(i, at(j));
}
trunc_to(length() - idx);
}
void truncate_from(int idx) {
trunc_to(idx);
}
size_t data_size_in_bytes() const {
return _len * sizeof(E);
}

View file

@ -353,6 +353,9 @@ public class ScopedMemoryAccess {
static final long BYTE_BUFFER_HB
= UNSAFE.objectFieldOffset(ByteBuffer.class, "hb");
static final long BYTE_BUFFER_IS_READ_ONLY
= UNSAFE.objectFieldOffset(ByteBuffer.class, "isReadOnly");
@ForceInline
static Object bufferBase(ByteBuffer bb) {
return UNSAFE.getReference(bb, BYTE_BUFFER_HB);
@ -373,13 +376,18 @@ public class ScopedMemoryAccess {
}
}
@ForceInline
public static boolean isReadOnly(ByteBuffer bb) {
return UNSAFE.getBoolean(bb, BufferAccess.BYTE_BUFFER_IS_READ_ONLY);
}
@ForceInline
public static
<V extends VectorSupport.Vector<E>, E, S extends VectorSupport.VectorSpecies<E>>
V loadFromByteBuffer(Class<? extends V> vmClass, Class<E> e, int length,
ByteBuffer bb, int offset,
S s,
VectorSupport.LoadOperation<ByteBuffer, V, E, S> defaultImpl) {
VectorSupport.LoadOperation<ByteBuffer, V, S> defaultImpl) {
try {
return loadFromByteBufferScoped(
BufferAccess.scope(bb),
@ -400,14 +408,59 @@ public class ScopedMemoryAccess {
Class<? extends V> vmClass, Class<E> e, int length,
ByteBuffer bb, int offset,
S s,
VectorSupport.LoadOperation<ByteBuffer, V, E, S> defaultImpl) {
VectorSupport.LoadOperation<ByteBuffer, V, S> defaultImpl) {
try {
if (scope != null) {
scope.checkValidState();
}
final byte[] base = (byte[]) BufferAccess.bufferBase(bb);
return VectorSupport.load(vmClass, e, length,
BufferAccess.bufferBase(bb), BufferAccess.bufferAddress(bb, offset),
base, BufferAccess.bufferAddress(bb, offset),
bb, offset, s,
defaultImpl);
} finally {
Reference.reachabilityFence(scope);
}
}
@ForceInline
public static
<V extends VectorSupport.Vector<E>, E, S extends VectorSupport.VectorSpecies<E>,
M extends VectorSupport.VectorMask<E>>
V loadFromByteBufferMasked(Class<? extends V> vmClass, Class<M> maskClass, Class<E> e,
int length, ByteBuffer bb, int offset, M m, S s,
VectorSupport.LoadVectorMaskedOperation<ByteBuffer, V, S, M> defaultImpl) {
try {
return loadFromByteBufferMaskedScoped(
BufferAccess.scope(bb),
vmClass, maskClass, e, length,
bb, offset, m,
s,
defaultImpl);
} catch (ScopedMemoryAccess.Scope.ScopedAccessError ex) {
throw new IllegalStateException("This segment is already closed");
}
}
@Scoped
@ForceInline
private static
<V extends VectorSupport.Vector<E>, E, S extends VectorSupport.VectorSpecies<E>,
M extends VectorSupport.VectorMask<E>>
V loadFromByteBufferMaskedScoped(ScopedMemoryAccess.Scope scope, Class<? extends V> vmClass,
Class<M> maskClass, Class<E> e, int length,
ByteBuffer bb, int offset, M m,
S s,
VectorSupport.LoadVectorMaskedOperation<ByteBuffer, V, S, M> defaultImpl) {
try {
if (scope != null) {
scope.checkValidState();
}
return VectorSupport.loadMasked(vmClass, maskClass, e, length,
BufferAccess.bufferBase(bb), BufferAccess.bufferAddress(bb, offset), m,
bb, offset, s,
defaultImpl);
} finally {
@ -448,8 +501,10 @@ public class ScopedMemoryAccess {
scope.checkValidState();
}
final byte[] base = (byte[]) BufferAccess.bufferBase(bb);
VectorSupport.store(vmClass, e, length,
BufferAccess.bufferBase(bb), BufferAccess.bufferAddress(bb, offset),
base, BufferAccess.bufferAddress(bb, offset),
v,
bb, offset,
defaultImpl);
@ -458,6 +513,48 @@ public class ScopedMemoryAccess {
}
}
@ForceInline
public static
<V extends VectorSupport.Vector<E>, E, M extends VectorSupport.VectorMask<E>>
void storeIntoByteBufferMasked(Class<? extends V> vmClass, Class<M> maskClass, Class<E> e,
int length, V v, M m,
ByteBuffer bb, int offset,
VectorSupport.StoreVectorMaskedOperation<ByteBuffer, V, M> defaultImpl) {
try {
storeIntoByteBufferMaskedScoped(
BufferAccess.scope(bb),
vmClass, maskClass, e, length,
v, m,
bb, offset,
defaultImpl);
} catch (ScopedMemoryAccess.Scope.ScopedAccessError ex) {
throw new IllegalStateException("This segment is already closed");
}
}
@Scoped
@ForceInline
private static
<V extends VectorSupport.Vector<E>, E, M extends VectorSupport.VectorMask<E>>
void storeIntoByteBufferMaskedScoped(ScopedMemoryAccess.Scope scope,
Class<? extends V> vmClass, Class<M> maskClass,
Class<E> e, int length, V v, M m,
ByteBuffer bb, int offset,
VectorSupport.StoreVectorMaskedOperation<ByteBuffer, V, M> defaultImpl) {
try {
if (scope != null) {
scope.checkValidState();
}
VectorSupport.storeMasked(vmClass, maskClass, e, length,
BufferAccess.bufferBase(bb), BufferAccess.bufferAddress(bb, offset),
v, m,
bb, offset,
defaultImpl);
} finally {
Reference.reachabilityFence(scope);
}
}
// typed-ops here

View file

@ -69,10 +69,11 @@ public class VectorSupport {
public static final int VECTOR_OP_MASK_TRUECOUNT = 19;
public static final int VECTOR_OP_MASK_FIRSTTRUE = 20;
public static final int VECTOR_OP_MASK_LASTTRUE = 21;
public static final int VECTOR_OP_MASK_TOLONG = 22;
// Rotate operations
public static final int VECTOR_OP_LROTATE = 22;
public static final int VECTOR_OP_RROTATE = 23;
public static final int VECTOR_OP_LROTATE = 23;
public static final int VECTOR_OP_RROTATE = 24;
// Math routines
public static final int VECTOR_OP_TAN = 101;
@ -156,274 +157,142 @@ public class VectorSupport {
}
/* ============================================================================ */
public interface BroadcastOperation<VM, E, S extends VectorSpecies<E>> {
public interface BroadcastOperation<VM extends VectorPayload,
S extends VectorSpecies<?>> {
VM broadcast(long l, S s);
}
@IntrinsicCandidate
public static
<VM, E, S extends VectorSpecies<E>>
VM broadcastCoerced(Class<? extends VM> vmClass, Class<E> E, int length,
<VM extends VectorPayload,
S extends VectorSpecies<E>,
E>
VM broadcastCoerced(Class<? extends VM> vmClass, Class<E> eClass,
int length,
long bits, S s,
BroadcastOperation<VM, E, S> defaultImpl) {
BroadcastOperation<VM, S> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.broadcast(bits, s);
}
/* ============================================================================ */
public interface ShuffleIotaOperation<E, S extends VectorSpecies<E>> {
VectorShuffle<E> apply(int length, int start, int step, S s);
public interface ShuffleIotaOperation<S extends VectorSpecies<?>,
SH extends VectorShuffle<?>> {
SH apply(int length, int start, int step, S s);
}
@IntrinsicCandidate
public static
<E, S extends VectorSpecies<E>>
VectorShuffle<E> shuffleIota(Class<?> E, Class<?> ShuffleClass, S s, int length,
int start, int step, int wrap, ShuffleIotaOperation<E, S> defaultImpl) {
<E,
S extends VectorSpecies<E>,
SH extends VectorShuffle<E>>
SH shuffleIota(Class<E> eClass, Class<? extends SH> shClass, S s,
int length,
int start, int step, int wrap,
ShuffleIotaOperation<S, SH> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(length, start, step, s);
}
public interface ShuffleToVectorOperation<VM, Sh, E> {
VM apply(Sh s);
public interface ShuffleToVectorOperation<V extends Vector<?>,
SH extends VectorShuffle<?>> {
V apply(SH sh);
}
@IntrinsicCandidate
public static
<VM ,Sh extends VectorShuffle<E>, E>
VM shuffleToVector(Class<?> VM, Class<?>E , Class<?> ShuffleClass, Sh s, int length,
ShuffleToVectorOperation<VM,Sh,E> defaultImpl) {
<V extends Vector<E>,
SH extends VectorShuffle<E>,
E>
V shuffleToVector(Class<? extends Vector<E>> vClass, Class<E> eClass, Class<? extends SH> shClass, SH sh,
int length,
ShuffleToVectorOperation<V, SH> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(s);
return defaultImpl.apply(sh);
}
/* ============================================================================ */
public interface IndexOperation<V extends Vector<E>, E, S extends VectorSpecies<E>> {
public interface IndexOperation<V extends Vector<?>,
S extends VectorSpecies<?>> {
V index(V v, int step, S s);
}
//FIXME @IntrinsicCandidate
public static
<V extends Vector<E>, E, S extends VectorSpecies<E>>
V indexVector(Class<? extends V> vClass, Class<E> E, int length,
<V extends Vector<E>,
E,
S extends VectorSpecies<E>>
V indexVector(Class<? extends V> vClass, Class<E> eClass,
int length,
V v, int step, S s,
IndexOperation<V, E, S> defaultImpl) {
IndexOperation<V, S> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.index(v, step, s);
}
/* ============================================================================ */
public interface ReductionOperation<V extends Vector<?>,
M extends VectorMask<?>> {
long apply(V v, M m);
}
@IntrinsicCandidate
public static
<V extends Vector<?>>
long reductionCoerced(int oprId, Class<?> vectorClass, Class<?> elementType, int length,
V v,
Function<V,Long> defaultImpl) {
<V extends Vector<E>,
M extends VectorMask<E>,
E>
long reductionCoerced(int oprId,
Class<? extends V> vClass, Class<? extends M> mClass, Class<E> eClass,
int length,
V v, M m,
ReductionOperation<V, M> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(v);
return defaultImpl.apply(v, m);
}
/* ============================================================================ */
public interface VecExtractOp<V> {
long apply(V v1, int idx);
public interface VecExtractOp<V extends Vector<?>> {
long apply(V v, int i);
}
@IntrinsicCandidate
public static
<V extends Vector<?>>
long extract(Class<?> vectorClass, Class<?> elementType, int vlen,
V vec, int ix,
<V extends Vector<E>,
E>
long extract(Class<? extends V> vClass, Class<E> eClass,
int length,
V v, int i,
VecExtractOp<V> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(vec, ix);
return defaultImpl.apply(v, i);
}
/* ============================================================================ */
public interface VecInsertOp<V> {
V apply(V v1, int idx, long val);
public interface VecInsertOp<V extends Vector<?>> {
V apply(V v, int i, long val);
}
@IntrinsicCandidate
public static
<V extends Vector<?>>
V insert(Class<? extends V> vectorClass, Class<?> elementType, int vlen,
V vec, int ix, long val,
<V extends Vector<E>,
E>
V insert(Class<? extends V> vClass, Class<E> eClass,
int length,
V v, int i, long val,
VecInsertOp<V> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(vec, ix, val);
return defaultImpl.apply(v, i, val);
}
/* ============================================================================ */
@IntrinsicCandidate
public static
<VM>
VM unaryOp(int oprId, Class<? extends VM> vmClass, Class<?> elementType, int length,
VM vm,
Function<VM, VM> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(vm);
}
/* ============================================================================ */
@IntrinsicCandidate
public static
<VM>
VM binaryOp(int oprId, Class<? extends VM> vmClass, Class<?> elementType, int length,
VM vm1, VM vm2,
BiFunction<VM, VM, VM> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(vm1, vm2);
}
/* ============================================================================ */
public interface TernaryOperation<V> {
V apply(V v1, V v2, V v3);
}
@IntrinsicCandidate
public static
<VM>
VM ternaryOp(int oprId, Class<? extends VM> vmClass, Class<?> elementType, int length,
VM vm1, VM vm2, VM vm3,
TernaryOperation<VM> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(vm1, vm2, vm3);
}
/* ============================================================================ */
// Memory operations
public interface LoadOperation<C, V, E, S extends VectorSpecies<E>> {
V load(C container, int index, S s);
}
@IntrinsicCandidate
public static
<C, VM, E, S extends VectorSpecies<E>>
VM load(Class<? extends VM> vmClass, Class<E> E, int length,
Object base, long offset, // Unsafe addressing
C container, int index, S s, // Arguments for default implementation
LoadOperation<C, VM, E, S> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.load(container, index, s);
}
/* ============================================================================ */
public interface LoadVectorOperationWithMap<C, V extends Vector<?>, E, S extends VectorSpecies<E>> {
V loadWithMap(C container, int index, int[] indexMap, int indexM, S s);
}
@IntrinsicCandidate
public static
<C, V extends Vector<?>, W extends Vector<Integer>, E, S extends VectorSpecies<E>>
V loadWithMap(Class<?> vectorClass, Class<E> E, int length, Class<?> vectorIndexClass,
Object base, long offset, // Unsafe addressing
W index_vector,
C container, int index, int[] indexMap, int indexM, S s, // Arguments for default implementation
LoadVectorOperationWithMap<C, V, E, S> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.loadWithMap(container, index, indexMap, indexM, s);
}
/* ============================================================================ */
public interface StoreVectorOperation<C, V extends Vector<?>> {
void store(C container, int index, V v);
}
@IntrinsicCandidate
public static
<C, V extends Vector<?>>
void store(Class<?> vectorClass, Class<?> elementType, int length,
Object base, long offset, // Unsafe addressing
V v,
C container, int index, // Arguments for default implementation
StoreVectorOperation<C, V> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
defaultImpl.store(container, index, v);
}
/* ============================================================================ */
public interface StoreVectorOperationWithMap<C, V extends Vector<?>> {
void storeWithMap(C container, int index, V v, int[] indexMap, int indexM);
}
@IntrinsicCandidate
public static
<C, V extends Vector<?>, W extends Vector<Integer>>
void storeWithMap(Class<?> vectorClass, Class<?> elementType, int length, Class<?> vectorIndexClass,
Object base, long offset, // Unsafe addressing
W index_vector, V v,
C container, int index, int[] indexMap, int indexM, // Arguments for default implementation
StoreVectorOperationWithMap<C, V> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
defaultImpl.storeWithMap(container, index, v, indexMap, indexM);
}
/* ============================================================================ */
@IntrinsicCandidate
public static
<VM>
boolean test(int cond, Class<?> vmClass, Class<?> elementType, int length,
VM vm1, VM vm2,
BiFunction<VM, VM, Boolean> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(vm1, vm2);
}
/* ============================================================================ */
public interface VectorCompareOp<V,M> {
M apply(int cond, V v1, V v2);
}
@IntrinsicCandidate
public static <V extends Vector<E>,
M extends VectorMask<E>,
E>
M compare(int cond, Class<? extends V> vectorClass, Class<M> maskClass, Class<?> elementType, int length,
V v1, V v2,
VectorCompareOp<V,M> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(cond, v1, v2);
}
/* ============================================================================ */
public interface VectorRearrangeOp<V extends Vector<E>,
Sh extends VectorShuffle<E>,
E> {
V apply(V v1, Sh shuffle);
}
@IntrinsicCandidate
public static
<V extends Vector<E>,
Sh extends VectorShuffle<E>,
E>
V rearrangeOp(Class<? extends V> vectorClass, Class<Sh> shuffleClass, Class<?> elementType, int vlen,
V v1, Sh sh,
VectorRearrangeOp<V,Sh, E> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(v1, sh);
}
/* ============================================================================ */
public interface VectorBlendOp<V extends Vector<E>,
M extends VectorMask<E>,
E> {
V apply(V v1, V v2, M mask);
public interface UnaryOperation<V extends Vector<?>,
M extends VectorMask<?>> {
V apply(V v, M m);
}
@IntrinsicCandidate
@ -431,33 +300,306 @@ public class VectorSupport {
<V extends Vector<E>,
M extends VectorMask<E>,
E>
V blend(Class<? extends V> vectorClass, Class<M> maskClass, Class<?> elementType, int length,
V v1, V v2, M m,
VectorBlendOp<V,M, E> defaultImpl) {
V unaryOp(int oprId,
Class<? extends V> vClass, Class<? extends M> mClass, Class<E> eClass,
int length,
V v, M m,
UnaryOperation<V, M> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(v, m);
}
/* ============================================================================ */
public interface BinaryOperation<VM extends VectorPayload,
M extends VectorMask<?>> {
VM apply(VM v1, VM v2, M m);
}
@IntrinsicCandidate
public static
<VM extends VectorPayload,
M extends VectorMask<E>,
E>
VM binaryOp(int oprId,
Class<? extends VM> vmClass, Class<? extends M> mClass, Class<E> eClass,
int length,
VM v1, VM v2, M m,
BinaryOperation<VM, M> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(v1, v2, m);
}
/* ============================================================================ */
public interface VectorBroadcastIntOp<V extends Vector<?>> {
V apply(V v, int n);
public interface TernaryOperation<V extends Vector<?>,
M extends VectorMask<?>> {
V apply(V v1, V v2, V v3, M m);
}
@IntrinsicCandidate
public static
<V extends Vector<?>>
V broadcastInt(int opr, Class<? extends V> vectorClass, Class<?> elementType, int length,
V v, int n,
VectorBroadcastIntOp<V> defaultImpl) {
<V extends Vector<E>,
M extends VectorMask<E>,
E>
V ternaryOp(int oprId,
Class<? extends V> vClass, Class<? extends M> mClass, Class<E> eClass,
int length,
V v1, V v2, V v3, M m,
TernaryOperation<V, M> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(v, n);
return defaultImpl.apply(v1, v2, v3, m);
}
/* ============================================================================ */
public interface VectorConvertOp<VOUT, VIN, S> {
VOUT apply(VIN v, S species);
// Memory operations
public interface LoadOperation<C,
VM extends VectorPayload,
S extends VectorSpecies<?>> {
VM load(C container, int index, S s);
}
@IntrinsicCandidate
public static
<C,
VM extends VectorPayload,
E,
S extends VectorSpecies<E>>
VM load(Class<? extends VM> vmClass, Class<E> eClass,
int length,
Object base, long offset,
C container, int index, S s,
LoadOperation<C, VM, S> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.load(container, index, s);
}
/* ============================================================================ */
public interface LoadVectorMaskedOperation<C,
V extends Vector<?>,
S extends VectorSpecies<?>,
M extends VectorMask<?>> {
V load(C container, int index, S s, M m);
}
@IntrinsicCandidate
public static
<C,
V extends Vector<?>,
E,
S extends VectorSpecies<E>,
M extends VectorMask<E>>
V loadMasked(Class<? extends V> vClass, Class<M> mClass, Class<E> eClass,
int length,
Object base, long offset,
M m, C container, int index, S s,
LoadVectorMaskedOperation<C, V, S, M> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.load(container, index, s, m);
}
/* ============================================================================ */
public interface LoadVectorOperationWithMap<C,
V extends Vector<?>,
S extends VectorSpecies<?>,
M extends VectorMask<?>> {
V loadWithMap(C container, int index, int[] indexMap, int indexM, S s, M m);
}
@IntrinsicCandidate
public static
<C,
V extends Vector<?>,
W extends Vector<Integer>,
S extends VectorSpecies<E>,
M extends VectorMask<E>,
E>
V loadWithMap(Class<? extends V> vClass, Class<M> mClass, Class<E> eClass,
int length,
Class<? extends Vector<Integer>> vectorIndexClass,
Object base, long offset,
W index_vector,
M m, C container, int index, int[] indexMap, int indexM, S s,
LoadVectorOperationWithMap<C, V, S, M> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.loadWithMap(container, index, indexMap, indexM, s, m);
}
/* ============================================================================ */
public interface StoreVectorOperation<C,
V extends Vector<?>> {
void store(C container, int index, V v);
}
@IntrinsicCandidate
public static
<C,
V extends Vector<?>>
void store(Class<?> vClass, Class<?> eClass,
int length,
Object base, long offset,
V v, C container, int index,
StoreVectorOperation<C, V> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
defaultImpl.store(container, index, v);
}
public interface StoreVectorMaskedOperation<C,
V extends Vector<?>,
M extends VectorMask<?>> {
void store(C container, int index, V v, M m);
}
@IntrinsicCandidate
public static
<C,
V extends Vector<E>,
M extends VectorMask<E>,
E>
void storeMasked(Class<? extends V> vClass, Class<M> mClass, Class<E> eClass,
int length,
Object base, long offset,
V v, M m, C container, int index,
StoreVectorMaskedOperation<C, V, M> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
defaultImpl.store(container, index, v, m);
}
/* ============================================================================ */
public interface StoreVectorOperationWithMap<C,
V extends Vector<?>,
M extends VectorMask<?>> {
void storeWithMap(C container, int index, V v, int[] indexMap, int indexM, M m);
}
@IntrinsicCandidate
public static
<C,
V extends Vector<E>,
W extends Vector<Integer>,
M extends VectorMask<E>,
E>
void storeWithMap(Class<? extends V> vClass, Class<M> mClass, Class<E> eClass,
int length,
Class<? extends Vector<Integer>> vectorIndexClass,
Object base, long offset,
W index_vector,
V v, M m, C container, int index, int[] indexMap, int indexM,
StoreVectorOperationWithMap<C, V, M> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
defaultImpl.storeWithMap(container, index, v, indexMap, indexM, m);
}
/* ============================================================================ */
@IntrinsicCandidate
public static
<M extends VectorMask<E>,
E>
boolean test(int cond,
Class<?> mClass, Class<?> eClass,
int length,
M m1, M m2,
BiFunction<M, M, Boolean> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(m1, m2);
}
/* ============================================================================ */
public interface VectorCompareOp<V extends Vector<?>,
M extends VectorMask<?>> {
M apply(int cond, V v1, V v2, M m);
}
@IntrinsicCandidate
public static
<V extends Vector<E>,
M extends VectorMask<E>,
E>
M compare(int cond,
Class<? extends V> vectorClass, Class<M> mClass, Class<E> eClass,
int length,
V v1, V v2, M m,
VectorCompareOp<V, M> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(cond, v1, v2, m);
}
/* ============================================================================ */
public interface VectorRearrangeOp<V extends Vector<?>,
SH extends VectorShuffle<?>,
M extends VectorMask<?>> {
V apply(V v, SH sh, M m);
}
@IntrinsicCandidate
public static
<V extends Vector<E>,
SH extends VectorShuffle<E>,
M extends VectorMask<E>,
E>
V rearrangeOp(Class<? extends V> vClass, Class<SH> shClass, Class<M> mClass, Class<E> eClass,
int length,
V v, SH sh, M m,
VectorRearrangeOp<V, SH, M> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(v, sh, m);
}
/* ============================================================================ */
public interface VectorBlendOp<V extends Vector<?>,
M extends VectorMask<?>> {
V apply(V v1, V v2, M m);
}
@IntrinsicCandidate
public static
<V extends Vector<E>,
M extends VectorMask<E>,
E>
V blend(Class<? extends V> vClass, Class<M> mClass, Class<E> eClass,
int length,
V v1, V v2, M m,
VectorBlendOp<V, M> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(v1, v2, m);
}
/* ============================================================================ */
public interface VectorBroadcastIntOp<V extends Vector<?>,
M extends VectorMask<?>> {
V apply(V v, int n, M m);
}
@IntrinsicCandidate
public static
<V extends Vector<E>,
M extends VectorMask<E>,
E>
V broadcastInt(int opr,
Class<? extends V> vClass, Class<? extends M> mClass, Class<E> eClass,
int length,
V v, int n, M m,
VectorBroadcastIntOp<V, M> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(v, n, m);
}
/* ============================================================================ */
public interface VectorConvertOp<VOUT extends VectorPayload,
VIN extends VectorPayload,
S extends VectorSpecies<?>> {
VOUT apply(VIN v, S s);
}
// Users of this intrinsic assume that it respects
@ -469,8 +611,8 @@ public class VectorSupport {
VIN extends VectorPayload,
S extends VectorSpecies<?>>
VOUT convert(int oprId,
Class<?> fromVectorClass, Class<?> fromElementType, int fromVLen,
Class<?> toVectorClass, Class<?> toElementType, int toVLen,
Class<?> fromVectorClass, Class<?> fromeClass, int fromVLen,
Class<?> toVectorClass, Class<?> toeClass, int toVLen,
VIN v, S s,
VectorConvertOp<VOUT, VIN, S> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
@ -480,7 +622,9 @@ public class VectorSupport {
/* ============================================================================ */
@IntrinsicCandidate
public static <V> V maybeRebox(V v) {
public static
<VP extends VectorPayload>
VP maybeRebox(VP v) {
// The fence is added here to avoid memory aliasing problems in C2 between scalar & vector accesses.
// TODO: move the fence generation into C2. Generate only when reboxing is taking place.
U.loadFence();
@ -488,14 +632,18 @@ public class VectorSupport {
}
/* ============================================================================ */
public interface VectorMaskOp<M> {
int apply(M m);
public interface VectorMaskOp<M extends VectorMask<?>> {
long apply(M m);
}
@IntrinsicCandidate
public static
<E, M>
int maskReductionCoerced(int oper, Class<? extends M> maskClass, Class<?> elemClass, int length, M m,
<M extends VectorMask<E>,
E>
long maskReductionCoerced(int oper,
Class<? extends M> mClass, Class<?> eClass,
int length,
M m,
VectorMaskOp<M> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(m);

View file

@ -24,6 +24,8 @@
*/
package jdk.incubator.vector;
import java.util.Objects;
import jdk.internal.vm.annotation.ForceInline;
import static jdk.incubator.vector.VectorOperators.*;
@ -62,24 +64,15 @@ abstract class AbstractMask<E> extends VectorMask<E> {
}
@Override
@ForceInline
public boolean laneIsSet(int i) {
int length = length();
Objects.checkIndex(i, length);
if (length <= Long.SIZE) {
return ((toLong() >>> i) & 1L) == 1;
} else {
return getBits()[i];
}
@Override
public long toLong() {
// FIXME: This should be an intrinsic.
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
long res = 0;
long set = 1;
boolean[] bits = getBits();
for (int i = 0; i < bits.length; i++) {
res = bits[i] ? res | set : res;
set = set << 1;
}
return res;
}
@Override
@ -114,6 +107,23 @@ abstract class AbstractMask<E> extends VectorMask<E> {
return (VectorMask<F>) this;
}
@Override
@ForceInline
@SuppressWarnings("unchecked")
<F> VectorMask<F> check(Class<? extends VectorMask<F>> maskClass, Vector<F> vector) {
if (!sameSpecies(maskClass, vector)) {
throw AbstractSpecies.checkFailed(this, vector);
}
return (VectorMask<F>) this;
}
@ForceInline
private <F> boolean sameSpecies(Class<? extends VectorMask<F>> maskClass, Vector<F> vector) {
boolean same = getClass() == maskClass;
assert (same == (vectorSpecies() == vector.species())) : same;
return same;
}
@Override
public VectorMask<E> andNot(VectorMask<E> m) {
return and(m.not());
@ -162,6 +172,17 @@ abstract class AbstractMask<E> extends VectorMask<E> {
return -1;
}
/*package-private*/
static long toLongHelper(boolean[] bits) {
long res = 0;
long set = 1;
for (int i = 0; i < bits.length; i++) {
res = bits[i] ? res | set : res;
set = set << 1;
}
return res;
}
@Override
@ForceInline
public VectorMask<E> indexInRange(int offset, int limit) {
@ -215,14 +236,10 @@ abstract class AbstractMask<E> extends VectorMask<E> {
int elemCount = Math.min(vlength, (alength - offset) / esize);
badMask = checkIndex0(0, elemCount, iota, vlength);
} else {
// This requires a split test.
int clipOffset = Math.max(offset, -(vlength * esize));
int elemCount = Math.min(vlength, (alength - clipOffset) / esize);
badMask = checkIndex0(0, elemCount, iota, vlength);
clipOffset &= (esize - 1); // power of two, so OK
VectorMask<E> badMask2 = checkIndex0(clipOffset / esize, vlength,
iota, vlength);
badMask = badMask.or(badMask2);
badMask = checkIndex0(clipOffset, alength,
iota.lanewise(VectorOperators.MUL, esize),
vlength * esize);
}
badMask = badMask.and(this);
if (badMask.anyTrue()) {

View file

@ -236,8 +236,8 @@ final class Byte128Vector extends ByteVector {
@ForceInline
final @Override
byte rOp(byte v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
byte rOp(byte v, VectorMask<Byte> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -273,12 +273,24 @@ final class Byte128Vector extends ByteVector {
return (Byte128Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Byte128Vector lanewise(Unary op, VectorMask<Byte> m) {
return (Byte128Vector) super.lanewiseTemplate(op, Byte128Mask.class, (Byte128Mask) m); // specialize
}
@Override
@ForceInline
public Byte128Vector lanewise(Binary op, Vector<Byte> v) {
return (Byte128Vector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public Byte128Vector lanewise(Binary op, Vector<Byte> v, VectorMask<Byte> m) {
return (Byte128Vector) super.lanewiseTemplate(op, Byte128Mask.class, v, (Byte128Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline Byte128Vector
@ -286,15 +298,30 @@ final class Byte128Vector extends ByteVector {
return (Byte128Vector) super.lanewiseShiftTemplate(op, e); // specialize
}
/*package-private*/
@Override
@ForceInline Byte128Vector
lanewiseShift(VectorOperators.Binary op, int e, VectorMask<Byte> m) {
return (Byte128Vector) super.lanewiseShiftTemplate(op, Byte128Mask.class, e, (Byte128Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Byte128Vector
lanewise(VectorOperators.Ternary op, Vector<Byte> v1, Vector<Byte> v2) {
lanewise(Ternary op, Vector<Byte> v1, Vector<Byte> v2) {
return (Byte128Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Byte128Vector
lanewise(Ternary op, Vector<Byte> v1, Vector<Byte> v2, VectorMask<Byte> m) {
return (Byte128Vector) super.lanewiseTemplate(op, Byte128Mask.class, v1, v2, (Byte128Mask) m); // specialize
}
@Override
@ForceInline
public final
@ -314,7 +341,7 @@ final class Byte128Vector extends ByteVector {
@ForceInline
public final byte reduceLanes(VectorOperators.Associative op,
VectorMask<Byte> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, Byte128Mask.class, (Byte128Mask) m); // specialized
}
@Override
@ -327,7 +354,7 @@ final class Byte128Vector extends ByteVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Byte> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, Byte128Mask.class, (Byte128Mask) m); // specialized
}
@ForceInline
@ -363,6 +390,13 @@ final class Byte128Vector extends ByteVector {
return super.compareTemplate(Byte128Mask.class, op, s); // specialize
}
@Override
@ForceInline
public final Byte128Mask compare(Comparison op, Vector<Byte> v, VectorMask<Byte> m) {
return super.compareTemplate(Byte128Mask.class, op, v, (Byte128Mask) m);
}
@Override
@ForceInline
public Byte128Vector blend(Vector<Byte> v, VectorMask<Byte> m) {
@ -419,6 +453,7 @@ final class Byte128Vector extends ByteVector {
VectorMask<Byte> m) {
return (Byte128Vector)
super.rearrangeTemplate(Byte128Shuffle.class,
Byte128Mask.class,
(Byte128Shuffle) shuffle,
(Byte128Mask) m); // specialize
}
@ -612,16 +647,12 @@ final class Byte128Vector extends ByteVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
Byte128Mask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -647,9 +678,9 @@ final class Byte128Vector extends ByteVector {
public Byte128Mask and(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
Byte128Mask m = (Byte128Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Byte128Mask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, Byte128Mask.class, null, byte.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -657,9 +688,9 @@ final class Byte128Vector extends ByteVector {
public Byte128Mask or(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
Byte128Mask m = (Byte128Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Byte128Mask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, Byte128Mask.class, null, byte.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -667,9 +698,9 @@ final class Byte128Vector extends ByteVector {
Byte128Mask xor(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
Byte128Mask m = (Byte128Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Byte128Mask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, Byte128Mask.class, null, byte.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -677,22 +708,32 @@ final class Byte128Vector extends ByteVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Byte128Mask.class, byte.class, VLENGTH, this,
(m) -> trueCountHelper(((Byte128Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Byte128Mask.class, byte.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Byte128Mask.class, byte.class, VLENGTH, this,
(m) -> firstTrueHelper(((Byte128Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Byte128Mask.class, byte.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Byte128Mask.class, byte.class, VLENGTH, this,
(m) -> lastTrueHelper(((Byte128Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Byte128Mask.class, byte.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Byte128Mask.class, byte.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -803,6 +844,14 @@ final class Byte128Vector extends ByteVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromArray0(byte[] a, int offset, VectorMask<Byte> m) {
return super.fromArray0Template(Byte128Mask.class, a, offset, (Byte128Mask) m); // specialize
}
@ForceInline
@Override
@ -811,6 +860,13 @@ final class Byte128Vector extends ByteVector {
return super.fromBooleanArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromBooleanArray0(boolean[] a, int offset, VectorMask<Byte> m) {
return super.fromBooleanArray0Template(Byte128Mask.class, a, offset, (Byte128Mask) m); // specialize
}
@ForceInline
@Override
final
@ -818,6 +874,13 @@ final class Byte128Vector extends ByteVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
return super.fromByteArray0Template(Byte128Mask.class, a, offset, (Byte128Mask) m); // specialize
}
@ForceInline
@Override
final
@ -825,6 +888,13 @@ final class Byte128Vector extends ByteVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
return super.fromByteBuffer0Template(Byte128Mask.class, bb, offset, (Byte128Mask) m); // specialize
}
@ForceInline
@Override
final
@ -832,6 +902,21 @@ final class Byte128Vector extends ByteVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(byte[] a, int offset, VectorMask<Byte> m) {
super.intoArray0Template(Byte128Mask.class, a, offset, (Byte128Mask) m);
}
@ForceInline
@Override
final
void intoBooleanArray0(boolean[] a, int offset, VectorMask<Byte> m) {
super.intoBooleanArray0Template(Byte128Mask.class, a, offset, (Byte128Mask) m);
}
@ForceInline
@Override
final
@ -839,6 +924,21 @@ final class Byte128Vector extends ByteVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
super.intoByteArray0Template(Byte128Mask.class, a, offset, (Byte128Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
super.intoByteBuffer0Template(Byte128Mask.class, bb, offset, (Byte128Mask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -236,8 +236,8 @@ final class Byte256Vector extends ByteVector {
@ForceInline
final @Override
byte rOp(byte v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
byte rOp(byte v, VectorMask<Byte> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -273,12 +273,24 @@ final class Byte256Vector extends ByteVector {
return (Byte256Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Byte256Vector lanewise(Unary op, VectorMask<Byte> m) {
return (Byte256Vector) super.lanewiseTemplate(op, Byte256Mask.class, (Byte256Mask) m); // specialize
}
@Override
@ForceInline
public Byte256Vector lanewise(Binary op, Vector<Byte> v) {
return (Byte256Vector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public Byte256Vector lanewise(Binary op, Vector<Byte> v, VectorMask<Byte> m) {
return (Byte256Vector) super.lanewiseTemplate(op, Byte256Mask.class, v, (Byte256Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline Byte256Vector
@ -286,15 +298,30 @@ final class Byte256Vector extends ByteVector {
return (Byte256Vector) super.lanewiseShiftTemplate(op, e); // specialize
}
/*package-private*/
@Override
@ForceInline Byte256Vector
lanewiseShift(VectorOperators.Binary op, int e, VectorMask<Byte> m) {
return (Byte256Vector) super.lanewiseShiftTemplate(op, Byte256Mask.class, e, (Byte256Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Byte256Vector
lanewise(VectorOperators.Ternary op, Vector<Byte> v1, Vector<Byte> v2) {
lanewise(Ternary op, Vector<Byte> v1, Vector<Byte> v2) {
return (Byte256Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Byte256Vector
lanewise(Ternary op, Vector<Byte> v1, Vector<Byte> v2, VectorMask<Byte> m) {
return (Byte256Vector) super.lanewiseTemplate(op, Byte256Mask.class, v1, v2, (Byte256Mask) m); // specialize
}
@Override
@ForceInline
public final
@ -314,7 +341,7 @@ final class Byte256Vector extends ByteVector {
@ForceInline
public final byte reduceLanes(VectorOperators.Associative op,
VectorMask<Byte> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, Byte256Mask.class, (Byte256Mask) m); // specialized
}
@Override
@ -327,7 +354,7 @@ final class Byte256Vector extends ByteVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Byte> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, Byte256Mask.class, (Byte256Mask) m); // specialized
}
@ForceInline
@ -363,6 +390,13 @@ final class Byte256Vector extends ByteVector {
return super.compareTemplate(Byte256Mask.class, op, s); // specialize
}
@Override
@ForceInline
public final Byte256Mask compare(Comparison op, Vector<Byte> v, VectorMask<Byte> m) {
return super.compareTemplate(Byte256Mask.class, op, v, (Byte256Mask) m);
}
@Override
@ForceInline
public Byte256Vector blend(Vector<Byte> v, VectorMask<Byte> m) {
@ -419,6 +453,7 @@ final class Byte256Vector extends ByteVector {
VectorMask<Byte> m) {
return (Byte256Vector)
super.rearrangeTemplate(Byte256Shuffle.class,
Byte256Mask.class,
(Byte256Shuffle) shuffle,
(Byte256Mask) m); // specialize
}
@ -644,16 +679,12 @@ final class Byte256Vector extends ByteVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
Byte256Mask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -679,9 +710,9 @@ final class Byte256Vector extends ByteVector {
public Byte256Mask and(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
Byte256Mask m = (Byte256Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Byte256Mask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, Byte256Mask.class, null, byte.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -689,9 +720,9 @@ final class Byte256Vector extends ByteVector {
public Byte256Mask or(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
Byte256Mask m = (Byte256Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Byte256Mask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, Byte256Mask.class, null, byte.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -699,9 +730,9 @@ final class Byte256Vector extends ByteVector {
Byte256Mask xor(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
Byte256Mask m = (Byte256Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Byte256Mask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, Byte256Mask.class, null, byte.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -709,22 +740,32 @@ final class Byte256Vector extends ByteVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Byte256Mask.class, byte.class, VLENGTH, this,
(m) -> trueCountHelper(((Byte256Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Byte256Mask.class, byte.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Byte256Mask.class, byte.class, VLENGTH, this,
(m) -> firstTrueHelper(((Byte256Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Byte256Mask.class, byte.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Byte256Mask.class, byte.class, VLENGTH, this,
(m) -> lastTrueHelper(((Byte256Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Byte256Mask.class, byte.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Byte256Mask.class, byte.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -835,6 +876,14 @@ final class Byte256Vector extends ByteVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromArray0(byte[] a, int offset, VectorMask<Byte> m) {
return super.fromArray0Template(Byte256Mask.class, a, offset, (Byte256Mask) m); // specialize
}
@ForceInline
@Override
@ -843,6 +892,13 @@ final class Byte256Vector extends ByteVector {
return super.fromBooleanArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromBooleanArray0(boolean[] a, int offset, VectorMask<Byte> m) {
return super.fromBooleanArray0Template(Byte256Mask.class, a, offset, (Byte256Mask) m); // specialize
}
@ForceInline
@Override
final
@ -850,6 +906,13 @@ final class Byte256Vector extends ByteVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
return super.fromByteArray0Template(Byte256Mask.class, a, offset, (Byte256Mask) m); // specialize
}
@ForceInline
@Override
final
@ -857,6 +920,13 @@ final class Byte256Vector extends ByteVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
return super.fromByteBuffer0Template(Byte256Mask.class, bb, offset, (Byte256Mask) m); // specialize
}
@ForceInline
@Override
final
@ -864,6 +934,21 @@ final class Byte256Vector extends ByteVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(byte[] a, int offset, VectorMask<Byte> m) {
super.intoArray0Template(Byte256Mask.class, a, offset, (Byte256Mask) m);
}
@ForceInline
@Override
final
void intoBooleanArray0(boolean[] a, int offset, VectorMask<Byte> m) {
super.intoBooleanArray0Template(Byte256Mask.class, a, offset, (Byte256Mask) m);
}
@ForceInline
@Override
final
@ -871,6 +956,21 @@ final class Byte256Vector extends ByteVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
super.intoByteArray0Template(Byte256Mask.class, a, offset, (Byte256Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
super.intoByteBuffer0Template(Byte256Mask.class, bb, offset, (Byte256Mask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -236,8 +236,8 @@ final class Byte512Vector extends ByteVector {
@ForceInline
final @Override
byte rOp(byte v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
byte rOp(byte v, VectorMask<Byte> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -273,12 +273,24 @@ final class Byte512Vector extends ByteVector {
return (Byte512Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Byte512Vector lanewise(Unary op, VectorMask<Byte> m) {
return (Byte512Vector) super.lanewiseTemplate(op, Byte512Mask.class, (Byte512Mask) m); // specialize
}
@Override
@ForceInline
public Byte512Vector lanewise(Binary op, Vector<Byte> v) {
return (Byte512Vector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public Byte512Vector lanewise(Binary op, Vector<Byte> v, VectorMask<Byte> m) {
return (Byte512Vector) super.lanewiseTemplate(op, Byte512Mask.class, v, (Byte512Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline Byte512Vector
@ -286,15 +298,30 @@ final class Byte512Vector extends ByteVector {
return (Byte512Vector) super.lanewiseShiftTemplate(op, e); // specialize
}
/*package-private*/
@Override
@ForceInline Byte512Vector
lanewiseShift(VectorOperators.Binary op, int e, VectorMask<Byte> m) {
return (Byte512Vector) super.lanewiseShiftTemplate(op, Byte512Mask.class, e, (Byte512Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Byte512Vector
lanewise(VectorOperators.Ternary op, Vector<Byte> v1, Vector<Byte> v2) {
lanewise(Ternary op, Vector<Byte> v1, Vector<Byte> v2) {
return (Byte512Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Byte512Vector
lanewise(Ternary op, Vector<Byte> v1, Vector<Byte> v2, VectorMask<Byte> m) {
return (Byte512Vector) super.lanewiseTemplate(op, Byte512Mask.class, v1, v2, (Byte512Mask) m); // specialize
}
@Override
@ForceInline
public final
@ -314,7 +341,7 @@ final class Byte512Vector extends ByteVector {
@ForceInline
public final byte reduceLanes(VectorOperators.Associative op,
VectorMask<Byte> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, Byte512Mask.class, (Byte512Mask) m); // specialized
}
@Override
@ -327,7 +354,7 @@ final class Byte512Vector extends ByteVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Byte> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, Byte512Mask.class, (Byte512Mask) m); // specialized
}
@ForceInline
@ -363,6 +390,13 @@ final class Byte512Vector extends ByteVector {
return super.compareTemplate(Byte512Mask.class, op, s); // specialize
}
@Override
@ForceInline
public final Byte512Mask compare(Comparison op, Vector<Byte> v, VectorMask<Byte> m) {
return super.compareTemplate(Byte512Mask.class, op, v, (Byte512Mask) m);
}
@Override
@ForceInline
public Byte512Vector blend(Vector<Byte> v, VectorMask<Byte> m) {
@ -419,6 +453,7 @@ final class Byte512Vector extends ByteVector {
VectorMask<Byte> m) {
return (Byte512Vector)
super.rearrangeTemplate(Byte512Shuffle.class,
Byte512Mask.class,
(Byte512Shuffle) shuffle,
(Byte512Mask) m); // specialize
}
@ -708,16 +743,12 @@ final class Byte512Vector extends ByteVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
Byte512Mask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -743,9 +774,9 @@ final class Byte512Vector extends ByteVector {
public Byte512Mask and(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
Byte512Mask m = (Byte512Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Byte512Mask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, Byte512Mask.class, null, byte.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -753,9 +784,9 @@ final class Byte512Vector extends ByteVector {
public Byte512Mask or(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
Byte512Mask m = (Byte512Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Byte512Mask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, Byte512Mask.class, null, byte.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -763,9 +794,9 @@ final class Byte512Vector extends ByteVector {
Byte512Mask xor(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
Byte512Mask m = (Byte512Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Byte512Mask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, Byte512Mask.class, null, byte.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -773,22 +804,32 @@ final class Byte512Vector extends ByteVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Byte512Mask.class, byte.class, VLENGTH, this,
(m) -> trueCountHelper(((Byte512Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Byte512Mask.class, byte.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Byte512Mask.class, byte.class, VLENGTH, this,
(m) -> firstTrueHelper(((Byte512Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Byte512Mask.class, byte.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Byte512Mask.class, byte.class, VLENGTH, this,
(m) -> lastTrueHelper(((Byte512Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Byte512Mask.class, byte.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Byte512Mask.class, byte.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -899,6 +940,14 @@ final class Byte512Vector extends ByteVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromArray0(byte[] a, int offset, VectorMask<Byte> m) {
return super.fromArray0Template(Byte512Mask.class, a, offset, (Byte512Mask) m); // specialize
}
@ForceInline
@Override
@ -907,6 +956,13 @@ final class Byte512Vector extends ByteVector {
return super.fromBooleanArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromBooleanArray0(boolean[] a, int offset, VectorMask<Byte> m) {
return super.fromBooleanArray0Template(Byte512Mask.class, a, offset, (Byte512Mask) m); // specialize
}
@ForceInline
@Override
final
@ -914,6 +970,13 @@ final class Byte512Vector extends ByteVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
return super.fromByteArray0Template(Byte512Mask.class, a, offset, (Byte512Mask) m); // specialize
}
@ForceInline
@Override
final
@ -921,6 +984,13 @@ final class Byte512Vector extends ByteVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
return super.fromByteBuffer0Template(Byte512Mask.class, bb, offset, (Byte512Mask) m); // specialize
}
@ForceInline
@Override
final
@ -928,6 +998,21 @@ final class Byte512Vector extends ByteVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(byte[] a, int offset, VectorMask<Byte> m) {
super.intoArray0Template(Byte512Mask.class, a, offset, (Byte512Mask) m);
}
@ForceInline
@Override
final
void intoBooleanArray0(boolean[] a, int offset, VectorMask<Byte> m) {
super.intoBooleanArray0Template(Byte512Mask.class, a, offset, (Byte512Mask) m);
}
@ForceInline
@Override
final
@ -935,6 +1020,21 @@ final class Byte512Vector extends ByteVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
super.intoByteArray0Template(Byte512Mask.class, a, offset, (Byte512Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
super.intoByteBuffer0Template(Byte512Mask.class, bb, offset, (Byte512Mask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -236,8 +236,8 @@ final class Byte64Vector extends ByteVector {
@ForceInline
final @Override
byte rOp(byte v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
byte rOp(byte v, VectorMask<Byte> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -273,12 +273,24 @@ final class Byte64Vector extends ByteVector {
return (Byte64Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Byte64Vector lanewise(Unary op, VectorMask<Byte> m) {
return (Byte64Vector) super.lanewiseTemplate(op, Byte64Mask.class, (Byte64Mask) m); // specialize
}
@Override
@ForceInline
public Byte64Vector lanewise(Binary op, Vector<Byte> v) {
return (Byte64Vector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public Byte64Vector lanewise(Binary op, Vector<Byte> v, VectorMask<Byte> m) {
return (Byte64Vector) super.lanewiseTemplate(op, Byte64Mask.class, v, (Byte64Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline Byte64Vector
@ -286,15 +298,30 @@ final class Byte64Vector extends ByteVector {
return (Byte64Vector) super.lanewiseShiftTemplate(op, e); // specialize
}
/*package-private*/
@Override
@ForceInline Byte64Vector
lanewiseShift(VectorOperators.Binary op, int e, VectorMask<Byte> m) {
return (Byte64Vector) super.lanewiseShiftTemplate(op, Byte64Mask.class, e, (Byte64Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Byte64Vector
lanewise(VectorOperators.Ternary op, Vector<Byte> v1, Vector<Byte> v2) {
lanewise(Ternary op, Vector<Byte> v1, Vector<Byte> v2) {
return (Byte64Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Byte64Vector
lanewise(Ternary op, Vector<Byte> v1, Vector<Byte> v2, VectorMask<Byte> m) {
return (Byte64Vector) super.lanewiseTemplate(op, Byte64Mask.class, v1, v2, (Byte64Mask) m); // specialize
}
@Override
@ForceInline
public final
@ -314,7 +341,7 @@ final class Byte64Vector extends ByteVector {
@ForceInline
public final byte reduceLanes(VectorOperators.Associative op,
VectorMask<Byte> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, Byte64Mask.class, (Byte64Mask) m); // specialized
}
@Override
@ -327,7 +354,7 @@ final class Byte64Vector extends ByteVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Byte> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, Byte64Mask.class, (Byte64Mask) m); // specialized
}
@ForceInline
@ -363,6 +390,13 @@ final class Byte64Vector extends ByteVector {
return super.compareTemplate(Byte64Mask.class, op, s); // specialize
}
@Override
@ForceInline
public final Byte64Mask compare(Comparison op, Vector<Byte> v, VectorMask<Byte> m) {
return super.compareTemplate(Byte64Mask.class, op, v, (Byte64Mask) m);
}
@Override
@ForceInline
public Byte64Vector blend(Vector<Byte> v, VectorMask<Byte> m) {
@ -419,6 +453,7 @@ final class Byte64Vector extends ByteVector {
VectorMask<Byte> m) {
return (Byte64Vector)
super.rearrangeTemplate(Byte64Shuffle.class,
Byte64Mask.class,
(Byte64Shuffle) shuffle,
(Byte64Mask) m); // specialize
}
@ -596,16 +631,12 @@ final class Byte64Vector extends ByteVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
Byte64Mask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -631,9 +662,9 @@ final class Byte64Vector extends ByteVector {
public Byte64Mask and(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
Byte64Mask m = (Byte64Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Byte64Mask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, Byte64Mask.class, null, byte.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -641,9 +672,9 @@ final class Byte64Vector extends ByteVector {
public Byte64Mask or(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
Byte64Mask m = (Byte64Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Byte64Mask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, Byte64Mask.class, null, byte.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -651,9 +682,9 @@ final class Byte64Vector extends ByteVector {
Byte64Mask xor(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
Byte64Mask m = (Byte64Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Byte64Mask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, Byte64Mask.class, null, byte.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -661,22 +692,32 @@ final class Byte64Vector extends ByteVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Byte64Mask.class, byte.class, VLENGTH, this,
(m) -> trueCountHelper(((Byte64Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Byte64Mask.class, byte.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Byte64Mask.class, byte.class, VLENGTH, this,
(m) -> firstTrueHelper(((Byte64Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Byte64Mask.class, byte.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Byte64Mask.class, byte.class, VLENGTH, this,
(m) -> lastTrueHelper(((Byte64Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Byte64Mask.class, byte.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Byte64Mask.class, byte.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -787,6 +828,14 @@ final class Byte64Vector extends ByteVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromArray0(byte[] a, int offset, VectorMask<Byte> m) {
return super.fromArray0Template(Byte64Mask.class, a, offset, (Byte64Mask) m); // specialize
}
@ForceInline
@Override
@ -795,6 +844,13 @@ final class Byte64Vector extends ByteVector {
return super.fromBooleanArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromBooleanArray0(boolean[] a, int offset, VectorMask<Byte> m) {
return super.fromBooleanArray0Template(Byte64Mask.class, a, offset, (Byte64Mask) m); // specialize
}
@ForceInline
@Override
final
@ -802,6 +858,13 @@ final class Byte64Vector extends ByteVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
return super.fromByteArray0Template(Byte64Mask.class, a, offset, (Byte64Mask) m); // specialize
}
@ForceInline
@Override
final
@ -809,6 +872,13 @@ final class Byte64Vector extends ByteVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
return super.fromByteBuffer0Template(Byte64Mask.class, bb, offset, (Byte64Mask) m); // specialize
}
@ForceInline
@Override
final
@ -816,6 +886,21 @@ final class Byte64Vector extends ByteVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(byte[] a, int offset, VectorMask<Byte> m) {
super.intoArray0Template(Byte64Mask.class, a, offset, (Byte64Mask) m);
}
@ForceInline
@Override
final
void intoBooleanArray0(boolean[] a, int offset, VectorMask<Byte> m) {
super.intoBooleanArray0Template(Byte64Mask.class, a, offset, (Byte64Mask) m);
}
@ForceInline
@Override
final
@ -823,6 +908,21 @@ final class Byte64Vector extends ByteVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
super.intoByteArray0Template(Byte64Mask.class, a, offset, (Byte64Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
super.intoByteBuffer0Template(Byte64Mask.class, bb, offset, (Byte64Mask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -236,8 +236,8 @@ final class ByteMaxVector extends ByteVector {
@ForceInline
final @Override
byte rOp(byte v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
byte rOp(byte v, VectorMask<Byte> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -273,12 +273,24 @@ final class ByteMaxVector extends ByteVector {
return (ByteMaxVector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public ByteMaxVector lanewise(Unary op, VectorMask<Byte> m) {
return (ByteMaxVector) super.lanewiseTemplate(op, ByteMaxMask.class, (ByteMaxMask) m); // specialize
}
@Override
@ForceInline
public ByteMaxVector lanewise(Binary op, Vector<Byte> v) {
return (ByteMaxVector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public ByteMaxVector lanewise(Binary op, Vector<Byte> v, VectorMask<Byte> m) {
return (ByteMaxVector) super.lanewiseTemplate(op, ByteMaxMask.class, v, (ByteMaxMask) m); // specialize
}
/*package-private*/
@Override
@ForceInline ByteMaxVector
@ -286,15 +298,30 @@ final class ByteMaxVector extends ByteVector {
return (ByteMaxVector) super.lanewiseShiftTemplate(op, e); // specialize
}
/*package-private*/
@Override
@ForceInline ByteMaxVector
lanewiseShift(VectorOperators.Binary op, int e, VectorMask<Byte> m) {
return (ByteMaxVector) super.lanewiseShiftTemplate(op, ByteMaxMask.class, e, (ByteMaxMask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
ByteMaxVector
lanewise(VectorOperators.Ternary op, Vector<Byte> v1, Vector<Byte> v2) {
lanewise(Ternary op, Vector<Byte> v1, Vector<Byte> v2) {
return (ByteMaxVector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
ByteMaxVector
lanewise(Ternary op, Vector<Byte> v1, Vector<Byte> v2, VectorMask<Byte> m) {
return (ByteMaxVector) super.lanewiseTemplate(op, ByteMaxMask.class, v1, v2, (ByteMaxMask) m); // specialize
}
@Override
@ForceInline
public final
@ -314,7 +341,7 @@ final class ByteMaxVector extends ByteVector {
@ForceInline
public final byte reduceLanes(VectorOperators.Associative op,
VectorMask<Byte> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, ByteMaxMask.class, (ByteMaxMask) m); // specialized
}
@Override
@ -327,7 +354,7 @@ final class ByteMaxVector extends ByteVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Byte> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, ByteMaxMask.class, (ByteMaxMask) m); // specialized
}
@ForceInline
@ -363,6 +390,13 @@ final class ByteMaxVector extends ByteVector {
return super.compareTemplate(ByteMaxMask.class, op, s); // specialize
}
@Override
@ForceInline
public final ByteMaxMask compare(Comparison op, Vector<Byte> v, VectorMask<Byte> m) {
return super.compareTemplate(ByteMaxMask.class, op, v, (ByteMaxMask) m);
}
@Override
@ForceInline
public ByteMaxVector blend(Vector<Byte> v, VectorMask<Byte> m) {
@ -419,6 +453,7 @@ final class ByteMaxVector extends ByteVector {
VectorMask<Byte> m) {
return (ByteMaxVector)
super.rearrangeTemplate(ByteMaxShuffle.class,
ByteMaxMask.class,
(ByteMaxShuffle) shuffle,
(ByteMaxMask) m); // specialize
}
@ -582,16 +617,12 @@ final class ByteMaxVector extends ByteVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
ByteMaxMask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -617,9 +648,9 @@ final class ByteMaxVector extends ByteVector {
public ByteMaxMask and(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
ByteMaxMask m = (ByteMaxMask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, ByteMaxMask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, ByteMaxMask.class, null, byte.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -627,9 +658,9 @@ final class ByteMaxVector extends ByteVector {
public ByteMaxMask or(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
ByteMaxMask m = (ByteMaxMask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, ByteMaxMask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, ByteMaxMask.class, null, byte.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -637,9 +668,9 @@ final class ByteMaxVector extends ByteVector {
ByteMaxMask xor(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
ByteMaxMask m = (ByteMaxMask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, ByteMaxMask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, ByteMaxMask.class, null, byte.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -647,22 +678,32 @@ final class ByteMaxVector extends ByteVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, ByteMaxMask.class, byte.class, VLENGTH, this,
(m) -> trueCountHelper(((ByteMaxMask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, ByteMaxMask.class, byte.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, ByteMaxMask.class, byte.class, VLENGTH, this,
(m) -> firstTrueHelper(((ByteMaxMask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, ByteMaxMask.class, byte.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, ByteMaxMask.class, byte.class, VLENGTH, this,
(m) -> lastTrueHelper(((ByteMaxMask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, ByteMaxMask.class, byte.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, ByteMaxMask.class, byte.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -773,6 +814,14 @@ final class ByteMaxVector extends ByteVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromArray0(byte[] a, int offset, VectorMask<Byte> m) {
return super.fromArray0Template(ByteMaxMask.class, a, offset, (ByteMaxMask) m); // specialize
}
@ForceInline
@Override
@ -781,6 +830,13 @@ final class ByteMaxVector extends ByteVector {
return super.fromBooleanArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromBooleanArray0(boolean[] a, int offset, VectorMask<Byte> m) {
return super.fromBooleanArray0Template(ByteMaxMask.class, a, offset, (ByteMaxMask) m); // specialize
}
@ForceInline
@Override
final
@ -788,6 +844,13 @@ final class ByteMaxVector extends ByteVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
return super.fromByteArray0Template(ByteMaxMask.class, a, offset, (ByteMaxMask) m); // specialize
}
@ForceInline
@Override
final
@ -795,6 +858,13 @@ final class ByteMaxVector extends ByteVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
return super.fromByteBuffer0Template(ByteMaxMask.class, bb, offset, (ByteMaxMask) m); // specialize
}
@ForceInline
@Override
final
@ -802,6 +872,21 @@ final class ByteMaxVector extends ByteVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(byte[] a, int offset, VectorMask<Byte> m) {
super.intoArray0Template(ByteMaxMask.class, a, offset, (ByteMaxMask) m);
}
@ForceInline
@Override
final
void intoBooleanArray0(boolean[] a, int offset, VectorMask<Byte> m) {
super.intoBooleanArray0Template(ByteMaxMask.class, a, offset, (ByteMaxMask) m);
}
@ForceInline
@Override
final
@ -809,6 +894,21 @@ final class ByteMaxVector extends ByteVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
super.intoByteArray0Template(ByteMaxMask.class, a, offset, (ByteMaxMask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
super.intoByteBuffer0Template(ByteMaxMask.class, bb, offset, (ByteMaxMask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -236,8 +236,8 @@ final class Double128Vector extends DoubleVector {
@ForceInline
final @Override
double rOp(double v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
double rOp(double v, VectorMask<Double> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -273,22 +273,42 @@ final class Double128Vector extends DoubleVector {
return (Double128Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Double128Vector lanewise(Unary op, VectorMask<Double> m) {
return (Double128Vector) super.lanewiseTemplate(op, Double128Mask.class, (Double128Mask) m); // specialize
}
@Override
@ForceInline
public Double128Vector lanewise(Binary op, Vector<Double> v) {
return (Double128Vector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public Double128Vector lanewise(Binary op, Vector<Double> v, VectorMask<Double> m) {
return (Double128Vector) super.lanewiseTemplate(op, Double128Mask.class, v, (Double128Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Double128Vector
lanewise(VectorOperators.Ternary op, Vector<Double> v1, Vector<Double> v2) {
lanewise(Ternary op, Vector<Double> v1, Vector<Double> v2) {
return (Double128Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Double128Vector
lanewise(Ternary op, Vector<Double> v1, Vector<Double> v2, VectorMask<Double> m) {
return (Double128Vector) super.lanewiseTemplate(op, Double128Mask.class, v1, v2, (Double128Mask) m); // specialize
}
@Override
@ForceInline
public final
@ -308,7 +328,7 @@ final class Double128Vector extends DoubleVector {
@ForceInline
public final double reduceLanes(VectorOperators.Associative op,
VectorMask<Double> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, Double128Mask.class, (Double128Mask) m); // specialized
}
@Override
@ -321,7 +341,7 @@ final class Double128Vector extends DoubleVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Double> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, Double128Mask.class, (Double128Mask) m); // specialized
}
@ForceInline
@ -357,6 +377,13 @@ final class Double128Vector extends DoubleVector {
return super.compareTemplate(Double128Mask.class, op, s); // specialize
}
@Override
@ForceInline
public final Double128Mask compare(Comparison op, Vector<Double> v, VectorMask<Double> m) {
return super.compareTemplate(Double128Mask.class, op, v, (Double128Mask) m);
}
@Override
@ForceInline
public Double128Vector blend(Vector<Double> v, VectorMask<Double> m) {
@ -413,6 +440,7 @@ final class Double128Vector extends DoubleVector {
VectorMask<Double> m) {
return (Double128Vector)
super.rearrangeTemplate(Double128Shuffle.class,
Double128Mask.class,
(Double128Shuffle) shuffle,
(Double128Mask) m); // specialize
}
@ -580,16 +608,12 @@ final class Double128Vector extends DoubleVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
Double128Mask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -615,9 +639,9 @@ final class Double128Vector extends DoubleVector {
public Double128Mask and(VectorMask<Double> mask) {
Objects.requireNonNull(mask);
Double128Mask m = (Double128Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Double128Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, Double128Mask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -625,9 +649,9 @@ final class Double128Vector extends DoubleVector {
public Double128Mask or(VectorMask<Double> mask) {
Objects.requireNonNull(mask);
Double128Mask m = (Double128Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Double128Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, Double128Mask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -635,9 +659,9 @@ final class Double128Vector extends DoubleVector {
Double128Mask xor(VectorMask<Double> mask) {
Objects.requireNonNull(mask);
Double128Mask m = (Double128Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Double128Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, Double128Mask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -645,22 +669,32 @@ final class Double128Vector extends DoubleVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Double128Mask.class, long.class, VLENGTH, this,
(m) -> trueCountHelper(((Double128Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Double128Mask.class, long.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Double128Mask.class, long.class, VLENGTH, this,
(m) -> firstTrueHelper(((Double128Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Double128Mask.class, long.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Double128Mask.class, long.class, VLENGTH, this,
(m) -> lastTrueHelper(((Double128Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Double128Mask.class, long.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Double128Mask.class, long.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -771,6 +805,20 @@ final class Double128Vector extends DoubleVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromArray0(double[] a, int offset, VectorMask<Double> m) {
return super.fromArray0Template(Double128Mask.class, a, offset, (Double128Mask) m); // specialize
}
@ForceInline
@Override
final
DoubleVector fromArray0(double[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Double> m) {
return super.fromArray0Template(Double128Mask.class, a, offset, indexMap, mapOffset, (Double128Mask) m);
}
@ForceInline
@ -780,6 +828,13 @@ final class Double128Vector extends DoubleVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteArray0(byte[] a, int offset, VectorMask<Double> m) {
return super.fromByteArray0Template(Double128Mask.class, a, offset, (Double128Mask) m); // specialize
}
@ForceInline
@Override
final
@ -787,6 +842,13 @@ final class Double128Vector extends DoubleVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
return super.fromByteBuffer0Template(Double128Mask.class, bb, offset, (Double128Mask) m); // specialize
}
@ForceInline
@Override
final
@ -794,6 +856,21 @@ final class Double128Vector extends DoubleVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(double[] a, int offset, VectorMask<Double> m) {
super.intoArray0Template(Double128Mask.class, a, offset, (Double128Mask) m);
}
@ForceInline
@Override
final
void intoArray0(double[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Double> m) {
super.intoArray0Template(Double128Mask.class, a, offset, indexMap, mapOffset, (Double128Mask) m);
}
@ForceInline
@Override
final
@ -801,6 +878,21 @@ final class Double128Vector extends DoubleVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Double> m) {
super.intoByteArray0Template(Double128Mask.class, a, offset, (Double128Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
super.intoByteBuffer0Template(Double128Mask.class, bb, offset, (Double128Mask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -236,8 +236,8 @@ final class Double256Vector extends DoubleVector {
@ForceInline
final @Override
double rOp(double v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
double rOp(double v, VectorMask<Double> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -273,22 +273,42 @@ final class Double256Vector extends DoubleVector {
return (Double256Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Double256Vector lanewise(Unary op, VectorMask<Double> m) {
return (Double256Vector) super.lanewiseTemplate(op, Double256Mask.class, (Double256Mask) m); // specialize
}
@Override
@ForceInline
public Double256Vector lanewise(Binary op, Vector<Double> v) {
return (Double256Vector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public Double256Vector lanewise(Binary op, Vector<Double> v, VectorMask<Double> m) {
return (Double256Vector) super.lanewiseTemplate(op, Double256Mask.class, v, (Double256Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Double256Vector
lanewise(VectorOperators.Ternary op, Vector<Double> v1, Vector<Double> v2) {
lanewise(Ternary op, Vector<Double> v1, Vector<Double> v2) {
return (Double256Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Double256Vector
lanewise(Ternary op, Vector<Double> v1, Vector<Double> v2, VectorMask<Double> m) {
return (Double256Vector) super.lanewiseTemplate(op, Double256Mask.class, v1, v2, (Double256Mask) m); // specialize
}
@Override
@ForceInline
public final
@ -308,7 +328,7 @@ final class Double256Vector extends DoubleVector {
@ForceInline
public final double reduceLanes(VectorOperators.Associative op,
VectorMask<Double> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, Double256Mask.class, (Double256Mask) m); // specialized
}
@Override
@ -321,7 +341,7 @@ final class Double256Vector extends DoubleVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Double> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, Double256Mask.class, (Double256Mask) m); // specialized
}
@ForceInline
@ -357,6 +377,13 @@ final class Double256Vector extends DoubleVector {
return super.compareTemplate(Double256Mask.class, op, s); // specialize
}
@Override
@ForceInline
public final Double256Mask compare(Comparison op, Vector<Double> v, VectorMask<Double> m) {
return super.compareTemplate(Double256Mask.class, op, v, (Double256Mask) m);
}
@Override
@ForceInline
public Double256Vector blend(Vector<Double> v, VectorMask<Double> m) {
@ -413,6 +440,7 @@ final class Double256Vector extends DoubleVector {
VectorMask<Double> m) {
return (Double256Vector)
super.rearrangeTemplate(Double256Shuffle.class,
Double256Mask.class,
(Double256Shuffle) shuffle,
(Double256Mask) m); // specialize
}
@ -584,16 +612,12 @@ final class Double256Vector extends DoubleVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
Double256Mask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -619,9 +643,9 @@ final class Double256Vector extends DoubleVector {
public Double256Mask and(VectorMask<Double> mask) {
Objects.requireNonNull(mask);
Double256Mask m = (Double256Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Double256Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, Double256Mask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -629,9 +653,9 @@ final class Double256Vector extends DoubleVector {
public Double256Mask or(VectorMask<Double> mask) {
Objects.requireNonNull(mask);
Double256Mask m = (Double256Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Double256Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, Double256Mask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -639,9 +663,9 @@ final class Double256Vector extends DoubleVector {
Double256Mask xor(VectorMask<Double> mask) {
Objects.requireNonNull(mask);
Double256Mask m = (Double256Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Double256Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, Double256Mask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -649,22 +673,32 @@ final class Double256Vector extends DoubleVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Double256Mask.class, long.class, VLENGTH, this,
(m) -> trueCountHelper(((Double256Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Double256Mask.class, long.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Double256Mask.class, long.class, VLENGTH, this,
(m) -> firstTrueHelper(((Double256Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Double256Mask.class, long.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Double256Mask.class, long.class, VLENGTH, this,
(m) -> lastTrueHelper(((Double256Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Double256Mask.class, long.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Double256Mask.class, long.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -775,6 +809,20 @@ final class Double256Vector extends DoubleVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromArray0(double[] a, int offset, VectorMask<Double> m) {
return super.fromArray0Template(Double256Mask.class, a, offset, (Double256Mask) m); // specialize
}
@ForceInline
@Override
final
DoubleVector fromArray0(double[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Double> m) {
return super.fromArray0Template(Double256Mask.class, a, offset, indexMap, mapOffset, (Double256Mask) m);
}
@ForceInline
@ -784,6 +832,13 @@ final class Double256Vector extends DoubleVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteArray0(byte[] a, int offset, VectorMask<Double> m) {
return super.fromByteArray0Template(Double256Mask.class, a, offset, (Double256Mask) m); // specialize
}
@ForceInline
@Override
final
@ -791,6 +846,13 @@ final class Double256Vector extends DoubleVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
return super.fromByteBuffer0Template(Double256Mask.class, bb, offset, (Double256Mask) m); // specialize
}
@ForceInline
@Override
final
@ -798,6 +860,21 @@ final class Double256Vector extends DoubleVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(double[] a, int offset, VectorMask<Double> m) {
super.intoArray0Template(Double256Mask.class, a, offset, (Double256Mask) m);
}
@ForceInline
@Override
final
void intoArray0(double[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Double> m) {
super.intoArray0Template(Double256Mask.class, a, offset, indexMap, mapOffset, (Double256Mask) m);
}
@ForceInline
@Override
final
@ -805,6 +882,21 @@ final class Double256Vector extends DoubleVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Double> m) {
super.intoByteArray0Template(Double256Mask.class, a, offset, (Double256Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
super.intoByteBuffer0Template(Double256Mask.class, bb, offset, (Double256Mask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -236,8 +236,8 @@ final class Double512Vector extends DoubleVector {
@ForceInline
final @Override
double rOp(double v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
double rOp(double v, VectorMask<Double> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -273,22 +273,42 @@ final class Double512Vector extends DoubleVector {
return (Double512Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Double512Vector lanewise(Unary op, VectorMask<Double> m) {
return (Double512Vector) super.lanewiseTemplate(op, Double512Mask.class, (Double512Mask) m); // specialize
}
@Override
@ForceInline
public Double512Vector lanewise(Binary op, Vector<Double> v) {
return (Double512Vector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public Double512Vector lanewise(Binary op, Vector<Double> v, VectorMask<Double> m) {
return (Double512Vector) super.lanewiseTemplate(op, Double512Mask.class, v, (Double512Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Double512Vector
lanewise(VectorOperators.Ternary op, Vector<Double> v1, Vector<Double> v2) {
lanewise(Ternary op, Vector<Double> v1, Vector<Double> v2) {
return (Double512Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Double512Vector
lanewise(Ternary op, Vector<Double> v1, Vector<Double> v2, VectorMask<Double> m) {
return (Double512Vector) super.lanewiseTemplate(op, Double512Mask.class, v1, v2, (Double512Mask) m); // specialize
}
@Override
@ForceInline
public final
@ -308,7 +328,7 @@ final class Double512Vector extends DoubleVector {
@ForceInline
public final double reduceLanes(VectorOperators.Associative op,
VectorMask<Double> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, Double512Mask.class, (Double512Mask) m); // specialized
}
@Override
@ -321,7 +341,7 @@ final class Double512Vector extends DoubleVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Double> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, Double512Mask.class, (Double512Mask) m); // specialized
}
@ForceInline
@ -357,6 +377,13 @@ final class Double512Vector extends DoubleVector {
return super.compareTemplate(Double512Mask.class, op, s); // specialize
}
@Override
@ForceInline
public final Double512Mask compare(Comparison op, Vector<Double> v, VectorMask<Double> m) {
return super.compareTemplate(Double512Mask.class, op, v, (Double512Mask) m);
}
@Override
@ForceInline
public Double512Vector blend(Vector<Double> v, VectorMask<Double> m) {
@ -413,6 +440,7 @@ final class Double512Vector extends DoubleVector {
VectorMask<Double> m) {
return (Double512Vector)
super.rearrangeTemplate(Double512Shuffle.class,
Double512Mask.class,
(Double512Shuffle) shuffle,
(Double512Mask) m); // specialize
}
@ -592,16 +620,12 @@ final class Double512Vector extends DoubleVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
Double512Mask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -627,9 +651,9 @@ final class Double512Vector extends DoubleVector {
public Double512Mask and(VectorMask<Double> mask) {
Objects.requireNonNull(mask);
Double512Mask m = (Double512Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Double512Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, Double512Mask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -637,9 +661,9 @@ final class Double512Vector extends DoubleVector {
public Double512Mask or(VectorMask<Double> mask) {
Objects.requireNonNull(mask);
Double512Mask m = (Double512Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Double512Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, Double512Mask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -647,9 +671,9 @@ final class Double512Vector extends DoubleVector {
Double512Mask xor(VectorMask<Double> mask) {
Objects.requireNonNull(mask);
Double512Mask m = (Double512Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Double512Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, Double512Mask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -657,22 +681,32 @@ final class Double512Vector extends DoubleVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Double512Mask.class, long.class, VLENGTH, this,
(m) -> trueCountHelper(((Double512Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Double512Mask.class, long.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Double512Mask.class, long.class, VLENGTH, this,
(m) -> firstTrueHelper(((Double512Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Double512Mask.class, long.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Double512Mask.class, long.class, VLENGTH, this,
(m) -> lastTrueHelper(((Double512Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Double512Mask.class, long.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Double512Mask.class, long.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -783,6 +817,20 @@ final class Double512Vector extends DoubleVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromArray0(double[] a, int offset, VectorMask<Double> m) {
return super.fromArray0Template(Double512Mask.class, a, offset, (Double512Mask) m); // specialize
}
@ForceInline
@Override
final
DoubleVector fromArray0(double[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Double> m) {
return super.fromArray0Template(Double512Mask.class, a, offset, indexMap, mapOffset, (Double512Mask) m);
}
@ForceInline
@ -792,6 +840,13 @@ final class Double512Vector extends DoubleVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteArray0(byte[] a, int offset, VectorMask<Double> m) {
return super.fromByteArray0Template(Double512Mask.class, a, offset, (Double512Mask) m); // specialize
}
@ForceInline
@Override
final
@ -799,6 +854,13 @@ final class Double512Vector extends DoubleVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
return super.fromByteBuffer0Template(Double512Mask.class, bb, offset, (Double512Mask) m); // specialize
}
@ForceInline
@Override
final
@ -806,6 +868,21 @@ final class Double512Vector extends DoubleVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(double[] a, int offset, VectorMask<Double> m) {
super.intoArray0Template(Double512Mask.class, a, offset, (Double512Mask) m);
}
@ForceInline
@Override
final
void intoArray0(double[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Double> m) {
super.intoArray0Template(Double512Mask.class, a, offset, indexMap, mapOffset, (Double512Mask) m);
}
@ForceInline
@Override
final
@ -813,6 +890,21 @@ final class Double512Vector extends DoubleVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Double> m) {
super.intoByteArray0Template(Double512Mask.class, a, offset, (Double512Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
super.intoByteBuffer0Template(Double512Mask.class, bb, offset, (Double512Mask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -236,8 +236,8 @@ final class Double64Vector extends DoubleVector {
@ForceInline
final @Override
double rOp(double v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
double rOp(double v, VectorMask<Double> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -273,22 +273,42 @@ final class Double64Vector extends DoubleVector {
return (Double64Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Double64Vector lanewise(Unary op, VectorMask<Double> m) {
return (Double64Vector) super.lanewiseTemplate(op, Double64Mask.class, (Double64Mask) m); // specialize
}
@Override
@ForceInline
public Double64Vector lanewise(Binary op, Vector<Double> v) {
return (Double64Vector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public Double64Vector lanewise(Binary op, Vector<Double> v, VectorMask<Double> m) {
return (Double64Vector) super.lanewiseTemplate(op, Double64Mask.class, v, (Double64Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Double64Vector
lanewise(VectorOperators.Ternary op, Vector<Double> v1, Vector<Double> v2) {
lanewise(Ternary op, Vector<Double> v1, Vector<Double> v2) {
return (Double64Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Double64Vector
lanewise(Ternary op, Vector<Double> v1, Vector<Double> v2, VectorMask<Double> m) {
return (Double64Vector) super.lanewiseTemplate(op, Double64Mask.class, v1, v2, (Double64Mask) m); // specialize
}
@Override
@ForceInline
public final
@ -308,7 +328,7 @@ final class Double64Vector extends DoubleVector {
@ForceInline
public final double reduceLanes(VectorOperators.Associative op,
VectorMask<Double> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, Double64Mask.class, (Double64Mask) m); // specialized
}
@Override
@ -321,7 +341,7 @@ final class Double64Vector extends DoubleVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Double> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, Double64Mask.class, (Double64Mask) m); // specialized
}
@ForceInline
@ -357,6 +377,13 @@ final class Double64Vector extends DoubleVector {
return super.compareTemplate(Double64Mask.class, op, s); // specialize
}
@Override
@ForceInline
public final Double64Mask compare(Comparison op, Vector<Double> v, VectorMask<Double> m) {
return super.compareTemplate(Double64Mask.class, op, v, (Double64Mask) m);
}
@Override
@ForceInline
public Double64Vector blend(Vector<Double> v, VectorMask<Double> m) {
@ -413,6 +440,7 @@ final class Double64Vector extends DoubleVector {
VectorMask<Double> m) {
return (Double64Vector)
super.rearrangeTemplate(Double64Shuffle.class,
Double64Mask.class,
(Double64Shuffle) shuffle,
(Double64Mask) m); // specialize
}
@ -578,16 +606,12 @@ final class Double64Vector extends DoubleVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
Double64Mask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -613,9 +637,9 @@ final class Double64Vector extends DoubleVector {
public Double64Mask and(VectorMask<Double> mask) {
Objects.requireNonNull(mask);
Double64Mask m = (Double64Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Double64Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, Double64Mask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -623,9 +647,9 @@ final class Double64Vector extends DoubleVector {
public Double64Mask or(VectorMask<Double> mask) {
Objects.requireNonNull(mask);
Double64Mask m = (Double64Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Double64Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, Double64Mask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -633,9 +657,9 @@ final class Double64Vector extends DoubleVector {
Double64Mask xor(VectorMask<Double> mask) {
Objects.requireNonNull(mask);
Double64Mask m = (Double64Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Double64Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, Double64Mask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -643,22 +667,32 @@ final class Double64Vector extends DoubleVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Double64Mask.class, long.class, VLENGTH, this,
(m) -> trueCountHelper(((Double64Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Double64Mask.class, long.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Double64Mask.class, long.class, VLENGTH, this,
(m) -> firstTrueHelper(((Double64Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Double64Mask.class, long.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Double64Mask.class, long.class, VLENGTH, this,
(m) -> lastTrueHelper(((Double64Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Double64Mask.class, long.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Double64Mask.class, long.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -769,6 +803,20 @@ final class Double64Vector extends DoubleVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromArray0(double[] a, int offset, VectorMask<Double> m) {
return super.fromArray0Template(Double64Mask.class, a, offset, (Double64Mask) m); // specialize
}
@ForceInline
@Override
final
DoubleVector fromArray0(double[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Double> m) {
return super.fromArray0Template(Double64Mask.class, a, offset, indexMap, mapOffset, (Double64Mask) m);
}
@ForceInline
@ -778,6 +826,13 @@ final class Double64Vector extends DoubleVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteArray0(byte[] a, int offset, VectorMask<Double> m) {
return super.fromByteArray0Template(Double64Mask.class, a, offset, (Double64Mask) m); // specialize
}
@ForceInline
@Override
final
@ -785,6 +840,13 @@ final class Double64Vector extends DoubleVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
return super.fromByteBuffer0Template(Double64Mask.class, bb, offset, (Double64Mask) m); // specialize
}
@ForceInline
@Override
final
@ -792,6 +854,21 @@ final class Double64Vector extends DoubleVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(double[] a, int offset, VectorMask<Double> m) {
super.intoArray0Template(Double64Mask.class, a, offset, (Double64Mask) m);
}
@ForceInline
@Override
final
void intoArray0(double[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Double> m) {
super.intoArray0Template(Double64Mask.class, a, offset, indexMap, mapOffset, (Double64Mask) m);
}
@ForceInline
@Override
final
@ -799,6 +876,21 @@ final class Double64Vector extends DoubleVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Double> m) {
super.intoByteArray0Template(Double64Mask.class, a, offset, (Double64Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
super.intoByteBuffer0Template(Double64Mask.class, bb, offset, (Double64Mask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -236,8 +236,8 @@ final class DoubleMaxVector extends DoubleVector {
@ForceInline
final @Override
double rOp(double v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
double rOp(double v, VectorMask<Double> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -273,22 +273,42 @@ final class DoubleMaxVector extends DoubleVector {
return (DoubleMaxVector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public DoubleMaxVector lanewise(Unary op, VectorMask<Double> m) {
return (DoubleMaxVector) super.lanewiseTemplate(op, DoubleMaxMask.class, (DoubleMaxMask) m); // specialize
}
@Override
@ForceInline
public DoubleMaxVector lanewise(Binary op, Vector<Double> v) {
return (DoubleMaxVector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public DoubleMaxVector lanewise(Binary op, Vector<Double> v, VectorMask<Double> m) {
return (DoubleMaxVector) super.lanewiseTemplate(op, DoubleMaxMask.class, v, (DoubleMaxMask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
DoubleMaxVector
lanewise(VectorOperators.Ternary op, Vector<Double> v1, Vector<Double> v2) {
lanewise(Ternary op, Vector<Double> v1, Vector<Double> v2) {
return (DoubleMaxVector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
DoubleMaxVector
lanewise(Ternary op, Vector<Double> v1, Vector<Double> v2, VectorMask<Double> m) {
return (DoubleMaxVector) super.lanewiseTemplate(op, DoubleMaxMask.class, v1, v2, (DoubleMaxMask) m); // specialize
}
@Override
@ForceInline
public final
@ -308,7 +328,7 @@ final class DoubleMaxVector extends DoubleVector {
@ForceInline
public final double reduceLanes(VectorOperators.Associative op,
VectorMask<Double> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, DoubleMaxMask.class, (DoubleMaxMask) m); // specialized
}
@Override
@ -321,7 +341,7 @@ final class DoubleMaxVector extends DoubleVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Double> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, DoubleMaxMask.class, (DoubleMaxMask) m); // specialized
}
@ForceInline
@ -357,6 +377,13 @@ final class DoubleMaxVector extends DoubleVector {
return super.compareTemplate(DoubleMaxMask.class, op, s); // specialize
}
@Override
@ForceInline
public final DoubleMaxMask compare(Comparison op, Vector<Double> v, VectorMask<Double> m) {
return super.compareTemplate(DoubleMaxMask.class, op, v, (DoubleMaxMask) m);
}
@Override
@ForceInline
public DoubleMaxVector blend(Vector<Double> v, VectorMask<Double> m) {
@ -413,6 +440,7 @@ final class DoubleMaxVector extends DoubleVector {
VectorMask<Double> m) {
return (DoubleMaxVector)
super.rearrangeTemplate(DoubleMaxShuffle.class,
DoubleMaxMask.class,
(DoubleMaxShuffle) shuffle,
(DoubleMaxMask) m); // specialize
}
@ -577,16 +605,12 @@ final class DoubleMaxVector extends DoubleVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
DoubleMaxMask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -612,9 +636,9 @@ final class DoubleMaxVector extends DoubleVector {
public DoubleMaxMask and(VectorMask<Double> mask) {
Objects.requireNonNull(mask);
DoubleMaxMask m = (DoubleMaxMask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, DoubleMaxMask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, DoubleMaxMask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -622,9 +646,9 @@ final class DoubleMaxVector extends DoubleVector {
public DoubleMaxMask or(VectorMask<Double> mask) {
Objects.requireNonNull(mask);
DoubleMaxMask m = (DoubleMaxMask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, DoubleMaxMask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, DoubleMaxMask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -632,9 +656,9 @@ final class DoubleMaxVector extends DoubleVector {
DoubleMaxMask xor(VectorMask<Double> mask) {
Objects.requireNonNull(mask);
DoubleMaxMask m = (DoubleMaxMask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, DoubleMaxMask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, DoubleMaxMask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -642,22 +666,32 @@ final class DoubleMaxVector extends DoubleVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, DoubleMaxMask.class, long.class, VLENGTH, this,
(m) -> trueCountHelper(((DoubleMaxMask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, DoubleMaxMask.class, long.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, DoubleMaxMask.class, long.class, VLENGTH, this,
(m) -> firstTrueHelper(((DoubleMaxMask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, DoubleMaxMask.class, long.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, DoubleMaxMask.class, long.class, VLENGTH, this,
(m) -> lastTrueHelper(((DoubleMaxMask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, DoubleMaxMask.class, long.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, DoubleMaxMask.class, long.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -768,6 +802,20 @@ final class DoubleMaxVector extends DoubleVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromArray0(double[] a, int offset, VectorMask<Double> m) {
return super.fromArray0Template(DoubleMaxMask.class, a, offset, (DoubleMaxMask) m); // specialize
}
@ForceInline
@Override
final
DoubleVector fromArray0(double[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Double> m) {
return super.fromArray0Template(DoubleMaxMask.class, a, offset, indexMap, mapOffset, (DoubleMaxMask) m);
}
@ForceInline
@ -777,6 +825,13 @@ final class DoubleMaxVector extends DoubleVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteArray0(byte[] a, int offset, VectorMask<Double> m) {
return super.fromByteArray0Template(DoubleMaxMask.class, a, offset, (DoubleMaxMask) m); // specialize
}
@ForceInline
@Override
final
@ -784,6 +839,13 @@ final class DoubleMaxVector extends DoubleVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
return super.fromByteBuffer0Template(DoubleMaxMask.class, bb, offset, (DoubleMaxMask) m); // specialize
}
@ForceInline
@Override
final
@ -791,6 +853,21 @@ final class DoubleMaxVector extends DoubleVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(double[] a, int offset, VectorMask<Double> m) {
super.intoArray0Template(DoubleMaxMask.class, a, offset, (DoubleMaxMask) m);
}
@ForceInline
@Override
final
void intoArray0(double[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Double> m) {
super.intoArray0Template(DoubleMaxMask.class, a, offset, indexMap, mapOffset, (DoubleMaxMask) m);
}
@ForceInline
@Override
final
@ -798,6 +875,21 @@ final class DoubleMaxVector extends DoubleVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Double> m) {
super.intoByteArray0Template(DoubleMaxMask.class, a, offset, (DoubleMaxMask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
super.intoByteBuffer0Template(DoubleMaxMask.class, bb, offset, (DoubleMaxMask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -236,8 +236,8 @@ final class Float128Vector extends FloatVector {
@ForceInline
final @Override
float rOp(float v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
float rOp(float v, VectorMask<Float> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -273,22 +273,42 @@ final class Float128Vector extends FloatVector {
return (Float128Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Float128Vector lanewise(Unary op, VectorMask<Float> m) {
return (Float128Vector) super.lanewiseTemplate(op, Float128Mask.class, (Float128Mask) m); // specialize
}
@Override
@ForceInline
public Float128Vector lanewise(Binary op, Vector<Float> v) {
return (Float128Vector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public Float128Vector lanewise(Binary op, Vector<Float> v, VectorMask<Float> m) {
return (Float128Vector) super.lanewiseTemplate(op, Float128Mask.class, v, (Float128Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Float128Vector
lanewise(VectorOperators.Ternary op, Vector<Float> v1, Vector<Float> v2) {
lanewise(Ternary op, Vector<Float> v1, Vector<Float> v2) {
return (Float128Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Float128Vector
lanewise(Ternary op, Vector<Float> v1, Vector<Float> v2, VectorMask<Float> m) {
return (Float128Vector) super.lanewiseTemplate(op, Float128Mask.class, v1, v2, (Float128Mask) m); // specialize
}
@Override
@ForceInline
public final
@ -308,7 +328,7 @@ final class Float128Vector extends FloatVector {
@ForceInline
public final float reduceLanes(VectorOperators.Associative op,
VectorMask<Float> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, Float128Mask.class, (Float128Mask) m); // specialized
}
@Override
@ -321,7 +341,7 @@ final class Float128Vector extends FloatVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Float> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, Float128Mask.class, (Float128Mask) m); // specialized
}
@ForceInline
@ -357,6 +377,13 @@ final class Float128Vector extends FloatVector {
return super.compareTemplate(Float128Mask.class, op, s); // specialize
}
@Override
@ForceInline
public final Float128Mask compare(Comparison op, Vector<Float> v, VectorMask<Float> m) {
return super.compareTemplate(Float128Mask.class, op, v, (Float128Mask) m);
}
@Override
@ForceInline
public Float128Vector blend(Vector<Float> v, VectorMask<Float> m) {
@ -413,6 +440,7 @@ final class Float128Vector extends FloatVector {
VectorMask<Float> m) {
return (Float128Vector)
super.rearrangeTemplate(Float128Shuffle.class,
Float128Mask.class,
(Float128Shuffle) shuffle,
(Float128Mask) m); // specialize
}
@ -584,16 +612,12 @@ final class Float128Vector extends FloatVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
Float128Mask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -619,9 +643,9 @@ final class Float128Vector extends FloatVector {
public Float128Mask and(VectorMask<Float> mask) {
Objects.requireNonNull(mask);
Float128Mask m = (Float128Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Float128Mask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, Float128Mask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -629,9 +653,9 @@ final class Float128Vector extends FloatVector {
public Float128Mask or(VectorMask<Float> mask) {
Objects.requireNonNull(mask);
Float128Mask m = (Float128Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Float128Mask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, Float128Mask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -639,9 +663,9 @@ final class Float128Vector extends FloatVector {
Float128Mask xor(VectorMask<Float> mask) {
Objects.requireNonNull(mask);
Float128Mask m = (Float128Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Float128Mask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, Float128Mask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -649,22 +673,32 @@ final class Float128Vector extends FloatVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Float128Mask.class, int.class, VLENGTH, this,
(m) -> trueCountHelper(((Float128Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Float128Mask.class, int.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Float128Mask.class, int.class, VLENGTH, this,
(m) -> firstTrueHelper(((Float128Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Float128Mask.class, int.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Float128Mask.class, int.class, VLENGTH, this,
(m) -> lastTrueHelper(((Float128Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Float128Mask.class, int.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Float128Mask.class, int.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -775,6 +809,20 @@ final class Float128Vector extends FloatVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
FloatVector fromArray0(float[] a, int offset, VectorMask<Float> m) {
return super.fromArray0Template(Float128Mask.class, a, offset, (Float128Mask) m); // specialize
}
@ForceInline
@Override
final
FloatVector fromArray0(float[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Float> m) {
return super.fromArray0Template(Float128Mask.class, a, offset, indexMap, mapOffset, (Float128Mask) m);
}
@ForceInline
@ -784,6 +832,13 @@ final class Float128Vector extends FloatVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
FloatVector fromByteArray0(byte[] a, int offset, VectorMask<Float> m) {
return super.fromByteArray0Template(Float128Mask.class, a, offset, (Float128Mask) m); // specialize
}
@ForceInline
@Override
final
@ -791,6 +846,13 @@ final class Float128Vector extends FloatVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
FloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
return super.fromByteBuffer0Template(Float128Mask.class, bb, offset, (Float128Mask) m); // specialize
}
@ForceInline
@Override
final
@ -798,6 +860,21 @@ final class Float128Vector extends FloatVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(float[] a, int offset, VectorMask<Float> m) {
super.intoArray0Template(Float128Mask.class, a, offset, (Float128Mask) m);
}
@ForceInline
@Override
final
void intoArray0(float[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Float> m) {
super.intoArray0Template(Float128Mask.class, a, offset, indexMap, mapOffset, (Float128Mask) m);
}
@ForceInline
@Override
final
@ -805,6 +882,21 @@ final class Float128Vector extends FloatVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Float> m) {
super.intoByteArray0Template(Float128Mask.class, a, offset, (Float128Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
super.intoByteBuffer0Template(Float128Mask.class, bb, offset, (Float128Mask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -236,8 +236,8 @@ final class Float256Vector extends FloatVector {
@ForceInline
final @Override
float rOp(float v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
float rOp(float v, VectorMask<Float> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -273,22 +273,42 @@ final class Float256Vector extends FloatVector {
return (Float256Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Float256Vector lanewise(Unary op, VectorMask<Float> m) {
return (Float256Vector) super.lanewiseTemplate(op, Float256Mask.class, (Float256Mask) m); // specialize
}
@Override
@ForceInline
public Float256Vector lanewise(Binary op, Vector<Float> v) {
return (Float256Vector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public Float256Vector lanewise(Binary op, Vector<Float> v, VectorMask<Float> m) {
return (Float256Vector) super.lanewiseTemplate(op, Float256Mask.class, v, (Float256Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Float256Vector
lanewise(VectorOperators.Ternary op, Vector<Float> v1, Vector<Float> v2) {
lanewise(Ternary op, Vector<Float> v1, Vector<Float> v2) {
return (Float256Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Float256Vector
lanewise(Ternary op, Vector<Float> v1, Vector<Float> v2, VectorMask<Float> m) {
return (Float256Vector) super.lanewiseTemplate(op, Float256Mask.class, v1, v2, (Float256Mask) m); // specialize
}
@Override
@ForceInline
public final
@ -308,7 +328,7 @@ final class Float256Vector extends FloatVector {
@ForceInline
public final float reduceLanes(VectorOperators.Associative op,
VectorMask<Float> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, Float256Mask.class, (Float256Mask) m); // specialized
}
@Override
@ -321,7 +341,7 @@ final class Float256Vector extends FloatVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Float> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, Float256Mask.class, (Float256Mask) m); // specialized
}
@ForceInline
@ -357,6 +377,13 @@ final class Float256Vector extends FloatVector {
return super.compareTemplate(Float256Mask.class, op, s); // specialize
}
@Override
@ForceInline
public final Float256Mask compare(Comparison op, Vector<Float> v, VectorMask<Float> m) {
return super.compareTemplate(Float256Mask.class, op, v, (Float256Mask) m);
}
@Override
@ForceInline
public Float256Vector blend(Vector<Float> v, VectorMask<Float> m) {
@ -413,6 +440,7 @@ final class Float256Vector extends FloatVector {
VectorMask<Float> m) {
return (Float256Vector)
super.rearrangeTemplate(Float256Shuffle.class,
Float256Mask.class,
(Float256Shuffle) shuffle,
(Float256Mask) m); // specialize
}
@ -592,16 +620,12 @@ final class Float256Vector extends FloatVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
Float256Mask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -627,9 +651,9 @@ final class Float256Vector extends FloatVector {
public Float256Mask and(VectorMask<Float> mask) {
Objects.requireNonNull(mask);
Float256Mask m = (Float256Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Float256Mask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, Float256Mask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -637,9 +661,9 @@ final class Float256Vector extends FloatVector {
public Float256Mask or(VectorMask<Float> mask) {
Objects.requireNonNull(mask);
Float256Mask m = (Float256Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Float256Mask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, Float256Mask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -647,9 +671,9 @@ final class Float256Vector extends FloatVector {
Float256Mask xor(VectorMask<Float> mask) {
Objects.requireNonNull(mask);
Float256Mask m = (Float256Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Float256Mask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, Float256Mask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -657,22 +681,32 @@ final class Float256Vector extends FloatVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Float256Mask.class, int.class, VLENGTH, this,
(m) -> trueCountHelper(((Float256Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Float256Mask.class, int.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Float256Mask.class, int.class, VLENGTH, this,
(m) -> firstTrueHelper(((Float256Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Float256Mask.class, int.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Float256Mask.class, int.class, VLENGTH, this,
(m) -> lastTrueHelper(((Float256Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Float256Mask.class, int.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Float256Mask.class, int.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -783,6 +817,20 @@ final class Float256Vector extends FloatVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
FloatVector fromArray0(float[] a, int offset, VectorMask<Float> m) {
return super.fromArray0Template(Float256Mask.class, a, offset, (Float256Mask) m); // specialize
}
@ForceInline
@Override
final
FloatVector fromArray0(float[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Float> m) {
return super.fromArray0Template(Float256Mask.class, a, offset, indexMap, mapOffset, (Float256Mask) m);
}
@ForceInline
@ -792,6 +840,13 @@ final class Float256Vector extends FloatVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
FloatVector fromByteArray0(byte[] a, int offset, VectorMask<Float> m) {
return super.fromByteArray0Template(Float256Mask.class, a, offset, (Float256Mask) m); // specialize
}
@ForceInline
@Override
final
@ -799,6 +854,13 @@ final class Float256Vector extends FloatVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
FloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
return super.fromByteBuffer0Template(Float256Mask.class, bb, offset, (Float256Mask) m); // specialize
}
@ForceInline
@Override
final
@ -806,6 +868,21 @@ final class Float256Vector extends FloatVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(float[] a, int offset, VectorMask<Float> m) {
super.intoArray0Template(Float256Mask.class, a, offset, (Float256Mask) m);
}
@ForceInline
@Override
final
void intoArray0(float[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Float> m) {
super.intoArray0Template(Float256Mask.class, a, offset, indexMap, mapOffset, (Float256Mask) m);
}
@ForceInline
@Override
final
@ -813,6 +890,21 @@ final class Float256Vector extends FloatVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Float> m) {
super.intoByteArray0Template(Float256Mask.class, a, offset, (Float256Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
super.intoByteBuffer0Template(Float256Mask.class, bb, offset, (Float256Mask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -236,8 +236,8 @@ final class Float512Vector extends FloatVector {
@ForceInline
final @Override
float rOp(float v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
float rOp(float v, VectorMask<Float> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -273,22 +273,42 @@ final class Float512Vector extends FloatVector {
return (Float512Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Float512Vector lanewise(Unary op, VectorMask<Float> m) {
return (Float512Vector) super.lanewiseTemplate(op, Float512Mask.class, (Float512Mask) m); // specialize
}
@Override
@ForceInline
public Float512Vector lanewise(Binary op, Vector<Float> v) {
return (Float512Vector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public Float512Vector lanewise(Binary op, Vector<Float> v, VectorMask<Float> m) {
return (Float512Vector) super.lanewiseTemplate(op, Float512Mask.class, v, (Float512Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Float512Vector
lanewise(VectorOperators.Ternary op, Vector<Float> v1, Vector<Float> v2) {
lanewise(Ternary op, Vector<Float> v1, Vector<Float> v2) {
return (Float512Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Float512Vector
lanewise(Ternary op, Vector<Float> v1, Vector<Float> v2, VectorMask<Float> m) {
return (Float512Vector) super.lanewiseTemplate(op, Float512Mask.class, v1, v2, (Float512Mask) m); // specialize
}
@Override
@ForceInline
public final
@ -308,7 +328,7 @@ final class Float512Vector extends FloatVector {
@ForceInline
public final float reduceLanes(VectorOperators.Associative op,
VectorMask<Float> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, Float512Mask.class, (Float512Mask) m); // specialized
}
@Override
@ -321,7 +341,7 @@ final class Float512Vector extends FloatVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Float> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, Float512Mask.class, (Float512Mask) m); // specialized
}
@ForceInline
@ -357,6 +377,13 @@ final class Float512Vector extends FloatVector {
return super.compareTemplate(Float512Mask.class, op, s); // specialize
}
@Override
@ForceInline
public final Float512Mask compare(Comparison op, Vector<Float> v, VectorMask<Float> m) {
return super.compareTemplate(Float512Mask.class, op, v, (Float512Mask) m);
}
@Override
@ForceInline
public Float512Vector blend(Vector<Float> v, VectorMask<Float> m) {
@ -413,6 +440,7 @@ final class Float512Vector extends FloatVector {
VectorMask<Float> m) {
return (Float512Vector)
super.rearrangeTemplate(Float512Shuffle.class,
Float512Mask.class,
(Float512Shuffle) shuffle,
(Float512Mask) m); // specialize
}
@ -608,16 +636,12 @@ final class Float512Vector extends FloatVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
Float512Mask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -643,9 +667,9 @@ final class Float512Vector extends FloatVector {
public Float512Mask and(VectorMask<Float> mask) {
Objects.requireNonNull(mask);
Float512Mask m = (Float512Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Float512Mask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, Float512Mask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -653,9 +677,9 @@ final class Float512Vector extends FloatVector {
public Float512Mask or(VectorMask<Float> mask) {
Objects.requireNonNull(mask);
Float512Mask m = (Float512Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Float512Mask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, Float512Mask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -663,9 +687,9 @@ final class Float512Vector extends FloatVector {
Float512Mask xor(VectorMask<Float> mask) {
Objects.requireNonNull(mask);
Float512Mask m = (Float512Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Float512Mask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, Float512Mask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -673,22 +697,32 @@ final class Float512Vector extends FloatVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Float512Mask.class, int.class, VLENGTH, this,
(m) -> trueCountHelper(((Float512Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Float512Mask.class, int.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Float512Mask.class, int.class, VLENGTH, this,
(m) -> firstTrueHelper(((Float512Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Float512Mask.class, int.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Float512Mask.class, int.class, VLENGTH, this,
(m) -> lastTrueHelper(((Float512Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Float512Mask.class, int.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Float512Mask.class, int.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -799,6 +833,20 @@ final class Float512Vector extends FloatVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
FloatVector fromArray0(float[] a, int offset, VectorMask<Float> m) {
return super.fromArray0Template(Float512Mask.class, a, offset, (Float512Mask) m); // specialize
}
@ForceInline
@Override
final
FloatVector fromArray0(float[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Float> m) {
return super.fromArray0Template(Float512Mask.class, a, offset, indexMap, mapOffset, (Float512Mask) m);
}
@ForceInline
@ -808,6 +856,13 @@ final class Float512Vector extends FloatVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
FloatVector fromByteArray0(byte[] a, int offset, VectorMask<Float> m) {
return super.fromByteArray0Template(Float512Mask.class, a, offset, (Float512Mask) m); // specialize
}
@ForceInline
@Override
final
@ -815,6 +870,13 @@ final class Float512Vector extends FloatVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
FloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
return super.fromByteBuffer0Template(Float512Mask.class, bb, offset, (Float512Mask) m); // specialize
}
@ForceInline
@Override
final
@ -822,6 +884,21 @@ final class Float512Vector extends FloatVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(float[] a, int offset, VectorMask<Float> m) {
super.intoArray0Template(Float512Mask.class, a, offset, (Float512Mask) m);
}
@ForceInline
@Override
final
void intoArray0(float[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Float> m) {
super.intoArray0Template(Float512Mask.class, a, offset, indexMap, mapOffset, (Float512Mask) m);
}
@ForceInline
@Override
final
@ -829,6 +906,21 @@ final class Float512Vector extends FloatVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Float> m) {
super.intoByteArray0Template(Float512Mask.class, a, offset, (Float512Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
super.intoByteBuffer0Template(Float512Mask.class, bb, offset, (Float512Mask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -236,8 +236,8 @@ final class Float64Vector extends FloatVector {
@ForceInline
final @Override
float rOp(float v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
float rOp(float v, VectorMask<Float> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -273,22 +273,42 @@ final class Float64Vector extends FloatVector {
return (Float64Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Float64Vector lanewise(Unary op, VectorMask<Float> m) {
return (Float64Vector) super.lanewiseTemplate(op, Float64Mask.class, (Float64Mask) m); // specialize
}
@Override
@ForceInline
public Float64Vector lanewise(Binary op, Vector<Float> v) {
return (Float64Vector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public Float64Vector lanewise(Binary op, Vector<Float> v, VectorMask<Float> m) {
return (Float64Vector) super.lanewiseTemplate(op, Float64Mask.class, v, (Float64Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Float64Vector
lanewise(VectorOperators.Ternary op, Vector<Float> v1, Vector<Float> v2) {
lanewise(Ternary op, Vector<Float> v1, Vector<Float> v2) {
return (Float64Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Float64Vector
lanewise(Ternary op, Vector<Float> v1, Vector<Float> v2, VectorMask<Float> m) {
return (Float64Vector) super.lanewiseTemplate(op, Float64Mask.class, v1, v2, (Float64Mask) m); // specialize
}
@Override
@ForceInline
public final
@ -308,7 +328,7 @@ final class Float64Vector extends FloatVector {
@ForceInline
public final float reduceLanes(VectorOperators.Associative op,
VectorMask<Float> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, Float64Mask.class, (Float64Mask) m); // specialized
}
@Override
@ -321,7 +341,7 @@ final class Float64Vector extends FloatVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Float> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, Float64Mask.class, (Float64Mask) m); // specialized
}
@ForceInline
@ -357,6 +377,13 @@ final class Float64Vector extends FloatVector {
return super.compareTemplate(Float64Mask.class, op, s); // specialize
}
@Override
@ForceInline
public final Float64Mask compare(Comparison op, Vector<Float> v, VectorMask<Float> m) {
return super.compareTemplate(Float64Mask.class, op, v, (Float64Mask) m);
}
@Override
@ForceInline
public Float64Vector blend(Vector<Float> v, VectorMask<Float> m) {
@ -413,6 +440,7 @@ final class Float64Vector extends FloatVector {
VectorMask<Float> m) {
return (Float64Vector)
super.rearrangeTemplate(Float64Shuffle.class,
Float64Mask.class,
(Float64Shuffle) shuffle,
(Float64Mask) m); // specialize
}
@ -580,16 +608,12 @@ final class Float64Vector extends FloatVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
Float64Mask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -615,9 +639,9 @@ final class Float64Vector extends FloatVector {
public Float64Mask and(VectorMask<Float> mask) {
Objects.requireNonNull(mask);
Float64Mask m = (Float64Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Float64Mask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, Float64Mask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -625,9 +649,9 @@ final class Float64Vector extends FloatVector {
public Float64Mask or(VectorMask<Float> mask) {
Objects.requireNonNull(mask);
Float64Mask m = (Float64Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Float64Mask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, Float64Mask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -635,9 +659,9 @@ final class Float64Vector extends FloatVector {
Float64Mask xor(VectorMask<Float> mask) {
Objects.requireNonNull(mask);
Float64Mask m = (Float64Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Float64Mask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, Float64Mask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -645,22 +669,32 @@ final class Float64Vector extends FloatVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Float64Mask.class, int.class, VLENGTH, this,
(m) -> trueCountHelper(((Float64Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Float64Mask.class, int.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Float64Mask.class, int.class, VLENGTH, this,
(m) -> firstTrueHelper(((Float64Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Float64Mask.class, int.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Float64Mask.class, int.class, VLENGTH, this,
(m) -> lastTrueHelper(((Float64Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Float64Mask.class, int.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Float64Mask.class, int.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -771,6 +805,20 @@ final class Float64Vector extends FloatVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
FloatVector fromArray0(float[] a, int offset, VectorMask<Float> m) {
return super.fromArray0Template(Float64Mask.class, a, offset, (Float64Mask) m); // specialize
}
@ForceInline
@Override
final
FloatVector fromArray0(float[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Float> m) {
return super.fromArray0Template(Float64Mask.class, a, offset, indexMap, mapOffset, (Float64Mask) m);
}
@ForceInline
@ -780,6 +828,13 @@ final class Float64Vector extends FloatVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
FloatVector fromByteArray0(byte[] a, int offset, VectorMask<Float> m) {
return super.fromByteArray0Template(Float64Mask.class, a, offset, (Float64Mask) m); // specialize
}
@ForceInline
@Override
final
@ -787,6 +842,13 @@ final class Float64Vector extends FloatVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
FloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
return super.fromByteBuffer0Template(Float64Mask.class, bb, offset, (Float64Mask) m); // specialize
}
@ForceInline
@Override
final
@ -794,6 +856,21 @@ final class Float64Vector extends FloatVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(float[] a, int offset, VectorMask<Float> m) {
super.intoArray0Template(Float64Mask.class, a, offset, (Float64Mask) m);
}
@ForceInline
@Override
final
void intoArray0(float[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Float> m) {
super.intoArray0Template(Float64Mask.class, a, offset, indexMap, mapOffset, (Float64Mask) m);
}
@ForceInline
@Override
final
@ -801,6 +878,21 @@ final class Float64Vector extends FloatVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Float> m) {
super.intoByteArray0Template(Float64Mask.class, a, offset, (Float64Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
super.intoByteBuffer0Template(Float64Mask.class, bb, offset, (Float64Mask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -236,8 +236,8 @@ final class FloatMaxVector extends FloatVector {
@ForceInline
final @Override
float rOp(float v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
float rOp(float v, VectorMask<Float> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -273,22 +273,42 @@ final class FloatMaxVector extends FloatVector {
return (FloatMaxVector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public FloatMaxVector lanewise(Unary op, VectorMask<Float> m) {
return (FloatMaxVector) super.lanewiseTemplate(op, FloatMaxMask.class, (FloatMaxMask) m); // specialize
}
@Override
@ForceInline
public FloatMaxVector lanewise(Binary op, Vector<Float> v) {
return (FloatMaxVector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public FloatMaxVector lanewise(Binary op, Vector<Float> v, VectorMask<Float> m) {
return (FloatMaxVector) super.lanewiseTemplate(op, FloatMaxMask.class, v, (FloatMaxMask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
FloatMaxVector
lanewise(VectorOperators.Ternary op, Vector<Float> v1, Vector<Float> v2) {
lanewise(Ternary op, Vector<Float> v1, Vector<Float> v2) {
return (FloatMaxVector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
FloatMaxVector
lanewise(Ternary op, Vector<Float> v1, Vector<Float> v2, VectorMask<Float> m) {
return (FloatMaxVector) super.lanewiseTemplate(op, FloatMaxMask.class, v1, v2, (FloatMaxMask) m); // specialize
}
@Override
@ForceInline
public final
@ -308,7 +328,7 @@ final class FloatMaxVector extends FloatVector {
@ForceInline
public final float reduceLanes(VectorOperators.Associative op,
VectorMask<Float> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, FloatMaxMask.class, (FloatMaxMask) m); // specialized
}
@Override
@ -321,7 +341,7 @@ final class FloatMaxVector extends FloatVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Float> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, FloatMaxMask.class, (FloatMaxMask) m); // specialized
}
@ForceInline
@ -357,6 +377,13 @@ final class FloatMaxVector extends FloatVector {
return super.compareTemplate(FloatMaxMask.class, op, s); // specialize
}
@Override
@ForceInline
public final FloatMaxMask compare(Comparison op, Vector<Float> v, VectorMask<Float> m) {
return super.compareTemplate(FloatMaxMask.class, op, v, (FloatMaxMask) m);
}
@Override
@ForceInline
public FloatMaxVector blend(Vector<Float> v, VectorMask<Float> m) {
@ -413,6 +440,7 @@ final class FloatMaxVector extends FloatVector {
VectorMask<Float> m) {
return (FloatMaxVector)
super.rearrangeTemplate(FloatMaxShuffle.class,
FloatMaxMask.class,
(FloatMaxShuffle) shuffle,
(FloatMaxMask) m); // specialize
}
@ -577,16 +605,12 @@ final class FloatMaxVector extends FloatVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
FloatMaxMask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -612,9 +636,9 @@ final class FloatMaxVector extends FloatVector {
public FloatMaxMask and(VectorMask<Float> mask) {
Objects.requireNonNull(mask);
FloatMaxMask m = (FloatMaxMask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, FloatMaxMask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, FloatMaxMask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -622,9 +646,9 @@ final class FloatMaxVector extends FloatVector {
public FloatMaxMask or(VectorMask<Float> mask) {
Objects.requireNonNull(mask);
FloatMaxMask m = (FloatMaxMask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, FloatMaxMask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, FloatMaxMask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -632,9 +656,9 @@ final class FloatMaxVector extends FloatVector {
FloatMaxMask xor(VectorMask<Float> mask) {
Objects.requireNonNull(mask);
FloatMaxMask m = (FloatMaxMask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, FloatMaxMask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, FloatMaxMask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -642,22 +666,32 @@ final class FloatMaxVector extends FloatVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, FloatMaxMask.class, int.class, VLENGTH, this,
(m) -> trueCountHelper(((FloatMaxMask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, FloatMaxMask.class, int.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, FloatMaxMask.class, int.class, VLENGTH, this,
(m) -> firstTrueHelper(((FloatMaxMask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, FloatMaxMask.class, int.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, FloatMaxMask.class, int.class, VLENGTH, this,
(m) -> lastTrueHelper(((FloatMaxMask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, FloatMaxMask.class, int.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, FloatMaxMask.class, int.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -768,6 +802,20 @@ final class FloatMaxVector extends FloatVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
FloatVector fromArray0(float[] a, int offset, VectorMask<Float> m) {
return super.fromArray0Template(FloatMaxMask.class, a, offset, (FloatMaxMask) m); // specialize
}
@ForceInline
@Override
final
FloatVector fromArray0(float[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Float> m) {
return super.fromArray0Template(FloatMaxMask.class, a, offset, indexMap, mapOffset, (FloatMaxMask) m);
}
@ForceInline
@ -777,6 +825,13 @@ final class FloatMaxVector extends FloatVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
FloatVector fromByteArray0(byte[] a, int offset, VectorMask<Float> m) {
return super.fromByteArray0Template(FloatMaxMask.class, a, offset, (FloatMaxMask) m); // specialize
}
@ForceInline
@Override
final
@ -784,6 +839,13 @@ final class FloatMaxVector extends FloatVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
FloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
return super.fromByteBuffer0Template(FloatMaxMask.class, bb, offset, (FloatMaxMask) m); // specialize
}
@ForceInline
@Override
final
@ -791,6 +853,21 @@ final class FloatMaxVector extends FloatVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(float[] a, int offset, VectorMask<Float> m) {
super.intoArray0Template(FloatMaxMask.class, a, offset, (FloatMaxMask) m);
}
@ForceInline
@Override
final
void intoArray0(float[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Float> m) {
super.intoArray0Template(FloatMaxMask.class, a, offset, indexMap, mapOffset, (FloatMaxMask) m);
}
@ForceInline
@Override
final
@ -798,6 +875,21 @@ final class FloatMaxVector extends FloatVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Float> m) {
super.intoByteArray0Template(FloatMaxMask.class, a, offset, (FloatMaxMask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
super.intoByteBuffer0Template(FloatMaxMask.class, bb, offset, (FloatMaxMask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -29,7 +29,6 @@ import java.nio.ByteOrder;
import java.nio.ReadOnlyBufferException;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.BinaryOperator;
import java.util.function.Function;
import java.util.function.UnaryOperator;
@ -173,6 +172,9 @@ public abstract class FloatVector extends AbstractVector<Float> {
final
FloatVector uOpTemplate(VectorMask<Float> m,
FUnOp f) {
if (m == null) {
return uOpTemplate(f);
}
float[] vec = vec();
float[] res = new float[length()];
boolean[] mbits = ((AbstractMask<Float>)m).getBits();
@ -216,6 +218,9 @@ public abstract class FloatVector extends AbstractVector<Float> {
FloatVector bOpTemplate(Vector<Float> o,
VectorMask<Float> m,
FBinOp f) {
if (m == null) {
return bOpTemplate(o, f);
}
float[] res = new float[length()];
float[] vec1 = this.vec();
float[] vec2 = ((FloatVector)o).vec();
@ -265,6 +270,9 @@ public abstract class FloatVector extends AbstractVector<Float> {
Vector<Float> o2,
VectorMask<Float> m,
FTriOp f) {
if (m == null) {
return tOpTemplate(o1, o2, f);
}
float[] res = new float[length()];
float[] vec1 = this.vec();
float[] vec2 = ((FloatVector)o1).vec();
@ -280,7 +288,22 @@ public abstract class FloatVector extends AbstractVector<Float> {
/*package-private*/
abstract
float rOp(float v, FBinOp f);
float rOp(float v, VectorMask<Float> m, FBinOp f);
@ForceInline
final
float rOpTemplate(float v, VectorMask<Float> m, FBinOp f) {
if (m == null) {
return rOpTemplate(v, f);
}
float[] vec = vec();
boolean[] mbits = ((AbstractMask<Float>)m).getBits();
for (int i = 0; i < vec.length; i++) {
v = mbits[i] ? f.apply(i, v, vec[i]) : v;
}
return v;
}
@ForceInline
final
float rOpTemplate(float v, FBinOp f) {
@ -540,61 +563,80 @@ public abstract class FloatVector extends AbstractVector<Float> {
}
int opc = opCode(op);
return VectorSupport.unaryOp(
opc, getClass(), float.class, length(),
this,
UN_IMPL.find(op, opc, (opc_) -> {
switch (opc_) {
case VECTOR_OP_NEG: return v0 ->
v0.uOp((i, a) -> (float) -a);
case VECTOR_OP_ABS: return v0 ->
v0.uOp((i, a) -> (float) Math.abs(a));
case VECTOR_OP_SIN: return v0 ->
v0.uOp((i, a) -> (float) Math.sin(a));
case VECTOR_OP_COS: return v0 ->
v0.uOp((i, a) -> (float) Math.cos(a));
case VECTOR_OP_TAN: return v0 ->
v0.uOp((i, a) -> (float) Math.tan(a));
case VECTOR_OP_ASIN: return v0 ->
v0.uOp((i, a) -> (float) Math.asin(a));
case VECTOR_OP_ACOS: return v0 ->
v0.uOp((i, a) -> (float) Math.acos(a));
case VECTOR_OP_ATAN: return v0 ->
v0.uOp((i, a) -> (float) Math.atan(a));
case VECTOR_OP_EXP: return v0 ->
v0.uOp((i, a) -> (float) Math.exp(a));
case VECTOR_OP_LOG: return v0 ->
v0.uOp((i, a) -> (float) Math.log(a));
case VECTOR_OP_LOG10: return v0 ->
v0.uOp((i, a) -> (float) Math.log10(a));
case VECTOR_OP_SQRT: return v0 ->
v0.uOp((i, a) -> (float) Math.sqrt(a));
case VECTOR_OP_CBRT: return v0 ->
v0.uOp((i, a) -> (float) Math.cbrt(a));
case VECTOR_OP_SINH: return v0 ->
v0.uOp((i, a) -> (float) Math.sinh(a));
case VECTOR_OP_COSH: return v0 ->
v0.uOp((i, a) -> (float) Math.cosh(a));
case VECTOR_OP_TANH: return v0 ->
v0.uOp((i, a) -> (float) Math.tanh(a));
case VECTOR_OP_EXPM1: return v0 ->
v0.uOp((i, a) -> (float) Math.expm1(a));
case VECTOR_OP_LOG1P: return v0 ->
v0.uOp((i, a) -> (float) Math.log1p(a));
default: return null;
}}));
opc, getClass(), null, float.class, length(),
this, null,
UN_IMPL.find(op, opc, FloatVector::unaryOperations));
}
private static final
ImplCache<Unary,UnaryOperator<FloatVector>> UN_IMPL
= new ImplCache<>(Unary.class, FloatVector.class);
/**
* {@inheritDoc} <!--workaround-->
*/
@ForceInline
public final
@Override
public abstract
FloatVector lanewise(VectorOperators.Unary op,
VectorMask<Float> m);
@ForceInline
final
FloatVector lanewiseTemplate(VectorOperators.Unary op,
Class<? extends VectorMask<Float>> maskClass,
VectorMask<Float> m) {
return blend(lanewise(op), m);
m.check(maskClass, this);
if (opKind(op, VO_SPECIAL)) {
if (op == ZOMO) {
return blend(broadcast(-1), compare(NE, 0, m));
}
}
int opc = opCode(op);
return VectorSupport.unaryOp(
opc, getClass(), maskClass, float.class, length(),
this, m,
UN_IMPL.find(op, opc, FloatVector::unaryOperations));
}
private static final
ImplCache<Unary, UnaryOperation<FloatVector, VectorMask<Float>>>
UN_IMPL = new ImplCache<>(Unary.class, FloatVector.class);
private static UnaryOperation<FloatVector, VectorMask<Float>> unaryOperations(int opc_) {
switch (opc_) {
case VECTOR_OP_NEG: return (v0, m) ->
v0.uOp(m, (i, a) -> (float) -a);
case VECTOR_OP_ABS: return (v0, m) ->
v0.uOp(m, (i, a) -> (float) Math.abs(a));
case VECTOR_OP_SIN: return (v0, m) ->
v0.uOp(m, (i, a) -> (float) Math.sin(a));
case VECTOR_OP_COS: return (v0, m) ->
v0.uOp(m, (i, a) -> (float) Math.cos(a));
case VECTOR_OP_TAN: return (v0, m) ->
v0.uOp(m, (i, a) -> (float) Math.tan(a));
case VECTOR_OP_ASIN: return (v0, m) ->
v0.uOp(m, (i, a) -> (float) Math.asin(a));
case VECTOR_OP_ACOS: return (v0, m) ->
v0.uOp(m, (i, a) -> (float) Math.acos(a));
case VECTOR_OP_ATAN: return (v0, m) ->
v0.uOp(m, (i, a) -> (float) Math.atan(a));
case VECTOR_OP_EXP: return (v0, m) ->
v0.uOp(m, (i, a) -> (float) Math.exp(a));
case VECTOR_OP_LOG: return (v0, m) ->
v0.uOp(m, (i, a) -> (float) Math.log(a));
case VECTOR_OP_LOG10: return (v0, m) ->
v0.uOp(m, (i, a) -> (float) Math.log10(a));
case VECTOR_OP_SQRT: return (v0, m) ->
v0.uOp(m, (i, a) -> (float) Math.sqrt(a));
case VECTOR_OP_CBRT: return (v0, m) ->
v0.uOp(m, (i, a) -> (float) Math.cbrt(a));
case VECTOR_OP_SINH: return (v0, m) ->
v0.uOp(m, (i, a) -> (float) Math.sinh(a));
case VECTOR_OP_COSH: return (v0, m) ->
v0.uOp(m, (i, a) -> (float) Math.cosh(a));
case VECTOR_OP_TANH: return (v0, m) ->
v0.uOp(m, (i, a) -> (float) Math.tanh(a));
case VECTOR_OP_EXPM1: return (v0, m) ->
v0.uOp(m, (i, a) -> (float) Math.expm1(a));
case VECTOR_OP_LOG1P: return (v0, m) ->
v0.uOp(m, (i, a) -> (float) Math.log1p(a));
default: return null;
}
}
// Binary lanewise support
@ -614,6 +656,7 @@ public abstract class FloatVector extends AbstractVector<Float> {
Vector<Float> v) {
FloatVector that = (FloatVector) v;
that.check(this);
if (opKind(op, VO_SPECIAL )) {
if (op == FIRST_NONZERO) {
// FIXME: Support this in the JIT.
@ -627,48 +670,75 @@ public abstract class FloatVector extends AbstractVector<Float> {
.viewAsFloatingLanes();
}
}
int opc = opCode(op);
return VectorSupport.binaryOp(
opc, getClass(), float.class, length(),
this, that,
BIN_IMPL.find(op, opc, (opc_) -> {
switch (opc_) {
case VECTOR_OP_ADD: return (v0, v1) ->
v0.bOp(v1, (i, a, b) -> (float)(a + b));
case VECTOR_OP_SUB: return (v0, v1) ->
v0.bOp(v1, (i, a, b) -> (float)(a - b));
case VECTOR_OP_MUL: return (v0, v1) ->
v0.bOp(v1, (i, a, b) -> (float)(a * b));
case VECTOR_OP_DIV: return (v0, v1) ->
v0.bOp(v1, (i, a, b) -> (float)(a / b));
case VECTOR_OP_MAX: return (v0, v1) ->
v0.bOp(v1, (i, a, b) -> (float)Math.max(a, b));
case VECTOR_OP_MIN: return (v0, v1) ->
v0.bOp(v1, (i, a, b) -> (float)Math.min(a, b));
case VECTOR_OP_ATAN2: return (v0, v1) ->
v0.bOp(v1, (i, a, b) -> (float) Math.atan2(a, b));
case VECTOR_OP_POW: return (v0, v1) ->
v0.bOp(v1, (i, a, b) -> (float) Math.pow(a, b));
case VECTOR_OP_HYPOT: return (v0, v1) ->
v0.bOp(v1, (i, a, b) -> (float) Math.hypot(a, b));
default: return null;
}}));
opc, getClass(), null, float.class, length(),
this, that, null,
BIN_IMPL.find(op, opc, FloatVector::binaryOperations));
}
private static final
ImplCache<Binary,BinaryOperator<FloatVector>> BIN_IMPL
= new ImplCache<>(Binary.class, FloatVector.class);
/**
* {@inheritDoc} <!--workaround-->
* @see #lanewise(VectorOperators.Binary,float,VectorMask)
*/
@ForceInline
public final
@Override
public abstract
FloatVector lanewise(VectorOperators.Binary op,
Vector<Float> v,
VectorMask<Float> m) {
VectorMask<Float> m);
@ForceInline
final
FloatVector lanewiseTemplate(VectorOperators.Binary op,
Class<? extends VectorMask<Float>> maskClass,
Vector<Float> v, VectorMask<Float> m) {
FloatVector that = (FloatVector) v;
that.check(this);
m.check(maskClass, this);
if (opKind(op, VO_SPECIAL )) {
if (op == FIRST_NONZERO) {
return blend(lanewise(op, v), m);
}
}
int opc = opCode(op);
return VectorSupport.binaryOp(
opc, getClass(), maskClass, float.class, length(),
this, that, m,
BIN_IMPL.find(op, opc, FloatVector::binaryOperations));
}
private static final
ImplCache<Binary, BinaryOperation<FloatVector, VectorMask<Float>>>
BIN_IMPL = new ImplCache<>(Binary.class, FloatVector.class);
private static BinaryOperation<FloatVector, VectorMask<Float>> binaryOperations(int opc_) {
switch (opc_) {
case VECTOR_OP_ADD: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, b) -> (float)(a + b));
case VECTOR_OP_SUB: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, b) -> (float)(a - b));
case VECTOR_OP_MUL: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, b) -> (float)(a * b));
case VECTOR_OP_DIV: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, b) -> (float)(a / b));
case VECTOR_OP_MAX: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, b) -> (float)Math.max(a, b));
case VECTOR_OP_MIN: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, b) -> (float)Math.min(a, b));
case VECTOR_OP_OR: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, b) -> fromBits(toBits(a) | toBits(b)));
case VECTOR_OP_ATAN2: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, b) -> (float) Math.atan2(a, b));
case VECTOR_OP_POW: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, b) -> (float) Math.pow(a, b));
case VECTOR_OP_HYPOT: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, b) -> (float) Math.hypot(a, b));
default: return null;
}
}
// FIXME: Maybe all of the public final methods in this file (the
// simple ones that just call lanewise) should be pushed down to
// the X-VectorBits template. They can't optimize properly at
@ -725,7 +795,7 @@ public abstract class FloatVector extends AbstractVector<Float> {
FloatVector lanewise(VectorOperators.Binary op,
float e,
VectorMask<Float> m) {
return blend(lanewise(op, e), m);
return lanewise(op, broadcast(e), m);
}
/**
@ -743,8 +813,7 @@ public abstract class FloatVector extends AbstractVector<Float> {
FloatVector lanewise(VectorOperators.Binary op,
long e) {
float e1 = (float) e;
if ((long)e1 != e
) {
if ((long)e1 != e) {
vspecies().checkValue(e); // for exception
}
return lanewise(op, e1);
@ -764,7 +833,11 @@ public abstract class FloatVector extends AbstractVector<Float> {
public final
FloatVector lanewise(VectorOperators.Binary op,
long e, VectorMask<Float> m) {
return blend(lanewise(op, e), m);
float e1 = (float) e;
if ((long)e1 != e) {
vspecies().checkValue(e); // for exception
}
return lanewise(op, e1, m);
}
@ -806,18 +879,10 @@ public abstract class FloatVector extends AbstractVector<Float> {
tother.check(this);
int opc = opCode(op);
return VectorSupport.ternaryOp(
opc, getClass(), float.class, length(),
this, that, tother,
TERN_IMPL.find(op, opc, (opc_) -> {
switch (opc_) {
case VECTOR_OP_FMA: return (v0, v1_, v2_) ->
v0.tOp(v1_, v2_, (i, a, b, c) -> Math.fma(a, b, c));
default: return null;
}}));
opc, getClass(), null, float.class, length(),
this, that, tother, null,
TERN_IMPL.find(op, opc, FloatVector::ternaryOperations));
}
private static final
ImplCache<Ternary,TernaryOperation<FloatVector>> TERN_IMPL
= new ImplCache<>(Ternary.class, FloatVector.class);
/**
* {@inheritDoc} <!--workaround-->
@ -825,13 +890,45 @@ public abstract class FloatVector extends AbstractVector<Float> {
* @see #lanewise(VectorOperators.Ternary,Vector,float,VectorMask)
* @see #lanewise(VectorOperators.Ternary,float,Vector,VectorMask)
*/
@ForceInline
public final
@Override
public abstract
FloatVector lanewise(VectorOperators.Ternary op,
Vector<Float> v1,
Vector<Float> v2,
VectorMask<Float> m);
@ForceInline
final
FloatVector lanewiseTemplate(VectorOperators.Ternary op,
Class<? extends VectorMask<Float>> maskClass,
Vector<Float> v1,
Vector<Float> v2,
VectorMask<Float> m) {
return blend(lanewise(op, v1, v2), m);
FloatVector that = (FloatVector) v1;
FloatVector tother = (FloatVector) v2;
// It's a word: https://www.dictionary.com/browse/tother
// See also Chapter 11 of Dickens, Our Mutual Friend:
// "Totherest Governor," replied Mr Riderhood...
that.check(this);
tother.check(this);
m.check(maskClass, this);
int opc = opCode(op);
return VectorSupport.ternaryOp(
opc, getClass(), maskClass, float.class, length(),
this, that, tother, m,
TERN_IMPL.find(op, opc, FloatVector::ternaryOperations));
}
private static final
ImplCache<Ternary, TernaryOperation<FloatVector, VectorMask<Float>>>
TERN_IMPL = new ImplCache<>(Ternary.class, FloatVector.class);
private static TernaryOperation<FloatVector, VectorMask<Float>> ternaryOperations(int opc_) {
switch (opc_) {
case VECTOR_OP_FMA: return (v0, v1_, v2_, m) ->
v0.tOp(v1_, v2_, m, (i, a, b, c) -> Math.fma(a, b, c));
default: return null;
}
}
/**
@ -888,7 +985,7 @@ public abstract class FloatVector extends AbstractVector<Float> {
float e1,
float e2,
VectorMask<Float> m) {
return blend(lanewise(op, e1, e2), m);
return lanewise(op, broadcast(e1), broadcast(e2), m);
}
/**
@ -946,7 +1043,7 @@ public abstract class FloatVector extends AbstractVector<Float> {
Vector<Float> v1,
float e2,
VectorMask<Float> m) {
return blend(lanewise(op, v1, e2), m);
return lanewise(op, v1, broadcast(e2), m);
}
/**
@ -1003,7 +1100,7 @@ public abstract class FloatVector extends AbstractVector<Float> {
float e1,
Vector<Float> v2,
VectorMask<Float> m) {
return blend(lanewise(op, e1, v2), m);
return lanewise(op, broadcast(e1), v2, m);
}
// (Thus endeth the Great and Mighty Ternary Ogdoad.)
@ -1659,15 +1756,13 @@ public abstract class FloatVector extends AbstractVector<Float> {
final
<M extends VectorMask<Float>>
M compareTemplate(Class<M> maskType, Comparison op, Vector<Float> v) {
Objects.requireNonNull(v);
FloatSpecies vsp = vspecies();
FloatVector that = (FloatVector) v;
that.check(this);
int opc = opCode(op);
return VectorSupport.compare(
opc, getClass(), maskType, float.class, length(),
this, that,
(cond, v0, v1) -> {
this, that, null,
(cond, v0, v1, m1) -> {
AbstractMask<Float> m
= v0.bTest(cond, v1, (cond_, i, a, b)
-> compareWithOp(cond, a, b));
@ -1677,6 +1772,28 @@ public abstract class FloatVector extends AbstractVector<Float> {
});
}
/*package-private*/
@ForceInline
final
<M extends VectorMask<Float>>
M compareTemplate(Class<M> maskType, Comparison op, Vector<Float> v, M m) {
FloatVector that = (FloatVector) v;
that.check(this);
m.check(maskType, this);
int opc = opCode(op);
return VectorSupport.compare(
opc, getClass(), maskType, float.class, length(),
this, that, m,
(cond, v0, v1, m1) -> {
AbstractMask<Float> cmpM
= v0.bTest(cond, v1, (cond_, i, a, b)
-> compareWithOp(cond, a, b));
@SuppressWarnings("unchecked")
M m2 = (M) cmpM.and(m1);
return m2;
});
}
@ForceInline
private static boolean compareWithOp(int cond, float a, float b) {
return switch (cond) {
@ -1690,18 +1807,6 @@ public abstract class FloatVector extends AbstractVector<Float> {
};
}
/**
* {@inheritDoc} <!--workaround-->
*/
@Override
@ForceInline
public final
VectorMask<Float> compare(VectorOperators.Comparison op,
Vector<Float> v,
VectorMask<Float> m) {
return compare(op, v).and(m);
}
/**
* Tests this vector by comparing it with an input scalar,
* according to the given comparison operation.
@ -1760,7 +1865,7 @@ public abstract class FloatVector extends AbstractVector<Float> {
public final VectorMask<Float> compare(VectorOperators.Comparison op,
float e,
VectorMask<Float> m) {
return compare(op, e).and(m);
return compare(op, broadcast(e), m);
}
/**
@ -2011,9 +2116,9 @@ public abstract class FloatVector extends AbstractVector<Float> {
FloatVector rearrangeTemplate(Class<S> shuffletype, S shuffle) {
shuffle.checkIndexes();
return VectorSupport.rearrangeOp(
getClass(), shuffletype, float.class, length(),
this, shuffle,
(v1, s_) -> v1.uOp((i, a) -> {
getClass(), shuffletype, null, float.class, length(),
this, shuffle, null,
(v1, s_, m_) -> v1.uOp((i, a) -> {
int ei = s_.laneSource(i);
return v1.lane(ei);
}));
@ -2030,24 +2135,25 @@ public abstract class FloatVector extends AbstractVector<Float> {
/*package-private*/
@ForceInline
final
<S extends VectorShuffle<Float>>
<S extends VectorShuffle<Float>, M extends VectorMask<Float>>
FloatVector rearrangeTemplate(Class<S> shuffletype,
Class<M> masktype,
S shuffle,
VectorMask<Float> m) {
FloatVector unmasked =
VectorSupport.rearrangeOp(
getClass(), shuffletype, float.class, length(),
this, shuffle,
(v1, s_) -> v1.uOp((i, a) -> {
int ei = s_.laneSource(i);
return ei < 0 ? 0 : v1.lane(ei);
}));
M m) {
m.check(masktype, this);
VectorMask<Float> valid = shuffle.laneIsValid();
if (m.andNot(valid).anyTrue()) {
shuffle.checkIndexes();
throw new AssertionError();
}
return broadcast((float)0).blend(unmasked, m);
return VectorSupport.rearrangeOp(
getClass(), shuffletype, masktype, float.class, length(),
this, shuffle, m,
(v1, s_, m_) -> v1.uOp((i, a) -> {
int ei = s_.laneSource(i);
return ei < 0 || !m_.laneIsSet(i) ? 0 : v1.lane(ei);
}));
}
/**
@ -2070,17 +2176,17 @@ public abstract class FloatVector extends AbstractVector<Float> {
S ws = (S) shuffle.wrapIndexes();
FloatVector r0 =
VectorSupport.rearrangeOp(
getClass(), shuffletype, float.class, length(),
this, ws,
(v0, s_) -> v0.uOp((i, a) -> {
getClass(), shuffletype, null, float.class, length(),
this, ws, null,
(v0, s_, m_) -> v0.uOp((i, a) -> {
int ei = s_.laneSource(i);
return v0.lane(ei);
}));
FloatVector r1 =
VectorSupport.rearrangeOp(
getClass(), shuffletype, float.class, length(),
v, ws,
(v1, s_) -> v1.uOp((i, a) -> {
getClass(), shuffletype, null, float.class, length(),
v, ws, null,
(v1, s_, m_) -> v1.uOp((i, a) -> {
int ei = s_.laneSource(i);
return v1.lane(ei);
}));
@ -2329,10 +2435,19 @@ public abstract class FloatVector extends AbstractVector<Float> {
@ForceInline
final
float reduceLanesTemplate(VectorOperators.Associative op,
Class<? extends VectorMask<Float>> maskClass,
VectorMask<Float> m) {
m.check(maskClass, this);
if (op == FIRST_NONZERO) {
FloatVector v = reduceIdentityVector(op).blend(this, m);
return v.reduceLanesTemplate(op);
}
int opc = opCode(op);
return fromBits(VectorSupport.reductionCoerced(
opc, getClass(), maskClass, float.class, length(),
this, m,
REDUCE_IMPL.find(op, opc, FloatVector::reductionOperations)));
}
/*package-private*/
@ForceInline
@ -2346,24 +2461,28 @@ public abstract class FloatVector extends AbstractVector<Float> {
}
int opc = opCode(op);
return fromBits(VectorSupport.reductionCoerced(
opc, getClass(), float.class, length(),
this,
REDUCE_IMPL.find(op, opc, (opc_) -> {
switch (opc_) {
case VECTOR_OP_ADD: return v ->
toBits(v.rOp((float)0, (i, a, b) -> (float)(a + b)));
case VECTOR_OP_MUL: return v ->
toBits(v.rOp((float)1, (i, a, b) -> (float)(a * b)));
case VECTOR_OP_MIN: return v ->
toBits(v.rOp(MAX_OR_INF, (i, a, b) -> (float) Math.min(a, b)));
case VECTOR_OP_MAX: return v ->
toBits(v.rOp(MIN_OR_INF, (i, a, b) -> (float) Math.max(a, b)));
default: return null;
}})));
opc, getClass(), null, float.class, length(),
this, null,
REDUCE_IMPL.find(op, opc, FloatVector::reductionOperations)));
}
private static final
ImplCache<Associative,Function<FloatVector,Long>> REDUCE_IMPL
= new ImplCache<>(Associative.class, FloatVector.class);
ImplCache<Associative, ReductionOperation<FloatVector, VectorMask<Float>>>
REDUCE_IMPL = new ImplCache<>(Associative.class, FloatVector.class);
private static ReductionOperation<FloatVector, VectorMask<Float>> reductionOperations(int opc_) {
switch (opc_) {
case VECTOR_OP_ADD: return (v, m) ->
toBits(v.rOp((float)0, m, (i, a, b) -> (float)(a + b)));
case VECTOR_OP_MUL: return (v, m) ->
toBits(v.rOp((float)1, m, (i, a, b) -> (float)(a * b)));
case VECTOR_OP_MIN: return (v, m) ->
toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> (float) Math.min(a, b)));
case VECTOR_OP_MAX: return (v, m) ->
toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> (float) Math.max(a, b)));
default: return null;
}
}
private
@ForceInline
@ -2573,9 +2692,7 @@ public abstract class FloatVector extends AbstractVector<Float> {
VectorMask<Float> m) {
FloatSpecies vsp = (FloatSpecies) species;
if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
FloatVector zero = vsp.zero();
FloatVector v = zero.fromByteArray0(a, offset);
return zero.blend(v.maybeSwap(bo), m);
return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo);
}
// FIXME: optimize
@ -2637,8 +2754,7 @@ public abstract class FloatVector extends AbstractVector<Float> {
VectorMask<Float> m) {
FloatSpecies vsp = (FloatSpecies) species;
if (offset >= 0 && offset <= (a.length - species.length())) {
FloatVector zero = vsp.zero();
return zero.blend(zero.fromArray0(a, offset), m);
return vsp.dummyVector().fromArray0(a, offset, m);
}
// FIXME: optimize
@ -2696,11 +2812,11 @@ public abstract class FloatVector extends AbstractVector<Float> {
vix = VectorIntrinsics.checkIndex(vix, a.length);
return VectorSupport.loadWithMap(
vectorType, float.class, vsp.laneCount(),
IntVector.species(vsp.indexShape()).vectorType(),
a, ARRAY_BASE, vix,
vectorType, null, float.class, vsp.laneCount(),
isp.vectorType(),
a, ARRAY_BASE, vix, null,
a, offset, indexMap, mapOffset, vsp,
(float[] c, int idx, int[] iMap, int idy, FloatSpecies s) ->
(c, idx, iMap, idy, s, vm) ->
s.vOp(n -> c[idx + iMap[idy+n]]));
}
@ -2750,9 +2866,8 @@ public abstract class FloatVector extends AbstractVector<Float> {
return fromArray(species, a, offset, indexMap, mapOffset);
}
else {
// FIXME: Cannot vectorize yet, if there's a mask.
FloatSpecies vsp = (FloatSpecies) species;
return vsp.vOp(m, n -> a[offset + indexMap[mapOffset + n]]);
return vsp.dummyVector().fromArray0(a, offset, indexMap, mapOffset, m);
}
}
@ -2846,9 +2961,7 @@ public abstract class FloatVector extends AbstractVector<Float> {
VectorMask<Float> m) {
FloatSpecies vsp = (FloatSpecies) species;
if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
FloatVector zero = vsp.zero();
FloatVector v = zero.fromByteBuffer0(bb, offset);
return zero.blend(v.maybeSwap(bo), m);
return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo);
}
// FIXME: optimize
@ -2920,10 +3033,9 @@ public abstract class FloatVector extends AbstractVector<Float> {
if (m.allTrue()) {
intoArray(a, offset);
} else {
// FIXME: optimize
FloatSpecies vsp = vspecies();
checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
stOp(a, offset, m, (arr, off, i, v) -> arr[off+i] = v);
intoArray0(a, offset, m);
}
}
@ -2967,12 +3079,12 @@ public abstract class FloatVector extends AbstractVector<Float> {
vix = VectorIntrinsics.checkIndex(vix, a.length);
VectorSupport.storeWithMap(
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
vsp.vectorType(), null, vsp.elementType(), vsp.laneCount(),
isp.vectorType(),
a, arrayAddress(a, 0), vix,
this,
this, null,
a, offset, indexMap, mapOffset,
(arr, off, v, map, mo)
(arr, off, v, map, mo, vm)
-> v.stOp(arr, off,
(arr_, off_, i, e) -> {
int j = map[mo + i];
@ -3019,12 +3131,7 @@ public abstract class FloatVector extends AbstractVector<Float> {
intoArray(a, offset, indexMap, mapOffset);
}
else {
// FIXME: Cannot vectorize yet, if there's a mask.
stOp(a, offset, m,
(arr, off, i, e) -> {
int j = indexMap[mapOffset + i];
arr[off + j] = e;
});
intoArray0(a, offset, indexMap, mapOffset, m);
}
}
@ -3054,12 +3161,9 @@ public abstract class FloatVector extends AbstractVector<Float> {
if (m.allTrue()) {
intoByteArray(a, offset, bo);
} else {
// FIXME: optimize
FloatSpecies vsp = vspecies();
checkMaskFromIndexSize(offset, vsp, m, 4, a.length);
ByteBuffer wb = wrapper(a, bo);
this.stOp(wb, offset, m,
(wb_, o, i, e) -> wb_.putFloat(o + i * 4, e));
maybeSwap(bo).intoByteArray0(a, offset, m);
}
}
@ -3071,7 +3175,7 @@ public abstract class FloatVector extends AbstractVector<Float> {
public final
void intoByteBuffer(ByteBuffer bb, int offset,
ByteOrder bo) {
if (bb.isReadOnly()) {
if (ScopedMemoryAccess.isReadOnly(bb)) {
throw new ReadOnlyBufferException();
}
offset = checkFromIndexSize(offset, byteSize(), bb.limit());
@ -3090,15 +3194,12 @@ public abstract class FloatVector extends AbstractVector<Float> {
if (m.allTrue()) {
intoByteBuffer(bb, offset, bo);
} else {
// FIXME: optimize
if (bb.isReadOnly()) {
throw new ReadOnlyBufferException();
}
FloatSpecies vsp = vspecies();
checkMaskFromIndexSize(offset, vsp, m, 4, bb.limit());
ByteBuffer wb = wrapper(bb, bo);
this.stOp(wb, offset, m,
(wb_, o, i, e) -> wb_.putFloat(o + i * 4, e));
maybeSwap(bo).intoByteBuffer0(bb, offset, m);
}
}
@ -3136,6 +3237,57 @@ public abstract class FloatVector extends AbstractVector<Float> {
(arr_, off_, i) -> arr_[off_ + i]));
}
/*package-private*/
abstract
FloatVector fromArray0(float[] a, int offset, VectorMask<Float> m);
@ForceInline
final
<M extends VectorMask<Float>>
FloatVector fromArray0Template(Class<M> maskClass, float[] a, int offset, M m) {
m.check(species());
FloatSpecies vsp = vspecies();
return VectorSupport.loadMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
a, arrayAddress(a, offset), m,
a, offset, vsp,
(arr, off, s, vm) -> s.ldOp(arr, off, vm,
(arr_, off_, i) -> arr_[off_ + i]));
}
/*package-private*/
abstract
FloatVector fromArray0(float[] a, int offset,
int[] indexMap, int mapOffset,
VectorMask<Float> m);
@ForceInline
final
<M extends VectorMask<Float>>
FloatVector fromArray0Template(Class<M> maskClass, float[] a, int offset,
int[] indexMap, int mapOffset, M m) {
FloatSpecies vsp = vspecies();
IntVector.IntSpecies isp = IntVector.species(vsp.indexShape());
Objects.requireNonNull(a);
Objects.requireNonNull(indexMap);
m.check(vsp);
Class<? extends FloatVector> vectorType = vsp.vectorType();
// Index vector: vix[0:n] = k -> offset + indexMap[mapOffset + k]
IntVector vix = IntVector
.fromArray(isp, indexMap, mapOffset)
.add(offset);
// FIXME: Check index under mask controlling.
vix = VectorIntrinsics.checkIndex(vix, a.length);
return VectorSupport.loadWithMap(
vectorType, maskClass, float.class, vsp.laneCount(),
isp.vectorType(),
a, ARRAY_BASE, vix, m,
a, offset, indexMap, mapOffset, vsp,
(c, idx, iMap, idy, s, vm) ->
s.vOp(vm, n -> c[idx + iMap[idy+n]]));
}
@Override
@ -3156,6 +3308,25 @@ public abstract class FloatVector extends AbstractVector<Float> {
});
}
abstract
FloatVector fromByteArray0(byte[] a, int offset, VectorMask<Float> m);
@ForceInline
final
<M extends VectorMask<Float>>
FloatVector fromByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
FloatSpecies vsp = vspecies();
m.check(vsp);
return VectorSupport.loadMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset), m,
a, offset, vsp,
(arr, off, s, vm) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
return s.ldOp(wb, off, vm,
(wb_, o, i) -> wb_.getFloat(o + i * 4));
});
}
abstract
FloatVector fromByteBuffer0(ByteBuffer bb, int offset);
@ForceInline
@ -3172,6 +3343,24 @@ public abstract class FloatVector extends AbstractVector<Float> {
});
}
abstract
FloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m);
@ForceInline
final
<M extends VectorMask<Float>>
FloatVector fromByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
FloatSpecies vsp = vspecies();
m.check(vsp);
return ScopedMemoryAccess.loadFromByteBufferMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
bb, offset, m, vsp,
(buf, off, s, vm) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
return s.ldOp(wb, off, vm,
(wb_, o, i) -> wb_.getFloat(o + i * 4));
});
}
// Unchecked storing operations in native byte order.
// Caller is responsible for applying index checks, masking, and
// byte swapping.
@ -3191,6 +3380,58 @@ public abstract class FloatVector extends AbstractVector<Float> {
(arr_, off_, i, e) -> arr_[off_+i] = e));
}
abstract
void intoArray0(float[] a, int offset, VectorMask<Float> m);
@ForceInline
final
<M extends VectorMask<Float>>
void intoArray0Template(Class<M> maskClass, float[] a, int offset, M m) {
m.check(species());
FloatSpecies vsp = vspecies();
VectorSupport.storeMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
a, arrayAddress(a, offset),
this, m, a, offset,
(arr, off, v, vm)
-> v.stOp(arr, off, vm,
(arr_, off_, i, e) -> arr_[off_ + i] = e));
}
abstract
void intoArray0(float[] a, int offset,
int[] indexMap, int mapOffset,
VectorMask<Float> m);
@ForceInline
final
<M extends VectorMask<Float>>
void intoArray0Template(Class<M> maskClass, float[] a, int offset,
int[] indexMap, int mapOffset, M m) {
m.check(species());
FloatSpecies vsp = vspecies();
IntVector.IntSpecies isp = IntVector.species(vsp.indexShape());
// Index vector: vix[0:n] = i -> offset + indexMap[mo + i]
IntVector vix = IntVector
.fromArray(isp, indexMap, mapOffset)
.add(offset);
// FIXME: Check index under mask controlling.
vix = VectorIntrinsics.checkIndex(vix, a.length);
VectorSupport.storeWithMap(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
isp.vectorType(),
a, arrayAddress(a, 0), vix,
this, m,
a, offset, indexMap, mapOffset,
(arr, off, v, map, mo, vm)
-> v.stOp(arr, off, vm,
(arr_, off_, i, e) -> {
int j = map[mo + i];
arr[off + j] = e;
}));
}
abstract
void intoByteArray0(byte[] a, int offset);
@ForceInline
@ -3208,6 +3449,25 @@ public abstract class FloatVector extends AbstractVector<Float> {
});
}
abstract
void intoByteArray0(byte[] a, int offset, VectorMask<Float> m);
@ForceInline
final
<M extends VectorMask<Float>>
void intoByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
FloatSpecies vsp = vspecies();
m.check(vsp);
VectorSupport.storeMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset),
this, m, a, offset,
(arr, off, v, vm) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
v.stOp(wb, off, vm,
(tb_, o, i, e) -> tb_.putFloat(o + i * 4, e));
});
}
@ForceInline
final
void intoByteBuffer0(ByteBuffer bb, int offset) {
@ -3222,6 +3482,25 @@ public abstract class FloatVector extends AbstractVector<Float> {
});
}
abstract
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m);
@ForceInline
final
<M extends VectorMask<Float>>
void intoByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
FloatSpecies vsp = vspecies();
m.check(vsp);
ScopedMemoryAccess.storeIntoByteBufferMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
this, m, bb, offset,
(buf, off, v, vm) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
v.stOp(wb, off, vm,
(wb_, o, i, e) -> wb_.putFloat(o + i * 4, e));
});
}
// End of low-level memory operations.
private static
@ -3539,7 +3818,7 @@ public abstract class FloatVector extends AbstractVector<Float> {
/*package-private*/
@ForceInline
<M> FloatVector ldOp(M memory, int offset,
AbstractMask<Float> m,
VectorMask<Float> m,
FLdOp<M> f) {
return dummyVector().ldOp(memory, offset, m, f);
}

View file

@ -236,8 +236,8 @@ final class Int128Vector extends IntVector {
@ForceInline
final @Override
int rOp(int v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
int rOp(int v, VectorMask<Integer> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -273,12 +273,24 @@ final class Int128Vector extends IntVector {
return (Int128Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Int128Vector lanewise(Unary op, VectorMask<Integer> m) {
return (Int128Vector) super.lanewiseTemplate(op, Int128Mask.class, (Int128Mask) m); // specialize
}
@Override
@ForceInline
public Int128Vector lanewise(Binary op, Vector<Integer> v) {
return (Int128Vector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public Int128Vector lanewise(Binary op, Vector<Integer> v, VectorMask<Integer> m) {
return (Int128Vector) super.lanewiseTemplate(op, Int128Mask.class, v, (Int128Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline Int128Vector
@ -286,15 +298,30 @@ final class Int128Vector extends IntVector {
return (Int128Vector) super.lanewiseShiftTemplate(op, e); // specialize
}
/*package-private*/
@Override
@ForceInline Int128Vector
lanewiseShift(VectorOperators.Binary op, int e, VectorMask<Integer> m) {
return (Int128Vector) super.lanewiseShiftTemplate(op, Int128Mask.class, e, (Int128Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Int128Vector
lanewise(VectorOperators.Ternary op, Vector<Integer> v1, Vector<Integer> v2) {
lanewise(Ternary op, Vector<Integer> v1, Vector<Integer> v2) {
return (Int128Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Int128Vector
lanewise(Ternary op, Vector<Integer> v1, Vector<Integer> v2, VectorMask<Integer> m) {
return (Int128Vector) super.lanewiseTemplate(op, Int128Mask.class, v1, v2, (Int128Mask) m); // specialize
}
@Override
@ForceInline
public final
@ -314,7 +341,7 @@ final class Int128Vector extends IntVector {
@ForceInline
public final int reduceLanes(VectorOperators.Associative op,
VectorMask<Integer> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, Int128Mask.class, (Int128Mask) m); // specialized
}
@Override
@ -327,7 +354,7 @@ final class Int128Vector extends IntVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Integer> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, Int128Mask.class, (Int128Mask) m); // specialized
}
@ForceInline
@ -363,6 +390,13 @@ final class Int128Vector extends IntVector {
return super.compareTemplate(Int128Mask.class, op, s); // specialize
}
@Override
@ForceInline
public final Int128Mask compare(Comparison op, Vector<Integer> v, VectorMask<Integer> m) {
return super.compareTemplate(Int128Mask.class, op, v, (Int128Mask) m);
}
@Override
@ForceInline
public Int128Vector blend(Vector<Integer> v, VectorMask<Integer> m) {
@ -419,6 +453,7 @@ final class Int128Vector extends IntVector {
VectorMask<Integer> m) {
return (Int128Vector)
super.rearrangeTemplate(Int128Shuffle.class,
Int128Mask.class,
(Int128Shuffle) shuffle,
(Int128Mask) m); // specialize
}
@ -588,16 +623,12 @@ final class Int128Vector extends IntVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
Int128Mask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -623,9 +654,9 @@ final class Int128Vector extends IntVector {
public Int128Mask and(VectorMask<Integer> mask) {
Objects.requireNonNull(mask);
Int128Mask m = (Int128Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Int128Mask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, Int128Mask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -633,9 +664,9 @@ final class Int128Vector extends IntVector {
public Int128Mask or(VectorMask<Integer> mask) {
Objects.requireNonNull(mask);
Int128Mask m = (Int128Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Int128Mask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, Int128Mask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -643,9 +674,9 @@ final class Int128Vector extends IntVector {
Int128Mask xor(VectorMask<Integer> mask) {
Objects.requireNonNull(mask);
Int128Mask m = (Int128Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Int128Mask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, Int128Mask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -653,22 +684,32 @@ final class Int128Vector extends IntVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Int128Mask.class, int.class, VLENGTH, this,
(m) -> trueCountHelper(((Int128Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Int128Mask.class, int.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Int128Mask.class, int.class, VLENGTH, this,
(m) -> firstTrueHelper(((Int128Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Int128Mask.class, int.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Int128Mask.class, int.class, VLENGTH, this,
(m) -> lastTrueHelper(((Int128Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Int128Mask.class, int.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Int128Mask.class, int.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -779,6 +820,20 @@ final class Int128Vector extends IntVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
IntVector fromArray0(int[] a, int offset, VectorMask<Integer> m) {
return super.fromArray0Template(Int128Mask.class, a, offset, (Int128Mask) m); // specialize
}
@ForceInline
@Override
final
IntVector fromArray0(int[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Integer> m) {
return super.fromArray0Template(Int128Mask.class, a, offset, indexMap, mapOffset, (Int128Mask) m);
}
@ForceInline
@ -788,6 +843,13 @@ final class Int128Vector extends IntVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
IntVector fromByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
return super.fromByteArray0Template(Int128Mask.class, a, offset, (Int128Mask) m); // specialize
}
@ForceInline
@Override
final
@ -795,6 +857,13 @@ final class Int128Vector extends IntVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
IntVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
return super.fromByteBuffer0Template(Int128Mask.class, bb, offset, (Int128Mask) m); // specialize
}
@ForceInline
@Override
final
@ -802,6 +871,21 @@ final class Int128Vector extends IntVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(int[] a, int offset, VectorMask<Integer> m) {
super.intoArray0Template(Int128Mask.class, a, offset, (Int128Mask) m);
}
@ForceInline
@Override
final
void intoArray0(int[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Integer> m) {
super.intoArray0Template(Int128Mask.class, a, offset, indexMap, mapOffset, (Int128Mask) m);
}
@ForceInline
@Override
final
@ -809,6 +893,21 @@ final class Int128Vector extends IntVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
super.intoByteArray0Template(Int128Mask.class, a, offset, (Int128Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
super.intoByteBuffer0Template(Int128Mask.class, bb, offset, (Int128Mask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -236,8 +236,8 @@ final class Int256Vector extends IntVector {
@ForceInline
final @Override
int rOp(int v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
int rOp(int v, VectorMask<Integer> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -273,12 +273,24 @@ final class Int256Vector extends IntVector {
return (Int256Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Int256Vector lanewise(Unary op, VectorMask<Integer> m) {
return (Int256Vector) super.lanewiseTemplate(op, Int256Mask.class, (Int256Mask) m); // specialize
}
@Override
@ForceInline
public Int256Vector lanewise(Binary op, Vector<Integer> v) {
return (Int256Vector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public Int256Vector lanewise(Binary op, Vector<Integer> v, VectorMask<Integer> m) {
return (Int256Vector) super.lanewiseTemplate(op, Int256Mask.class, v, (Int256Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline Int256Vector
@ -286,15 +298,30 @@ final class Int256Vector extends IntVector {
return (Int256Vector) super.lanewiseShiftTemplate(op, e); // specialize
}
/*package-private*/
@Override
@ForceInline Int256Vector
lanewiseShift(VectorOperators.Binary op, int e, VectorMask<Integer> m) {
return (Int256Vector) super.lanewiseShiftTemplate(op, Int256Mask.class, e, (Int256Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Int256Vector
lanewise(VectorOperators.Ternary op, Vector<Integer> v1, Vector<Integer> v2) {
lanewise(Ternary op, Vector<Integer> v1, Vector<Integer> v2) {
return (Int256Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Int256Vector
lanewise(Ternary op, Vector<Integer> v1, Vector<Integer> v2, VectorMask<Integer> m) {
return (Int256Vector) super.lanewiseTemplate(op, Int256Mask.class, v1, v2, (Int256Mask) m); // specialize
}
@Override
@ForceInline
public final
@ -314,7 +341,7 @@ final class Int256Vector extends IntVector {
@ForceInline
public final int reduceLanes(VectorOperators.Associative op,
VectorMask<Integer> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, Int256Mask.class, (Int256Mask) m); // specialized
}
@Override
@ -327,7 +354,7 @@ final class Int256Vector extends IntVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Integer> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, Int256Mask.class, (Int256Mask) m); // specialized
}
@ForceInline
@ -363,6 +390,13 @@ final class Int256Vector extends IntVector {
return super.compareTemplate(Int256Mask.class, op, s); // specialize
}
@Override
@ForceInline
public final Int256Mask compare(Comparison op, Vector<Integer> v, VectorMask<Integer> m) {
return super.compareTemplate(Int256Mask.class, op, v, (Int256Mask) m);
}
@Override
@ForceInline
public Int256Vector blend(Vector<Integer> v, VectorMask<Integer> m) {
@ -419,6 +453,7 @@ final class Int256Vector extends IntVector {
VectorMask<Integer> m) {
return (Int256Vector)
super.rearrangeTemplate(Int256Shuffle.class,
Int256Mask.class,
(Int256Shuffle) shuffle,
(Int256Mask) m); // specialize
}
@ -596,16 +631,12 @@ final class Int256Vector extends IntVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
Int256Mask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -631,9 +662,9 @@ final class Int256Vector extends IntVector {
public Int256Mask and(VectorMask<Integer> mask) {
Objects.requireNonNull(mask);
Int256Mask m = (Int256Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Int256Mask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, Int256Mask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -641,9 +672,9 @@ final class Int256Vector extends IntVector {
public Int256Mask or(VectorMask<Integer> mask) {
Objects.requireNonNull(mask);
Int256Mask m = (Int256Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Int256Mask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, Int256Mask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -651,9 +682,9 @@ final class Int256Vector extends IntVector {
Int256Mask xor(VectorMask<Integer> mask) {
Objects.requireNonNull(mask);
Int256Mask m = (Int256Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Int256Mask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, Int256Mask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -661,22 +692,32 @@ final class Int256Vector extends IntVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Int256Mask.class, int.class, VLENGTH, this,
(m) -> trueCountHelper(((Int256Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Int256Mask.class, int.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Int256Mask.class, int.class, VLENGTH, this,
(m) -> firstTrueHelper(((Int256Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Int256Mask.class, int.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Int256Mask.class, int.class, VLENGTH, this,
(m) -> lastTrueHelper(((Int256Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Int256Mask.class, int.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Int256Mask.class, int.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -787,6 +828,20 @@ final class Int256Vector extends IntVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
IntVector fromArray0(int[] a, int offset, VectorMask<Integer> m) {
return super.fromArray0Template(Int256Mask.class, a, offset, (Int256Mask) m); // specialize
}
@ForceInline
@Override
final
IntVector fromArray0(int[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Integer> m) {
return super.fromArray0Template(Int256Mask.class, a, offset, indexMap, mapOffset, (Int256Mask) m);
}
@ForceInline
@ -796,6 +851,13 @@ final class Int256Vector extends IntVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
IntVector fromByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
return super.fromByteArray0Template(Int256Mask.class, a, offset, (Int256Mask) m); // specialize
}
@ForceInline
@Override
final
@ -803,6 +865,13 @@ final class Int256Vector extends IntVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
IntVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
return super.fromByteBuffer0Template(Int256Mask.class, bb, offset, (Int256Mask) m); // specialize
}
@ForceInline
@Override
final
@ -810,6 +879,21 @@ final class Int256Vector extends IntVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(int[] a, int offset, VectorMask<Integer> m) {
super.intoArray0Template(Int256Mask.class, a, offset, (Int256Mask) m);
}
@ForceInline
@Override
final
void intoArray0(int[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Integer> m) {
super.intoArray0Template(Int256Mask.class, a, offset, indexMap, mapOffset, (Int256Mask) m);
}
@ForceInline
@Override
final
@ -817,6 +901,21 @@ final class Int256Vector extends IntVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
super.intoByteArray0Template(Int256Mask.class, a, offset, (Int256Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
super.intoByteBuffer0Template(Int256Mask.class, bb, offset, (Int256Mask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -236,8 +236,8 @@ final class Int512Vector extends IntVector {
@ForceInline
final @Override
int rOp(int v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
int rOp(int v, VectorMask<Integer> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -273,12 +273,24 @@ final class Int512Vector extends IntVector {
return (Int512Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Int512Vector lanewise(Unary op, VectorMask<Integer> m) {
return (Int512Vector) super.lanewiseTemplate(op, Int512Mask.class, (Int512Mask) m); // specialize
}
@Override
@ForceInline
public Int512Vector lanewise(Binary op, Vector<Integer> v) {
return (Int512Vector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public Int512Vector lanewise(Binary op, Vector<Integer> v, VectorMask<Integer> m) {
return (Int512Vector) super.lanewiseTemplate(op, Int512Mask.class, v, (Int512Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline Int512Vector
@ -286,15 +298,30 @@ final class Int512Vector extends IntVector {
return (Int512Vector) super.lanewiseShiftTemplate(op, e); // specialize
}
/*package-private*/
@Override
@ForceInline Int512Vector
lanewiseShift(VectorOperators.Binary op, int e, VectorMask<Integer> m) {
return (Int512Vector) super.lanewiseShiftTemplate(op, Int512Mask.class, e, (Int512Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Int512Vector
lanewise(VectorOperators.Ternary op, Vector<Integer> v1, Vector<Integer> v2) {
lanewise(Ternary op, Vector<Integer> v1, Vector<Integer> v2) {
return (Int512Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Int512Vector
lanewise(Ternary op, Vector<Integer> v1, Vector<Integer> v2, VectorMask<Integer> m) {
return (Int512Vector) super.lanewiseTemplate(op, Int512Mask.class, v1, v2, (Int512Mask) m); // specialize
}
@Override
@ForceInline
public final
@ -314,7 +341,7 @@ final class Int512Vector extends IntVector {
@ForceInline
public final int reduceLanes(VectorOperators.Associative op,
VectorMask<Integer> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, Int512Mask.class, (Int512Mask) m); // specialized
}
@Override
@ -327,7 +354,7 @@ final class Int512Vector extends IntVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Integer> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, Int512Mask.class, (Int512Mask) m); // specialized
}
@ForceInline
@ -363,6 +390,13 @@ final class Int512Vector extends IntVector {
return super.compareTemplate(Int512Mask.class, op, s); // specialize
}
@Override
@ForceInline
public final Int512Mask compare(Comparison op, Vector<Integer> v, VectorMask<Integer> m) {
return super.compareTemplate(Int512Mask.class, op, v, (Int512Mask) m);
}
@Override
@ForceInline
public Int512Vector blend(Vector<Integer> v, VectorMask<Integer> m) {
@ -419,6 +453,7 @@ final class Int512Vector extends IntVector {
VectorMask<Integer> m) {
return (Int512Vector)
super.rearrangeTemplate(Int512Shuffle.class,
Int512Mask.class,
(Int512Shuffle) shuffle,
(Int512Mask) m); // specialize
}
@ -612,16 +647,12 @@ final class Int512Vector extends IntVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
Int512Mask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -647,9 +678,9 @@ final class Int512Vector extends IntVector {
public Int512Mask and(VectorMask<Integer> mask) {
Objects.requireNonNull(mask);
Int512Mask m = (Int512Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Int512Mask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, Int512Mask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -657,9 +688,9 @@ final class Int512Vector extends IntVector {
public Int512Mask or(VectorMask<Integer> mask) {
Objects.requireNonNull(mask);
Int512Mask m = (Int512Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Int512Mask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, Int512Mask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -667,9 +698,9 @@ final class Int512Vector extends IntVector {
Int512Mask xor(VectorMask<Integer> mask) {
Objects.requireNonNull(mask);
Int512Mask m = (Int512Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Int512Mask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, Int512Mask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -677,22 +708,32 @@ final class Int512Vector extends IntVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Int512Mask.class, int.class, VLENGTH, this,
(m) -> trueCountHelper(((Int512Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Int512Mask.class, int.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Int512Mask.class, int.class, VLENGTH, this,
(m) -> firstTrueHelper(((Int512Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Int512Mask.class, int.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Int512Mask.class, int.class, VLENGTH, this,
(m) -> lastTrueHelper(((Int512Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Int512Mask.class, int.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Int512Mask.class, int.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -803,6 +844,20 @@ final class Int512Vector extends IntVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
IntVector fromArray0(int[] a, int offset, VectorMask<Integer> m) {
return super.fromArray0Template(Int512Mask.class, a, offset, (Int512Mask) m); // specialize
}
@ForceInline
@Override
final
IntVector fromArray0(int[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Integer> m) {
return super.fromArray0Template(Int512Mask.class, a, offset, indexMap, mapOffset, (Int512Mask) m);
}
@ForceInline
@ -812,6 +867,13 @@ final class Int512Vector extends IntVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
IntVector fromByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
return super.fromByteArray0Template(Int512Mask.class, a, offset, (Int512Mask) m); // specialize
}
@ForceInline
@Override
final
@ -819,6 +881,13 @@ final class Int512Vector extends IntVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
IntVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
return super.fromByteBuffer0Template(Int512Mask.class, bb, offset, (Int512Mask) m); // specialize
}
@ForceInline
@Override
final
@ -826,6 +895,21 @@ final class Int512Vector extends IntVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(int[] a, int offset, VectorMask<Integer> m) {
super.intoArray0Template(Int512Mask.class, a, offset, (Int512Mask) m);
}
@ForceInline
@Override
final
void intoArray0(int[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Integer> m) {
super.intoArray0Template(Int512Mask.class, a, offset, indexMap, mapOffset, (Int512Mask) m);
}
@ForceInline
@Override
final
@ -833,6 +917,21 @@ final class Int512Vector extends IntVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
super.intoByteArray0Template(Int512Mask.class, a, offset, (Int512Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
super.intoByteBuffer0Template(Int512Mask.class, bb, offset, (Int512Mask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -236,8 +236,8 @@ final class Int64Vector extends IntVector {
@ForceInline
final @Override
int rOp(int v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
int rOp(int v, VectorMask<Integer> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -273,12 +273,24 @@ final class Int64Vector extends IntVector {
return (Int64Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Int64Vector lanewise(Unary op, VectorMask<Integer> m) {
return (Int64Vector) super.lanewiseTemplate(op, Int64Mask.class, (Int64Mask) m); // specialize
}
@Override
@ForceInline
public Int64Vector lanewise(Binary op, Vector<Integer> v) {
return (Int64Vector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public Int64Vector lanewise(Binary op, Vector<Integer> v, VectorMask<Integer> m) {
return (Int64Vector) super.lanewiseTemplate(op, Int64Mask.class, v, (Int64Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline Int64Vector
@ -286,15 +298,30 @@ final class Int64Vector extends IntVector {
return (Int64Vector) super.lanewiseShiftTemplate(op, e); // specialize
}
/*package-private*/
@Override
@ForceInline Int64Vector
lanewiseShift(VectorOperators.Binary op, int e, VectorMask<Integer> m) {
return (Int64Vector) super.lanewiseShiftTemplate(op, Int64Mask.class, e, (Int64Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Int64Vector
lanewise(VectorOperators.Ternary op, Vector<Integer> v1, Vector<Integer> v2) {
lanewise(Ternary op, Vector<Integer> v1, Vector<Integer> v2) {
return (Int64Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Int64Vector
lanewise(Ternary op, Vector<Integer> v1, Vector<Integer> v2, VectorMask<Integer> m) {
return (Int64Vector) super.lanewiseTemplate(op, Int64Mask.class, v1, v2, (Int64Mask) m); // specialize
}
@Override
@ForceInline
public final
@ -314,7 +341,7 @@ final class Int64Vector extends IntVector {
@ForceInline
public final int reduceLanes(VectorOperators.Associative op,
VectorMask<Integer> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, Int64Mask.class, (Int64Mask) m); // specialized
}
@Override
@ -327,7 +354,7 @@ final class Int64Vector extends IntVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Integer> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, Int64Mask.class, (Int64Mask) m); // specialized
}
@ForceInline
@ -363,6 +390,13 @@ final class Int64Vector extends IntVector {
return super.compareTemplate(Int64Mask.class, op, s); // specialize
}
@Override
@ForceInline
public final Int64Mask compare(Comparison op, Vector<Integer> v, VectorMask<Integer> m) {
return super.compareTemplate(Int64Mask.class, op, v, (Int64Mask) m);
}
@Override
@ForceInline
public Int64Vector blend(Vector<Integer> v, VectorMask<Integer> m) {
@ -419,6 +453,7 @@ final class Int64Vector extends IntVector {
VectorMask<Integer> m) {
return (Int64Vector)
super.rearrangeTemplate(Int64Shuffle.class,
Int64Mask.class,
(Int64Shuffle) shuffle,
(Int64Mask) m); // specialize
}
@ -584,16 +619,12 @@ final class Int64Vector extends IntVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
Int64Mask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -619,9 +650,9 @@ final class Int64Vector extends IntVector {
public Int64Mask and(VectorMask<Integer> mask) {
Objects.requireNonNull(mask);
Int64Mask m = (Int64Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Int64Mask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, Int64Mask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -629,9 +660,9 @@ final class Int64Vector extends IntVector {
public Int64Mask or(VectorMask<Integer> mask) {
Objects.requireNonNull(mask);
Int64Mask m = (Int64Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Int64Mask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, Int64Mask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -639,9 +670,9 @@ final class Int64Vector extends IntVector {
Int64Mask xor(VectorMask<Integer> mask) {
Objects.requireNonNull(mask);
Int64Mask m = (Int64Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Int64Mask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, Int64Mask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -649,22 +680,32 @@ final class Int64Vector extends IntVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Int64Mask.class, int.class, VLENGTH, this,
(m) -> trueCountHelper(((Int64Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Int64Mask.class, int.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Int64Mask.class, int.class, VLENGTH, this,
(m) -> firstTrueHelper(((Int64Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Int64Mask.class, int.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Int64Mask.class, int.class, VLENGTH, this,
(m) -> lastTrueHelper(((Int64Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Int64Mask.class, int.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Int64Mask.class, int.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -775,6 +816,20 @@ final class Int64Vector extends IntVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
IntVector fromArray0(int[] a, int offset, VectorMask<Integer> m) {
return super.fromArray0Template(Int64Mask.class, a, offset, (Int64Mask) m); // specialize
}
@ForceInline
@Override
final
IntVector fromArray0(int[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Integer> m) {
return super.fromArray0Template(Int64Mask.class, a, offset, indexMap, mapOffset, (Int64Mask) m);
}
@ForceInline
@ -784,6 +839,13 @@ final class Int64Vector extends IntVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
IntVector fromByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
return super.fromByteArray0Template(Int64Mask.class, a, offset, (Int64Mask) m); // specialize
}
@ForceInline
@Override
final
@ -791,6 +853,13 @@ final class Int64Vector extends IntVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
IntVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
return super.fromByteBuffer0Template(Int64Mask.class, bb, offset, (Int64Mask) m); // specialize
}
@ForceInline
@Override
final
@ -798,6 +867,21 @@ final class Int64Vector extends IntVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(int[] a, int offset, VectorMask<Integer> m) {
super.intoArray0Template(Int64Mask.class, a, offset, (Int64Mask) m);
}
@ForceInline
@Override
final
void intoArray0(int[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Integer> m) {
super.intoArray0Template(Int64Mask.class, a, offset, indexMap, mapOffset, (Int64Mask) m);
}
@ForceInline
@Override
final
@ -805,6 +889,21 @@ final class Int64Vector extends IntVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
super.intoByteArray0Template(Int64Mask.class, a, offset, (Int64Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
super.intoByteBuffer0Template(Int64Mask.class, bb, offset, (Int64Mask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -236,8 +236,8 @@ final class IntMaxVector extends IntVector {
@ForceInline
final @Override
int rOp(int v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
int rOp(int v, VectorMask<Integer> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -273,12 +273,24 @@ final class IntMaxVector extends IntVector {
return (IntMaxVector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public IntMaxVector lanewise(Unary op, VectorMask<Integer> m) {
return (IntMaxVector) super.lanewiseTemplate(op, IntMaxMask.class, (IntMaxMask) m); // specialize
}
@Override
@ForceInline
public IntMaxVector lanewise(Binary op, Vector<Integer> v) {
return (IntMaxVector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public IntMaxVector lanewise(Binary op, Vector<Integer> v, VectorMask<Integer> m) {
return (IntMaxVector) super.lanewiseTemplate(op, IntMaxMask.class, v, (IntMaxMask) m); // specialize
}
/*package-private*/
@Override
@ForceInline IntMaxVector
@ -286,15 +298,30 @@ final class IntMaxVector extends IntVector {
return (IntMaxVector) super.lanewiseShiftTemplate(op, e); // specialize
}
/*package-private*/
@Override
@ForceInline IntMaxVector
lanewiseShift(VectorOperators.Binary op, int e, VectorMask<Integer> m) {
return (IntMaxVector) super.lanewiseShiftTemplate(op, IntMaxMask.class, e, (IntMaxMask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
IntMaxVector
lanewise(VectorOperators.Ternary op, Vector<Integer> v1, Vector<Integer> v2) {
lanewise(Ternary op, Vector<Integer> v1, Vector<Integer> v2) {
return (IntMaxVector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
IntMaxVector
lanewise(Ternary op, Vector<Integer> v1, Vector<Integer> v2, VectorMask<Integer> m) {
return (IntMaxVector) super.lanewiseTemplate(op, IntMaxMask.class, v1, v2, (IntMaxMask) m); // specialize
}
@Override
@ForceInline
public final
@ -314,7 +341,7 @@ final class IntMaxVector extends IntVector {
@ForceInline
public final int reduceLanes(VectorOperators.Associative op,
VectorMask<Integer> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, IntMaxMask.class, (IntMaxMask) m); // specialized
}
@Override
@ -327,7 +354,7 @@ final class IntMaxVector extends IntVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Integer> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, IntMaxMask.class, (IntMaxMask) m); // specialized
}
@ForceInline
@ -363,6 +390,13 @@ final class IntMaxVector extends IntVector {
return super.compareTemplate(IntMaxMask.class, op, s); // specialize
}
@Override
@ForceInline
public final IntMaxMask compare(Comparison op, Vector<Integer> v, VectorMask<Integer> m) {
return super.compareTemplate(IntMaxMask.class, op, v, (IntMaxMask) m);
}
@Override
@ForceInline
public IntMaxVector blend(Vector<Integer> v, VectorMask<Integer> m) {
@ -419,6 +453,7 @@ final class IntMaxVector extends IntVector {
VectorMask<Integer> m) {
return (IntMaxVector)
super.rearrangeTemplate(IntMaxShuffle.class,
IntMaxMask.class,
(IntMaxShuffle) shuffle,
(IntMaxMask) m); // specialize
}
@ -582,16 +617,12 @@ final class IntMaxVector extends IntVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
IntMaxMask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -617,9 +648,9 @@ final class IntMaxVector extends IntVector {
public IntMaxMask and(VectorMask<Integer> mask) {
Objects.requireNonNull(mask);
IntMaxMask m = (IntMaxMask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, IntMaxMask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, IntMaxMask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -627,9 +658,9 @@ final class IntMaxVector extends IntVector {
public IntMaxMask or(VectorMask<Integer> mask) {
Objects.requireNonNull(mask);
IntMaxMask m = (IntMaxMask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, IntMaxMask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, IntMaxMask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -637,9 +668,9 @@ final class IntMaxVector extends IntVector {
IntMaxMask xor(VectorMask<Integer> mask) {
Objects.requireNonNull(mask);
IntMaxMask m = (IntMaxMask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, IntMaxMask.class, int.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, IntMaxMask.class, null, int.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -647,22 +678,32 @@ final class IntMaxVector extends IntVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, IntMaxMask.class, int.class, VLENGTH, this,
(m) -> trueCountHelper(((IntMaxMask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, IntMaxMask.class, int.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, IntMaxMask.class, int.class, VLENGTH, this,
(m) -> firstTrueHelper(((IntMaxMask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, IntMaxMask.class, int.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, IntMaxMask.class, int.class, VLENGTH, this,
(m) -> lastTrueHelper(((IntMaxMask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, IntMaxMask.class, int.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, IntMaxMask.class, int.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -784,6 +825,20 @@ final class IntMaxVector extends IntVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
IntVector fromArray0(int[] a, int offset, VectorMask<Integer> m) {
return super.fromArray0Template(IntMaxMask.class, a, offset, (IntMaxMask) m); // specialize
}
@ForceInline
@Override
final
IntVector fromArray0(int[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Integer> m) {
return super.fromArray0Template(IntMaxMask.class, a, offset, indexMap, mapOffset, (IntMaxMask) m);
}
@ForceInline
@ -793,6 +848,13 @@ final class IntMaxVector extends IntVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
IntVector fromByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
return super.fromByteArray0Template(IntMaxMask.class, a, offset, (IntMaxMask) m); // specialize
}
@ForceInline
@Override
final
@ -800,6 +862,13 @@ final class IntMaxVector extends IntVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
IntVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
return super.fromByteBuffer0Template(IntMaxMask.class, bb, offset, (IntMaxMask) m); // specialize
}
@ForceInline
@Override
final
@ -807,6 +876,21 @@ final class IntMaxVector extends IntVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(int[] a, int offset, VectorMask<Integer> m) {
super.intoArray0Template(IntMaxMask.class, a, offset, (IntMaxMask) m);
}
@ForceInline
@Override
final
void intoArray0(int[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Integer> m) {
super.intoArray0Template(IntMaxMask.class, a, offset, indexMap, mapOffset, (IntMaxMask) m);
}
@ForceInline
@Override
final
@ -814,6 +898,21 @@ final class IntMaxVector extends IntVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
super.intoByteArray0Template(IntMaxMask.class, a, offset, (IntMaxMask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
super.intoByteBuffer0Template(IntMaxMask.class, bb, offset, (IntMaxMask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -231,8 +231,8 @@ final class Long128Vector extends LongVector {
@ForceInline
final @Override
long rOp(long v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
long rOp(long v, VectorMask<Long> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -268,12 +268,24 @@ final class Long128Vector extends LongVector {
return (Long128Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Long128Vector lanewise(Unary op, VectorMask<Long> m) {
return (Long128Vector) super.lanewiseTemplate(op, Long128Mask.class, (Long128Mask) m); // specialize
}
@Override
@ForceInline
public Long128Vector lanewise(Binary op, Vector<Long> v) {
return (Long128Vector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public Long128Vector lanewise(Binary op, Vector<Long> v, VectorMask<Long> m) {
return (Long128Vector) super.lanewiseTemplate(op, Long128Mask.class, v, (Long128Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline Long128Vector
@ -281,15 +293,30 @@ final class Long128Vector extends LongVector {
return (Long128Vector) super.lanewiseShiftTemplate(op, e); // specialize
}
/*package-private*/
@Override
@ForceInline Long128Vector
lanewiseShift(VectorOperators.Binary op, int e, VectorMask<Long> m) {
return (Long128Vector) super.lanewiseShiftTemplate(op, Long128Mask.class, e, (Long128Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Long128Vector
lanewise(VectorOperators.Ternary op, Vector<Long> v1, Vector<Long> v2) {
lanewise(Ternary op, Vector<Long> v1, Vector<Long> v2) {
return (Long128Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Long128Vector
lanewise(Ternary op, Vector<Long> v1, Vector<Long> v2, VectorMask<Long> m) {
return (Long128Vector) super.lanewiseTemplate(op, Long128Mask.class, v1, v2, (Long128Mask) m); // specialize
}
@Override
@ForceInline
public final
@ -309,7 +336,7 @@ final class Long128Vector extends LongVector {
@ForceInline
public final long reduceLanes(VectorOperators.Associative op,
VectorMask<Long> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, Long128Mask.class, (Long128Mask) m); // specialized
}
@Override
@ -322,7 +349,7 @@ final class Long128Vector extends LongVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Long> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, Long128Mask.class, (Long128Mask) m); // specialized
}
@ForceInline
@ -353,6 +380,13 @@ final class Long128Vector extends LongVector {
}
@Override
@ForceInline
public final Long128Mask compare(Comparison op, Vector<Long> v, VectorMask<Long> m) {
return super.compareTemplate(Long128Mask.class, op, v, (Long128Mask) m);
}
@Override
@ForceInline
public Long128Vector blend(Vector<Long> v, VectorMask<Long> m) {
@ -409,6 +443,7 @@ final class Long128Vector extends LongVector {
VectorMask<Long> m) {
return (Long128Vector)
super.rearrangeTemplate(Long128Shuffle.class,
Long128Mask.class,
(Long128Shuffle) shuffle,
(Long128Mask) m); // specialize
}
@ -574,16 +609,12 @@ final class Long128Vector extends LongVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
Long128Mask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -609,9 +640,9 @@ final class Long128Vector extends LongVector {
public Long128Mask and(VectorMask<Long> mask) {
Objects.requireNonNull(mask);
Long128Mask m = (Long128Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Long128Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, Long128Mask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -619,9 +650,9 @@ final class Long128Vector extends LongVector {
public Long128Mask or(VectorMask<Long> mask) {
Objects.requireNonNull(mask);
Long128Mask m = (Long128Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Long128Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, Long128Mask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -629,9 +660,9 @@ final class Long128Vector extends LongVector {
Long128Mask xor(VectorMask<Long> mask) {
Objects.requireNonNull(mask);
Long128Mask m = (Long128Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Long128Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, Long128Mask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -639,22 +670,32 @@ final class Long128Vector extends LongVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Long128Mask.class, long.class, VLENGTH, this,
(m) -> trueCountHelper(((Long128Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Long128Mask.class, long.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Long128Mask.class, long.class, VLENGTH, this,
(m) -> firstTrueHelper(((Long128Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Long128Mask.class, long.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Long128Mask.class, long.class, VLENGTH, this,
(m) -> lastTrueHelper(((Long128Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Long128Mask.class, long.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Long128Mask.class, long.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -765,6 +806,20 @@ final class Long128Vector extends LongVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
LongVector fromArray0(long[] a, int offset, VectorMask<Long> m) {
return super.fromArray0Template(Long128Mask.class, a, offset, (Long128Mask) m); // specialize
}
@ForceInline
@Override
final
LongVector fromArray0(long[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Long> m) {
return super.fromArray0Template(Long128Mask.class, a, offset, indexMap, mapOffset, (Long128Mask) m);
}
@ForceInline
@ -774,6 +829,13 @@ final class Long128Vector extends LongVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
LongVector fromByteArray0(byte[] a, int offset, VectorMask<Long> m) {
return super.fromByteArray0Template(Long128Mask.class, a, offset, (Long128Mask) m); // specialize
}
@ForceInline
@Override
final
@ -781,6 +843,13 @@ final class Long128Vector extends LongVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
LongVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
return super.fromByteBuffer0Template(Long128Mask.class, bb, offset, (Long128Mask) m); // specialize
}
@ForceInline
@Override
final
@ -788,6 +857,21 @@ final class Long128Vector extends LongVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(long[] a, int offset, VectorMask<Long> m) {
super.intoArray0Template(Long128Mask.class, a, offset, (Long128Mask) m);
}
@ForceInline
@Override
final
void intoArray0(long[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Long> m) {
super.intoArray0Template(Long128Mask.class, a, offset, indexMap, mapOffset, (Long128Mask) m);
}
@ForceInline
@Override
final
@ -795,6 +879,21 @@ final class Long128Vector extends LongVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Long> m) {
super.intoByteArray0Template(Long128Mask.class, a, offset, (Long128Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
super.intoByteBuffer0Template(Long128Mask.class, bb, offset, (Long128Mask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -231,8 +231,8 @@ final class Long256Vector extends LongVector {
@ForceInline
final @Override
long rOp(long v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
long rOp(long v, VectorMask<Long> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -268,12 +268,24 @@ final class Long256Vector extends LongVector {
return (Long256Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Long256Vector lanewise(Unary op, VectorMask<Long> m) {
return (Long256Vector) super.lanewiseTemplate(op, Long256Mask.class, (Long256Mask) m); // specialize
}
@Override
@ForceInline
public Long256Vector lanewise(Binary op, Vector<Long> v) {
return (Long256Vector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public Long256Vector lanewise(Binary op, Vector<Long> v, VectorMask<Long> m) {
return (Long256Vector) super.lanewiseTemplate(op, Long256Mask.class, v, (Long256Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline Long256Vector
@ -281,15 +293,30 @@ final class Long256Vector extends LongVector {
return (Long256Vector) super.lanewiseShiftTemplate(op, e); // specialize
}
/*package-private*/
@Override
@ForceInline Long256Vector
lanewiseShift(VectorOperators.Binary op, int e, VectorMask<Long> m) {
return (Long256Vector) super.lanewiseShiftTemplate(op, Long256Mask.class, e, (Long256Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Long256Vector
lanewise(VectorOperators.Ternary op, Vector<Long> v1, Vector<Long> v2) {
lanewise(Ternary op, Vector<Long> v1, Vector<Long> v2) {
return (Long256Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Long256Vector
lanewise(Ternary op, Vector<Long> v1, Vector<Long> v2, VectorMask<Long> m) {
return (Long256Vector) super.lanewiseTemplate(op, Long256Mask.class, v1, v2, (Long256Mask) m); // specialize
}
@Override
@ForceInline
public final
@ -309,7 +336,7 @@ final class Long256Vector extends LongVector {
@ForceInline
public final long reduceLanes(VectorOperators.Associative op,
VectorMask<Long> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, Long256Mask.class, (Long256Mask) m); // specialized
}
@Override
@ -322,7 +349,7 @@ final class Long256Vector extends LongVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Long> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, Long256Mask.class, (Long256Mask) m); // specialized
}
@ForceInline
@ -353,6 +380,13 @@ final class Long256Vector extends LongVector {
}
@Override
@ForceInline
public final Long256Mask compare(Comparison op, Vector<Long> v, VectorMask<Long> m) {
return super.compareTemplate(Long256Mask.class, op, v, (Long256Mask) m);
}
@Override
@ForceInline
public Long256Vector blend(Vector<Long> v, VectorMask<Long> m) {
@ -409,6 +443,7 @@ final class Long256Vector extends LongVector {
VectorMask<Long> m) {
return (Long256Vector)
super.rearrangeTemplate(Long256Shuffle.class,
Long256Mask.class,
(Long256Shuffle) shuffle,
(Long256Mask) m); // specialize
}
@ -578,16 +613,12 @@ final class Long256Vector extends LongVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
Long256Mask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -613,9 +644,9 @@ final class Long256Vector extends LongVector {
public Long256Mask and(VectorMask<Long> mask) {
Objects.requireNonNull(mask);
Long256Mask m = (Long256Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Long256Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, Long256Mask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -623,9 +654,9 @@ final class Long256Vector extends LongVector {
public Long256Mask or(VectorMask<Long> mask) {
Objects.requireNonNull(mask);
Long256Mask m = (Long256Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Long256Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, Long256Mask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -633,9 +664,9 @@ final class Long256Vector extends LongVector {
Long256Mask xor(VectorMask<Long> mask) {
Objects.requireNonNull(mask);
Long256Mask m = (Long256Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Long256Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, Long256Mask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -643,22 +674,32 @@ final class Long256Vector extends LongVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Long256Mask.class, long.class, VLENGTH, this,
(m) -> trueCountHelper(((Long256Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Long256Mask.class, long.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Long256Mask.class, long.class, VLENGTH, this,
(m) -> firstTrueHelper(((Long256Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Long256Mask.class, long.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Long256Mask.class, long.class, VLENGTH, this,
(m) -> lastTrueHelper(((Long256Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Long256Mask.class, long.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Long256Mask.class, long.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -769,6 +810,20 @@ final class Long256Vector extends LongVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
LongVector fromArray0(long[] a, int offset, VectorMask<Long> m) {
return super.fromArray0Template(Long256Mask.class, a, offset, (Long256Mask) m); // specialize
}
@ForceInline
@Override
final
LongVector fromArray0(long[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Long> m) {
return super.fromArray0Template(Long256Mask.class, a, offset, indexMap, mapOffset, (Long256Mask) m);
}
@ForceInline
@ -778,6 +833,13 @@ final class Long256Vector extends LongVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
LongVector fromByteArray0(byte[] a, int offset, VectorMask<Long> m) {
return super.fromByteArray0Template(Long256Mask.class, a, offset, (Long256Mask) m); // specialize
}
@ForceInline
@Override
final
@ -785,6 +847,13 @@ final class Long256Vector extends LongVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
LongVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
return super.fromByteBuffer0Template(Long256Mask.class, bb, offset, (Long256Mask) m); // specialize
}
@ForceInline
@Override
final
@ -792,6 +861,21 @@ final class Long256Vector extends LongVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(long[] a, int offset, VectorMask<Long> m) {
super.intoArray0Template(Long256Mask.class, a, offset, (Long256Mask) m);
}
@ForceInline
@Override
final
void intoArray0(long[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Long> m) {
super.intoArray0Template(Long256Mask.class, a, offset, indexMap, mapOffset, (Long256Mask) m);
}
@ForceInline
@Override
final
@ -799,6 +883,21 @@ final class Long256Vector extends LongVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Long> m) {
super.intoByteArray0Template(Long256Mask.class, a, offset, (Long256Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
super.intoByteBuffer0Template(Long256Mask.class, bb, offset, (Long256Mask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -231,8 +231,8 @@ final class Long512Vector extends LongVector {
@ForceInline
final @Override
long rOp(long v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
long rOp(long v, VectorMask<Long> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -268,12 +268,24 @@ final class Long512Vector extends LongVector {
return (Long512Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Long512Vector lanewise(Unary op, VectorMask<Long> m) {
return (Long512Vector) super.lanewiseTemplate(op, Long512Mask.class, (Long512Mask) m); // specialize
}
@Override
@ForceInline
public Long512Vector lanewise(Binary op, Vector<Long> v) {
return (Long512Vector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public Long512Vector lanewise(Binary op, Vector<Long> v, VectorMask<Long> m) {
return (Long512Vector) super.lanewiseTemplate(op, Long512Mask.class, v, (Long512Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline Long512Vector
@ -281,15 +293,30 @@ final class Long512Vector extends LongVector {
return (Long512Vector) super.lanewiseShiftTemplate(op, e); // specialize
}
/*package-private*/
@Override
@ForceInline Long512Vector
lanewiseShift(VectorOperators.Binary op, int e, VectorMask<Long> m) {
return (Long512Vector) super.lanewiseShiftTemplate(op, Long512Mask.class, e, (Long512Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Long512Vector
lanewise(VectorOperators.Ternary op, Vector<Long> v1, Vector<Long> v2) {
lanewise(Ternary op, Vector<Long> v1, Vector<Long> v2) {
return (Long512Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Long512Vector
lanewise(Ternary op, Vector<Long> v1, Vector<Long> v2, VectorMask<Long> m) {
return (Long512Vector) super.lanewiseTemplate(op, Long512Mask.class, v1, v2, (Long512Mask) m); // specialize
}
@Override
@ForceInline
public final
@ -309,7 +336,7 @@ final class Long512Vector extends LongVector {
@ForceInline
public final long reduceLanes(VectorOperators.Associative op,
VectorMask<Long> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, Long512Mask.class, (Long512Mask) m); // specialized
}
@Override
@ -322,7 +349,7 @@ final class Long512Vector extends LongVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Long> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, Long512Mask.class, (Long512Mask) m); // specialized
}
@ForceInline
@ -353,6 +380,13 @@ final class Long512Vector extends LongVector {
}
@Override
@ForceInline
public final Long512Mask compare(Comparison op, Vector<Long> v, VectorMask<Long> m) {
return super.compareTemplate(Long512Mask.class, op, v, (Long512Mask) m);
}
@Override
@ForceInline
public Long512Vector blend(Vector<Long> v, VectorMask<Long> m) {
@ -409,6 +443,7 @@ final class Long512Vector extends LongVector {
VectorMask<Long> m) {
return (Long512Vector)
super.rearrangeTemplate(Long512Shuffle.class,
Long512Mask.class,
(Long512Shuffle) shuffle,
(Long512Mask) m); // specialize
}
@ -586,16 +621,12 @@ final class Long512Vector extends LongVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
Long512Mask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -621,9 +652,9 @@ final class Long512Vector extends LongVector {
public Long512Mask and(VectorMask<Long> mask) {
Objects.requireNonNull(mask);
Long512Mask m = (Long512Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Long512Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, Long512Mask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -631,9 +662,9 @@ final class Long512Vector extends LongVector {
public Long512Mask or(VectorMask<Long> mask) {
Objects.requireNonNull(mask);
Long512Mask m = (Long512Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Long512Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, Long512Mask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -641,9 +672,9 @@ final class Long512Vector extends LongVector {
Long512Mask xor(VectorMask<Long> mask) {
Objects.requireNonNull(mask);
Long512Mask m = (Long512Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Long512Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, Long512Mask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -651,22 +682,32 @@ final class Long512Vector extends LongVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Long512Mask.class, long.class, VLENGTH, this,
(m) -> trueCountHelper(((Long512Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Long512Mask.class, long.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Long512Mask.class, long.class, VLENGTH, this,
(m) -> firstTrueHelper(((Long512Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Long512Mask.class, long.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Long512Mask.class, long.class, VLENGTH, this,
(m) -> lastTrueHelper(((Long512Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Long512Mask.class, long.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Long512Mask.class, long.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -777,6 +818,20 @@ final class Long512Vector extends LongVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
LongVector fromArray0(long[] a, int offset, VectorMask<Long> m) {
return super.fromArray0Template(Long512Mask.class, a, offset, (Long512Mask) m); // specialize
}
@ForceInline
@Override
final
LongVector fromArray0(long[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Long> m) {
return super.fromArray0Template(Long512Mask.class, a, offset, indexMap, mapOffset, (Long512Mask) m);
}
@ForceInline
@ -786,6 +841,13 @@ final class Long512Vector extends LongVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
LongVector fromByteArray0(byte[] a, int offset, VectorMask<Long> m) {
return super.fromByteArray0Template(Long512Mask.class, a, offset, (Long512Mask) m); // specialize
}
@ForceInline
@Override
final
@ -793,6 +855,13 @@ final class Long512Vector extends LongVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
LongVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
return super.fromByteBuffer0Template(Long512Mask.class, bb, offset, (Long512Mask) m); // specialize
}
@ForceInline
@Override
final
@ -800,6 +869,21 @@ final class Long512Vector extends LongVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(long[] a, int offset, VectorMask<Long> m) {
super.intoArray0Template(Long512Mask.class, a, offset, (Long512Mask) m);
}
@ForceInline
@Override
final
void intoArray0(long[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Long> m) {
super.intoArray0Template(Long512Mask.class, a, offset, indexMap, mapOffset, (Long512Mask) m);
}
@ForceInline
@Override
final
@ -807,6 +891,21 @@ final class Long512Vector extends LongVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Long> m) {
super.intoByteArray0Template(Long512Mask.class, a, offset, (Long512Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
super.intoByteBuffer0Template(Long512Mask.class, bb, offset, (Long512Mask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -231,8 +231,8 @@ final class Long64Vector extends LongVector {
@ForceInline
final @Override
long rOp(long v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
long rOp(long v, VectorMask<Long> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -268,12 +268,24 @@ final class Long64Vector extends LongVector {
return (Long64Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Long64Vector lanewise(Unary op, VectorMask<Long> m) {
return (Long64Vector) super.lanewiseTemplate(op, Long64Mask.class, (Long64Mask) m); // specialize
}
@Override
@ForceInline
public Long64Vector lanewise(Binary op, Vector<Long> v) {
return (Long64Vector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public Long64Vector lanewise(Binary op, Vector<Long> v, VectorMask<Long> m) {
return (Long64Vector) super.lanewiseTemplate(op, Long64Mask.class, v, (Long64Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline Long64Vector
@ -281,15 +293,30 @@ final class Long64Vector extends LongVector {
return (Long64Vector) super.lanewiseShiftTemplate(op, e); // specialize
}
/*package-private*/
@Override
@ForceInline Long64Vector
lanewiseShift(VectorOperators.Binary op, int e, VectorMask<Long> m) {
return (Long64Vector) super.lanewiseShiftTemplate(op, Long64Mask.class, e, (Long64Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Long64Vector
lanewise(VectorOperators.Ternary op, Vector<Long> v1, Vector<Long> v2) {
lanewise(Ternary op, Vector<Long> v1, Vector<Long> v2) {
return (Long64Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Long64Vector
lanewise(Ternary op, Vector<Long> v1, Vector<Long> v2, VectorMask<Long> m) {
return (Long64Vector) super.lanewiseTemplate(op, Long64Mask.class, v1, v2, (Long64Mask) m); // specialize
}
@Override
@ForceInline
public final
@ -309,7 +336,7 @@ final class Long64Vector extends LongVector {
@ForceInline
public final long reduceLanes(VectorOperators.Associative op,
VectorMask<Long> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, Long64Mask.class, (Long64Mask) m); // specialized
}
@Override
@ -322,7 +349,7 @@ final class Long64Vector extends LongVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Long> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, Long64Mask.class, (Long64Mask) m); // specialized
}
@ForceInline
@ -353,6 +380,13 @@ final class Long64Vector extends LongVector {
}
@Override
@ForceInline
public final Long64Mask compare(Comparison op, Vector<Long> v, VectorMask<Long> m) {
return super.compareTemplate(Long64Mask.class, op, v, (Long64Mask) m);
}
@Override
@ForceInline
public Long64Vector blend(Vector<Long> v, VectorMask<Long> m) {
@ -409,6 +443,7 @@ final class Long64Vector extends LongVector {
VectorMask<Long> m) {
return (Long64Vector)
super.rearrangeTemplate(Long64Shuffle.class,
Long64Mask.class,
(Long64Shuffle) shuffle,
(Long64Mask) m); // specialize
}
@ -572,16 +607,12 @@ final class Long64Vector extends LongVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
Long64Mask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -607,9 +638,9 @@ final class Long64Vector extends LongVector {
public Long64Mask and(VectorMask<Long> mask) {
Objects.requireNonNull(mask);
Long64Mask m = (Long64Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Long64Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, Long64Mask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -617,9 +648,9 @@ final class Long64Vector extends LongVector {
public Long64Mask or(VectorMask<Long> mask) {
Objects.requireNonNull(mask);
Long64Mask m = (Long64Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Long64Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, Long64Mask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -627,9 +658,9 @@ final class Long64Vector extends LongVector {
Long64Mask xor(VectorMask<Long> mask) {
Objects.requireNonNull(mask);
Long64Mask m = (Long64Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Long64Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, Long64Mask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -637,22 +668,32 @@ final class Long64Vector extends LongVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Long64Mask.class, long.class, VLENGTH, this,
(m) -> trueCountHelper(((Long64Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Long64Mask.class, long.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Long64Mask.class, long.class, VLENGTH, this,
(m) -> firstTrueHelper(((Long64Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Long64Mask.class, long.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Long64Mask.class, long.class, VLENGTH, this,
(m) -> lastTrueHelper(((Long64Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Long64Mask.class, long.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Long64Mask.class, long.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -763,6 +804,20 @@ final class Long64Vector extends LongVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
LongVector fromArray0(long[] a, int offset, VectorMask<Long> m) {
return super.fromArray0Template(Long64Mask.class, a, offset, (Long64Mask) m); // specialize
}
@ForceInline
@Override
final
LongVector fromArray0(long[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Long> m) {
return super.fromArray0Template(Long64Mask.class, a, offset, indexMap, mapOffset, (Long64Mask) m);
}
@ForceInline
@ -772,6 +827,13 @@ final class Long64Vector extends LongVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
LongVector fromByteArray0(byte[] a, int offset, VectorMask<Long> m) {
return super.fromByteArray0Template(Long64Mask.class, a, offset, (Long64Mask) m); // specialize
}
@ForceInline
@Override
final
@ -779,6 +841,13 @@ final class Long64Vector extends LongVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
LongVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
return super.fromByteBuffer0Template(Long64Mask.class, bb, offset, (Long64Mask) m); // specialize
}
@ForceInline
@Override
final
@ -786,6 +855,21 @@ final class Long64Vector extends LongVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(long[] a, int offset, VectorMask<Long> m) {
super.intoArray0Template(Long64Mask.class, a, offset, (Long64Mask) m);
}
@ForceInline
@Override
final
void intoArray0(long[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Long> m) {
super.intoArray0Template(Long64Mask.class, a, offset, indexMap, mapOffset, (Long64Mask) m);
}
@ForceInline
@Override
final
@ -793,6 +877,21 @@ final class Long64Vector extends LongVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Long> m) {
super.intoByteArray0Template(Long64Mask.class, a, offset, (Long64Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
super.intoByteBuffer0Template(Long64Mask.class, bb, offset, (Long64Mask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -231,8 +231,8 @@ final class LongMaxVector extends LongVector {
@ForceInline
final @Override
long rOp(long v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
long rOp(long v, VectorMask<Long> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -268,12 +268,24 @@ final class LongMaxVector extends LongVector {
return (LongMaxVector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public LongMaxVector lanewise(Unary op, VectorMask<Long> m) {
return (LongMaxVector) super.lanewiseTemplate(op, LongMaxMask.class, (LongMaxMask) m); // specialize
}
@Override
@ForceInline
public LongMaxVector lanewise(Binary op, Vector<Long> v) {
return (LongMaxVector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public LongMaxVector lanewise(Binary op, Vector<Long> v, VectorMask<Long> m) {
return (LongMaxVector) super.lanewiseTemplate(op, LongMaxMask.class, v, (LongMaxMask) m); // specialize
}
/*package-private*/
@Override
@ForceInline LongMaxVector
@ -281,15 +293,30 @@ final class LongMaxVector extends LongVector {
return (LongMaxVector) super.lanewiseShiftTemplate(op, e); // specialize
}
/*package-private*/
@Override
@ForceInline LongMaxVector
lanewiseShift(VectorOperators.Binary op, int e, VectorMask<Long> m) {
return (LongMaxVector) super.lanewiseShiftTemplate(op, LongMaxMask.class, e, (LongMaxMask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
LongMaxVector
lanewise(VectorOperators.Ternary op, Vector<Long> v1, Vector<Long> v2) {
lanewise(Ternary op, Vector<Long> v1, Vector<Long> v2) {
return (LongMaxVector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
LongMaxVector
lanewise(Ternary op, Vector<Long> v1, Vector<Long> v2, VectorMask<Long> m) {
return (LongMaxVector) super.lanewiseTemplate(op, LongMaxMask.class, v1, v2, (LongMaxMask) m); // specialize
}
@Override
@ForceInline
public final
@ -309,7 +336,7 @@ final class LongMaxVector extends LongVector {
@ForceInline
public final long reduceLanes(VectorOperators.Associative op,
VectorMask<Long> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, LongMaxMask.class, (LongMaxMask) m); // specialized
}
@Override
@ -322,7 +349,7 @@ final class LongMaxVector extends LongVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Long> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, LongMaxMask.class, (LongMaxMask) m); // specialized
}
@ForceInline
@ -353,6 +380,13 @@ final class LongMaxVector extends LongVector {
}
@Override
@ForceInline
public final LongMaxMask compare(Comparison op, Vector<Long> v, VectorMask<Long> m) {
return super.compareTemplate(LongMaxMask.class, op, v, (LongMaxMask) m);
}
@Override
@ForceInline
public LongMaxVector blend(Vector<Long> v, VectorMask<Long> m) {
@ -409,6 +443,7 @@ final class LongMaxVector extends LongVector {
VectorMask<Long> m) {
return (LongMaxVector)
super.rearrangeTemplate(LongMaxShuffle.class,
LongMaxMask.class,
(LongMaxShuffle) shuffle,
(LongMaxMask) m); // specialize
}
@ -572,16 +607,12 @@ final class LongMaxVector extends LongVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
LongMaxMask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -607,9 +638,9 @@ final class LongMaxVector extends LongVector {
public LongMaxMask and(VectorMask<Long> mask) {
Objects.requireNonNull(mask);
LongMaxMask m = (LongMaxMask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, LongMaxMask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, LongMaxMask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -617,9 +648,9 @@ final class LongMaxVector extends LongVector {
public LongMaxMask or(VectorMask<Long> mask) {
Objects.requireNonNull(mask);
LongMaxMask m = (LongMaxMask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, LongMaxMask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, LongMaxMask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -627,9 +658,9 @@ final class LongMaxVector extends LongVector {
LongMaxMask xor(VectorMask<Long> mask) {
Objects.requireNonNull(mask);
LongMaxMask m = (LongMaxMask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, LongMaxMask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, LongMaxMask.class, null, long.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -637,22 +668,32 @@ final class LongMaxVector extends LongVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, LongMaxMask.class, long.class, VLENGTH, this,
(m) -> trueCountHelper(((LongMaxMask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, LongMaxMask.class, long.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, LongMaxMask.class, long.class, VLENGTH, this,
(m) -> firstTrueHelper(((LongMaxMask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, LongMaxMask.class, long.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, LongMaxMask.class, long.class, VLENGTH, this,
(m) -> lastTrueHelper(((LongMaxMask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, LongMaxMask.class, long.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, LongMaxMask.class, long.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -763,6 +804,20 @@ final class LongMaxVector extends LongVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
LongVector fromArray0(long[] a, int offset, VectorMask<Long> m) {
return super.fromArray0Template(LongMaxMask.class, a, offset, (LongMaxMask) m); // specialize
}
@ForceInline
@Override
final
LongVector fromArray0(long[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Long> m) {
return super.fromArray0Template(LongMaxMask.class, a, offset, indexMap, mapOffset, (LongMaxMask) m);
}
@ForceInline
@ -772,6 +827,13 @@ final class LongMaxVector extends LongVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
LongVector fromByteArray0(byte[] a, int offset, VectorMask<Long> m) {
return super.fromByteArray0Template(LongMaxMask.class, a, offset, (LongMaxMask) m); // specialize
}
@ForceInline
@Override
final
@ -779,6 +841,13 @@ final class LongMaxVector extends LongVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
LongVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
return super.fromByteBuffer0Template(LongMaxMask.class, bb, offset, (LongMaxMask) m); // specialize
}
@ForceInline
@Override
final
@ -786,6 +855,21 @@ final class LongMaxVector extends LongVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(long[] a, int offset, VectorMask<Long> m) {
super.intoArray0Template(LongMaxMask.class, a, offset, (LongMaxMask) m);
}
@ForceInline
@Override
final
void intoArray0(long[] a, int offset, int[] indexMap, int mapOffset, VectorMask<Long> m) {
super.intoArray0Template(LongMaxMask.class, a, offset, indexMap, mapOffset, (LongMaxMask) m);
}
@ForceInline
@Override
final
@ -793,6 +877,21 @@ final class LongMaxVector extends LongVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Long> m) {
super.intoByteArray0Template(LongMaxMask.class, a, offset, (LongMaxMask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
super.intoByteBuffer0Template(LongMaxMask.class, bb, offset, (LongMaxMask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -236,8 +236,8 @@ final class Short128Vector extends ShortVector {
@ForceInline
final @Override
short rOp(short v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
short rOp(short v, VectorMask<Short> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -273,12 +273,24 @@ final class Short128Vector extends ShortVector {
return (Short128Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Short128Vector lanewise(Unary op, VectorMask<Short> m) {
return (Short128Vector) super.lanewiseTemplate(op, Short128Mask.class, (Short128Mask) m); // specialize
}
@Override
@ForceInline
public Short128Vector lanewise(Binary op, Vector<Short> v) {
return (Short128Vector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public Short128Vector lanewise(Binary op, Vector<Short> v, VectorMask<Short> m) {
return (Short128Vector) super.lanewiseTemplate(op, Short128Mask.class, v, (Short128Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline Short128Vector
@ -286,15 +298,30 @@ final class Short128Vector extends ShortVector {
return (Short128Vector) super.lanewiseShiftTemplate(op, e); // specialize
}
/*package-private*/
@Override
@ForceInline Short128Vector
lanewiseShift(VectorOperators.Binary op, int e, VectorMask<Short> m) {
return (Short128Vector) super.lanewiseShiftTemplate(op, Short128Mask.class, e, (Short128Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Short128Vector
lanewise(VectorOperators.Ternary op, Vector<Short> v1, Vector<Short> v2) {
lanewise(Ternary op, Vector<Short> v1, Vector<Short> v2) {
return (Short128Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Short128Vector
lanewise(Ternary op, Vector<Short> v1, Vector<Short> v2, VectorMask<Short> m) {
return (Short128Vector) super.lanewiseTemplate(op, Short128Mask.class, v1, v2, (Short128Mask) m); // specialize
}
@Override
@ForceInline
public final
@ -314,7 +341,7 @@ final class Short128Vector extends ShortVector {
@ForceInline
public final short reduceLanes(VectorOperators.Associative op,
VectorMask<Short> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, Short128Mask.class, (Short128Mask) m); // specialized
}
@Override
@ -327,7 +354,7 @@ final class Short128Vector extends ShortVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Short> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, Short128Mask.class, (Short128Mask) m); // specialized
}
@ForceInline
@ -363,6 +390,13 @@ final class Short128Vector extends ShortVector {
return super.compareTemplate(Short128Mask.class, op, s); // specialize
}
@Override
@ForceInline
public final Short128Mask compare(Comparison op, Vector<Short> v, VectorMask<Short> m) {
return super.compareTemplate(Short128Mask.class, op, v, (Short128Mask) m);
}
@Override
@ForceInline
public Short128Vector blend(Vector<Short> v, VectorMask<Short> m) {
@ -419,6 +453,7 @@ final class Short128Vector extends ShortVector {
VectorMask<Short> m) {
return (Short128Vector)
super.rearrangeTemplate(Short128Shuffle.class,
Short128Mask.class,
(Short128Shuffle) shuffle,
(Short128Mask) m); // specialize
}
@ -596,16 +631,12 @@ final class Short128Vector extends ShortVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
Short128Mask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -631,9 +662,9 @@ final class Short128Vector extends ShortVector {
public Short128Mask and(VectorMask<Short> mask) {
Objects.requireNonNull(mask);
Short128Mask m = (Short128Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Short128Mask.class, short.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, Short128Mask.class, null, short.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -641,9 +672,9 @@ final class Short128Vector extends ShortVector {
public Short128Mask or(VectorMask<Short> mask) {
Objects.requireNonNull(mask);
Short128Mask m = (Short128Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Short128Mask.class, short.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, Short128Mask.class, null, short.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -651,9 +682,9 @@ final class Short128Vector extends ShortVector {
Short128Mask xor(VectorMask<Short> mask) {
Objects.requireNonNull(mask);
Short128Mask m = (Short128Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Short128Mask.class, short.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, Short128Mask.class, null, short.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -661,22 +692,32 @@ final class Short128Vector extends ShortVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Short128Mask.class, short.class, VLENGTH, this,
(m) -> trueCountHelper(((Short128Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Short128Mask.class, short.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Short128Mask.class, short.class, VLENGTH, this,
(m) -> firstTrueHelper(((Short128Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Short128Mask.class, short.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Short128Mask.class, short.class, VLENGTH, this,
(m) -> lastTrueHelper(((Short128Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Short128Mask.class, short.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Short128Mask.class, short.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -787,6 +828,14 @@ final class Short128Vector extends ShortVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromArray0(short[] a, int offset, VectorMask<Short> m) {
return super.fromArray0Template(Short128Mask.class, a, offset, (Short128Mask) m); // specialize
}
@ForceInline
@Override
final
@ -794,6 +843,13 @@ final class Short128Vector extends ShortVector {
return super.fromCharArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromCharArray0(char[] a, int offset, VectorMask<Short> m) {
return super.fromCharArray0Template(Short128Mask.class, a, offset, (Short128Mask) m); // specialize
}
@ForceInline
@Override
@ -802,6 +858,13 @@ final class Short128Vector extends ShortVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromByteArray0(byte[] a, int offset, VectorMask<Short> m) {
return super.fromByteArray0Template(Short128Mask.class, a, offset, (Short128Mask) m); // specialize
}
@ForceInline
@Override
final
@ -809,6 +872,13 @@ final class Short128Vector extends ShortVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
return super.fromByteBuffer0Template(Short128Mask.class, bb, offset, (Short128Mask) m); // specialize
}
@ForceInline
@Override
final
@ -816,6 +886,15 @@ final class Short128Vector extends ShortVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(short[] a, int offset, VectorMask<Short> m) {
super.intoArray0Template(Short128Mask.class, a, offset, (Short128Mask) m);
}
@ForceInline
@Override
final
@ -823,6 +902,27 @@ final class Short128Vector extends ShortVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Short> m) {
super.intoByteArray0Template(Short128Mask.class, a, offset, (Short128Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
super.intoByteBuffer0Template(Short128Mask.class, bb, offset, (Short128Mask) m);
}
@ForceInline
@Override
final
void intoCharArray0(char[] a, int offset, VectorMask<Short> m) {
super.intoCharArray0Template(Short128Mask.class, a, offset, (Short128Mask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -236,8 +236,8 @@ final class Short256Vector extends ShortVector {
@ForceInline
final @Override
short rOp(short v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
short rOp(short v, VectorMask<Short> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -273,12 +273,24 @@ final class Short256Vector extends ShortVector {
return (Short256Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Short256Vector lanewise(Unary op, VectorMask<Short> m) {
return (Short256Vector) super.lanewiseTemplate(op, Short256Mask.class, (Short256Mask) m); // specialize
}
@Override
@ForceInline
public Short256Vector lanewise(Binary op, Vector<Short> v) {
return (Short256Vector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public Short256Vector lanewise(Binary op, Vector<Short> v, VectorMask<Short> m) {
return (Short256Vector) super.lanewiseTemplate(op, Short256Mask.class, v, (Short256Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline Short256Vector
@ -286,15 +298,30 @@ final class Short256Vector extends ShortVector {
return (Short256Vector) super.lanewiseShiftTemplate(op, e); // specialize
}
/*package-private*/
@Override
@ForceInline Short256Vector
lanewiseShift(VectorOperators.Binary op, int e, VectorMask<Short> m) {
return (Short256Vector) super.lanewiseShiftTemplate(op, Short256Mask.class, e, (Short256Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Short256Vector
lanewise(VectorOperators.Ternary op, Vector<Short> v1, Vector<Short> v2) {
lanewise(Ternary op, Vector<Short> v1, Vector<Short> v2) {
return (Short256Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Short256Vector
lanewise(Ternary op, Vector<Short> v1, Vector<Short> v2, VectorMask<Short> m) {
return (Short256Vector) super.lanewiseTemplate(op, Short256Mask.class, v1, v2, (Short256Mask) m); // specialize
}
@Override
@ForceInline
public final
@ -314,7 +341,7 @@ final class Short256Vector extends ShortVector {
@ForceInline
public final short reduceLanes(VectorOperators.Associative op,
VectorMask<Short> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, Short256Mask.class, (Short256Mask) m); // specialized
}
@Override
@ -327,7 +354,7 @@ final class Short256Vector extends ShortVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Short> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, Short256Mask.class, (Short256Mask) m); // specialized
}
@ForceInline
@ -363,6 +390,13 @@ final class Short256Vector extends ShortVector {
return super.compareTemplate(Short256Mask.class, op, s); // specialize
}
@Override
@ForceInline
public final Short256Mask compare(Comparison op, Vector<Short> v, VectorMask<Short> m) {
return super.compareTemplate(Short256Mask.class, op, v, (Short256Mask) m);
}
@Override
@ForceInline
public Short256Vector blend(Vector<Short> v, VectorMask<Short> m) {
@ -419,6 +453,7 @@ final class Short256Vector extends ShortVector {
VectorMask<Short> m) {
return (Short256Vector)
super.rearrangeTemplate(Short256Shuffle.class,
Short256Mask.class,
(Short256Shuffle) shuffle,
(Short256Mask) m); // specialize
}
@ -612,16 +647,12 @@ final class Short256Vector extends ShortVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
Short256Mask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -647,9 +678,9 @@ final class Short256Vector extends ShortVector {
public Short256Mask and(VectorMask<Short> mask) {
Objects.requireNonNull(mask);
Short256Mask m = (Short256Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Short256Mask.class, short.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, Short256Mask.class, null, short.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -657,9 +688,9 @@ final class Short256Vector extends ShortVector {
public Short256Mask or(VectorMask<Short> mask) {
Objects.requireNonNull(mask);
Short256Mask m = (Short256Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Short256Mask.class, short.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, Short256Mask.class, null, short.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -667,9 +698,9 @@ final class Short256Vector extends ShortVector {
Short256Mask xor(VectorMask<Short> mask) {
Objects.requireNonNull(mask);
Short256Mask m = (Short256Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Short256Mask.class, short.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, Short256Mask.class, null, short.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -677,22 +708,32 @@ final class Short256Vector extends ShortVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Short256Mask.class, short.class, VLENGTH, this,
(m) -> trueCountHelper(((Short256Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Short256Mask.class, short.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Short256Mask.class, short.class, VLENGTH, this,
(m) -> firstTrueHelper(((Short256Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Short256Mask.class, short.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Short256Mask.class, short.class, VLENGTH, this,
(m) -> lastTrueHelper(((Short256Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Short256Mask.class, short.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Short256Mask.class, short.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -803,6 +844,14 @@ final class Short256Vector extends ShortVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromArray0(short[] a, int offset, VectorMask<Short> m) {
return super.fromArray0Template(Short256Mask.class, a, offset, (Short256Mask) m); // specialize
}
@ForceInline
@Override
final
@ -810,6 +859,13 @@ final class Short256Vector extends ShortVector {
return super.fromCharArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromCharArray0(char[] a, int offset, VectorMask<Short> m) {
return super.fromCharArray0Template(Short256Mask.class, a, offset, (Short256Mask) m); // specialize
}
@ForceInline
@Override
@ -818,6 +874,13 @@ final class Short256Vector extends ShortVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromByteArray0(byte[] a, int offset, VectorMask<Short> m) {
return super.fromByteArray0Template(Short256Mask.class, a, offset, (Short256Mask) m); // specialize
}
@ForceInline
@Override
final
@ -825,6 +888,13 @@ final class Short256Vector extends ShortVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
return super.fromByteBuffer0Template(Short256Mask.class, bb, offset, (Short256Mask) m); // specialize
}
@ForceInline
@Override
final
@ -832,6 +902,15 @@ final class Short256Vector extends ShortVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(short[] a, int offset, VectorMask<Short> m) {
super.intoArray0Template(Short256Mask.class, a, offset, (Short256Mask) m);
}
@ForceInline
@Override
final
@ -839,6 +918,27 @@ final class Short256Vector extends ShortVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Short> m) {
super.intoByteArray0Template(Short256Mask.class, a, offset, (Short256Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
super.intoByteBuffer0Template(Short256Mask.class, bb, offset, (Short256Mask) m);
}
@ForceInline
@Override
final
void intoCharArray0(char[] a, int offset, VectorMask<Short> m) {
super.intoCharArray0Template(Short256Mask.class, a, offset, (Short256Mask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -236,8 +236,8 @@ final class Short512Vector extends ShortVector {
@ForceInline
final @Override
short rOp(short v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
short rOp(short v, VectorMask<Short> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -273,12 +273,24 @@ final class Short512Vector extends ShortVector {
return (Short512Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Short512Vector lanewise(Unary op, VectorMask<Short> m) {
return (Short512Vector) super.lanewiseTemplate(op, Short512Mask.class, (Short512Mask) m); // specialize
}
@Override
@ForceInline
public Short512Vector lanewise(Binary op, Vector<Short> v) {
return (Short512Vector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public Short512Vector lanewise(Binary op, Vector<Short> v, VectorMask<Short> m) {
return (Short512Vector) super.lanewiseTemplate(op, Short512Mask.class, v, (Short512Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline Short512Vector
@ -286,15 +298,30 @@ final class Short512Vector extends ShortVector {
return (Short512Vector) super.lanewiseShiftTemplate(op, e); // specialize
}
/*package-private*/
@Override
@ForceInline Short512Vector
lanewiseShift(VectorOperators.Binary op, int e, VectorMask<Short> m) {
return (Short512Vector) super.lanewiseShiftTemplate(op, Short512Mask.class, e, (Short512Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Short512Vector
lanewise(VectorOperators.Ternary op, Vector<Short> v1, Vector<Short> v2) {
lanewise(Ternary op, Vector<Short> v1, Vector<Short> v2) {
return (Short512Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Short512Vector
lanewise(Ternary op, Vector<Short> v1, Vector<Short> v2, VectorMask<Short> m) {
return (Short512Vector) super.lanewiseTemplate(op, Short512Mask.class, v1, v2, (Short512Mask) m); // specialize
}
@Override
@ForceInline
public final
@ -314,7 +341,7 @@ final class Short512Vector extends ShortVector {
@ForceInline
public final short reduceLanes(VectorOperators.Associative op,
VectorMask<Short> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, Short512Mask.class, (Short512Mask) m); // specialized
}
@Override
@ -327,7 +354,7 @@ final class Short512Vector extends ShortVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Short> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, Short512Mask.class, (Short512Mask) m); // specialized
}
@ForceInline
@ -363,6 +390,13 @@ final class Short512Vector extends ShortVector {
return super.compareTemplate(Short512Mask.class, op, s); // specialize
}
@Override
@ForceInline
public final Short512Mask compare(Comparison op, Vector<Short> v, VectorMask<Short> m) {
return super.compareTemplate(Short512Mask.class, op, v, (Short512Mask) m);
}
@Override
@ForceInline
public Short512Vector blend(Vector<Short> v, VectorMask<Short> m) {
@ -419,6 +453,7 @@ final class Short512Vector extends ShortVector {
VectorMask<Short> m) {
return (Short512Vector)
super.rearrangeTemplate(Short512Shuffle.class,
Short512Mask.class,
(Short512Shuffle) shuffle,
(Short512Mask) m); // specialize
}
@ -644,16 +679,12 @@ final class Short512Vector extends ShortVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
Short512Mask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -679,9 +710,9 @@ final class Short512Vector extends ShortVector {
public Short512Mask and(VectorMask<Short> mask) {
Objects.requireNonNull(mask);
Short512Mask m = (Short512Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Short512Mask.class, short.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, Short512Mask.class, null, short.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -689,9 +720,9 @@ final class Short512Vector extends ShortVector {
public Short512Mask or(VectorMask<Short> mask) {
Objects.requireNonNull(mask);
Short512Mask m = (Short512Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Short512Mask.class, short.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, Short512Mask.class, null, short.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -699,9 +730,9 @@ final class Short512Vector extends ShortVector {
Short512Mask xor(VectorMask<Short> mask) {
Objects.requireNonNull(mask);
Short512Mask m = (Short512Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Short512Mask.class, short.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, Short512Mask.class, null, short.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -709,22 +740,32 @@ final class Short512Vector extends ShortVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Short512Mask.class, short.class, VLENGTH, this,
(m) -> trueCountHelper(((Short512Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Short512Mask.class, short.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Short512Mask.class, short.class, VLENGTH, this,
(m) -> firstTrueHelper(((Short512Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Short512Mask.class, short.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Short512Mask.class, short.class, VLENGTH, this,
(m) -> lastTrueHelper(((Short512Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Short512Mask.class, short.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Short512Mask.class, short.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -835,6 +876,14 @@ final class Short512Vector extends ShortVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromArray0(short[] a, int offset, VectorMask<Short> m) {
return super.fromArray0Template(Short512Mask.class, a, offset, (Short512Mask) m); // specialize
}
@ForceInline
@Override
final
@ -842,6 +891,13 @@ final class Short512Vector extends ShortVector {
return super.fromCharArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromCharArray0(char[] a, int offset, VectorMask<Short> m) {
return super.fromCharArray0Template(Short512Mask.class, a, offset, (Short512Mask) m); // specialize
}
@ForceInline
@Override
@ -850,6 +906,13 @@ final class Short512Vector extends ShortVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromByteArray0(byte[] a, int offset, VectorMask<Short> m) {
return super.fromByteArray0Template(Short512Mask.class, a, offset, (Short512Mask) m); // specialize
}
@ForceInline
@Override
final
@ -857,6 +920,13 @@ final class Short512Vector extends ShortVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
return super.fromByteBuffer0Template(Short512Mask.class, bb, offset, (Short512Mask) m); // specialize
}
@ForceInline
@Override
final
@ -864,6 +934,15 @@ final class Short512Vector extends ShortVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(short[] a, int offset, VectorMask<Short> m) {
super.intoArray0Template(Short512Mask.class, a, offset, (Short512Mask) m);
}
@ForceInline
@Override
final
@ -871,6 +950,27 @@ final class Short512Vector extends ShortVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Short> m) {
super.intoByteArray0Template(Short512Mask.class, a, offset, (Short512Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
super.intoByteBuffer0Template(Short512Mask.class, bb, offset, (Short512Mask) m);
}
@ForceInline
@Override
final
void intoCharArray0(char[] a, int offset, VectorMask<Short> m) {
super.intoCharArray0Template(Short512Mask.class, a, offset, (Short512Mask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -236,8 +236,8 @@ final class Short64Vector extends ShortVector {
@ForceInline
final @Override
short rOp(short v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
short rOp(short v, VectorMask<Short> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -273,12 +273,24 @@ final class Short64Vector extends ShortVector {
return (Short64Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Short64Vector lanewise(Unary op, VectorMask<Short> m) {
return (Short64Vector) super.lanewiseTemplate(op, Short64Mask.class, (Short64Mask) m); // specialize
}
@Override
@ForceInline
public Short64Vector lanewise(Binary op, Vector<Short> v) {
return (Short64Vector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public Short64Vector lanewise(Binary op, Vector<Short> v, VectorMask<Short> m) {
return (Short64Vector) super.lanewiseTemplate(op, Short64Mask.class, v, (Short64Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline Short64Vector
@ -286,15 +298,30 @@ final class Short64Vector extends ShortVector {
return (Short64Vector) super.lanewiseShiftTemplate(op, e); // specialize
}
/*package-private*/
@Override
@ForceInline Short64Vector
lanewiseShift(VectorOperators.Binary op, int e, VectorMask<Short> m) {
return (Short64Vector) super.lanewiseShiftTemplate(op, Short64Mask.class, e, (Short64Mask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Short64Vector
lanewise(VectorOperators.Ternary op, Vector<Short> v1, Vector<Short> v2) {
lanewise(Ternary op, Vector<Short> v1, Vector<Short> v2) {
return (Short64Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Short64Vector
lanewise(Ternary op, Vector<Short> v1, Vector<Short> v2, VectorMask<Short> m) {
return (Short64Vector) super.lanewiseTemplate(op, Short64Mask.class, v1, v2, (Short64Mask) m); // specialize
}
@Override
@ForceInline
public final
@ -314,7 +341,7 @@ final class Short64Vector extends ShortVector {
@ForceInline
public final short reduceLanes(VectorOperators.Associative op,
VectorMask<Short> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, Short64Mask.class, (Short64Mask) m); // specialized
}
@Override
@ -327,7 +354,7 @@ final class Short64Vector extends ShortVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Short> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, Short64Mask.class, (Short64Mask) m); // specialized
}
@ForceInline
@ -363,6 +390,13 @@ final class Short64Vector extends ShortVector {
return super.compareTemplate(Short64Mask.class, op, s); // specialize
}
@Override
@ForceInline
public final Short64Mask compare(Comparison op, Vector<Short> v, VectorMask<Short> m) {
return super.compareTemplate(Short64Mask.class, op, v, (Short64Mask) m);
}
@Override
@ForceInline
public Short64Vector blend(Vector<Short> v, VectorMask<Short> m) {
@ -419,6 +453,7 @@ final class Short64Vector extends ShortVector {
VectorMask<Short> m) {
return (Short64Vector)
super.rearrangeTemplate(Short64Shuffle.class,
Short64Mask.class,
(Short64Shuffle) shuffle,
(Short64Mask) m); // specialize
}
@ -588,16 +623,12 @@ final class Short64Vector extends ShortVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
Short64Mask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -623,9 +654,9 @@ final class Short64Vector extends ShortVector {
public Short64Mask and(VectorMask<Short> mask) {
Objects.requireNonNull(mask);
Short64Mask m = (Short64Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Short64Mask.class, short.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, Short64Mask.class, null, short.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -633,9 +664,9 @@ final class Short64Vector extends ShortVector {
public Short64Mask or(VectorMask<Short> mask) {
Objects.requireNonNull(mask);
Short64Mask m = (Short64Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Short64Mask.class, short.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, Short64Mask.class, null, short.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -643,9 +674,9 @@ final class Short64Vector extends ShortVector {
Short64Mask xor(VectorMask<Short> mask) {
Objects.requireNonNull(mask);
Short64Mask m = (Short64Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Short64Mask.class, short.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, Short64Mask.class, null, short.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -653,22 +684,32 @@ final class Short64Vector extends ShortVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Short64Mask.class, short.class, VLENGTH, this,
(m) -> trueCountHelper(((Short64Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Short64Mask.class, short.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Short64Mask.class, short.class, VLENGTH, this,
(m) -> firstTrueHelper(((Short64Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Short64Mask.class, short.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Short64Mask.class, short.class, VLENGTH, this,
(m) -> lastTrueHelper(((Short64Mask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Short64Mask.class, short.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Short64Mask.class, short.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -779,6 +820,14 @@ final class Short64Vector extends ShortVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromArray0(short[] a, int offset, VectorMask<Short> m) {
return super.fromArray0Template(Short64Mask.class, a, offset, (Short64Mask) m); // specialize
}
@ForceInline
@Override
final
@ -786,6 +835,13 @@ final class Short64Vector extends ShortVector {
return super.fromCharArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromCharArray0(char[] a, int offset, VectorMask<Short> m) {
return super.fromCharArray0Template(Short64Mask.class, a, offset, (Short64Mask) m); // specialize
}
@ForceInline
@Override
@ -794,6 +850,13 @@ final class Short64Vector extends ShortVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromByteArray0(byte[] a, int offset, VectorMask<Short> m) {
return super.fromByteArray0Template(Short64Mask.class, a, offset, (Short64Mask) m); // specialize
}
@ForceInline
@Override
final
@ -801,6 +864,13 @@ final class Short64Vector extends ShortVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
return super.fromByteBuffer0Template(Short64Mask.class, bb, offset, (Short64Mask) m); // specialize
}
@ForceInline
@Override
final
@ -808,6 +878,15 @@ final class Short64Vector extends ShortVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(short[] a, int offset, VectorMask<Short> m) {
super.intoArray0Template(Short64Mask.class, a, offset, (Short64Mask) m);
}
@ForceInline
@Override
final
@ -815,6 +894,27 @@ final class Short64Vector extends ShortVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Short> m) {
super.intoByteArray0Template(Short64Mask.class, a, offset, (Short64Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
super.intoByteBuffer0Template(Short64Mask.class, bb, offset, (Short64Mask) m);
}
@ForceInline
@Override
final
void intoCharArray0(char[] a, int offset, VectorMask<Short> m) {
super.intoCharArray0Template(Short64Mask.class, a, offset, (Short64Mask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -236,8 +236,8 @@ final class ShortMaxVector extends ShortVector {
@ForceInline
final @Override
short rOp(short v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
short rOp(short v, VectorMask<Short> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -273,12 +273,24 @@ final class ShortMaxVector extends ShortVector {
return (ShortMaxVector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public ShortMaxVector lanewise(Unary op, VectorMask<Short> m) {
return (ShortMaxVector) super.lanewiseTemplate(op, ShortMaxMask.class, (ShortMaxMask) m); // specialize
}
@Override
@ForceInline
public ShortMaxVector lanewise(Binary op, Vector<Short> v) {
return (ShortMaxVector) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public ShortMaxVector lanewise(Binary op, Vector<Short> v, VectorMask<Short> m) {
return (ShortMaxVector) super.lanewiseTemplate(op, ShortMaxMask.class, v, (ShortMaxMask) m); // specialize
}
/*package-private*/
@Override
@ForceInline ShortMaxVector
@ -286,15 +298,30 @@ final class ShortMaxVector extends ShortVector {
return (ShortMaxVector) super.lanewiseShiftTemplate(op, e); // specialize
}
/*package-private*/
@Override
@ForceInline ShortMaxVector
lanewiseShift(VectorOperators.Binary op, int e, VectorMask<Short> m) {
return (ShortMaxVector) super.lanewiseShiftTemplate(op, ShortMaxMask.class, e, (ShortMaxMask) m); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
ShortMaxVector
lanewise(VectorOperators.Ternary op, Vector<Short> v1, Vector<Short> v2) {
lanewise(Ternary op, Vector<Short> v1, Vector<Short> v2) {
return (ShortMaxVector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
ShortMaxVector
lanewise(Ternary op, Vector<Short> v1, Vector<Short> v2, VectorMask<Short> m) {
return (ShortMaxVector) super.lanewiseTemplate(op, ShortMaxMask.class, v1, v2, (ShortMaxMask) m); // specialize
}
@Override
@ForceInline
public final
@ -314,7 +341,7 @@ final class ShortMaxVector extends ShortVector {
@ForceInline
public final short reduceLanes(VectorOperators.Associative op,
VectorMask<Short> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, ShortMaxMask.class, (ShortMaxMask) m); // specialized
}
@Override
@ -327,7 +354,7 @@ final class ShortMaxVector extends ShortVector {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Short> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, ShortMaxMask.class, (ShortMaxMask) m); // specialized
}
@ForceInline
@ -363,6 +390,13 @@ final class ShortMaxVector extends ShortVector {
return super.compareTemplate(ShortMaxMask.class, op, s); // specialize
}
@Override
@ForceInline
public final ShortMaxMask compare(Comparison op, Vector<Short> v, VectorMask<Short> m) {
return super.compareTemplate(ShortMaxMask.class, op, v, (ShortMaxMask) m);
}
@Override
@ForceInline
public ShortMaxVector blend(Vector<Short> v, VectorMask<Short> m) {
@ -419,6 +453,7 @@ final class ShortMaxVector extends ShortVector {
VectorMask<Short> m) {
return (ShortMaxVector)
super.rearrangeTemplate(ShortMaxShuffle.class,
ShortMaxMask.class,
(ShortMaxShuffle) shuffle,
(ShortMaxMask) m); // specialize
}
@ -582,16 +617,12 @@ final class ShortMaxVector extends ShortVector {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
ShortMaxMask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -617,9 +648,9 @@ final class ShortMaxVector extends ShortVector {
public ShortMaxMask and(VectorMask<Short> mask) {
Objects.requireNonNull(mask);
ShortMaxMask m = (ShortMaxMask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, ShortMaxMask.class, short.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, ShortMaxMask.class, null, short.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -627,9 +658,9 @@ final class ShortMaxVector extends ShortVector {
public ShortMaxMask or(VectorMask<Short> mask) {
Objects.requireNonNull(mask);
ShortMaxMask m = (ShortMaxMask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, ShortMaxMask.class, short.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, ShortMaxMask.class, null, short.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -637,9 +668,9 @@ final class ShortMaxVector extends ShortVector {
ShortMaxMask xor(VectorMask<Short> mask) {
Objects.requireNonNull(mask);
ShortMaxMask m = (ShortMaxMask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, ShortMaxMask.class, short.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, ShortMaxMask.class, null, short.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -647,22 +678,32 @@ final class ShortMaxVector extends ShortVector {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, ShortMaxMask.class, short.class, VLENGTH, this,
(m) -> trueCountHelper(((ShortMaxMask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, ShortMaxMask.class, short.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, ShortMaxMask.class, short.class, VLENGTH, this,
(m) -> firstTrueHelper(((ShortMaxMask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, ShortMaxMask.class, short.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, ShortMaxMask.class, short.class, VLENGTH, this,
(m) -> lastTrueHelper(((ShortMaxMask)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, ShortMaxMask.class, short.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, ShortMaxMask.class, short.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -773,6 +814,14 @@ final class ShortMaxVector extends ShortVector {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromArray0(short[] a, int offset, VectorMask<Short> m) {
return super.fromArray0Template(ShortMaxMask.class, a, offset, (ShortMaxMask) m); // specialize
}
@ForceInline
@Override
final
@ -780,6 +829,13 @@ final class ShortMaxVector extends ShortVector {
return super.fromCharArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromCharArray0(char[] a, int offset, VectorMask<Short> m) {
return super.fromCharArray0Template(ShortMaxMask.class, a, offset, (ShortMaxMask) m); // specialize
}
@ForceInline
@Override
@ -788,6 +844,13 @@ final class ShortMaxVector extends ShortVector {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromByteArray0(byte[] a, int offset, VectorMask<Short> m) {
return super.fromByteArray0Template(ShortMaxMask.class, a, offset, (ShortMaxMask) m); // specialize
}
@ForceInline
@Override
final
@ -795,6 +858,13 @@ final class ShortMaxVector extends ShortVector {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
return super.fromByteBuffer0Template(ShortMaxMask.class, bb, offset, (ShortMaxMask) m); // specialize
}
@ForceInline
@Override
final
@ -802,6 +872,15 @@ final class ShortMaxVector extends ShortVector {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(short[] a, int offset, VectorMask<Short> m) {
super.intoArray0Template(ShortMaxMask.class, a, offset, (ShortMaxMask) m);
}
@ForceInline
@Override
final
@ -809,6 +888,27 @@ final class ShortMaxVector extends ShortVector {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Short> m) {
super.intoByteArray0Template(ShortMaxMask.class, a, offset, (ShortMaxMask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
super.intoByteBuffer0Template(ShortMaxMask.class, bb, offset, (ShortMaxMask) m);
}
@ForceInline
@Override
final
void intoCharArray0(char[] a, int offset, VectorMask<Short> m) {
super.intoCharArray0Template(ShortMaxMask.class, a, offset, (ShortMaxMask) m);
}
// End of specialized low-level memory operations.
// ================================================

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -516,6 +516,8 @@ public abstract class VectorMask<E> extends jdk.internal.vm.vector.VectorSupport
* @param i the lane index
*
* @return true if the lane at index {@code i} is set, otherwise false
* @throws IndexOutOfBoundsException if the index is out of range
* ({@code < 0 || >= length()})
*/
public abstract boolean laneIsSet(int i);
@ -553,6 +555,24 @@ public abstract class VectorMask<E> extends jdk.internal.vm.vector.VectorSupport
*/
public abstract <F> VectorMask<F> check(VectorSpecies<F> species);
/**
* Checks that this mask has the same class with the given mask class,
* and it has the same species with given vector's species,
* and returns this mask unchanged.
* The effect is similar to this pseudocode:
* {@code getClass() == maskClass &&
* vectorSpecies() == vector.species()
* ? this
* : throw new ClassCastException()}.
*
* @param maskClass the class required for this mask
* @param vector its species required for this mask
* @param <F> the boxed element type of the required species
* @return the same mask
* @throws ClassCastException if the species is wrong
*/
abstract <F> VectorMask<F> check(Class<? extends VectorMask<F>> maskClass, Vector<F> vector);
/**
* Returns a string representation of this mask, of the form
* {@code "Mask[T.TT...]"}, reporting the mask bit

View file

@ -238,8 +238,8 @@ final class $vectortype$ extends $abstractvectortype$ {
@ForceInline
final @Override
$type$ rOp($type$ v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
$type$ rOp($type$ v, VectorMask<$Boxtype$> m, FBinOp f) {
return super.rOpTemplate(v, m, f); // specialize
}
@Override
@ -275,12 +275,24 @@ final class $vectortype$ extends $abstractvectortype$ {
return ($vectortype$) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public $vectortype$ lanewise(Unary op, VectorMask<$Boxtype$> m) {
return ($vectortype$) super.lanewiseTemplate(op, $masktype$.class, ($masktype$) m); // specialize
}
@Override
@ForceInline
public $vectortype$ lanewise(Binary op, Vector<$Boxtype$> v) {
return ($vectortype$) super.lanewiseTemplate(op, v); // specialize
}
@Override
@ForceInline
public $vectortype$ lanewise(Binary op, Vector<$Boxtype$> v, VectorMask<$Boxtype$> m) {
return ($vectortype$) super.lanewiseTemplate(op, $masktype$.class, v, ($masktype$) m); // specialize
}
#if[!FP]
/*package-private*/
@Override
@ -288,6 +300,13 @@ final class $vectortype$ extends $abstractvectortype$ {
lanewiseShift(VectorOperators.Binary op, int e) {
return ($vectortype$) super.lanewiseShiftTemplate(op, e); // specialize
}
/*package-private*/
@Override
@ForceInline $vectortype$
lanewiseShift(VectorOperators.Binary op, int e, VectorMask<$Boxtype$> m) {
return ($vectortype$) super.lanewiseShiftTemplate(op, $masktype$.class, e, ($masktype$) m); // specialize
}
#end[!FP]
/*package-private*/
@ -295,10 +314,18 @@ final class $vectortype$ extends $abstractvectortype$ {
@ForceInline
public final
$vectortype$
lanewise(VectorOperators.Ternary op, Vector<$Boxtype$> v1, Vector<$Boxtype$> v2) {
lanewise(Ternary op, Vector<$Boxtype$> v1, Vector<$Boxtype$> v2) {
return ($vectortype$) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
$vectortype$
lanewise(Ternary op, Vector<$Boxtype$> v1, Vector<$Boxtype$> v2, VectorMask<$Boxtype$> m) {
return ($vectortype$) super.lanewiseTemplate(op, $masktype$.class, v1, v2, ($masktype$) m); // specialize
}
@Override
@ForceInline
public final
@ -318,7 +345,7 @@ final class $vectortype$ extends $abstractvectortype$ {
@ForceInline
public final $type$ reduceLanes(VectorOperators.Associative op,
VectorMask<$Boxtype$> m) {
return super.reduceLanesTemplate(op, m); // specialized
return super.reduceLanesTemplate(op, $masktype$.class, ($masktype$) m); // specialized
}
@Override
@ -331,7 +358,7 @@ final class $vectortype$ extends $abstractvectortype$ {
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<$Boxtype$> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
return (long) super.reduceLanesTemplate(op, $masktype$.class, ($masktype$) m); // specialized
}
@ForceInline
@ -369,6 +396,13 @@ final class $vectortype$ extends $abstractvectortype$ {
}
#end[!long]
@Override
@ForceInline
public final $masktype$ compare(Comparison op, Vector<$Boxtype$> v, VectorMask<$Boxtype$> m) {
return super.compareTemplate($masktype$.class, op, v, ($masktype$) m);
}
@Override
@ForceInline
public $vectortype$ blend(Vector<$Boxtype$> v, VectorMask<$Boxtype$> m) {
@ -425,6 +459,7 @@ final class $vectortype$ extends $abstractvectortype$ {
VectorMask<$Boxtype$> m) {
return ($vectortype$)
super.rearrangeTemplate($shuffletype$.class,
$masktype$.class,
($shuffletype$) shuffle,
($masktype$) m); // specialize
}
@ -855,16 +890,12 @@ final class $vectortype$ extends $abstractvectortype$ {
AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
if (VSIZE == species.vectorBitSize()) {
Class<?> dtype = species.elementType();
Class<?> dmtype = species.maskType();
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), ETYPE, VLENGTH,
dmtype, dtype, VLENGTH,
species.maskType(), species.elementType(), VLENGTH,
this, species,
$Type$$bits$Mask::defaultMaskCast);
}
return this.defaultMaskCast(species);
(m, s) -> s.maskFactory(m.toArray()).check(s));
}
@Override
@ -890,9 +921,9 @@ final class $vectortype$ extends $abstractvectortype$ {
public $masktype$ and(VectorMask<$Boxtype$> mask) {
Objects.requireNonNull(mask);
$masktype$ m = ($masktype$)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, $masktype$.class, $bitstype$.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
return VectorSupport.binaryOp(VECTOR_OP_AND, $masktype$.class, null, $bitstype$.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ -900,9 +931,9 @@ final class $vectortype$ extends $abstractvectortype$ {
public $masktype$ or(VectorMask<$Boxtype$> mask) {
Objects.requireNonNull(mask);
$masktype$ m = ($masktype$)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, $masktype$.class, $bitstype$.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
return VectorSupport.binaryOp(VECTOR_OP_OR, $masktype$.class, null, $bitstype$.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
@ -910,9 +941,9 @@ final class $vectortype$ extends $abstractvectortype$ {
$masktype$ xor(VectorMask<$Boxtype$> mask) {
Objects.requireNonNull(mask);
$masktype$ m = ($masktype$)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, $masktype$.class, $bitstype$.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
return VectorSupport.binaryOp(VECTOR_OP_XOR, $masktype$.class, null, $bitstype$.class, VLENGTH,
this, m, null,
(m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Mask Query operations
@ -920,22 +951,32 @@ final class $vectortype$ extends $abstractvectortype$ {
@Override
@ForceInline
public int trueCount() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, $masktype$.class, $bitstype$.class, VLENGTH, this,
(m) -> trueCountHelper((($masktype$)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, $masktype$.class, $bitstype$.class, VLENGTH, this,
(m) -> trueCountHelper(m.getBits()));
}
@Override
@ForceInline
public int firstTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, $masktype$.class, $bitstype$.class, VLENGTH, this,
(m) -> firstTrueHelper((($masktype$)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, $masktype$.class, $bitstype$.class, VLENGTH, this,
(m) -> firstTrueHelper(m.getBits()));
}
@Override
@ForceInline
public int lastTrue() {
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, $masktype$.class, $bitstype$.class, VLENGTH, this,
(m) -> lastTrueHelper((($masktype$)m).getBits()));
return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, $masktype$.class, $bitstype$.class, VLENGTH, this,
(m) -> lastTrueHelper(m.getBits()));
}
@Override
@ForceInline
public long toLong() {
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, $masktype$.class, $bitstype$.class, VLENGTH, this,
(m) -> toLongHelper(m.getBits()));
}
// Reductions
@ -1061,6 +1102,22 @@ final class $vectortype$ extends $abstractvectortype$ {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
$abstractvectortype$ fromArray0($type$[] a, int offset, VectorMask<$Boxtype$> m) {
return super.fromArray0Template($masktype$.class, a, offset, ($masktype$) m); // specialize
}
#if[!byteOrShort]
@ForceInline
@Override
final
$abstractvectortype$ fromArray0($type$[] a, int offset, int[] indexMap, int mapOffset, VectorMask<$Boxtype$> m) {
return super.fromArray0Template($masktype$.class, a, offset, indexMap, mapOffset, ($masktype$) m);
}
#end[!byteOrShort]
#if[short]
@ForceInline
@Override
@ -1068,6 +1125,13 @@ final class $vectortype$ extends $abstractvectortype$ {
$abstractvectortype$ fromCharArray0(char[] a, int offset) {
return super.fromCharArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
$abstractvectortype$ fromCharArray0(char[] a, int offset, VectorMask<$Boxtype$> m) {
return super.fromCharArray0Template($masktype$.class, a, offset, ($masktype$) m); // specialize
}
#end[short]
#if[byte]
@ -1077,6 +1141,13 @@ final class $vectortype$ extends $abstractvectortype$ {
$abstractvectortype$ fromBooleanArray0(boolean[] a, int offset) {
return super.fromBooleanArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
$abstractvectortype$ fromBooleanArray0(boolean[] a, int offset, VectorMask<$Boxtype$> m) {
return super.fromBooleanArray0Template($masktype$.class, a, offset, ($masktype$) m); // specialize
}
#end[byte]
@ForceInline
@ -1086,6 +1157,13 @@ final class $vectortype$ extends $abstractvectortype$ {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
$abstractvectortype$ fromByteArray0(byte[] a, int offset, VectorMask<$Boxtype$> m) {
return super.fromByteArray0Template($masktype$.class, a, offset, ($masktype$) m); // specialize
}
@ForceInline
@Override
final
@ -1093,6 +1171,13 @@ final class $vectortype$ extends $abstractvectortype$ {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
$abstractvectortype$ fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<$Boxtype$> m) {
return super.fromByteBuffer0Template($masktype$.class, bb, offset, ($masktype$) m); // specialize
}
@ForceInline
@Override
final
@ -1100,6 +1185,31 @@ final class $vectortype$ extends $abstractvectortype$ {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0($type$[] a, int offset, VectorMask<$Boxtype$> m) {
super.intoArray0Template($masktype$.class, a, offset, ($masktype$) m);
}
#if[!byteOrShort]
@ForceInline
@Override
final
void intoArray0($type$[] a, int offset, int[] indexMap, int mapOffset, VectorMask<$Boxtype$> m) {
super.intoArray0Template($masktype$.class, a, offset, indexMap, mapOffset, ($masktype$) m);
}
#end[!byteOrShort]
#if[byte]
@ForceInline
@Override
final
void intoBooleanArray0(boolean[] a, int offset, VectorMask<$Boxtype$> m) {
super.intoBooleanArray0Template($masktype$.class, a, offset, ($masktype$) m);
}
#end[byte]
@ForceInline
@Override
final
@ -1107,6 +1217,29 @@ final class $vectortype$ extends $abstractvectortype$ {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<$Boxtype$> m) {
super.intoByteArray0Template($masktype$.class, a, offset, ($masktype$) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<$Boxtype$> m) {
super.intoByteBuffer0Template($masktype$.class, bb, offset, ($masktype$) m);
}
#if[short]
@ForceInline
@Override
final
void intoCharArray0(char[] a, int offset, VectorMask<$Boxtype$> m) {
super.intoCharArray0Template($masktype$.class, a, offset, ($masktype$) m);
}
#end[short]
// End of specialized low-level memory operations.
// ================================================

View file

@ -19,9 +19,6 @@
; or visit www.oracle.com if you need additional information or have any
; questions.
; This file contains duplicate entries as globalDefinitions_vecApi.hpp
; It is intended for inclusion in .s files compiled with masm
; Used to check whether building on x86_64 architecture. Equivalent to checking in regular hpp file for #ifdef _WIN64
IFDEF RAX

View file

@ -1555,6 +1555,23 @@ generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);",
["asr", "__ sve_asr(z0, __ H, z11, 15);", "asr\tz0.h, z11.h, #15"],
["lsr", "__ sve_lsr(z30, __ S, z1, 31);", "lsr\tz30.s, z1.s, #31"],
["asr", "__ sve_asr(z0, __ D, z1, 63);", "asr\tz0.d, z1.d, #63"],
["lsl", "__ sve_lsl(z0, __ B, p0, 0);", "lsl\tz0.b, p0/m, z0.b, #0"],
["lsl", "__ sve_lsl(z0, __ B, p0, 5);", "lsl\tz0.b, p0/m, z0.b, #5"],
["lsl", "__ sve_lsl(z1, __ H, p1, 15);", "lsl\tz1.h, p1/m, z1.h, #15"],
["lsl", "__ sve_lsl(z2, __ S, p2, 31);", "lsl\tz2.s, p2/m, z2.s, #31"],
["lsl", "__ sve_lsl(z3, __ D, p3, 63);", "lsl\tz3.d, p3/m, z3.d, #63"],
["lsr", "__ sve_lsr(z0, __ B, p0, 1);", "lsr\tz0.b, p0/m, z0.b, #1"],
["lsr", "__ sve_lsr(z0, __ B, p0, 8);", "lsr\tz0.b, p0/m, z0.b, #8"],
["lsr", "__ sve_lsr(z1, __ H, p1, 15);", "lsr\tz1.h, p1/m, z1.h, #15"],
["lsr", "__ sve_lsr(z2, __ S, p2, 7);", "lsr\tz2.s, p2/m, z2.s, #7"],
["lsr", "__ sve_lsr(z2, __ S, p2, 31);", "lsr\tz2.s, p2/m, z2.s, #31"],
["lsr", "__ sve_lsr(z3, __ D, p3, 63);", "lsr\tz3.d, p3/m, z3.d, #63"],
["asr", "__ sve_asr(z0, __ B, p0, 1);", "asr\tz0.b, p0/m, z0.b, #1"],
["asr", "__ sve_asr(z0, __ B, p0, 7);", "asr\tz0.b, p0/m, z0.b, #7"],
["asr", "__ sve_asr(z1, __ H, p1, 5);", "asr\tz1.h, p1/m, z1.h, #5"],
["asr", "__ sve_asr(z1, __ H, p1, 15);", "asr\tz1.h, p1/m, z1.h, #15"],
["asr", "__ sve_asr(z2, __ S, p2, 31);", "asr\tz2.s, p2/m, z2.s, #31"],
["asr", "__ sve_asr(z3, __ D, p3, 63);", "asr\tz3.d, p3/m, z3.d, #63"],
["addvl", "__ sve_addvl(sp, r0, 31);", "addvl\tsp, x0, #31"],
["addpl", "__ sve_addpl(r1, sp, -32);", "addpl\tx1, sp, -32"],
["cntp", "__ sve_cntp(r8, __ B, p0, p1);", "cntp\tx8, p0, p1.b"],
@ -1649,6 +1666,29 @@ generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);",
["ld1d", "__ sve_ld1d_gather(z15, p0, r5, z16);", "ld1d\t{z15.d}, p0/z, [x5, z16.d, uxtw #3]"],
["st1w", "__ sve_st1w_scatter(z15, p0, r5, z16);", "st1w\t{z15.s}, p0, [x5, z16.s, uxtw #2]"],
["st1d", "__ sve_st1d_scatter(z15, p0, r5, z16);", "st1d\t{z15.d}, p0, [x5, z16.d, uxtw #3]"],
["and", "__ sve_and(p0, p1, p2, p3);", "and\tp0.b, p1/z, p2.b, p3.b"],
["ands", "__ sve_ands(p4, p5, p6, p0);", "ands\tp4.b, p5/z, p6.b, p0.b"],
["eor", "__ sve_eor(p0, p1, p2, p3);", "eor\tp0.b, p1/z, p2.b, p3.b"],
["eors", "__ sve_eors(p5, p6, p0, p1);", "eors\tp5.b, p6/z, p0.b, p1.b"],
["orr", "__ sve_orr(p0, p1, p2, p3);", "orr\tp0.b, p1/z, p2.b, p3.b"],
["orrs", "__ sve_orrs(p9, p1, p4, p5);", "orrs\tp9.b, p1/z, p4.b, p5.b"],
["bic", "__ sve_bic(p10, p7, p9, p11);", "bic\tp10.b, p7/z, p9.b, p11.b"],
["ptest", "__ sve_ptest(p7, p1);", "ptest\tp7, p1.b"],
["ptrue", "__ sve_ptrue(p1, __ B);", "ptrue\tp1.b"],
["ptrue", "__ sve_ptrue(p2, __ H);", "ptrue\tp2.h"],
["ptrue", "__ sve_ptrue(p3, __ S);", "ptrue\tp3.s"],
["ptrue", "__ sve_ptrue(p4, __ D);", "ptrue\tp4.d"],
["pfalse", "__ sve_pfalse(p7);", "pfalse\tp7.b"],
["uzp1", "__ sve_uzp1(p0, __ B, p0, p1);", "uzp1\tp0.b, p0.b, p1.b"],
["uzp1", "__ sve_uzp1(p0, __ H, p0, p1);", "uzp1\tp0.h, p0.h, p1.h"],
["uzp1", "__ sve_uzp1(p0, __ S, p0, p1);", "uzp1\tp0.s, p0.s, p1.s"],
["uzp1", "__ sve_uzp1(p0, __ D, p0, p1);", "uzp1\tp0.d, p0.d, p1.d"],
["uzp2", "__ sve_uzp2(p0, __ B, p0, p1);", "uzp2\tp0.b, p0.b, p1.b"],
["uzp2", "__ sve_uzp2(p0, __ H, p0, p1);", "uzp2\tp0.h, p0.h, p1.h"],
["uzp2", "__ sve_uzp2(p0, __ S, p0, p1);", "uzp2\tp0.s, p0.s, p1.s"],
["uzp2", "__ sve_uzp2(p0, __ D, p0, p1);", "uzp2\tp0.d, p0.d, p1.d"],
["punpklo", "__ sve_punpklo(p1, p0);", "punpklo\tp1.h, p0.b"],
["punpkhi", "__ sve_punpkhi(p1, p0);", "punpkhi\tp1.h, p0.b"],
])
print "\n// FloatImmediateOp"
@ -1686,13 +1726,16 @@ generate(SVEVectorOp, [["add", "ZZZ"],
["fsub", "ZZZ"],
["abs", "ZPZ", "m"],
["add", "ZPZ", "m", "dn"],
["and", "ZPZ", "m", "dn"],
["asr", "ZPZ", "m", "dn"],
["cnt", "ZPZ", "m"],
["eor", "ZPZ", "m", "dn"],
["lsl", "ZPZ", "m", "dn"],
["lsr", "ZPZ", "m", "dn"],
["mul", "ZPZ", "m", "dn"],
["neg", "ZPZ", "m"],
["not", "ZPZ", "m"],
["orr", "ZPZ", "m", "dn"],
["smax", "ZPZ", "m", "dn"],
["smin", "ZPZ", "m", "dn"],
["sub", "ZPZ", "m", "dn"],
@ -1708,6 +1751,7 @@ generate(SVEVectorOp, [["add", "ZZZ"],
["frintp", "ZPZ", "m"],
["fsqrt", "ZPZ", "m"],
["fsub", "ZPZ", "m", "dn"],
["fmad", "ZPZZ", "m"],
["fmla", "ZPZZ", "m"],
["fmls", "ZPZZ", "m"],
["fnmla", "ZPZZ", "m"],

Some files were not shown because too many files have changed in this diff Show more