8284960: Integration of JEP 426: Vector API (Fourth Incubator)

Co-authored-by: Jatin Bhateja <jbhateja@openjdk.org>
Co-authored-by: Paul Sandoz <psandoz@openjdk.org>
Co-authored-by: Sandhya Viswanathan <sviswanathan@openjdk.org>
Co-authored-by: Smita Kamath <svkamath@openjdk.org>
Co-authored-by: Joshua Zhu <jzhu@openjdk.org>
Co-authored-by: Xiaohong Gong <xgong@openjdk.org>
Co-authored-by: John R Rose <jrose@openjdk.org>
Co-authored-by: Eric Liu <eliu@openjdk.org>
Co-authored-by: Ningsheng Jian <njian@openjdk.org>
Reviewed-by: ngasson, vlivanov, mcimadamore, jlahoda, kvn
This commit is contained in:
Jatin Bhateja 2022-05-31 16:02:09 +00:00
parent 171a7cdd5d
commit 6f6486e977
227 changed files with 20949 additions and 21221 deletions

View file

@ -2468,6 +2468,9 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
break;
case Op_LoadVectorGather:
case Op_StoreVectorScatter:
case Op_CompressV:
case Op_CompressM:
case Op_ExpandV:
return false;
default:
break;
@ -8658,7 +8661,6 @@ instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
//
instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
predicate(UsePopCountInstruction);
match(Set dst (PopCountI src));
effect(TEMP tmp);
ins_cost(INSN_COST * 13);
@ -8680,7 +8682,6 @@ instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
%}
instruct popCountI_mem(iRegINoSp dst, memory4 mem, vRegF tmp) %{
predicate(UsePopCountInstruction);
match(Set dst (PopCountI (LoadI mem)));
effect(TEMP tmp);
ins_cost(INSN_COST * 13);
@ -8703,7 +8704,6 @@ instruct popCountI_mem(iRegINoSp dst, memory4 mem, vRegF tmp) %{
// Note: Long.bitCount(long) returns an int.
instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
predicate(UsePopCountInstruction);
match(Set dst (PopCountL src));
effect(TEMP tmp);
ins_cost(INSN_COST * 13);
@ -8723,7 +8723,6 @@ instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
%}
instruct popCountL_mem(iRegINoSp dst, memory8 mem, vRegD tmp) %{
predicate(UsePopCountInstruction);
match(Set dst (PopCountL (LoadL mem)));
effect(TEMP tmp);
ins_cost(INSN_COST * 13);

View file

@ -5683,14 +5683,58 @@ instruct vround2D_reg(vecX dst, vecX src, immI rmode) %{
ins_pipe(vdop_fp128);
%}
instruct vpopcount4I(vecX dst, vecX src) %{
predicate(UsePopCountInstruction && n->as_Vector()->length() == 4);
instruct vpopcountID(vecD dst, vecD src) %{
predicate(n->as_Vector()->length_in_bytes() < 16);
match(Set dst (PopCountVI src));
format %{
"cnt $dst, $src\t# vector (16B)\n\t"
"uaddlp $dst, $dst\t# vector (16B)\n\t"
"uaddlp $dst, $dst\t# vector (8H)"
ins_cost(3 * INSN_COST);
format %{ "vpopcountI $dst, $src\t# vector (8B/4H/2S)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
__ cnt(as_FloatRegister($dst$$reg), __ T8B,
as_FloatRegister($src$$reg));
if (bt == T_SHORT || bt == T_INT) {
__ uaddlp(as_FloatRegister($dst$$reg), __ T8B,
as_FloatRegister($dst$$reg));
if (bt == T_INT) {
__ uaddlp(as_FloatRegister($dst$$reg), __ T4H,
as_FloatRegister($dst$$reg));
}
}
%}
ins_pipe(pipe_class_default);
%}
instruct vpopcountIX(vecX dst, vecX src) %{
predicate(n->as_Vector()->length_in_bytes() == 16);
match(Set dst (PopCountVI src));
ins_cost(3 * INSN_COST);
format %{ "vpopcountI $dst, $src\t# vector (16B/8H/4S)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
__ cnt(as_FloatRegister($dst$$reg), __ T16B,
as_FloatRegister($src$$reg));
if (bt == T_SHORT || bt == T_INT) {
__ uaddlp(as_FloatRegister($dst$$reg), __ T16B,
as_FloatRegister($dst$$reg));
if (bt == T_INT) {
__ uaddlp(as_FloatRegister($dst$$reg), __ T8H,
as_FloatRegister($dst$$reg));
}
}
%}
ins_pipe(pipe_class_default);
%}
// If the PopCountVL is generated by auto-vectorization, the dst basic
// type is T_INT. And once we have unified the type definition for
// Vector API and auto-vectorization, this rule can be merged with
// "vpopcountLX" rule.
instruct vpopcountLD(vecD dst, vecX src) %{
predicate(n->as_Vector()->length_in_bytes() < 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_INT);
match(Set dst (PopCountVL src));
ins_cost(5 * INSN_COST);
format %{ "vpopcountL $dst, $src\t# vector (2S)" %}
ins_encode %{
__ cnt(as_FloatRegister($dst$$reg), __ T16B,
as_FloatRegister($src$$reg));
@ -5698,24 +5742,28 @@ instruct vpopcount4I(vecX dst, vecX src) %{
as_FloatRegister($dst$$reg));
__ uaddlp(as_FloatRegister($dst$$reg), __ T8H,
as_FloatRegister($dst$$reg));
__ uaddlp(as_FloatRegister($dst$$reg), __ T4S,
as_FloatRegister($dst$$reg));
__ xtn(as_FloatRegister($dst$$reg), __ T2S,
as_FloatRegister($dst$$reg), __ T2D);
%}
ins_pipe(pipe_class_default);
%}
instruct vpopcount2I(vecD dst, vecD src) %{
predicate(UsePopCountInstruction && n->as_Vector()->length() == 2);
match(Set dst (PopCountVI src));
format %{
"cnt $dst, $src\t# vector (8B)\n\t"
"uaddlp $dst, $dst\t# vector (8B)\n\t"
"uaddlp $dst, $dst\t# vector (4H)"
%}
instruct vpopcountLX(vecX dst, vecX src) %{
predicate(n->as_Vector()->length_in_bytes() == 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
match(Set dst (PopCountVL src));
ins_cost(4 * INSN_COST);
format %{ "vpopcountL $dst, $src\t# vector (2D)" %}
ins_encode %{
__ cnt(as_FloatRegister($dst$$reg), __ T8B,
__ cnt(as_FloatRegister($dst$$reg), __ T16B,
as_FloatRegister($src$$reg));
__ uaddlp(as_FloatRegister($dst$$reg), __ T8B,
__ uaddlp(as_FloatRegister($dst$$reg), __ T16B,
as_FloatRegister($dst$$reg));
__ uaddlp(as_FloatRegister($dst$$reg), __ T4H,
__ uaddlp(as_FloatRegister($dst$$reg), __ T8H,
as_FloatRegister($dst$$reg));
__ uaddlp(as_FloatRegister($dst$$reg), __ T4S,
as_FloatRegister($dst$$reg));
%}
ins_pipe(pipe_class_default);
@ -5921,3 +5969,131 @@ instruct vmask_tolong16B(iRegLNoSp dst, vecX src) %{
%}
ins_pipe(pipe_slow);
%}
//------------------------- CountLeadingZerosV -----------------------------
instruct countLeadingZerosVD(vecD dst, vecD src) %{
predicate(n->as_Vector()->length_in_bytes() == 8);
match(Set dst (CountLeadingZerosV src));
ins_cost(INSN_COST);
format %{ "countLeadingZerosV $dst, $src\t# vector (8B/4H/2S)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), false);
__ clz(as_FloatRegister($dst$$reg), size, as_FloatRegister($src$$reg));
%}
ins_pipe(pipe_slow);
%}
instruct countLeadingZerosVX(vecX dst, vecX src) %{
predicate(n->as_Vector()->length_in_bytes() == 16);
match(Set dst (CountLeadingZerosV src));
ins_cost(INSN_COST);
format %{ "countLeadingZerosV $dst, $src\t# vector (16B/8H/4S/2D)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), true);
if (bt != T_LONG) {
__ clz(as_FloatRegister($dst$$reg), size, as_FloatRegister($src$$reg));
} else {
__ umov(rscratch1, as_FloatRegister($src$$reg), __ D, 0);
__ clz(rscratch1, rscratch1);
__ mov(as_FloatRegister($dst$$reg), __ D, 0, rscratch1);
__ umov(rscratch1, as_FloatRegister($src$$reg), __ D, 1);
__ clz(rscratch1, rscratch1);
__ mov(as_FloatRegister($dst$$reg), __ D, 1, rscratch1);
}
%}
ins_pipe(pipe_slow);
%}
//------------------------- CountTrailingZerosV ----------------------------
instruct countTrailingZerosVD(vecD dst, vecD src) %{
predicate(n->as_Vector()->length_in_bytes() == 8);
match(Set dst (CountTrailingZerosV src));
ins_cost(3 * INSN_COST);
format %{ "countTrailingZerosV $dst, $src\t# vector (8B/4H/2S)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), false);
__ neon_reverse_bits(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, false);
__ clz(as_FloatRegister($dst$$reg), size, as_FloatRegister($dst$$reg));
%}
ins_pipe(pipe_slow);
%}
instruct countTrailingZerosVX(vecX dst, vecX src) %{
predicate(n->as_Vector()->length_in_bytes() == 16);
match(Set dst (CountTrailingZerosV src));
ins_cost(3 * INSN_COST);
format %{ "countTrailingZerosV $dst, $src\t# vector (16B/8H/4S/2D)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), true);
__ neon_reverse_bits(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, true);
if (bt != T_LONG) {
__ clz(as_FloatRegister($dst$$reg), size, as_FloatRegister($dst$$reg));
} else {
__ umov(rscratch1, as_FloatRegister($dst$$reg), __ D, 0);
__ clz(rscratch1, rscratch1);
__ mov(as_FloatRegister($dst$$reg), __ D, 0, rscratch1);
__ umov(rscratch1, as_FloatRegister($dst$$reg), __ D, 1);
__ clz(rscratch1, rscratch1);
__ mov(as_FloatRegister($dst$$reg), __ D, 1, rscratch1);
}
%}
ins_pipe(pipe_slow);
%}
//------------------------------ ReverseV -----------------------------------
instruct vreverseD(vecD dst, vecD src) %{
predicate(n->as_Vector()->length_in_bytes() == 8);
match(Set dst (ReverseV src));
ins_cost(2 * INSN_COST);
format %{ "ReverseV $dst, $src\t# vector (D)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
__ neon_reverse_bits(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, false);
%}
ins_pipe(pipe_slow);
%}
instruct vreverseX(vecX dst, vecX src) %{
predicate(n->as_Vector()->length_in_bytes() == 16);
match(Set dst (ReverseV src));
ins_cost(2 * INSN_COST);
format %{ "ReverseV $dst, $src\t# vector (X)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
__ neon_reverse_bits(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, true);
%}
ins_pipe(pipe_slow);
%}
//---------------------------- ReverseBytesV --------------------------------
instruct vreverseBytesD(vecD dst, vecD src) %{
predicate(n->as_Vector()->length_in_bytes() == 8);
match(Set dst (ReverseBytesV src));
ins_cost(INSN_COST);
format %{ "ReverseBytesV $dst, $src\t# vector (D)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
__ neon_reverse_bytes(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, false);
%}
ins_pipe(pipe_slow);
%}
instruct vreverseBytesX(vecX dst, vecX src) %{
predicate(n->as_Vector()->length_in_bytes() == 16);
match(Set dst (ReverseBytesV src));
ins_cost(INSN_COST);
format %{ "ReverseBytesV $dst, $src\t# vector (X)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
__ neon_reverse_bytes(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, true);
%}
ins_pipe(pipe_slow);
%}

View file

@ -2445,28 +2445,50 @@ instruct vround2D_reg(vecX dst, vecX src, immI rmode) %{
ins_pipe(vdop_fp128);
%}
dnl
define(`VPOPCOUNT', `
instruct vpopcount$1$2`'(vec$5 dst, vec$5 src) %{
predicate(UsePopCountInstruction && n->as_Vector()->length() == $1);
match(Set dst (PopCountVI src));
format %{
"cnt $dst, $src\t# vector ($3B)\n\t"
"uaddlp $dst, $dst\t# vector ($3B)\n\t"
"uaddlp $dst, $dst\t# vector ($4H)"
%}
ins_encode %{
__ cnt(as_FloatRegister($dst$$reg), __ T$3B,
as_FloatRegister($src$$reg));
__ uaddlp(as_FloatRegister($dst$$reg), __ T$3B,
define(`VPOPCOUNT', `dnl
ifelse($1$2, `LD', `
// If the PopCountVL is generated by auto-vectorization, the dst basic
// type is T_INT. And once we have unified the type definition for
// Vector API and auto-vectorization, this rule can be merged with
// "vpopcountLX" rule.', `')
instruct vpopcount$1$2`'(vec$2 dst, vec$3 src) %{
predicate(n->as_Vector()->length_in_bytes() $4 16`'ifelse($1$2, `LD', ` &&
n->bottom_type()->is_vect()->element_basic_type() == T_INT', $1$2, `LX', ` &&
n->bottom_type()->is_vect()->element_basic_type() == T_LONG', `'));
match(Set dst (PopCountV$1 src));
ins_cost($5 * INSN_COST);
format %{ "vpopcount$1 $dst, $src\t# vector ($6)" %}
ins_encode %{dnl
ifelse($1, `I', `
BasicType bt = Matcher::vector_element_basic_type(this);', `')
__ cnt(as_FloatRegister($dst$$reg), __ T`'ifelse($3, D, 8, 16)B,
as_FloatRegister($src$$reg));dnl
ifelse($1, `L', `
__ uaddlp(as_FloatRegister($dst$$reg), __ T16B,
as_FloatRegister($dst$$reg));
__ uaddlp(as_FloatRegister($dst$$reg), __ T$4H,
__ uaddlp(as_FloatRegister($dst$$reg), __ T8H,
as_FloatRegister($dst$$reg));
__ uaddlp(as_FloatRegister($dst$$reg), __ T4S,
as_FloatRegister($dst$$reg));', `
if (bt == T_SHORT || bt == T_INT) {
__ uaddlp(as_FloatRegister($dst$$reg), __ T`'ifelse($2, D, 8, 16)B,
as_FloatRegister($dst$$reg));
if (bt == T_INT) {
__ uaddlp(as_FloatRegister($dst$$reg), __ T`'ifelse($2, D, 4, 8)H,
as_FloatRegister($dst$$reg));
}
}')dnl
ifelse($1$2, `LD', `
__ xtn(as_FloatRegister($dst$$reg), __ T2S,
as_FloatRegister($dst$$reg), __ T2D);', `')
%}
ins_pipe(pipe_class_default);
%}')dnl
dnl $1 $2 $3 $4 $5
VPOPCOUNT(4, I, 16, 8, X)
VPOPCOUNT(2, I, 8, 4, D)
dnl $1 $2 $3 $4 $5 $6
VPOPCOUNT(I, D, D, <, 3, 8B/4H/2S)
VPOPCOUNT(I, X, X, ==, 3, 16B/8H/4S)
VPOPCOUNT(L, D, X, <, 5, 2S)
VPOPCOUNT(L, X, X, ==, 4, 2D)
dnl
dnl VMASK_TRUECOUNT($1, $2 )
dnl VMASK_TRUECOUNT(suffix, reg)
@ -2647,3 +2669,81 @@ instruct vmask_tolong16B(iRegLNoSp dst, vecX src) %{
%}
ins_pipe(pipe_slow);
%}
dnl
dnl CLTZ_D($1 )
dnl CLTZ_D(op_name)
define(`CLTZ_D', `
instruct count$1D(vecD dst, vecD src) %{
predicate(n->as_Vector()->length_in_bytes() == 8);
match(Set dst (Count$1 src));
ins_cost(ifelse($1, `TrailingZerosV', `3 * ', `')INSN_COST);
format %{ "count$1 $dst, $src\t# vector (8B/4H/2S)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), false);dnl
ifelse($1, `TrailingZerosV', `
__ neon_reverse_bits(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, false);', `')
__ clz(as_FloatRegister($dst$$reg), size, as_FloatRegister($ifelse($1, `TrailingZerosV', dst, src)$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
dnl CLTZ_X($1 )
dnl CLTZ_X(op_name)
define(`CLTZ_X', `
instruct count$1X(vecX dst, vecX src) %{
predicate(n->as_Vector()->length_in_bytes() == 16);
match(Set dst (Count$1 src));
ins_cost(ifelse($1, `TrailingZerosV', `3 * ', `')INSN_COST);
format %{ "count$1 $dst, $src\t# vector (16B/8H/4S/2D)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), true);dnl
ifelse($1, `TrailingZerosV', `
__ neon_reverse_bits(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, true);', `')
if (bt != T_LONG) {
__ clz(as_FloatRegister($dst$$reg), size, as_FloatRegister($ifelse($1, `TrailingZerosV', dst, src)$$reg));
} else {
__ umov(rscratch1, as_FloatRegister($ifelse($1, `TrailingZerosV', dst, src)$$reg), __ D, 0);
__ clz(rscratch1, rscratch1);
__ mov(as_FloatRegister($dst$$reg), __ D, 0, rscratch1);
__ umov(rscratch1, as_FloatRegister($ifelse($1, `TrailingZerosV', dst, src)$$reg), __ D, 1);
__ clz(rscratch1, rscratch1);
__ mov(as_FloatRegister($dst$$reg), __ D, 1, rscratch1);
}
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
//------------------------- CountLeadingZerosV -----------------------------
CLTZ_D(LeadingZerosV)
CLTZ_X(LeadingZerosV)
//------------------------- CountTrailingZerosV ----------------------------
CLTZ_D(TrailingZerosV)
CLTZ_X(TrailingZerosV)
dnl
dnl REVERSE($1, $2, $3, $4 )
dnl REVERSE(insn_name, op_name, type, insn)
define(`REVERSE', `
instruct $1(vec$3 dst, vec$3 src) %{
predicate(n->as_Vector()->length_in_bytes() == ifelse($3, D, 8, 16));
match(Set dst ($2 src));
ins_cost(ifelse($2, `ReverseV', `2 * ', `')INSN_COST);
format %{ "$2 $dst, $src\t# vector ($3)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
__ $4(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, ifelse($3, D, false, true));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
//------------------------------ ReverseV -----------------------------------
REVERSE(vreverseD, ReverseV, D, neon_reverse_bits)
REVERSE(vreverseX, ReverseV, X, neon_reverse_bits)
//---------------------------- ReverseBytesV --------------------------------
REVERSE(vreverseBytesD, ReverseBytesV, D, neon_reverse_bytes)
REVERSE(vreverseBytesX, ReverseBytesV, X, neon_reverse_bytes)

View file

@ -149,6 +149,8 @@ source %{
case Op_LoadVector:
case Op_StoreVector:
return Matcher::vector_size_supported(bt, vlen);
case Op_ExpandV:
if (UseSVE < 2 || is_subword_type(bt)) return false;
case Op_VectorMaskToLong:
if (vlen > 64) return false;
default:
@ -2199,14 +2201,83 @@ instruct vnegD_masked(vReg dst_src, pRegGov pg) %{
ins_pipe(pipe_slow);
%}
// popcount vector
// vector popcount
instruct vpopcountI(vReg dst, vReg src) %{
predicate(UseSVE > 0);
predicate(UseSVE > 0 &&
!n->as_Vector()->is_predicated_vector());
match(Set dst (PopCountVI src));
format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t" %}
ins_cost(SVE_COST);
format %{ "sve_cnt $dst, $src\t# vector (sve) (B/H/S)" %}
ins_encode %{
__ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg));
BasicType bt = Matcher::vector_element_basic_type(this);
__ sve_cnt(as_FloatRegister($dst$$reg), __ elemType_to_regVariant(bt),
ptrue, as_FloatRegister($src$$reg));
%}
ins_pipe(pipe_slow);
%}
instruct vpopcountL(vReg dst, vReg src) %{
predicate(UseSVE > 0 &&
!n->as_Vector()->is_predicated_vector() &&
n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
match(Set dst (PopCountVL src));
ins_cost(SVE_COST);
format %{ "sve_cnt $dst, $src\t# vector (sve) (D)" %}
ins_encode %{
__ sve_cnt(as_FloatRegister($dst$$reg), __ D,
ptrue, as_FloatRegister($src$$reg));
%}
ins_pipe(pipe_slow);
%}
// If the PopCountVL is generated by auto-vectorization, the dst basic
// type is T_INT. And once we have unified the type definition for
// Vector API and auto-vectorization, this rule can be merged with
// "vpopcountL" rule.
instruct vpopcountLI(vReg dst, vReg src, vReg vtmp) %{
predicate(UseSVE > 0 &&
!n->as_Vector()->is_predicated_vector() &&
n->bottom_type()->is_vect()->element_basic_type() == T_INT);
match(Set dst (PopCountVL src));
effect(TEMP_DEF dst, TEMP vtmp);
ins_cost(3 * SVE_COST);
format %{ "sve_cnt $dst, $src\n\t"
"sve_dup $vtmp, #0\n\t"
"sve_uzp1 $dst, $dst, $vtmp\t# vector (sve) (S)" %}
ins_encode %{
__ sve_cnt(as_FloatRegister($dst$$reg), __ D,
ptrue, as_FloatRegister($src$$reg));
__ sve_vector_narrow(as_FloatRegister($dst$$reg), __ S,
as_FloatRegister($dst$$reg), __ D, as_FloatRegister($vtmp$$reg));
%}
ins_pipe(pipe_slow);
%}
// vector popcount - predicated
instruct vpopcountI_masked(vReg dst_src, pRegGov pg) %{
predicate(UseSVE > 0);
match(Set dst_src (PopCountVI dst_src pg));
ins_cost(SVE_COST);
format %{ "sve_cnt $dst_src, $pg, $dst_src\t# vector (sve) (B/H/S)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
__ sve_cnt(as_FloatRegister($dst_src$$reg), __ elemType_to_regVariant(bt),
as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));
%}
ins_pipe(pipe_slow);
%}
instruct vpopcountL_masked(vReg dst_src, pRegGov pg) %{
predicate(UseSVE > 0 &&
n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
match(Set dst_src (PopCountVL dst_src pg));
ins_cost(SVE_COST);
format %{ "sve_cnt $dst_src, $pg, $dst_src\t# vector (sve) (D)" %}
ins_encode %{
__ sve_cnt(as_FloatRegister($dst_src$$reg), __ D,
as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));
%}
ins_pipe(pipe_slow);
%}
@ -5767,6 +5838,104 @@ instruct vloadmask_extend(pRegGov dst, vReg src, vReg tmp, rFlagsReg cr) %{
ins_pipe(pipe_slow);
%}
// ---------------------------- Compress/Expand Operations ---------------------------
instruct mcompress(pReg dst, pReg pg, rFlagsReg cr) %{
predicate(UseSVE > 0);
match(Set dst (CompressM pg));
effect(KILL cr);
ins_cost(2 * SVE_COST);
format %{ "sve_cntp rscratch1, $pg\n\t"
"sve_whilelo $dst, zr, rscratch1\t# mask compress (B/H/S/D)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
__ sve_cntp(rscratch1, size, ptrue, as_PRegister($pg$$reg));
__ sve_whilelo(as_PRegister($dst$$reg), size, zr, rscratch1);
%}
ins_pipe(pipe_slow);
%}
instruct vcompress(vReg dst, vReg src, pRegGov pg) %{
predicate(UseSVE > 0 &&
(n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT ||
n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
match(Set dst (CompressV src pg));
ins_cost(SVE_COST);
format %{ "sve_compact $dst, $src, $pg\t# vector compress (S/D)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
__ sve_compact(as_FloatRegister($dst$$reg), size, as_FloatRegister($src$$reg), as_PRegister($pg$$reg));
%}
ins_pipe(pipe_slow);
%}
instruct vcompressB(vReg dst, vReg src, pReg pg, vReg vtmp1, vReg vtmp2, vReg vtmp3, vReg vtmp4,
pReg ptmp, pRegGov pgtmp) %{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP ptmp, TEMP pgtmp);
match(Set dst (CompressV src pg));
ins_cost(13 * SVE_COST);
format %{ "sve_compact $dst, $src, $pg\t# vector compress (B)" %}
ins_encode %{
__ sve_compress_byte(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_PRegister($pg$$reg),
as_FloatRegister($vtmp1$$reg),as_FloatRegister($vtmp2$$reg),
as_FloatRegister($vtmp3$$reg),as_FloatRegister($vtmp4$$reg),
as_PRegister($ptmp$$reg), as_PRegister($pgtmp$$reg));
%}
ins_pipe(pipe_slow);
%}
instruct vcompressS(vReg dst, vReg src, pReg pg, vReg vtmp1, vReg vtmp2, pRegGov pgtmp) %{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP pgtmp);
match(Set dst (CompressV src pg));
ins_cost(38 * SVE_COST);
format %{ "sve_compact $dst, $src, $pg\t# vector compress (H)" %}
ins_encode %{
__ sve_compress_short(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_PRegister($pg$$reg),
as_FloatRegister($vtmp1$$reg),as_FloatRegister($vtmp2$$reg), as_PRegister($pgtmp$$reg));
%}
ins_pipe(pipe_slow);
%}
instruct vexpand(vReg dst, vReg src, pRegGov pg) %{
match(Set dst (ExpandV src pg));
effect(TEMP_DEF dst);
ins_cost(4 * SVE_COST);
format %{ "sve_dup $dst, S/D, 0\n\t"
"sve_histcnt $dst, S/D, $pg, $dst, $dst\n\t"
"sve_sub $dst, S/D, 1\n\t"
"sve_tbl $dst, S/D, $src, $dst\t# vector expand (S/D)" %}
ins_encode %{
// Example input: src = 1 2 3 4 5 6 7 8
// pg = 1 0 0 1 1 0 1 1
// Expected result: dst = 4 0 0 5 6 0 7 8
// The basic idea is to use TBL which can shuffle the elements in the given
// vector flexibly. HISTCNT + SUB is used to generate the second source input
// for TBL whose value is used to select the indexed element from src vector.
BasicType bt = Matcher::vector_element_basic_type(this);
assert(UseSVE == 2 && !is_subword_type(bt), "unsupported");
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
// dst = 0 0 0 0 0 0 0 0
__ sve_dup(as_FloatRegister($dst$$reg), size, 0);
// dst = 5 0 0 4 3 0 2 1
__ sve_histcnt(as_FloatRegister($dst$$reg), size, as_PRegister($pg$$reg),
as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg));
// dst = 4 -1 -1 3 2 -1 1 0
__ sve_sub(as_FloatRegister($dst$$reg), size, 1);
// dst = 4 0 0 5 6 0 7 8
__ sve_tbl(as_FloatRegister($dst$$reg), size, as_FloatRegister($src$$reg),
as_FloatRegister($dst$$reg));
%}
ins_pipe(pipe_slow);
%}
instruct vmask_gen(pRegGov pg, iRegL len, rFlagsReg cr) %{
predicate(UseSVE > 0);
match(Set pg (VectorMaskGen len));
@ -5780,3 +5949,147 @@ instruct vmask_gen(pRegGov pg, iRegL len, rFlagsReg cr) %{
%}
ins_pipe(pipe_slow);
%}
// ------------------------------ CountLeadingZerosV ------------------------------
instruct vcountLeadingZeros(vReg dst, vReg src) %{
predicate(UseSVE > 0 &&
!n->as_Vector()->is_predicated_vector());
match(Set dst (CountLeadingZerosV src));
ins_cost(SVE_COST);
format %{ "sve_clz $dst, $src\t# vector (sve)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
__ sve_clz(as_FloatRegister($dst$$reg), size, ptrue, as_FloatRegister($src$$reg));
%}
ins_pipe(pipe_slow);
%}
// The dst and src should use the same register to make sure the
// inactive lanes in dst save the same elements as src.
instruct vcountLeadingZeros_masked(vReg dst_src, pRegGov pg) %{
predicate(UseSVE > 0);
match(Set dst_src (CountLeadingZerosV dst_src pg));
ins_cost(SVE_COST);
format %{ "sve_clz $dst_src, $pg, $dst_src\t# vector (sve)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
__ sve_clz(as_FloatRegister($dst_src$$reg), size,
as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));
%}
ins_pipe(pipe_slow);
%}
// ------------------------------ CountTrailingZerosV -----------------------------
instruct vcountTrailingZeros(vReg dst, vReg src) %{
predicate(UseSVE > 0 &&
!n->as_Vector()->is_predicated_vector());
match(Set dst (CountTrailingZerosV src));
ins_cost(2 * SVE_COST);
format %{ "sve_rbit $dst, $src\n\t"
"sve_clz $dst, $dst\t# vector (sve)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
__ sve_rbit(as_FloatRegister($dst$$reg), size, ptrue, as_FloatRegister($src$$reg));
__ sve_clz(as_FloatRegister($dst$$reg), size, ptrue, as_FloatRegister($dst$$reg));
%}
ins_pipe(pipe_slow);
%}
// The dst and src should use the same register to make sure the
// inactive lanes in dst save the same elements as src.
instruct vcountTrailingZeros_masked(vReg dst_src, pRegGov pg) %{
predicate(UseSVE > 0);
match(Set dst_src (CountTrailingZerosV dst_src pg));
ins_cost(2 * SVE_COST);
format %{ "sve_rbit $dst_src, $pg, $dst_src\n\t"
"sve_clz $dst_src, $pg, $dst_src\t# vector (sve)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
__ sve_rbit(as_FloatRegister($dst_src$$reg), size,
as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));
__ sve_clz(as_FloatRegister($dst_src$$reg), size,
as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));
%}
ins_pipe(pipe_slow);
%}
// ---------------------------------- ReverseV ------------------------------------
instruct vreverse(vReg dst, vReg src) %{
predicate(UseSVE > 0 &&
!n->as_Vector()->is_predicated_vector());
match(Set dst (ReverseV src));
ins_cost(SVE_COST);
format %{ "sve_rbit $dst, $src\t# vector (sve)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
__ sve_rbit(as_FloatRegister($dst$$reg), size, ptrue, as_FloatRegister($src$$reg));
%}
ins_pipe(pipe_slow);
%}
// The dst and src should use the same register to make sure the
// inactive lanes in dst save the same elements as src.
instruct vreverse_masked(vReg dst_src, pRegGov pg) %{
predicate(UseSVE > 0);
match(Set dst_src (ReverseV dst_src pg));
ins_cost(SVE_COST);
format %{ "sve_rbit $dst_src, $pg, $dst_src\t# vector (sve)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
__ sve_rbit(as_FloatRegister($dst_src$$reg), size,
as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));
%}
ins_pipe(pipe_slow);
%}
// -------------------------------- ReverseBytesV ---------------------------------
instruct vreverseBytes(vReg dst, vReg src) %{
predicate(UseSVE > 0 &&
!n->as_Vector()->is_predicated_vector());
match(Set dst (ReverseBytesV src));
ins_cost(SVE_COST);
format %{ "sve_revb $dst, $src\t# vector (sve)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
if (bt == T_BYTE) {
if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
__ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
}
} else {
__ sve_revb(as_FloatRegister($dst$$reg), size, ptrue, as_FloatRegister($src$$reg));
}
%}
ins_pipe(pipe_slow);
%}
// The dst and src should use the same register to make sure the
// inactive lanes in dst save the same elements as src.
instruct vreverseBytes_masked(vReg dst_src, pRegGov pg) %{
predicate(UseSVE > 0);
match(Set dst_src (ReverseBytesV dst_src pg));
ins_cost(SVE_COST);
format %{ "sve_revb $dst_src, $pg, $dst_src\t# vector (sve)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
if (bt == T_BYTE) {
// do nothing
} else {
__ sve_revb(as_FloatRegister($dst_src$$reg), size,
as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));
}
%}
ins_pipe(pipe_slow);
%}

View file

@ -144,6 +144,8 @@ source %{
case Op_LoadVector:
case Op_StoreVector:
return Matcher::vector_size_supported(bt, vlen);
case Op_ExpandV:
if (UseSVE < 2 || is_subword_type(bt)) return false;
case Op_VectorMaskToLong:
if (vlen > 64) return false;
default:
@ -1172,18 +1174,75 @@ UNARY_OP_PREDICATE(vnegL, NegVL, D, sve_neg)
UNARY_OP_PREDICATE(vnegF, NegVF, S, sve_fneg)
UNARY_OP_PREDICATE(vnegD, NegVD, D, sve_fneg)
// popcount vector
dnl
dnl VPOPCOUNT($1, $2 )
dnl VPOPCOUNT(name_suffix, size)
define(`VPOPCOUNT', `
instruct vpopcount$1(vReg dst, vReg src) %{
predicate(UseSVE > 0 &&
!n->as_Vector()->is_predicated_vector()`'ifelse($1, `L', ` &&
n->bottom_type()->is_vect()->element_basic_type() == T_LONG', `'));
match(Set dst (PopCountV$1 src));
ins_cost(SVE_COST);
format %{ "sve_cnt $dst, $src\t# vector (sve) ($2)" %}
ins_encode %{dnl
ifelse($1, `I', `
BasicType bt = Matcher::vector_element_basic_type(this);', `')
__ sve_cnt(as_FloatRegister($dst$$reg), ifelse($1, `I', `__ elemType_to_regVariant(bt)', `__ D'),
ptrue, as_FloatRegister($src$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
// vector popcount
VPOPCOUNT(I, B/H/S)
VPOPCOUNT(L, D)
instruct vpopcountI(vReg dst, vReg src) %{
predicate(UseSVE > 0);
match(Set dst (PopCountVI src));
format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t" %}
// If the PopCountVL is generated by auto-vectorization, the dst basic
// type is T_INT. And once we have unified the type definition for
// Vector API and auto-vectorization, this rule can be merged with
// "vpopcountL" rule.
instruct vpopcountLI(vReg dst, vReg src, vReg vtmp) %{
predicate(UseSVE > 0 &&
!n->as_Vector()->is_predicated_vector() &&
n->bottom_type()->is_vect()->element_basic_type() == T_INT);
match(Set dst (PopCountVL src));
effect(TEMP_DEF dst, TEMP vtmp);
ins_cost(3 * SVE_COST);
format %{ "sve_cnt $dst, $src\n\t"
"sve_dup $vtmp, #0\n\t"
"sve_uzp1 $dst, $dst, $vtmp\t# vector (sve) (S)" %}
ins_encode %{
__ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg));
__ sve_cnt(as_FloatRegister($dst$$reg), __ D,
ptrue, as_FloatRegister($src$$reg));
__ sve_vector_narrow(as_FloatRegister($dst$$reg), __ S,
as_FloatRegister($dst$$reg), __ D, as_FloatRegister($vtmp$$reg));
%}
ins_pipe(pipe_slow);
%}
dnl
dnl VPOPCOUNT_PREDICATE($1, $2 )
dnl VPOPCOUNT_PREDICATE(name_suffix, size)
define(`VPOPCOUNT_PREDICATE', `
instruct vpopcount$1_masked(vReg dst_src, pRegGov pg) %{
predicate(UseSVE > 0`'ifelse($1, `L', ` &&
n->bottom_type()->is_vect()->element_basic_type() == T_LONG', `'));
match(Set dst_src (PopCountV$1 dst_src pg));
ins_cost(SVE_COST);
format %{ "sve_cnt $dst_src, $pg, $dst_src\t# vector (sve) ($2)" %}
ins_encode %{dnl
ifelse($1, `I', `
BasicType bt = Matcher::vector_element_basic_type(this);', `')
__ sve_cnt(as_FloatRegister($dst_src$$reg), ifelse($1, `I', `__ elemType_to_regVariant(bt)', `__ D'),
as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
// vector popcount - predicated
VPOPCOUNT_PREDICATE(I, B/H/S)
VPOPCOUNT_PREDICATE(L, D)
// vector blend
instruct vblend(vReg dst, vReg src1, vReg src2, pRegGov pg) %{
@ -3234,6 +3293,104 @@ instruct vloadmask_extend(pRegGov dst, vReg src, vReg tmp, rFlagsReg cr) %{
ins_pipe(pipe_slow);
%}
// ---------------------------- Compress/Expand Operations ---------------------------
instruct mcompress(pReg dst, pReg pg, rFlagsReg cr) %{
predicate(UseSVE > 0);
match(Set dst (CompressM pg));
effect(KILL cr);
ins_cost(2 * SVE_COST);
format %{ "sve_cntp rscratch1, $pg\n\t"
"sve_whilelo $dst, zr, rscratch1\t# mask compress (B/H/S/D)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
__ sve_cntp(rscratch1, size, ptrue, as_PRegister($pg$$reg));
__ sve_whilelo(as_PRegister($dst$$reg), size, zr, rscratch1);
%}
ins_pipe(pipe_slow);
%}
instruct vcompress(vReg dst, vReg src, pRegGov pg) %{
predicate(UseSVE > 0 &&
(n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT ||
n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
match(Set dst (CompressV src pg));
ins_cost(SVE_COST);
format %{ "sve_compact $dst, $src, $pg\t# vector compress (S/D)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
__ sve_compact(as_FloatRegister($dst$$reg), size, as_FloatRegister($src$$reg), as_PRegister($pg$$reg));
%}
ins_pipe(pipe_slow);
%}
instruct vcompressB(vReg dst, vReg src, pReg pg, vReg vtmp1, vReg vtmp2, vReg vtmp3, vReg vtmp4,
pReg ptmp, pRegGov pgtmp) %{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP ptmp, TEMP pgtmp);
match(Set dst (CompressV src pg));
ins_cost(13 * SVE_COST);
format %{ "sve_compact $dst, $src, $pg\t# vector compress (B)" %}
ins_encode %{
__ sve_compress_byte(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_PRegister($pg$$reg),
as_FloatRegister($vtmp1$$reg),as_FloatRegister($vtmp2$$reg),
as_FloatRegister($vtmp3$$reg),as_FloatRegister($vtmp4$$reg),
as_PRegister($ptmp$$reg), as_PRegister($pgtmp$$reg));
%}
ins_pipe(pipe_slow);
%}
instruct vcompressS(vReg dst, vReg src, pReg pg, vReg vtmp1, vReg vtmp2, pRegGov pgtmp) %{
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP pgtmp);
match(Set dst (CompressV src pg));
ins_cost(38 * SVE_COST);
format %{ "sve_compact $dst, $src, $pg\t# vector compress (H)" %}
ins_encode %{
__ sve_compress_short(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_PRegister($pg$$reg),
as_FloatRegister($vtmp1$$reg),as_FloatRegister($vtmp2$$reg), as_PRegister($pgtmp$$reg));
%}
ins_pipe(pipe_slow);
%}
instruct vexpand(vReg dst, vReg src, pRegGov pg) %{
match(Set dst (ExpandV src pg));
effect(TEMP_DEF dst);
ins_cost(4 * SVE_COST);
format %{ "sve_dup $dst, S/D, 0\n\t"
"sve_histcnt $dst, S/D, $pg, $dst, $dst\n\t"
"sve_sub $dst, S/D, 1\n\t"
"sve_tbl $dst, S/D, $src, $dst\t# vector expand (S/D)" %}
ins_encode %{
// Example input: src = 1 2 3 4 5 6 7 8
// pg = 1 0 0 1 1 0 1 1
// Expected result: dst = 4 0 0 5 6 0 7 8
// The basic idea is to use TBL which can shuffle the elements in the given
// vector flexibly. HISTCNT + SUB is used to generate the second source input
// for TBL whose value is used to select the indexed element from src vector.
BasicType bt = Matcher::vector_element_basic_type(this);
assert(UseSVE == 2 && !is_subword_type(bt), "unsupported");
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
// dst = 0 0 0 0 0 0 0 0
__ sve_dup(as_FloatRegister($dst$$reg), size, 0);
// dst = 5 0 0 4 3 0 2 1
__ sve_histcnt(as_FloatRegister($dst$$reg), size, as_PRegister($pg$$reg),
as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg));
// dst = 4 -1 -1 3 2 -1 1 0
__ sve_sub(as_FloatRegister($dst$$reg), size, 1);
// dst = 4 0 0 5 6 0 7 8
__ sve_tbl(as_FloatRegister($dst$$reg), size, as_FloatRegister($src$$reg),
as_FloatRegister($dst$$reg));
%}
ins_pipe(pipe_slow);
%}
instruct vmask_gen(pRegGov pg, iRegL len, rFlagsReg cr) %{
predicate(UseSVE > 0);
match(Set pg (VectorMaskGen len));
@ -3247,3 +3404,79 @@ instruct vmask_gen(pRegGov pg, iRegL len, rFlagsReg cr) %{
%}
ins_pipe(pipe_slow);
%}
dnl
dnl BITWISE_UNARY($1, $2, $3 )
dnl BITWISE_UNARY(insn_name, op_name, insn)
define(`BITWISE_UNARY', `
instruct $1(vReg dst, vReg src) %{
predicate(UseSVE > 0 &&
!n->as_Vector()->is_predicated_vector());
match(Set dst ($2 src));
ins_cost(ifelse($2, `CountTrailingZerosV', `2 * ', `')SVE_COST);
format %{ ifelse($2, `CountTrailingZerosV', `"sve_rbit $dst, $src\n\t"
"$3 $dst, $dst', `"$3 $dst, $src')\t# vector (sve)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);dnl
ifelse($2, `CountTrailingZerosV', `
__ sve_rbit(as_FloatRegister($dst$$reg), size, ptrue, as_FloatRegister($src$$reg));', `')dnl
ifelse($2, `ReverseBytesV', `
if (bt == T_BYTE) {
if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
__ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
}
} else {
__ $3(as_FloatRegister($dst$$reg), size, ptrue, as_FloatRegister($src$$reg));
}', `
__ $3(as_FloatRegister($dst$$reg), size, ptrue, as_FloatRegister($ifelse($2, `CountTrailingZerosV', dst, src)$$reg));')
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
dnl BITWISE_UNARY_PREDICATE($1, $2, $3 )
dnl BITWISE_UNARY_PREDICATE(insn_name, op_name, insn)
define(`BITWISE_UNARY_PREDICATE', `
// The dst and src should use the same register to make sure the
// inactive lanes in dst save the same elements as src.
instruct $1_masked(vReg dst_src, pRegGov pg) %{
predicate(UseSVE > 0);
match(Set dst_src ($2 dst_src pg));
ins_cost(ifelse($2, `CountTrailingZerosV', `2 * ', `')SVE_COST);
format %{ ifelse($2, `CountTrailingZerosV', `"sve_rbit $dst_src, $pg, $dst_src\n\t"
"$3 $dst_src, $pg, $dst_src', `"$3 $dst_src, $pg, $dst_src')\t# vector (sve)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);dnl
ifelse($2, `CountTrailingZerosV', `
__ sve_rbit(as_FloatRegister($dst_src$$reg), size,
as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));', `')dnl
ifelse($2, `ReverseBytesV', `
if (bt == T_BYTE) {
// do nothing
} else {
__ $3(as_FloatRegister($dst_src$$reg), size,
as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));
}', `
__ $3(as_FloatRegister($dst_src$$reg), size,
as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));')
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
// ------------------------------ CountLeadingZerosV ------------------------------
BITWISE_UNARY(vcountLeadingZeros, CountLeadingZerosV, sve_clz)
BITWISE_UNARY_PREDICATE(vcountLeadingZeros, CountLeadingZerosV, sve_clz)
// ------------------------------ CountTrailingZerosV -----------------------------
BITWISE_UNARY(vcountTrailingZeros, CountTrailingZerosV, sve_clz)
BITWISE_UNARY_PREDICATE(vcountTrailingZeros, CountTrailingZerosV, sve_clz)
// ---------------------------------- ReverseV ------------------------------------
BITWISE_UNARY(vreverse, ReverseV, sve_rbit)
BITWISE_UNARY_PREDICATE(vreverse, ReverseV, sve_rbit)
// -------------------------------- ReverseBytesV ---------------------------------
BITWISE_UNARY(vreverseBytes, ReverseBytesV, sve_revb)
BITWISE_UNARY_PREDICATE(vreverseBytes, ReverseBytesV, sve_revb)

View file

@ -3134,6 +3134,7 @@ public:
INSN(sve_andv, 0b00000100, 0b011010001); // bitwise and reduction to scalar
INSN(sve_asr, 0b00000100, 0b010000100); // vector arithmetic shift right
INSN(sve_bic, 0b00000100, 0b011011000); // vector bitwise clear
INSN(sve_clz, 0b00000100, 0b011001101); // vector count leading zero bits
INSN(sve_cnt, 0b00000100, 0b011010101); // count non-zero bits
INSN(sve_cpy, 0b00000101, 0b100000100); // copy scalar to each active vector element
INSN(sve_eor, 0b00000100, 0b011001000); // vector eor
@ -3793,6 +3794,19 @@ void sve_fcm(Condition cond, PRegister Pd, SIMD_RegVariant T,
INSN(sve_lastb, 0b1);
#undef INSN
// SVE reverse within elements
#define INSN(NAME, opc, cond) \
void NAME(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn) { \
starti; \
assert(cond, "invalid size"); \
f(0b00000101, 31, 24), f(T, 23, 22), f(0b1001, 21, 18), f(opc, 17, 16); \
f(0b100, 15, 13), pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0); \
}
INSN(sve_revb, 0b00, T == H || T == S || T == D);
INSN(sve_rbit, 0b11, T != Q);
#undef INSN
// SVE Create index starting from general-purpose register and incremented by immediate
void sve_index(FloatRegister Zd, SIMD_RegVariant T, Register Rn, int imm) {
starti;
@ -3819,6 +3833,23 @@ void sve_fcm(Condition cond, PRegister Pd, SIMD_RegVariant T,
f(0b001100, 15, 10), rf(Zn, 5), rf(Zd, 0);
}
// Shuffle active elements of vector to the right and fill with zero
void sve_compact(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, PRegister Pg) {
starti;
assert(T == S || T == D, "invalid size");
f(0b00000101, 31, 24), f(T, 23, 22), f(0b100001100, 21, 13);
pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0);
}
// SVE2 Count matching elements in vector
void sve_histcnt(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg,
FloatRegister Zn, FloatRegister Zm) {
starti;
assert(T == S || T == D, "invalid size");
f(0b01000101, 31, 24), f(T, 23, 22), f(0b1, 21), rf(Zm, 16);
f(0b110, 15, 13), pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0);
}
// SVE2 bitwise permute
#define INSN(NAME, opc) \
void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \

View file

@ -1113,10 +1113,12 @@ void C2_MacroAssembler::sve_vector_narrow(FloatRegister dst, SIMD_RegVariant dst
sve_uzp1(dst, S, src, tmp);
break;
case H:
assert_different_registers(dst, tmp);
sve_uzp1(dst, S, src, tmp);
sve_uzp1(dst, H, dst, tmp);
break;
case B:
assert_different_registers(dst, tmp);
sve_uzp1(dst, S, src, tmp);
sve_uzp1(dst, H, dst, tmp);
sve_uzp1(dst, B, dst, tmp);
@ -1128,6 +1130,7 @@ void C2_MacroAssembler::sve_vector_narrow(FloatRegister dst, SIMD_RegVariant dst
if (dst_size == H) {
sve_uzp1(dst, H, src, tmp);
} else { // B
assert_different_registers(dst, tmp);
sve_uzp1(dst, H, src, tmp);
sve_uzp1(dst, B, dst, tmp);
}
@ -1311,6 +1314,154 @@ void C2_MacroAssembler::sve_ptrue_lanecnt(PRegister dst, SIMD_RegVariant size, i
}
}
// Pack active elements of src, under the control of mask, into the lowest-numbered elements of dst.
// Any remaining elements of dst will be filled with zero.
// Clobbers: rscratch1
// Preserves: src, mask
void C2_MacroAssembler::sve_compress_short(FloatRegister dst, FloatRegister src, PRegister mask,
FloatRegister vtmp1, FloatRegister vtmp2,
PRegister pgtmp) {
assert(pgtmp->is_governing(), "This register has to be a governing predicate register");
assert_different_registers(dst, src, vtmp1, vtmp2);
assert_different_registers(mask, pgtmp);
// Example input: src = 8888 7777 6666 5555 4444 3333 2222 1111
// mask = 0001 0000 0000 0001 0001 0000 0001 0001
// Expected result: dst = 0000 0000 0000 8888 5555 4444 2222 1111
sve_dup(vtmp2, H, 0);
// Extend lowest half to type INT.
// dst = 00004444 00003333 00002222 00001111
sve_uunpklo(dst, S, src);
// pgtmp = 00000001 00000000 00000001 00000001
sve_punpklo(pgtmp, mask);
// Pack the active elements in size of type INT to the right,
// and fill the remainings with zero.
// dst = 00000000 00004444 00002222 00001111
sve_compact(dst, S, dst, pgtmp);
// Narrow the result back to type SHORT.
// dst = 0000 0000 0000 0000 0000 4444 2222 1111
sve_uzp1(dst, H, dst, vtmp2);
// Count the active elements of lowest half.
// rscratch1 = 3
sve_cntp(rscratch1, S, ptrue, pgtmp);
// Repeat to the highest half.
// pgtmp = 00000001 00000000 00000000 00000001
sve_punpkhi(pgtmp, mask);
// vtmp1 = 00008888 00007777 00006666 00005555
sve_uunpkhi(vtmp1, S, src);
// vtmp1 = 00000000 00000000 00008888 00005555
sve_compact(vtmp1, S, vtmp1, pgtmp);
// vtmp1 = 0000 0000 0000 0000 0000 0000 8888 5555
sve_uzp1(vtmp1, H, vtmp1, vtmp2);
// Compressed low: dst = 0000 0000 0000 0000 0000 4444 2222 1111
// Compressed high: vtmp1 = 0000 0000 0000 0000 0000 0000 8888 5555
// Left shift(cross lane) compressed high with TRUE_CNT lanes,
// TRUE_CNT is the number of active elements in the compressed low.
neg(rscratch1, rscratch1);
// vtmp2 = {4 3 2 1 0 -1 -2 -3}
sve_index(vtmp2, H, rscratch1, 1);
// vtmp1 = 0000 0000 0000 8888 5555 0000 0000 0000
sve_tbl(vtmp1, H, vtmp1, vtmp2);
// Combine the compressed high(after shifted) with the compressed low.
// dst = 0000 0000 0000 8888 5555 4444 2222 1111
sve_orr(dst, dst, vtmp1);
}
// Clobbers: rscratch1, rscratch2
// Preserves: src, mask
void C2_MacroAssembler::sve_compress_byte(FloatRegister dst, FloatRegister src, PRegister mask,
FloatRegister vtmp1, FloatRegister vtmp2,
FloatRegister vtmp3, FloatRegister vtmp4,
PRegister ptmp, PRegister pgtmp) {
assert(pgtmp->is_governing(), "This register has to be a governing predicate register");
assert_different_registers(dst, src, vtmp1, vtmp2, vtmp3, vtmp4);
assert_different_registers(mask, ptmp, pgtmp);
// Example input: src = 88 77 66 55 44 33 22 11
// mask = 01 00 00 01 01 00 01 01
// Expected result: dst = 00 00 00 88 55 44 22 11
sve_dup(vtmp4, B, 0);
// Extend lowest half to type SHORT.
// vtmp1 = 0044 0033 0022 0011
sve_uunpklo(vtmp1, H, src);
// ptmp = 0001 0000 0001 0001
sve_punpklo(ptmp, mask);
// Count the active elements of lowest half.
// rscratch2 = 3
sve_cntp(rscratch2, H, ptrue, ptmp);
// Pack the active elements in size of type SHORT to the right,
// and fill the remainings with zero.
// dst = 0000 0044 0022 0011
sve_compress_short(dst, vtmp1, ptmp, vtmp2, vtmp3, pgtmp);
// Narrow the result back to type BYTE.
// dst = 00 00 00 00 00 44 22 11
sve_uzp1(dst, B, dst, vtmp4);
// Repeat to the highest half.
// ptmp = 0001 0000 0000 0001
sve_punpkhi(ptmp, mask);
// vtmp1 = 0088 0077 0066 0055
sve_uunpkhi(vtmp2, H, src);
// vtmp1 = 0000 0000 0088 0055
sve_compress_short(vtmp1, vtmp2, ptmp, vtmp3, vtmp4, pgtmp);
sve_dup(vtmp4, B, 0);
// vtmp1 = 00 00 00 00 00 00 88 55
sve_uzp1(vtmp1, B, vtmp1, vtmp4);
// Compressed low: dst = 00 00 00 00 00 44 22 11
// Compressed high: vtmp1 = 00 00 00 00 00 00 88 55
// Left shift(cross lane) compressed high with TRUE_CNT lanes,
// TRUE_CNT is the number of active elements in the compressed low.
neg(rscratch2, rscratch2);
// vtmp2 = {4 3 2 1 0 -1 -2 -3}
sve_index(vtmp2, B, rscratch2, 1);
// vtmp1 = 00 00 00 88 55 00 00 00
sve_tbl(vtmp1, B, vtmp1, vtmp2);
// Combine the compressed high(after shifted) with the compressed low.
// dst = 00 00 00 88 55 44 22 11
sve_orr(dst, dst, vtmp1);
}
void C2_MacroAssembler::neon_reverse_bits(FloatRegister dst, FloatRegister src, BasicType bt, bool isQ) {
assert(bt == T_BYTE || bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported basic type");
SIMD_Arrangement size = isQ ? T16B : T8B;
if (bt == T_BYTE) {
rbit(dst, size, src);
} else {
neon_reverse_bytes(dst, src, bt, isQ);
rbit(dst, size, dst);
}
}
void C2_MacroAssembler::neon_reverse_bytes(FloatRegister dst, FloatRegister src, BasicType bt, bool isQ) {
assert(bt == T_BYTE || bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported basic type");
SIMD_Arrangement size = isQ ? T16B : T8B;
switch (bt) {
case T_BYTE:
if (dst != src) {
orr(dst, size, src, src);
}
break;
case T_SHORT:
rev16(dst, size, src);
break;
case T_INT:
rev32(dst, size, src);
break;
case T_LONG:
rev64(dst, size, src);
break;
default:
assert(false, "unsupported");
ShouldNotReachHere();
}
}
// Extract a scalar element from an sve vector at position 'idx'.
// The input elements in src are expected to be of integral type.
void C2_MacroAssembler::sve_extract_integral(Register dst, SIMD_RegVariant size, FloatRegister src, int idx,

View file

@ -105,4 +105,20 @@
FloatRegister tmp2, PRegister ptmp,
SIMD_RegVariant T);
// Pack active elements of src, under the control of mask, into the
// lowest-numbered elements of dst. Any remaining elements of dst will
// be filled with zero.
void sve_compress_byte(FloatRegister dst, FloatRegister src, PRegister mask,
FloatRegister vtmp1, FloatRegister vtmp2,
FloatRegister vtmp3, FloatRegister vtmp4,
PRegister ptmp, PRegister pgtmp);
void sve_compress_short(FloatRegister dst, FloatRegister src, PRegister mask,
FloatRegister vtmp1, FloatRegister vtmp2,
PRegister pgtmp);
void neon_reverse_bits(FloatRegister dst, FloatRegister src, BasicType bt, bool isQ);
void neon_reverse_bytes(FloatRegister dst, FloatRegister src, BasicType bt, bool isQ);
#endif // CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP

View file

@ -4838,18 +4838,54 @@ void Assembler::popcntl(Register dst, Register src) {
emit_int24(0x0F, (unsigned char)0xB8, (0xC0 | encode));
}
void Assembler::vpopcntd(XMMRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx512_vpopcntdq(), "must support vpopcntdq feature");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
void Assembler::evpopcntb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_avx512_bitalg(), "must support avx512bitalg feature");
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
if (merge) {
attributes.reset_is_clear_context();
}
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x54, (0xC0 | encode));
}
void Assembler::evpopcntw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_avx512_bitalg(), "must support avx512bitalg feature");
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_embedded_opmask_register_specifier(mask);
if (merge) {
attributes.reset_is_clear_context();
}
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x54, (0xC0 | encode));
}
void Assembler::evpopcntd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_avx512_vpopcntdq(), "must support vpopcntdq feature");
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_embedded_opmask_register_specifier(mask);
if (merge) {
attributes.reset_is_clear_context();
}
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x55, (0xC0 | encode));
}
void Assembler::vpopcntq(XMMRegister dst, XMMRegister src, int vector_len) {
void Assembler::evpopcntq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_avx512_vpopcntdq(), "must support vpopcntdq feature");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_embedded_opmask_register_specifier(mask);
if (merge) {
attributes.reset_is_clear_context();
}
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x55, (0xC0 | encode));
}
@ -7894,6 +7930,32 @@ void Assembler::evprorvq(XMMRegister dst, XMMRegister src, XMMRegister shift, in
emit_int16(0x14, (unsigned char)(0xC0 | encode));
}
void Assembler::evplzcntd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_avx512cd(), "");
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_embedded_opmask_register_specifier(mask);
if (merge) {
attributes.reset_is_clear_context();
}
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x44, (0xC0 | encode));
}
void Assembler::evplzcntq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_avx512cd(), "");
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
InstructionAttr attributes(vector_len, /* vex_w */ true,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_embedded_opmask_register_specifier(mask);
if (merge) {
attributes.reset_is_clear_context();
}
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x44, (0xC0 | encode));
}
void Assembler::vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len) {
assert(VM_Version::supports_evex(), "requires EVEX support");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
@ -7930,6 +7992,84 @@ void Assembler::vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, XMMRegis
emit_int8(imm8);
}
void Assembler::evexpandps(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_embedded_opmask_register_specifier(mask);
if (merge) {
attributes.reset_is_clear_context();
}
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16((unsigned char)0x88, (0xC0 | encode));
}
void Assembler::evexpandpd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_embedded_opmask_register_specifier(mask);
if (merge) {
attributes.reset_is_clear_context();
}
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16((unsigned char)0x88, (0xC0 | encode));
}
void Assembler::evpexpandb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_avx512_vbmi2(), "");
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_embedded_opmask_register_specifier(mask);
if (merge) {
attributes.reset_is_clear_context();
}
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x62, (0xC0 | encode));
}
void Assembler::evpexpandw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_avx512_vbmi2(), "");
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_embedded_opmask_register_specifier(mask);
if (merge) {
attributes.reset_is_clear_context();
}
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x62, (0xC0 | encode));
}
void Assembler::evpexpandd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_embedded_opmask_register_specifier(mask);
if (merge) {
attributes.reset_is_clear_context();
}
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16((unsigned char)0x89, (0xC0 | encode));
}
void Assembler::evpexpandq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_embedded_opmask_register_specifier(mask);
if (merge) {
attributes.reset_is_clear_context();
}
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16((unsigned char)0x89, (0xC0 | encode));
}
// vinserti forms
void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
@ -7973,7 +8113,7 @@ void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src,
}
void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
assert(VM_Version::supports_avx(), "");
assert(VM_Version::supports_evex(), "");
assert(dst != xnoreg, "sanity");
assert(imm8 <= 0x03, "imm8: %u", imm8);
InstructionMark im(this);
@ -8032,7 +8172,7 @@ void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8
}
void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_avx2(), "");
assert(VM_Version::supports_evex(), "");
assert(imm8 <= 0x03, "imm8: %u", imm8);
InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
@ -8045,7 +8185,7 @@ void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src,
}
void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
assert(VM_Version::supports_avx(), "");
assert(VM_Version::supports_evex(), "");
assert(dst != xnoreg, "sanity");
assert(imm8 <= 0x03, "imm8: %u", imm8);
InstructionMark im(this);
@ -8346,6 +8486,20 @@ void Assembler::vpsadbw(XMMRegister dst, XMMRegister nds, XMMRegister src, int v
emit_int16((unsigned char)0xF6, (0xC0 | encode));
}
void Assembler::vpunpckhwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int16(0x69, (0xC0 | encode));
}
void Assembler::vpunpcklwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int16(0x61, (0xC0 | encode));
}
void Assembler::vpunpckhdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
@ -9862,6 +10016,14 @@ void Assembler::evpternlogq(XMMRegister dst, int imm8, KRegister mask, XMMRegist
emit_int8(imm8);
}
void Assembler::vgf2p8affineqb(XMMRegister dst, XMMRegister src2, XMMRegister src3, int imm8, int vector_len) {
assert(VM_Version::supports_gfni(), "requires GFNI support");
assert(VM_Version::supports_sse(), "");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src3->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int24((unsigned char)0xCE, (unsigned char)(0xC0 | encode), imm8);
}
// duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
assert(UseAVX >= 2, "");
@ -11555,6 +11717,85 @@ void Assembler::evpmovm2b(XMMRegister dst, KRegister src, int vector_len) {
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x28, (0xC0 | encode));
}
void Assembler::evpcompressb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_avx512_vbmi2(), "");
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
if (merge) {
attributes.reset_is_clear_context();
}
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16((unsigned char)0x63, (0xC0 | encode));
}
void Assembler::evpcompressw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_avx512_vbmi2(), "");
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
if (merge) {
attributes.reset_is_clear_context();
}
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16((unsigned char)0x63, (0xC0 | encode));
}
void Assembler::evpcompressd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
if (merge) {
attributes.reset_is_clear_context();
}
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16((unsigned char)0x8B, (0xC0 | encode));
}
void Assembler::evpcompressq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
if (merge) {
attributes.reset_is_clear_context();
}
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16((unsigned char)0x8B, (0xC0 | encode));
}
void Assembler::evcompressps(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
if (merge) {
attributes.reset_is_clear_context();
}
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16((unsigned char)0x8A, (0xC0 | encode));
}
void Assembler::evcompresspd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
if (merge) {
attributes.reset_is_clear_context();
}
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16((unsigned char)0x8A, (0xC0 | encode));
}
#ifndef _LP64
void Assembler::incl(Register dst) {

View file

@ -1878,8 +1878,10 @@ private:
void popcntl(Register dst, Address src);
void popcntl(Register dst, Register src);
void vpopcntd(XMMRegister dst, XMMRegister src, int vector_len);
void vpopcntq(XMMRegister dst, XMMRegister src, int vector_len);
void evpopcntb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evpopcntw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evpopcntd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evpopcntq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
#ifdef _LP64
void popcntq(Register dst, Address src);
@ -1945,6 +1947,12 @@ private:
void punpckldq(XMMRegister dst, Address src);
void vpunpckldq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
// Interleave High Word
void vpunpckhwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
// Interleave Low Word
void vpunpcklwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
// Interleave High Doublewords
void vpunpckhdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
@ -2206,9 +2214,8 @@ private:
void shrxq(Register dst, Register src1, Register src2);
void bzhiq(Register dst, Register src1, Register src2);
void pdep(Register dst, Register src1, Register src2);
void pext(Register dst, Register src1, Register src2);
void pdep(Register dst, Register src1, Register src2);
//====================VECTOR ARITHMETIC=====================================
// Add Packed Floating-Point Values
@ -2437,6 +2444,8 @@ private:
void evpternlogq(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, XMMRegister src3, bool merge, int vector_len);
void evpternlogq(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, Address src3, bool merge, int vector_len);
void evplzcntd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evplzcntq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
// Sub packed integers
void psubb(XMMRegister dst, XMMRegister src);
@ -2581,6 +2590,21 @@ private:
void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, Address src3, int vector_len);
void vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
// Vector compress/expand instructions.
void evpcompressb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evpcompressw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evpcompressd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evpcompressq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evcompressps(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evcompresspd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evpexpandb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evpexpandw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evpexpandd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evpexpandq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evexpandps(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evexpandpd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
// Vector Rotate Left/Right instruction.
void evprolvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void evprolvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
@ -2737,6 +2761,10 @@ private:
void evpblendmw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpblendmd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpblendmq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
// Galois field affine transformation instructions.
void vgf2p8affineqb(XMMRegister dst, XMMRegister src2, XMMRegister src3, int imm8, int vector_len);
protected:
// Next instructions require address alignment 16 bytes SSE mode.
// They should be called only from corresponding MacroAssembler instructions.

View file

@ -2292,7 +2292,7 @@ void C2_MacroAssembler::vpadd(BasicType elem_bt, XMMRegister dst, XMMRegister sr
case T_FLOAT: vaddps(dst, src1, src2, vlen_enc); return;
case T_LONG: vpaddq(dst, src1, src2, vlen_enc); return;
case T_DOUBLE: vaddpd(dst, src1, src2, vlen_enc); return;
default: assert(false, "%s", type2name(elem_bt));
default: fatal("Unsupported type %s", type2name(elem_bt)); return;
}
}
@ -2309,7 +2309,7 @@ void C2_MacroAssembler::vpbroadcast(BasicType elem_bt, XMMRegister dst, Register
case T_SHORT: evpbroadcastw(dst, src, vlen_enc); return;
case T_FLOAT: case T_INT: evpbroadcastd(dst, src, vlen_enc); return;
case T_DOUBLE: case T_LONG: evpbroadcastq(dst, src, vlen_enc); return;
default: assert(false, "%s", type2name(elem_bt));
default: fatal("Unsupported type %s", type2name(elem_bt)); return;
}
} else {
assert(vlen_enc != Assembler::AVX_512bit, "required");
@ -2321,7 +2321,7 @@ void C2_MacroAssembler::vpbroadcast(BasicType elem_bt, XMMRegister dst, Register
case T_FLOAT: movdl(dst, src); vbroadcastss(dst, dst, vlen_enc); return;
case T_LONG: movdq(dst, src); vpbroadcastq(dst, dst, vlen_enc); return;
case T_DOUBLE: movdq(dst, src); vbroadcastsd(dst, dst, vlen_enc); return;
default: assert(false, "%s", type2name(elem_bt));
default: fatal("Unsupported type %s", type2name(elem_bt)); return;
}
}
}
@ -2348,7 +2348,9 @@ void C2_MacroAssembler::vconvert_b2x(BasicType to_elem_bt, XMMRegister dst, XMMR
vcvtdq2pd(dst, dst, vlen_enc);
break;
}
default: assert(false, "%s", type2name(to_elem_bt));
default:
fatal("Unsupported type %s", type2name(to_elem_bt));
break;
}
}
@ -4496,6 +4498,71 @@ void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, XMMRegister
vector_mask_operation_helper(opc, dst, tmp, masklen);
}
void C2_MacroAssembler::vector_mask_compress(KRegister dst, KRegister src, Register rtmp1,
Register rtmp2, int mask_len) {
kmov(rtmp1, src);
andq(rtmp1, (0xFFFFFFFFFFFFFFFFUL >> (64 - mask_len)));
mov64(rtmp2, -1L);
pext(rtmp2, rtmp2, rtmp1);
kmov(dst, rtmp2);
}
void C2_MacroAssembler::vector_compress_expand(int opcode, XMMRegister dst, XMMRegister src, KRegister mask,
bool merge, BasicType bt, int vec_enc) {
if (opcode == Op_CompressV) {
switch(bt) {
case T_BYTE:
evpcompressb(dst, mask, src, merge, vec_enc);
break;
case T_CHAR:
case T_SHORT:
evpcompressw(dst, mask, src, merge, vec_enc);
break;
case T_INT:
evpcompressd(dst, mask, src, merge, vec_enc);
break;
case T_FLOAT:
evcompressps(dst, mask, src, merge, vec_enc);
break;
case T_LONG:
evpcompressq(dst, mask, src, merge, vec_enc);
break;
case T_DOUBLE:
evcompresspd(dst, mask, src, merge, vec_enc);
break;
default:
fatal("Unsupported type %s", type2name(bt));
break;
}
} else {
assert(opcode == Op_ExpandV, "");
switch(bt) {
case T_BYTE:
evpexpandb(dst, mask, src, merge, vec_enc);
break;
case T_CHAR:
case T_SHORT:
evpexpandw(dst, mask, src, merge, vec_enc);
break;
case T_INT:
evpexpandd(dst, mask, src, merge, vec_enc);
break;
case T_FLOAT:
evexpandps(dst, mask, src, merge, vec_enc);
break;
case T_LONG:
evpexpandq(dst, mask, src, merge, vec_enc);
break;
case T_DOUBLE:
evexpandpd(dst, mask, src, merge, vec_enc);
break;
default:
fatal("Unsupported type %s", type2name(bt));
break;
}
}
}
#endif
void C2_MacroAssembler::vector_signum_evex(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one,
@ -4559,6 +4626,34 @@ void C2_MacroAssembler::vector_maskall_operation(KRegister dst, Register src, in
}
}
void C2_MacroAssembler::vbroadcast(BasicType bt, XMMRegister dst, int imm32, Register rtmp, int vec_enc) {
int lane_size = type2aelembytes(bt);
bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
if ((is_LP64 || lane_size < 8) &&
((is_non_subword_integral_type(bt) && VM_Version::supports_avx512vl()) ||
(is_subword_type(bt) && VM_Version::supports_avx512vlbw()))) {
movptr(rtmp, imm32);
switch(lane_size) {
case 1 : evpbroadcastb(dst, rtmp, vec_enc); break;
case 2 : evpbroadcastw(dst, rtmp, vec_enc); break;
case 4 : evpbroadcastd(dst, rtmp, vec_enc); break;
case 8 : evpbroadcastq(dst, rtmp, vec_enc); break;
fatal("Unsupported lane size %d", lane_size);
break;
}
} else {
movptr(rtmp, imm32);
LP64_ONLY(movq(dst, rtmp)) NOT_LP64(movdl(dst, rtmp));
switch(lane_size) {
case 1 : vpbroadcastb(dst, dst, vec_enc); break;
case 2 : vpbroadcastw(dst, dst, vec_enc); break;
case 4 : vpbroadcastd(dst, dst, vec_enc); break;
case 8 : vpbroadcastq(dst, dst, vec_enc); break;
fatal("Unsupported lane size %d", lane_size);
break;
}
}
}
//
// Following is lookup table based popcount computation algorithm:-
@ -4589,62 +4684,98 @@ void C2_MacroAssembler::vector_maskall_operation(KRegister dst, Register src, in
// f. Perform step e. for upper 128bit vector lane.
// g. Pack the bitset count of quadwords back to double word.
// h. Unpacking and packing operations are not needed for 64bit vector lane.
void C2_MacroAssembler::vector_popcount_byte(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, Register rtmp, int vec_enc) {
assert((vec_enc == Assembler::AVX_512bit && VM_Version::supports_avx512bw()) || VM_Version::supports_avx2(), "");
vbroadcast(T_INT, xtmp1, 0x0F0F0F0F, rtmp, vec_enc);
vpsrlw(dst, src, 4, vec_enc);
vpand(dst, dst, xtmp1, vec_enc);
vpand(xtmp1, src, xtmp1, vec_enc);
vmovdqu(xtmp2, ExternalAddress(StubRoutines::x86::vector_popcount_lut()), rtmp, vec_enc);
vpshufb(xtmp1, xtmp2, xtmp1, vec_enc);
vpshufb(dst, xtmp2, dst, vec_enc);
vpaddb(dst, dst, xtmp1, vec_enc);
}
void C2_MacroAssembler::vector_popcount_int(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp,
int vec_enc) {
if (VM_Version::supports_avx512_vpopcntdq()) {
vpopcntd(dst, src, vec_enc);
} else {
assert((vec_enc == Assembler::AVX_512bit && VM_Version::supports_avx512bw()) || VM_Version::supports_avx2(), "");
movl(rtmp, 0x0F0F0F0F);
movdl(xtmp1, rtmp);
vpbroadcastd(xtmp1, xtmp1, vec_enc);
if (Assembler::AVX_512bit == vec_enc) {
evmovdqul(xtmp2, k0, ExternalAddress(StubRoutines::x86::vector_popcount_lut()), false, vec_enc, rtmp);
} else {
vmovdqu(xtmp2, ExternalAddress(StubRoutines::x86::vector_popcount_lut()), rtmp);
}
vpand(xtmp3, src, xtmp1, vec_enc);
vpshufb(xtmp3, xtmp2, xtmp3, vec_enc);
vpsrlw(dst, src, 4, vec_enc);
vpand(dst, dst, xtmp1, vec_enc);
vpshufb(dst, xtmp2, dst, vec_enc);
vpaddb(xtmp3, dst, xtmp3, vec_enc);
vpxor(xtmp1, xtmp1, xtmp1, vec_enc);
vpunpckhdq(dst, xtmp3, xtmp1, vec_enc);
vpsadbw(dst, dst, xtmp1, vec_enc);
vpunpckldq(xtmp2, xtmp3, xtmp1, vec_enc);
vpsadbw(xtmp2, xtmp2, xtmp1, vec_enc);
vpackuswb(dst, xtmp2, dst, vec_enc);
}
XMMRegister xtmp2, Register rtmp, int vec_enc) {
vector_popcount_byte(xtmp1, src, dst, xtmp2, rtmp, vec_enc);
// Following code is as per steps e,f,g and h of above algorithm.
vpxor(xtmp2, xtmp2, xtmp2, vec_enc);
vpunpckhdq(dst, xtmp1, xtmp2, vec_enc);
vpsadbw(dst, dst, xtmp2, vec_enc);
vpunpckldq(xtmp1, xtmp1, xtmp2, vec_enc);
vpsadbw(xtmp1, xtmp1, xtmp2, vec_enc);
vpackuswb(dst, xtmp1, dst, vec_enc);
}
void C2_MacroAssembler::vector_popcount_short(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, Register rtmp, int vec_enc) {
vector_popcount_byte(xtmp1, src, dst, xtmp2, rtmp, vec_enc);
// Add the popcount of upper and lower bytes of word.
vbroadcast(T_INT, xtmp2, 0x00FF00FF, rtmp, vec_enc);
vpsrlw(dst, xtmp1, 8, vec_enc);
vpand(xtmp1, xtmp1, xtmp2, vec_enc);
vpaddw(dst, dst, xtmp1, vec_enc);
}
void C2_MacroAssembler::vector_popcount_long(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp,
int vec_enc) {
if (VM_Version::supports_avx512_vpopcntdq()) {
vpopcntq(dst, src, vec_enc);
} else if (vec_enc == Assembler::AVX_512bit) {
assert(VM_Version::supports_avx512bw(), "");
movl(rtmp, 0x0F0F0F0F);
movdl(xtmp1, rtmp);
vpbroadcastd(xtmp1, xtmp1, vec_enc);
evmovdqul(xtmp2, k0, ExternalAddress(StubRoutines::x86::vector_popcount_lut()), true, vec_enc, rtmp);
vpandq(xtmp3, src, xtmp1, vec_enc);
vpshufb(xtmp3, xtmp2, xtmp3, vec_enc);
vpsrlw(dst, src, 4, vec_enc);
vpandq(dst, dst, xtmp1, vec_enc);
vpshufb(dst, xtmp2, dst, vec_enc);
vpaddb(xtmp3, dst, xtmp3, vec_enc);
vpxorq(xtmp1, xtmp1, xtmp1, vec_enc);
vpsadbw(dst, xtmp3, xtmp1, vec_enc);
} else {
// We do not see any performance benefit of running
// above instruction sequence on 256 bit vector which
// can operate over maximum 4 long elements.
ShouldNotReachHere();
XMMRegister xtmp2, Register rtmp, int vec_enc) {
vector_popcount_byte(xtmp1, src, dst, xtmp2, rtmp, vec_enc);
vpxor(xtmp2, xtmp2, xtmp2, vec_enc);
vpsadbw(dst, xtmp1, xtmp2, vec_enc);
}
void C2_MacroAssembler::vector_popcount_integral(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, Register rtmp, int vec_enc) {
switch(bt) {
case T_LONG:
vector_popcount_long(dst, src, xtmp1, xtmp2, rtmp, vec_enc);
break;
case T_INT:
vector_popcount_int(dst, src, xtmp1, xtmp2, rtmp, vec_enc);
break;
case T_CHAR:
case T_SHORT:
vector_popcount_short(dst, src, xtmp1, xtmp2, rtmp, vec_enc);
break;
case T_BYTE:
case T_BOOLEAN:
vector_popcount_byte(dst, src, xtmp1, xtmp2, rtmp, vec_enc);
break;
default:
fatal("Unsupported type %s", type2name(bt));
break;
}
}
void C2_MacroAssembler::vector_popcount_integral_evex(BasicType bt, XMMRegister dst, XMMRegister src,
KRegister mask, bool merge, int vec_enc) {
assert(VM_Version::supports_avx512vl() || vec_enc == Assembler::AVX_512bit, "");
switch(bt) {
case T_LONG:
assert(VM_Version::supports_avx512_vpopcntdq(), "");
evpopcntq(dst, mask, src, merge, vec_enc);
break;
case T_INT:
assert(VM_Version::supports_avx512_vpopcntdq(), "");
evpopcntd(dst, mask, src, merge, vec_enc);
break;
case T_CHAR:
case T_SHORT:
assert(VM_Version::supports_avx512_bitalg(), "");
evpopcntw(dst, mask, src, merge, vec_enc);
break;
case T_BYTE:
case T_BOOLEAN:
assert(VM_Version::supports_avx512_bitalg(), "");
evpopcntb(dst, mask, src, merge, vec_enc);
break;
default:
fatal("Unsupported type %s", type2name(bt));
break;
}
evpmovqd(dst, dst, vec_enc);
}
#ifndef _LP64
@ -4655,6 +4786,374 @@ void C2_MacroAssembler::vector_maskall_operation32(KRegister dst, Register src,
}
#endif
// Bit reversal algorithm first reverses the bits of each byte followed by
// a byte level reversal for multi-byte primitive types (short/int/long).
// Algorithm performs a lookup table access to get reverse bit sequence
// corresponding to a 4 bit value. Thus a reverse bit sequence for a byte
// is obtained by swapping the reverse bit sequences of upper and lower
// nibble of a byte.
void C2_MacroAssembler::vector_reverse_bit(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, Register rtmp, int vec_enc) {
if (VM_Version::supports_avx512vlbw()) {
// Get the reverse bit sequence of lower nibble of each byte.
vmovdqu(xtmp1, ExternalAddress(StubRoutines::x86::vector_reverse_bit_lut()), rtmp, vec_enc);
vbroadcast(T_INT, xtmp2, 0x0F0F0F0F, rtmp, vec_enc);
vpandq(dst, xtmp2, src, vec_enc);
vpshufb(dst, xtmp1, dst, vec_enc);
vpsllq(dst, dst, 4, vec_enc);
// Get the reverse bit sequence of upper nibble of each byte.
vpandn(xtmp2, xtmp2, src, vec_enc);
vpsrlq(xtmp2, xtmp2, 4, vec_enc);
vpshufb(xtmp2, xtmp1, xtmp2, vec_enc);
// Perform logical OR operation b/w left shifted reverse bit sequence of lower nibble and
// right shifted reverse bit sequence of upper nibble to obtain the reverse bit sequence of each byte.
vporq(xtmp2, dst, xtmp2, vec_enc);
vector_reverse_byte(bt, dst, xtmp2, rtmp, vec_enc);
} else if(vec_enc == Assembler::AVX_512bit) {
// Shift based bit reversal.
assert(bt == T_LONG || bt == T_INT, "");
// Swap lower and upper nibble of each byte.
vector_swap_nbits(4, 0x0F0F0F0F, xtmp1, src, xtmp2, rtmp, vec_enc);
// Swap two least and most significant bits of each nibble.
vector_swap_nbits(2, 0x33333333, dst, xtmp1, xtmp2, rtmp, vec_enc);
// Swap adjacent pair of bits.
evmovdqul(xtmp1, k0, dst, true, vec_enc);
vector_swap_nbits(1, 0x55555555, dst, xtmp1, xtmp2, rtmp, vec_enc);
evmovdqul(xtmp1, k0, dst, true, vec_enc);
vector_reverse_byte64(bt, dst, xtmp1, xtmp1, xtmp2, rtmp, vec_enc);
} else {
vmovdqu(xtmp1, ExternalAddress(StubRoutines::x86::vector_reverse_bit_lut()), rtmp, vec_enc);
vbroadcast(T_INT, xtmp2, 0x0F0F0F0F, rtmp, vec_enc);
// Get the reverse bit sequence of lower nibble of each byte.
vpand(dst, xtmp2, src, vec_enc);
vpshufb(dst, xtmp1, dst, vec_enc);
vpsllq(dst, dst, 4, vec_enc);
// Get the reverse bit sequence of upper nibble of each byte.
vpandn(xtmp2, xtmp2, src, vec_enc);
vpsrlq(xtmp2, xtmp2, 4, vec_enc);
vpshufb(xtmp2, xtmp1, xtmp2, vec_enc);
// Perform logical OR operation b/w left shifted reverse bit sequence of lower nibble and
// right shifted reverse bit sequence of upper nibble to obtain the reverse bit sequence of each byte.
vpor(xtmp2, dst, xtmp2, vec_enc);
vector_reverse_byte(bt, dst, xtmp2, rtmp, vec_enc);
}
}
void C2_MacroAssembler::vector_reverse_bit_gfni(BasicType bt, XMMRegister dst, XMMRegister src,
XMMRegister xtmp, AddressLiteral mask, Register rtmp, int vec_enc) {
// Galois field instruction based bit reversal based on following algorithm.
// http://0x80.pl/articles/avx512-galois-field-for-bit-shuffling.html
assert(VM_Version::supports_gfni(), "");
vpbroadcastq(xtmp, mask, vec_enc, rtmp);
vgf2p8affineqb(xtmp, src, xtmp, 0, vec_enc);
vector_reverse_byte(bt, dst, xtmp, rtmp, vec_enc);
}
void C2_MacroAssembler::vector_swap_nbits(int nbits, int bitmask, XMMRegister dst, XMMRegister src,
XMMRegister xtmp1, Register rtmp, int vec_enc) {
vbroadcast(T_INT, xtmp1, bitmask, rtmp, vec_enc);
vpandq(dst, xtmp1, src, vec_enc);
vpsllq(dst, dst, nbits, vec_enc);
vpandn(xtmp1, xtmp1, src, vec_enc);
vpsrlq(xtmp1, xtmp1, nbits, vec_enc);
vporq(dst, dst, xtmp1, vec_enc);
}
void C2_MacroAssembler::vector_reverse_byte64(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, Register rtmp, int vec_enc) {
// Shift based bit reversal.
assert(VM_Version::supports_evex(), "");
switch(bt) {
case T_LONG:
// Swap upper and lower double word of each quad word.
evprorq(xtmp1, k0, src, 32, true, vec_enc);
evprord(xtmp1, k0, xtmp1, 16, true, vec_enc);
vector_swap_nbits(8, 0x00FF00FF, dst, xtmp1, xtmp2, rtmp, vec_enc);
break;
case T_INT:
// Swap upper and lower word of each double word.
evprord(xtmp1, k0, src, 16, true, vec_enc);
vector_swap_nbits(8, 0x00FF00FF, dst, xtmp1, xtmp2, rtmp, vec_enc);
break;
case T_SHORT:
// Swap upper and lower byte of each word.
vector_swap_nbits(8, 0x00FF00FF, dst, src, xtmp2, rtmp, vec_enc);
break;
case T_BYTE:
evmovdquq(dst, k0, src, true, vec_enc);
break;
default:
fatal("Unsupported type %s", type2name(bt));
break;
}
}
void C2_MacroAssembler::vector_reverse_byte(BasicType bt, XMMRegister dst, XMMRegister src, Register rtmp, int vec_enc) {
if (bt == T_BYTE) {
if (VM_Version::supports_avx512vl() || vec_enc == Assembler::AVX_512bit) {
evmovdquq(dst, k0, src, true, vec_enc);
} else {
vmovdqu(dst, src);
}
return;
}
// Perform byte reversal by shuffling the bytes of a multi-byte primitive type using
// pre-computed shuffle indices.
switch(bt) {
case T_LONG:
vmovdqu(dst, ExternalAddress(StubRoutines::x86::vector_reverse_byte_perm_mask_long()), rtmp, vec_enc);
break;
case T_INT:
vmovdqu(dst, ExternalAddress(StubRoutines::x86::vector_reverse_byte_perm_mask_int()), rtmp, vec_enc);
break;
case T_SHORT:
vmovdqu(dst, ExternalAddress(StubRoutines::x86::vector_reverse_byte_perm_mask_short()), rtmp, vec_enc);
break;
default:
fatal("Unsupported type %s", type2name(bt));
break;
}
vpshufb(dst, src, dst, vec_enc);
}
void C2_MacroAssembler::vector_count_leading_zeros_evex(BasicType bt, XMMRegister dst, XMMRegister src,
XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3,
KRegister ktmp, Register rtmp, bool merge, int vec_enc) {
assert(is_integral_type(bt), "");
assert(VM_Version::supports_avx512vl() || vec_enc == Assembler::AVX_512bit, "");
assert(VM_Version::supports_avx512cd(), "");
switch(bt) {
case T_LONG:
evplzcntq(dst, ktmp, src, merge, vec_enc);
break;
case T_INT:
evplzcntd(dst, ktmp, src, merge, vec_enc);
break;
case T_SHORT:
vpternlogd(xtmp1, 0xff, xtmp1, xtmp1, vec_enc);
vpunpcklwd(xtmp2, xtmp1, src, vec_enc);
evplzcntd(xtmp2, ktmp, xtmp2, merge, vec_enc);
vpunpckhwd(dst, xtmp1, src, vec_enc);
evplzcntd(dst, ktmp, dst, merge, vec_enc);
vpackusdw(dst, xtmp2, dst, vec_enc);
break;
case T_BYTE:
// T1 = Compute leading zero counts of 4 LSB bits of each byte by
// accessing the lookup table.
// T2 = Compute leading zero counts of 4 MSB bits of each byte by
// accessing the lookup table.
// Add T1 to T2 if 4 MSB bits of byte are all zeros.
assert(VM_Version::supports_avx512bw(), "");
evmovdquq(xtmp1, ExternalAddress(StubRoutines::x86::vector_count_leading_zeros_lut()), vec_enc, rtmp);
vbroadcast(T_INT, dst, 0x0F0F0F0F, rtmp, vec_enc);
vpand(xtmp2, dst, src, vec_enc);
vpshufb(xtmp2, xtmp1, xtmp2, vec_enc);
vpsrlw(xtmp3, src, 4, vec_enc);
vpand(xtmp3, dst, xtmp3, vec_enc);
vpshufb(dst, xtmp1, xtmp3, vec_enc);
vpxor(xtmp1, xtmp1, xtmp1, vec_enc);
evpcmpeqb(ktmp, xtmp1, xtmp3, vec_enc);
evpaddb(dst, ktmp, dst, xtmp2, true, vec_enc);
break;
default:
fatal("Unsupported type %s", type2name(bt));
break;
}
}
void C2_MacroAssembler::vector_count_leading_zeros_byte_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc) {
vmovdqu(xtmp1, ExternalAddress(StubRoutines::x86::vector_count_leading_zeros_lut()), rtmp);
vbroadcast(T_INT, xtmp2, 0x0F0F0F0F, rtmp, vec_enc);
// T1 = Compute leading zero counts of 4 LSB bits of each byte by
// accessing the lookup table.
vpand(dst, xtmp2, src, vec_enc);
vpshufb(dst, xtmp1, dst, vec_enc);
// T2 = Compute leading zero counts of 4 MSB bits of each byte by
// accessing the lookup table.
vpsrlw(xtmp3, src, 4, vec_enc);
vpand(xtmp3, xtmp2, xtmp3, vec_enc);
vpshufb(xtmp2, xtmp1, xtmp3, vec_enc);
// Add T1 to T2 if 4 MSB bits of byte are all zeros.
vpxor(xtmp1, xtmp1, xtmp1, vec_enc);
vpcmpeqb(xtmp3, xtmp1, xtmp3, vec_enc);
vpaddb(dst, dst, xtmp2, vec_enc);
vpblendvb(dst, xtmp2, dst, xtmp3, vec_enc);
}
void C2_MacroAssembler::vector_count_leading_zeros_short_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc) {
vector_count_leading_zeros_byte_avx(dst, src, xtmp1, xtmp2, xtmp3, rtmp, vec_enc);
// Add zero counts of lower byte and upper byte of a word if
// upper byte holds a zero value.
vpsrlw(xtmp3, src, 8, vec_enc);
// xtmp1 is set to all zeros by vector_count_leading_zeros_byte_avx.
vpcmpeqw(xtmp3, xtmp1, xtmp3, vec_enc);
vpsllw(xtmp2, dst, 8, vec_enc);
vpaddw(xtmp2, xtmp2, dst, vec_enc);
vpblendvb(dst, dst, xtmp2, xtmp3, vec_enc);
vpsrlw(dst, dst, 8, vec_enc);
}
void C2_MacroAssembler::vector_count_leading_zeros_int_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, XMMRegister xtmp3, int vec_enc) {
// Since IEEE 754 floating point format represents mantissa in 1.0 format
// hence biased exponent can be used to compute leading zero count as per
// following formula:-
// LZCNT = 32 - (biased_exp - 127)
// Special handling has been introduced for Zero, Max_Int and -ve source values.
// Broadcast 0xFF
vpcmpeqd(xtmp1, xtmp1, xtmp1, vec_enc);
vpsrld(xtmp1, xtmp1, 24, vec_enc);
// Extract biased exponent.
vcvtdq2ps(dst, src, vec_enc);
vpsrld(dst, dst, 23, vec_enc);
vpand(dst, dst, xtmp1, vec_enc);
// Broadcast 127.
vpsrld(xtmp1, xtmp1, 1, vec_enc);
// Exponent = biased_exp - 127
vpsubd(dst, dst, xtmp1, vec_enc);
// Exponent = Exponent + 1
vpsrld(xtmp3, xtmp1, 6, vec_enc);
vpaddd(dst, dst, xtmp3, vec_enc);
// Replace -ve exponent with zero, exponent is -ve when src
// lane contains a zero value.
vpxor(xtmp2, xtmp2, xtmp2, vec_enc);
vblendvps(dst, dst, xtmp2, dst, vec_enc);
// Rematerialize broadcast 32.
vpslld(xtmp1, xtmp3, 5, vec_enc);
// Exponent is 32 if corresponding source lane contains max_int value.
vpcmpeqd(xtmp2, dst, xtmp1, vec_enc);
// LZCNT = 32 - exponent
vpsubd(dst, xtmp1, dst, vec_enc);
// Replace LZCNT with a value 1 if corresponding source lane
// contains max_int value.
vpblendvb(dst, dst, xtmp3, xtmp2, vec_enc);
// Replace biased_exp with 0 if source lane value is less than zero.
vpxor(xtmp2, xtmp2, xtmp2, vec_enc);
vblendvps(dst, dst, xtmp2, src, vec_enc);
}
void C2_MacroAssembler::vector_count_leading_zeros_long_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc) {
vector_count_leading_zeros_short_avx(dst, src, xtmp1, xtmp2, xtmp3, rtmp, vec_enc);
// Add zero counts of lower word and upper word of a double word if
// upper word holds a zero value.
vpsrld(xtmp3, src, 16, vec_enc);
// xtmp1 is set to all zeros by vector_count_leading_zeros_byte_avx.
vpcmpeqd(xtmp3, xtmp1, xtmp3, vec_enc);
vpslld(xtmp2, dst, 16, vec_enc);
vpaddd(xtmp2, xtmp2, dst, vec_enc);
vpblendvb(dst, dst, xtmp2, xtmp3, vec_enc);
vpsrld(dst, dst, 16, vec_enc);
// Add zero counts of lower doubleword and upper doubleword of a
// quadword if upper doubleword holds a zero value.
vpsrlq(xtmp3, src, 32, vec_enc);
vpcmpeqq(xtmp3, xtmp1, xtmp3, vec_enc);
vpsllq(xtmp2, dst, 32, vec_enc);
vpaddq(xtmp2, xtmp2, dst, vec_enc);
vpblendvb(dst, dst, xtmp2, xtmp3, vec_enc);
vpsrlq(dst, dst, 32, vec_enc);
}
void C2_MacroAssembler::vector_count_leading_zeros_avx(BasicType bt, XMMRegister dst, XMMRegister src,
XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3,
Register rtmp, int vec_enc) {
assert(is_integral_type(bt), "unexpected type");
assert(vec_enc < Assembler::AVX_512bit, "");
switch(bt) {
case T_LONG:
vector_count_leading_zeros_long_avx(dst, src, xtmp1, xtmp2, xtmp3, rtmp, vec_enc);
break;
case T_INT:
vector_count_leading_zeros_int_avx(dst, src, xtmp1, xtmp2, xtmp3, vec_enc);
break;
case T_SHORT:
vector_count_leading_zeros_short_avx(dst, src, xtmp1, xtmp2, xtmp3, rtmp, vec_enc);
break;
case T_BYTE:
vector_count_leading_zeros_byte_avx(dst, src, xtmp1, xtmp2, xtmp3, rtmp, vec_enc);
break;
default:
fatal("Unsupported type %s", type2name(bt));
break;
}
}
void C2_MacroAssembler::vpsub(BasicType bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vec_enc) {
switch(bt) {
case T_BYTE:
vpsubb(dst, src1, src2, vec_enc);
break;
case T_SHORT:
vpsubw(dst, src1, src2, vec_enc);
break;
case T_INT:
vpsubd(dst, src1, src2, vec_enc);
break;
case T_LONG:
vpsubq(dst, src1, src2, vec_enc);
break;
default:
fatal("Unsupported type %s", type2name(bt));
break;
}
}
// Trailing zero count computation is based on leading zero count operation as per
// following equation. All AVX3 targets support AVX512CD feature which offers
// direct vector instruction to compute leading zero count.
// CTZ = PRIM_TYPE_WIDHT - CLZ((x - 1) & ~x)
void C2_MacroAssembler::vector_count_trailing_zeros_evex(BasicType bt, XMMRegister dst, XMMRegister src,
XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3,
XMMRegister xtmp4, KRegister ktmp, Register rtmp, int vec_enc) {
assert(is_integral_type(bt), "");
// xtmp = -1
vpternlogd(xtmp4, 0xff, xtmp4, xtmp4, vec_enc);
// xtmp = xtmp + src
vpadd(bt, xtmp4, xtmp4, src, vec_enc);
// xtmp = xtmp & ~src
vpternlogd(xtmp4, 0x40, xtmp4, src, vec_enc);
vector_count_leading_zeros_evex(bt, dst, xtmp4, xtmp1, xtmp2, xtmp3, ktmp, rtmp, true, vec_enc);
vbroadcast(bt, xtmp4, 8 * type2aelembytes(bt), rtmp, vec_enc);
vpsub(bt, dst, xtmp4, dst, vec_enc);
}
// Trailing zero count computation for AVX2 targets is based on popcount operation as per following equation
// CTZ = PRIM_TYPE_WIDHT - POPC(x | -x)
void C2_MacroAssembler::vector_count_trailing_zeros_avx(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc) {
assert(is_integral_type(bt), "");
// xtmp = 0
vpxor(xtmp3 , xtmp3, xtmp3, vec_enc);
// xtmp = 0 - src
vpsub(bt, xtmp3, xtmp3, src, vec_enc);
// xtmp = xtmp | src
vpor(xtmp3, xtmp3, src, vec_enc);
vector_popcount_integral(bt, dst, xtmp3, xtmp1, xtmp2, rtmp, vec_enc);
vbroadcast(bt, xtmp1, 8 * type2aelembytes(bt), rtmp, vec_enc);
vpsub(bt, dst, xtmp1, dst, vec_enc);
}
void C2_MacroAssembler::udivI(Register rax, Register divisor, Register rdx) {
Label done;
Label neg_divisor_fastpath;
@ -4817,4 +5316,3 @@ void C2_MacroAssembler::udivmodL(Register rax, Register divisor, Register rdx, R
bind(done);
}
#endif

View file

@ -88,6 +88,11 @@ public:
XMMRegister zero, XMMRegister one,
Register scratch);
void vector_compress_expand(int opcode, XMMRegister dst, XMMRegister src, KRegister mask,
bool merge, BasicType bt, int vec_enc);
void vector_mask_compress(KRegister dst, KRegister src, Register rtmp1, Register rtmp2, int mask_len);
void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
void vextendbw(bool sign, XMMRegister dst, XMMRegister src);
void vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
@ -137,7 +142,6 @@ public:
#ifdef _LP64
void vpbroadcast(BasicType elem_bt, XMMRegister dst, Register src, int vlen_enc);
#endif
void vpadd(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc);
// blend
void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch = rscratch1);
@ -341,34 +345,89 @@ public:
AddressLiteral new_mxcsr, Register scratch, int vec_enc);
#endif
void udivI(Register rax, Register divisor, Register rdx);
void umodI(Register rax, Register divisor, Register rdx);
void udivmodI(Register rax, Register divisor, Register rdx, Register tmp);
#ifdef _LP64
void udivL(Register rax, Register divisor, Register rdx);
void umodL(Register rax, Register divisor, Register rdx);
void udivmodL(Register rax, Register divisor, Register rdx, Register tmp);
#endif
void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, XMMRegister src3,
bool merge, BasicType bt, int vlen_enc);
void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, Address src3,
bool merge, BasicType bt, int vlen_enc);
void vector_reverse_bit(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, Register rtmp, int vec_enc);
void vector_reverse_bit_gfni(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp,
AddressLiteral mask, Register rtmp, int vec_enc);
void vector_reverse_byte(BasicType bt, XMMRegister dst, XMMRegister src, Register rtmp, int vec_enc);
void vector_popcount_int(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, Register rtmp, int vec_enc);
void vector_popcount_long(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, Register rtmp, int vec_enc);
void vector_popcount_short(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, Register rtmp, int vec_enc);
void vector_popcount_byte(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, Register rtmp, int vec_enc);
void vector_popcount_integral(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, Register rtmp, int vec_enc);
void vector_popcount_integral_evex(BasicType bt, XMMRegister dst, XMMRegister src,
KRegister mask, bool merge, int vec_enc);
void vbroadcast(BasicType bt, XMMRegister dst, int imm32, Register rtmp, int vec_enc);
void vector_reverse_byte64(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, Register rtmp, int vec_enc);
void vector_count_leading_zeros_evex(BasicType bt, XMMRegister dst, XMMRegister src,
XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3,
KRegister ktmp, Register rtmp, bool merge, int vec_enc);
void vector_count_leading_zeros_byte_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
void vector_count_leading_zeros_short_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
void vector_count_leading_zeros_int_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, XMMRegister xtmp3, int vec_enc);
void vector_count_leading_zeros_long_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
void vector_count_leading_zeros_avx(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
void vpadd(BasicType bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vec_enc);
void vpsub(BasicType bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vec_enc);
void vector_count_trailing_zeros_evex(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, KRegister ktmp,
Register rtmp, int vec_enc);
void vector_swap_nbits(int nbits, int bitmask, XMMRegister dst, XMMRegister src,
XMMRegister xtmp1, Register rtmp, int vec_enc);
void vector_count_trailing_zeros_avx(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
void vector_signum_avx(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one,
XMMRegister xtmp1, int vec_enc);
void vector_signum_evex(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one,
KRegister ktmp1, int vec_enc);
void udivI(Register rax, Register divisor, Register rdx);
void umodI(Register rax, Register divisor, Register rdx);
void udivmodI(Register rax, Register divisor, Register rdx, Register tmp);
#ifdef _LP64
void udivL(Register rax, Register divisor, Register rdx);
void umodL(Register rax, Register divisor, Register rdx);
void udivmodL(Register rax, Register divisor, Register rdx, Register tmp);
#endif
void vector_popcount_int(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp,
int vec_enc);
void vector_popcount_long(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp,
int vec_enc);
#endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP

View file

@ -2577,8 +2577,9 @@ void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scrat
}
void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg, int vector_len) {
assert(vector_len <= AVX_256bit, "AVX2 vector length");
if (vector_len == AVX_256bit) {
if (vector_len == AVX_512bit) {
evmovdquq(dst, src, AVX_512bit, scratch_reg);
} else if (vector_len == AVX_256bit) {
vmovdqu(dst, src, scratch_reg);
} else {
movdqu(dst, src, scratch_reg);
@ -3229,6 +3230,15 @@ void MacroAssembler::vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_l
Assembler::vpbroadcastw(dst, src, vector_len);
}
void MacroAssembler::vpbroadcastq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
if (reachable(src)) {
Assembler::vpbroadcastq(dst, as_Address(src), vector_len);
} else {
lea(rscratch, src);
Assembler::vpbroadcastq(dst, Address(rscratch, 0), vector_len);
}
}
void MacroAssembler::vbroadcastsd(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
if (reachable(src)) {
Assembler::vbroadcastsd(dst, as_Address(src), vector_len);

View file

@ -1347,6 +1347,11 @@ public:
using Assembler::vbroadcastsd;
void vbroadcastsd(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = rscratch1);
void vpbroadcastq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = rscratch1);
void vpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vpbroadcastq(dst, src, vector_len); }
void vpbroadcastq(XMMRegister dst, Address src, int vector_len) { Assembler::vpbroadcastq(dst, src, vector_len); }
void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);

View file

@ -186,15 +186,29 @@
// Returns pre-selection estimated size of a vector operation.
static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
switch(vopc) {
default: return 0;
case Op_PopCountVI: return VM_Version::supports_avx512_vpopcntdq() ? 0 : 50;
case Op_PopCountVL: return VM_Version::supports_avx512_vpopcntdq() ? 0 : 40;
default:
return 0;
case Op_CountTrailingZerosV:
case Op_CountLeadingZerosV:
return VM_Version::supports_avx512cd() && (ety == T_INT || ety == T_LONG) ? 0 : 40;
case Op_PopCountVI:
if (is_subword_type(ety)) {
return VM_Version::supports_avx512_bitalg() ? 0 : 50;
} else {
assert(ety == T_INT, "sanity"); // for documentation purposes
return VM_Version::supports_avx512_vpopcntdq() ? 0 : 50;
}
case Op_PopCountVL:
return VM_Version::supports_avx512_vpopcntdq() ? 0 : 40;
case Op_ReverseV:
return VM_Version::supports_gfni() ? 0 : 30;
case Op_RoundVF: // fall through
case Op_RoundVD: {
return 30;
}
}
}
// Returns pre-selection estimated size of a scalar operation.
static int scalar_op_pre_select_sz_estimate(int vopc, BasicType ety) {
switch(vopc) {

View file

@ -588,6 +588,30 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
address generate_count_leading_zeros_lut(const char *stub_name) {
__ align64();
StubCodeMark mark(this, "StubRoutines", stub_name);
address start = __ pc();
__ emit_data(0x02020304, relocInfo::none, 0);
__ emit_data(0x01010101, relocInfo::none, 0);
__ emit_data(0x00000000, relocInfo::none, 0);
__ emit_data(0x00000000, relocInfo::none, 0);
__ emit_data(0x02020304, relocInfo::none, 0);
__ emit_data(0x01010101, relocInfo::none, 0);
__ emit_data(0x00000000, relocInfo::none, 0);
__ emit_data(0x00000000, relocInfo::none, 0);
__ emit_data(0x02020304, relocInfo::none, 0);
__ emit_data(0x01010101, relocInfo::none, 0);
__ emit_data(0x00000000, relocInfo::none, 0);
__ emit_data(0x00000000, relocInfo::none, 0);
__ emit_data(0x02020304, relocInfo::none, 0);
__ emit_data(0x01010101, relocInfo::none, 0);
__ emit_data(0x00000000, relocInfo::none, 0);
__ emit_data(0x00000000, relocInfo::none, 0);
return start;
}
address generate_popcount_avx_lut(const char *stub_name) {
__ align64();
StubCodeMark mark(this, "StubRoutines", stub_name);
@ -635,6 +659,98 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
address generate_vector_reverse_bit_lut(const char *stub_name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", stub_name);
address start = __ pc();
__ emit_data(0x0C040800, relocInfo::none, 0);
__ emit_data(0x0E060A02, relocInfo::none, 0);
__ emit_data(0x0D050901, relocInfo::none, 0);
__ emit_data(0x0F070B03, relocInfo::none, 0);
__ emit_data(0x0C040800, relocInfo::none, 0);
__ emit_data(0x0E060A02, relocInfo::none, 0);
__ emit_data(0x0D050901, relocInfo::none, 0);
__ emit_data(0x0F070B03, relocInfo::none, 0);
__ emit_data(0x0C040800, relocInfo::none, 0);
__ emit_data(0x0E060A02, relocInfo::none, 0);
__ emit_data(0x0D050901, relocInfo::none, 0);
__ emit_data(0x0F070B03, relocInfo::none, 0);
__ emit_data(0x0C040800, relocInfo::none, 0);
__ emit_data(0x0E060A02, relocInfo::none, 0);
__ emit_data(0x0D050901, relocInfo::none, 0);
__ emit_data(0x0F070B03, relocInfo::none, 0);
return start;
}
address generate_vector_reverse_byte_perm_mask_long(const char *stub_name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", stub_name);
address start = __ pc();
__ emit_data(0x04050607, relocInfo::none, 0);
__ emit_data(0x00010203, relocInfo::none, 0);
__ emit_data(0x0C0D0E0F, relocInfo::none, 0);
__ emit_data(0x08090A0B, relocInfo::none, 0);
__ emit_data(0x04050607, relocInfo::none, 0);
__ emit_data(0x00010203, relocInfo::none, 0);
__ emit_data(0x0C0D0E0F, relocInfo::none, 0);
__ emit_data(0x08090A0B, relocInfo::none, 0);
__ emit_data(0x04050607, relocInfo::none, 0);
__ emit_data(0x00010203, relocInfo::none, 0);
__ emit_data(0x0C0D0E0F, relocInfo::none, 0);
__ emit_data(0x08090A0B, relocInfo::none, 0);
__ emit_data(0x04050607, relocInfo::none, 0);
__ emit_data(0x00010203, relocInfo::none, 0);
__ emit_data(0x0C0D0E0F, relocInfo::none, 0);
__ emit_data(0x08090A0B, relocInfo::none, 0);
return start;
}
address generate_vector_reverse_byte_perm_mask_int(const char *stub_name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", stub_name);
address start = __ pc();
__ emit_data(0x00010203, relocInfo::none, 0);
__ emit_data(0x04050607, relocInfo::none, 0);
__ emit_data(0x08090A0B, relocInfo::none, 0);
__ emit_data(0x0C0D0E0F, relocInfo::none, 0);
__ emit_data(0x00010203, relocInfo::none, 0);
__ emit_data(0x04050607, relocInfo::none, 0);
__ emit_data(0x08090A0B, relocInfo::none, 0);
__ emit_data(0x0C0D0E0F, relocInfo::none, 0);
__ emit_data(0x00010203, relocInfo::none, 0);
__ emit_data(0x04050607, relocInfo::none, 0);
__ emit_data(0x08090A0B, relocInfo::none, 0);
__ emit_data(0x0C0D0E0F, relocInfo::none, 0);
__ emit_data(0x00010203, relocInfo::none, 0);
__ emit_data(0x04050607, relocInfo::none, 0);
__ emit_data(0x08090A0B, relocInfo::none, 0);
__ emit_data(0x0C0D0E0F, relocInfo::none, 0);
return start;
}
address generate_vector_reverse_byte_perm_mask_short(const char *stub_name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", stub_name);
address start = __ pc();
__ emit_data(0x02030001, relocInfo::none, 0);
__ emit_data(0x06070405, relocInfo::none, 0);
__ emit_data(0x0A0B0809, relocInfo::none, 0);
__ emit_data(0x0E0F0C0D, relocInfo::none, 0);
__ emit_data(0x02030001, relocInfo::none, 0);
__ emit_data(0x06070405, relocInfo::none, 0);
__ emit_data(0x0A0B0809, relocInfo::none, 0);
__ emit_data(0x0E0F0C0D, relocInfo::none, 0);
__ emit_data(0x02030001, relocInfo::none, 0);
__ emit_data(0x06070405, relocInfo::none, 0);
__ emit_data(0x0A0B0809, relocInfo::none, 0);
__ emit_data(0x0E0F0C0D, relocInfo::none, 0);
__ emit_data(0x02030001, relocInfo::none, 0);
__ emit_data(0x06070405, relocInfo::none, 0);
__ emit_data(0x0A0B0809, relocInfo::none, 0);
__ emit_data(0x0E0F0C0D, relocInfo::none, 0);
return start;
}
address generate_vector_byte_shuffle_mask(const char *stub_name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", stub_name);
@ -4090,8 +4206,13 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::x86::_vector_all_bits_set = generate_vector_mask("vector_all_bits_set", 0xFFFFFFFF);
StubRoutines::x86::_vector_int_mask_cmp_bits = generate_vector_mask("vector_int_mask_cmp_bits", 0x00000001);
StubRoutines::x86::_vector_iota_indices = generate_iota_indices("iota_indices");
StubRoutines::x86::_vector_count_leading_zeros_lut = generate_count_leading_zeros_lut("count_leading_zeros_lut");
StubRoutines::x86::_vector_reverse_bit_lut = generate_vector_reverse_bit_lut("reverse_bit_lut");
StubRoutines::x86::_vector_reverse_byte_perm_mask_long = generate_vector_reverse_byte_perm_mask_long("perm_mask_long");
StubRoutines::x86::_vector_reverse_byte_perm_mask_int = generate_vector_reverse_byte_perm_mask_int("perm_mask_int");
StubRoutines::x86::_vector_reverse_byte_perm_mask_short = generate_vector_reverse_byte_perm_mask_short("perm_mask_short");
if (UsePopCountInstruction && VM_Version::supports_avx2() && !VM_Version::supports_avx512_vpopcntdq()) {
if (VM_Version::supports_avx2() && !VM_Version::supports_avx512_vpopcntdq()) {
// lut implementation influenced by counting 1s algorithm from section 5-1 of Hackers' Delight.
StubRoutines::x86::_vector_popcount_lut = generate_popcount_avx_lut("popcount_lut");
}

View file

@ -807,6 +807,21 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
address generate_count_leading_zeros_lut(const char *stub_name) {
__ align64();
StubCodeMark mark(this, "StubRoutines", stub_name);
address start = __ pc();
__ emit_data64(0x0101010102020304, relocInfo::none);
__ emit_data64(0x0000000000000000, relocInfo::none);
__ emit_data64(0x0101010102020304, relocInfo::none);
__ emit_data64(0x0000000000000000, relocInfo::none);
__ emit_data64(0x0101010102020304, relocInfo::none);
__ emit_data64(0x0000000000000000, relocInfo::none);
__ emit_data64(0x0101010102020304, relocInfo::none);
__ emit_data64(0x0000000000000000, relocInfo::none);
return start;
}
address generate_popcount_avx_lut(const char *stub_name) {
__ align64();
StubCodeMark mark(this, "StubRoutines", stub_name);
@ -837,6 +852,66 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
address generate_vector_reverse_bit_lut(const char *stub_name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", stub_name);
address start = __ pc();
__ emit_data64(0x0E060A020C040800, relocInfo::none);
__ emit_data64(0x0F070B030D050901, relocInfo::none);
__ emit_data64(0x0E060A020C040800, relocInfo::none);
__ emit_data64(0x0F070B030D050901, relocInfo::none);
__ emit_data64(0x0E060A020C040800, relocInfo::none);
__ emit_data64(0x0F070B030D050901, relocInfo::none);
__ emit_data64(0x0E060A020C040800, relocInfo::none);
__ emit_data64(0x0F070B030D050901, relocInfo::none);
return start;
}
address generate_vector_reverse_byte_perm_mask_long(const char *stub_name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", stub_name);
address start = __ pc();
__ emit_data64(0x0001020304050607, relocInfo::none);
__ emit_data64(0x08090A0B0C0D0E0F, relocInfo::none);
__ emit_data64(0x0001020304050607, relocInfo::none);
__ emit_data64(0x08090A0B0C0D0E0F, relocInfo::none);
__ emit_data64(0x0001020304050607, relocInfo::none);
__ emit_data64(0x08090A0B0C0D0E0F, relocInfo::none);
__ emit_data64(0x0001020304050607, relocInfo::none);
__ emit_data64(0x08090A0B0C0D0E0F, relocInfo::none);
return start;
}
address generate_vector_reverse_byte_perm_mask_int(const char *stub_name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", stub_name);
address start = __ pc();
__ emit_data64(0x0405060700010203, relocInfo::none);
__ emit_data64(0x0C0D0E0F08090A0B, relocInfo::none);
__ emit_data64(0x0405060700010203, relocInfo::none);
__ emit_data64(0x0C0D0E0F08090A0B, relocInfo::none);
__ emit_data64(0x0405060700010203, relocInfo::none);
__ emit_data64(0x0C0D0E0F08090A0B, relocInfo::none);
__ emit_data64(0x0405060700010203, relocInfo::none);
__ emit_data64(0x0C0D0E0F08090A0B, relocInfo::none);
return start;
}
address generate_vector_reverse_byte_perm_mask_short(const char *stub_name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", stub_name);
address start = __ pc();
__ emit_data64(0x0607040502030001, relocInfo::none);
__ emit_data64(0x0E0F0C0D0A0B0809, relocInfo::none);
__ emit_data64(0x0607040502030001, relocInfo::none);
__ emit_data64(0x0E0F0C0D0A0B0809, relocInfo::none);
__ emit_data64(0x0607040502030001, relocInfo::none);
__ emit_data64(0x0E0F0C0D0A0B0809, relocInfo::none);
__ emit_data64(0x0607040502030001, relocInfo::none);
__ emit_data64(0x0E0F0C0D0A0B0809, relocInfo::none);
return start;
}
address generate_vector_byte_shuffle_mask(const char *stub_name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", stub_name);
@ -7955,8 +8030,13 @@ address generate_avx_ghash_processBlocks() {
StubRoutines::x86::_vector_long_shuffle_mask = generate_vector_mask("vector_long_shuffle_mask", 0x0000000100000000);
StubRoutines::x86::_vector_long_sign_mask = generate_vector_mask("vector_long_sign_mask", 0x8000000000000000);
StubRoutines::x86::_vector_iota_indices = generate_iota_indices("iota_indices");
StubRoutines::x86::_vector_count_leading_zeros_lut = generate_count_leading_zeros_lut("count_leading_zeros_lut");
StubRoutines::x86::_vector_reverse_bit_lut = generate_vector_reverse_bit_lut("reverse_bit_lut");
StubRoutines::x86::_vector_reverse_byte_perm_mask_long = generate_vector_reverse_byte_perm_mask_long("perm_mask_long");
StubRoutines::x86::_vector_reverse_byte_perm_mask_int = generate_vector_reverse_byte_perm_mask_int("perm_mask_int");
StubRoutines::x86::_vector_reverse_byte_perm_mask_short = generate_vector_reverse_byte_perm_mask_short("perm_mask_short");
if (UsePopCountInstruction && VM_Version::supports_avx2() && !VM_Version::supports_avx512_vpopcntdq()) {
if (VM_Version::supports_avx2() && !VM_Version::supports_avx512_vpopcntdq()) {
// lut implementation influenced by counting 1s algorithm from section 5-1 of Hackers' Delight.
StubRoutines::x86::_vector_popcount_lut = generate_popcount_avx_lut("popcount_lut");
}

View file

@ -59,7 +59,12 @@ address StubRoutines::x86::_vector_double_sign_flip = NULL;
address StubRoutines::x86::_vector_byte_perm_mask = NULL;
address StubRoutines::x86::_vector_long_sign_mask = NULL;
address StubRoutines::x86::_vector_iota_indices = NULL;
address StubRoutines::x86::_vector_reverse_bit_lut = NULL;
address StubRoutines::x86::_vector_reverse_byte_perm_mask_long = NULL;
address StubRoutines::x86::_vector_reverse_byte_perm_mask_int = NULL;
address StubRoutines::x86::_vector_reverse_byte_perm_mask_short = NULL;
address StubRoutines::x86::_vector_popcount_lut = NULL;
address StubRoutines::x86::_vector_count_leading_zeros_lut = NULL;
address StubRoutines::x86::_vector_32_bit_mask = NULL;
address StubRoutines::x86::_vector_64_bit_mask = NULL;
#ifdef _LP64

View file

@ -178,6 +178,11 @@ class x86 {
static address _vector_long_shuffle_mask;
static address _vector_iota_indices;
static address _vector_popcount_lut;
static address _vector_count_leading_zeros_lut;
static address _vector_reverse_bit_lut;
static address _vector_reverse_byte_perm_mask_long;
static address _vector_reverse_byte_perm_mask_int;
static address _vector_reverse_byte_perm_mask_short;
#ifdef _LP64
static juint _k256_W[];
static address _k256_W_adr;
@ -341,6 +346,26 @@ class x86 {
return _vector_iota_indices;
}
static address vector_count_leading_zeros_lut() {
return _vector_count_leading_zeros_lut;
}
static address vector_reverse_bit_lut() {
return _vector_reverse_bit_lut;
}
static address vector_reverse_byte_perm_mask_long() {
return _vector_reverse_byte_perm_mask_long;
}
static address vector_reverse_byte_perm_mask_int() {
return _vector_reverse_byte_perm_mask_int;
}
static address vector_reverse_byte_perm_mask_short() {
return _vector_reverse_byte_perm_mask_short;
}
static address vector_popcount_lut() {
return _vector_popcount_lut;
}

View file

@ -922,6 +922,7 @@ void VM_Version::get_processor_features() {
_features &= ~CPU_AVX512_VNNI;
_features &= ~CPU_AVX512_VBMI;
_features &= ~CPU_AVX512_VBMI2;
_features &= ~CPU_AVX512_BITALG;
}
if (UseAVX < 2)
@ -951,6 +952,8 @@ void VM_Version::get_processor_features() {
_features &= ~CPU_AVX512_VBMI2;
_features &= ~CPU_CLWB;
_features &= ~CPU_FLUSHOPT;
_features &= ~CPU_GFNI;
_features &= ~CPU_AVX512_BITALG;
}
}

View file

@ -370,10 +370,11 @@ protected:
decl(AVX512_VBMI, "avx512_vbmi", 45) /* Vector BMI instructions */ \
decl(HV, "hv", 46) /* Hypervisor instructions */ \
decl(SERIALIZE, "serialize", 47) /* CPU SERIALIZE */ \
\
decl(RDTSCP, "rdtscp", 48) /* RDTSCP instruction */ \
decl(RDPID, "rdpid", 49) /* RDPID instruction */ \
decl(FSRM, "fsrm", 50) /* Fast Short REP MOV */
decl(FSRM, "fsrm", 50) /* Fast Short REP MOV */ \
decl(GFNI, "gfni", 51) /* Vector GFNI instructions */ \
decl(AVX512_BITALG, "avx512_bitalg", 52) /* Vector sub-word popcount and bit gather instructions */
#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1ULL << bit),
CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)
@ -603,8 +604,12 @@ protected:
result |= CPU_AVX512_VPCLMULQDQ;
if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0)
result |= CPU_AVX512_VAES;
if (_cpuid_info.sef_cpuid7_ecx.bits.gfni != 0)
result |= CPU_GFNI;
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vnni != 0)
result |= CPU_AVX512_VNNI;
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_bitalg != 0)
result |= CPU_AVX512_BITALG;
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi != 0)
result |= CPU_AVX512_VBMI;
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
@ -918,7 +923,9 @@ public:
static bool supports_avx512_vpopcntdq() { return (_features & CPU_AVX512_VPOPCNTDQ) != 0; }
static bool supports_avx512_vpclmulqdq() { return (_features & CPU_AVX512_VPCLMULQDQ) != 0; }
static bool supports_avx512_vaes() { return (_features & CPU_AVX512_VAES) != 0; }
static bool supports_gfni() { return (_features & CPU_GFNI) != 0; }
static bool supports_avx512_vnni() { return (_features & CPU_AVX512_VNNI) != 0; }
static bool supports_avx512_bitalg() { return (_features & CPU_AVX512_BITALG) != 0; }
static bool supports_avx512_vbmi() { return (_features & CPU_AVX512_VBMI) != 0; }
static bool supports_avx512_vbmi2() { return (_features & CPU_AVX512_VBMI2) != 0; }
static bool supports_hv() { return (_features & CPU_HV) != 0; }

View file

@ -1241,10 +1241,20 @@ static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use
return vector_length_encoding(def);
}
static inline bool is_vector_popcount_predicate(BasicType bt) {
return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
(is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
}
static inline bool is_unsigned_booltest_pred(int bt) {
return ((bt & BoolTest::unsigned_compare) == BoolTest::unsigned_compare);
}
static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
(VM_Version::supports_avx512vl() || vlen_bytes == 64);
}
class Node::PD {
public:
enum NodeFlags {
@ -1405,12 +1415,12 @@ const bool Matcher::match_rule_supported(int opcode) {
}
break;
case Op_PopCountVI:
if (!UsePopCountInstruction || (UseAVX < 2)) {
if (UseAVX < 2) {
return false;
}
break;
case Op_PopCountVL:
if (!UsePopCountInstruction || (UseAVX <= 2)) {
if (UseAVX < 2) {
return false;
}
break;
@ -1630,6 +1640,17 @@ const bool Matcher::match_rule_supported(int opcode) {
return false;
}
break;
case Op_CompressM:
if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
return false;
}
break;
case Op_CompressV:
case Op_ExpandV:
if (!VM_Version::supports_avx512vl()) {
return false;
}
break;
case Op_SqrtF:
if (UseSSE < 1) {
return false;
@ -1651,6 +1672,11 @@ const bool Matcher::match_rule_supported(int opcode) {
//------------------------------------------------------------------------
static inline bool is_pop_count_instr_target(BasicType bt) {
return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
(is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
}
// Identify extra cases that we might want to provide match rules for vector nodes and
// other intrinsics guarded with vector length (vlen) and element type (bt).
const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
@ -1860,7 +1886,7 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
case Op_LoadVectorGatherMasked:
case Op_StoreVectorScatterMasked:
case Op_StoreVectorScatter:
if(is_subword_type(bt)) {
if (is_subword_type(bt)) {
return false;
} else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
return false;
@ -1887,6 +1913,23 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
return false;
}
break;
case Op_CompressM:
if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
return false;
}
break;
case Op_CompressV:
case Op_ExpandV:
if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
return false;
}
if (size_in_bits < 128 ) {
return false;
}
if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
return false;
}
break;
case Op_VectorLongToMask:
if (UseAVX < 1 || !is_LP64) {
return false;
@ -1902,14 +1945,22 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
}
break;
case Op_PopCountVI:
if (!VM_Version::supports_avx512_vpopcntdq() &&
(vlen == 16) && !VM_Version::supports_avx512bw()) {
case Op_PopCountVL: {
if (!is_pop_count_instr_target(bt) &&
(size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
return false;
}
}
break;
case Op_ReverseV:
case Op_ReverseBytesV:
if (UseAVX < 2) {
return false;
}
break;
case Op_PopCountVL:
if (!VM_Version::supports_avx512_vpopcntdq() &&
((vlen <= 4) || ((vlen == 8) && !VM_Version::supports_avx512bw()))) {
case Op_CountTrailingZerosV:
case Op_CountLeadingZerosV:
if (UseAVX < 2) {
return false;
}
break;
@ -2057,9 +2108,20 @@ const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, Bas
}
return true;
case Op_PopCountVI:
case Op_PopCountVL:
if (!is_pop_count_instr_target(bt)) {
return false;
}
return true;
case Op_MaskAll:
return true;
case Op_CountLeadingZerosV:
if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
return true;
}
default:
return false;
}
@ -8705,58 +8767,151 @@ instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
// --------------------------------- PopCount --------------------------------------
instruct vpopcountI_popcntd(vec dst, vec src) %{
predicate(VM_Version::supports_avx512_vpopcntdq());
instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
match(Set dst (PopCountVI src));
format %{ "vector_popcount_int $dst, $src\t! vector popcount packedI" %}
match(Set dst (PopCountVL src));
ins_cost(400);
format %{ "vector_popcount_integral $dst, $src" %}
ins_encode %{
assert(UsePopCountInstruction, "not enabled");
int vlen_enc = vector_length_encoding(this);
__ vector_popcount_int($dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, noreg, vlen_enc);
int opcode = this->ideal_Opcode();
int vlen_enc = vector_length_encoding(this, $src);
BasicType bt = Matcher::vector_element_basic_type(this, $src);
__ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
// TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL
// should be succeeded by its corresponding vector IR and following
// special handling should be removed.
if (opcode == Op_PopCountVL && Matcher::vector_element_basic_type(this) == T_INT) {
__ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
}
%}
ins_pipe( pipe_slow );
%}
instruct vpopcountI(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp, rFlagsReg cc) %{
predicate(!VM_Version::supports_avx512_vpopcntdq());
instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
match(Set dst (PopCountVI src mask));
match(Set dst (PopCountVL src mask));
format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this, $src);
BasicType bt = Matcher::vector_element_basic_type(this, $src);
__ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
__ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
match(Set dst (PopCountVI src));
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, KILL cc);
format %{ "vector_popcount_int $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
match(Set dst (PopCountVL src));
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
ins_encode %{
assert(UsePopCountInstruction, "not enabled");
int vlen_enc = vector_length_encoding(this);
__ vector_popcount_int($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
$xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
int opcode = this->ideal_Opcode();
int vlen_enc = vector_length_encoding(this, $src);
BasicType bt = Matcher::vector_element_basic_type(this, $src);
__ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
$xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
// TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL
// should be succeeded by its corresponding vector IR and following
// special handling should be removed.
if (opcode == Op_PopCountVL && Matcher::vector_element_basic_type(this) == T_INT) {
if (VM_Version::supports_avx512vl()) {
__ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
} else {
assert(VM_Version::supports_avx2(), "");
__ vpshufd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
__ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
}
}
%}
ins_pipe( pipe_slow );
%}
instruct vpopcountL_popcntd(vec dst, vec src) %{
predicate(VM_Version::supports_avx512_vpopcntdq());
match(Set dst (PopCountVL src));
format %{ "vector_popcount_long $dst, $src\t! vector popcount packedL" %}
// --------------------------------- Vector Trailing Zeros Count --------------------------------------
instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
Matcher::vector_length_in_bytes(n->in(1))));
match(Set dst (CountTrailingZerosV src));
effect(TEMP dst, TEMP xtmp, TEMP rtmp);
ins_cost(400);
format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
ins_encode %{
assert(UsePopCountInstruction, "not enabled");
int vlen_enc = vector_length_encoding(this, $src);
__ vector_popcount_long($dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, noreg, vlen_enc);
BasicType bt = Matcher::vector_element_basic_type(this, $src);
BasicType rbt = Matcher::vector_element_basic_type(this);
__ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
// TODO: Once auto-vectorizer supports ConvL2I operation, CountTrailingZerosV
// should be succeeded by its corresponding vector IR and following
// special handling should be removed.
if (bt == T_LONG && rbt == T_INT) {
__ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
}
%}
ins_pipe( pipe_slow );
%}
instruct vpopcountL(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp, rFlagsReg cc) %{
predicate(!VM_Version::supports_avx512_vpopcntdq());
match(Set dst (PopCountVL src));
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, KILL cc);
format %{ "vector_popcount_long $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
VM_Version::supports_avx512cd() &&
(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
match(Set dst (CountTrailingZerosV src));
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
ins_cost(400);
format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
ins_encode %{
assert(UsePopCountInstruction, "not enabled");
int vlen_enc = vector_length_encoding(this, $src);
__ vector_popcount_long($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
$xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
BasicType bt = Matcher::vector_element_basic_type(this, $src);
__ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
$xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
match(Set dst (CountTrailingZerosV src));
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
ins_cost(400);
format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this, $src);
BasicType bt = Matcher::vector_element_basic_type(this, $src);
__ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
$ktmp$$KRegister, $rtmp$$Register, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
match(Set dst (CountTrailingZerosV src));
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this, $src);
BasicType bt = Matcher::vector_element_basic_type(this, $src);
BasicType rbt = Matcher::vector_element_basic_type(this);
__ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
// TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL
// should be succeeded by its corresponding vector IR and following
// special handling should be removed.
if (bt == T_LONG && rbt == T_INT) {
assert(VM_Version::supports_avx2(), "");
__ vpshufd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
__ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
}
%}
ins_pipe( pipe_slow );
%}
// --------------------------------- Bitwise Ternary Logic ----------------------------------
instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
@ -9031,8 +9186,200 @@ instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp,
%}
ins_pipe( pipe_slow );
%}
// --------------------------------- Compress/Expand Operations ---------------------------
instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
match(Set dst (CompressV src mask));
match(Set dst (ExpandV src mask));
format %{ "vector_compress_expand $dst, $src, $mask" %}
ins_encode %{
int opcode = this->ideal_Opcode();
int vector_len = vector_length_encoding(this);
BasicType bt = Matcher::vector_element_basic_type(this);
__ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
match(Set dst (CompressM mask));
effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
ins_encode %{
assert(this->in(1)->bottom_type()->isa_vectmask(), "");
int mask_len = Matcher::vector_length(this);
__ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
%}
ins_pipe( pipe_slow );
%}
#endif // _LP64
// -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
predicate(!VM_Version::supports_gfni());
match(Set dst (ReverseV src));
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
ins_encode %{
int vec_enc = vector_length_encoding(this);
BasicType bt = Matcher::vector_element_basic_type(this);
__ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
$xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp, rRegI rtmp) %{
predicate(VM_Version::supports_gfni());
match(Set dst (ReverseV src));
effect(TEMP dst, TEMP xtmp, TEMP rtmp);
format %{ "vector_reverse_bit_gfni $dst, $src!\t using $rtmp and $xtmp as TEMP" %}
ins_encode %{
int vec_enc = vector_length_encoding(this);
BasicType bt = Matcher::vector_element_basic_type(this);
InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, 0x8040201008040201L, 1));
__ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
addr, $rtmp$$Register, vec_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vreverse_byte_reg(vec dst, vec src, rRegI rtmp) %{
predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
match(Set dst (ReverseBytesV src));
effect(TEMP dst, TEMP rtmp);
format %{ "vector_reverse_byte $dst, $src!\t using $rtmp as TEMP" %}
ins_encode %{
int vec_enc = vector_length_encoding(this);
BasicType bt = Matcher::vector_element_basic_type(this);
__ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, $rtmp$$Register, vec_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
match(Set dst (ReverseBytesV src));
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
ins_encode %{
int vec_enc = vector_length_encoding(this);
BasicType bt = Matcher::vector_element_basic_type(this);
__ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
$xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
%}
ins_pipe( pipe_slow );
%}
// ---------------------------------- Vector Count Leading Zeros -----------------------------------
instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
Matcher::vector_length_in_bytes(n->in(1))));
match(Set dst (CountLeadingZerosV src));
format %{ "vector_count_leading_zeros $dst, $src" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this, $src);
BasicType bt = Matcher::vector_element_basic_type(this, $src);
BasicType rbt = Matcher::vector_element_basic_type(this);
__ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
xnoreg, xnoreg, k0, noreg, true, vlen_enc);
// TODO: Once auto-vectorizer supports ConvL2I operation, CountLeadingZerosV
// should be succeeded by its corresponding vector IR and following
// special handling should be removed.
if (rbt == T_INT && bt == T_LONG) {
__ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
}
%}
ins_pipe( pipe_slow );
%}
instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
Matcher::vector_length_in_bytes(n->in(1))));
match(Set dst (CountLeadingZerosV src mask));
format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this, $src);
BasicType bt = Matcher::vector_element_basic_type(this, $src);
__ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
__ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
VM_Version::supports_avx512cd() &&
(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
match(Set dst (CountLeadingZerosV src));
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this, $src);
BasicType bt = Matcher::vector_element_basic_type(this, $src);
__ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
$xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
match(Set dst (CountLeadingZerosV src));
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this, $src);
BasicType bt = Matcher::vector_element_basic_type(this, $src);
__ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
$rtmp$$Register, true, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
match(Set dst (CountLeadingZerosV src));
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this, $src);
BasicType bt = Matcher::vector_element_basic_type(this, $src);
__ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
match(Set dst (CountLeadingZerosV src));
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this, $src);
BasicType bt = Matcher::vector_element_basic_type(this, $src);
BasicType rbt = Matcher::vector_element_basic_type(this);
__ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
// TODO: Once auto-vectorizer supports ConvL2I operation, CountLeadingZerosV
// should be succeeded by its corresponding vector IR and following
// special handling should be removed.
if (rbt == T_INT && bt == T_LONG) {
__ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
}
%}
ins_pipe( pipe_slow );
%}
// ---------------------------------- Vector Masked Operations ------------------------------------
instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{

View file

@ -4212,6 +4212,7 @@ bool MatchRule::is_vector() const {
"SqrtVD","SqrtVF",
"AndV" ,"XorV" ,"OrV",
"MaxV", "MinV",
"CompressV", "ExpandV", "CompressM",
"AddReductionVI", "AddReductionVL",
"AddReductionVF", "AddReductionVD",
"MulReductionVI", "MulReductionVL",
@ -4223,7 +4224,7 @@ bool MatchRule::is_vector() const {
"LShiftVB","LShiftVS","LShiftVI","LShiftVL",
"RShiftVB","RShiftVS","RShiftVI","RShiftVL",
"URShiftVB","URShiftVS","URShiftVI","URShiftVL",
"ReplicateB","ReplicateS","ReplicateI","ReplicateL","ReplicateF","ReplicateD","PopulateIndex",
"ReplicateB","ReplicateS","ReplicateI","ReplicateL","ReplicateF","ReplicateD","ReverseV","ReverseBytesV",
"RoundDoubleModeV","RotateLeftV" , "RotateRightV", "LoadVector","StoreVector",
"LoadVectorGather", "StoreVectorScatter", "LoadVectorGatherMasked", "StoreVectorScatterMasked",
"VectorTest", "VectorLoadMask", "VectorStoreMask", "VectorBlend", "VectorInsert",
@ -4232,7 +4233,8 @@ bool MatchRule::is_vector() const {
"VectorCastL2X", "VectorCastF2X", "VectorCastD2X",
"VectorUCastB2X", "VectorUCastS2X", "VectorUCastI2X",
"VectorMaskWrapper","VectorMaskCmp","VectorReinterpret","LoadVectorMasked","StoreVectorMasked",
"FmaVD","FmaVF","PopCountVI", "PopCountVL", "SignumVF", "SignumVD", "VectorLongToMask",
"FmaVD","FmaVF","PopCountVI","PopCountVL","PopulateIndex","VectorLongToMask",
"CountLeadingZerosV", "CountTrailingZerosV", "SignumVF", "SignumVD",
// Next are vector mask ops.
"MaskAll", "AndVMask", "OrVMask", "XorVMask", "VectorMaskCast",
"RoundVF", "RoundVD",

View file

@ -935,7 +935,7 @@ class methodHandle;
"Ljava/lang/Object;" \
"J" \
"Ljava/lang/Object;" \
"I" \
"J" \
"Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \
"Ljdk/internal/vm/vector/VectorSupport$LoadOperation;)" \
"Ljdk/internal/vm/vector/VectorSupport$VectorPayload;") \
@ -950,7 +950,7 @@ class methodHandle;
"J" \
"Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \
"Ljava/lang/Object;" \
"I" \
"J" \
"Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \
"Ljdk/internal/vm/vector/VectorSupport$LoadVectorMaskedOperation;)" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;") \
@ -962,8 +962,10 @@ class methodHandle;
"I" \
"Ljava/lang/Object;" \
"J" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"Ljava/lang/Object;ILjdk/internal/vm/vector/VectorSupport$StoreVectorOperation;)" \
"Ljdk/internal/vm/vector/VectorSupport$VectorPayload;" \
"Ljava/lang/Object;" \
"J" \
"Ljdk/internal/vm/vector/VectorSupport$StoreVectorOperation;)" \
"V") \
do_name(vector_store_op_name, "store") \
\
@ -977,7 +979,7 @@ class methodHandle;
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \
"Ljava/lang/Object;" \
"I" \
"J" \
"Ljdk/internal/vm/vector/VectorSupport$StoreVectorMaskedOperation;)" \
"V") \
do_name(vector_store_masked_op_name, "storeMasked") \
@ -1137,6 +1139,17 @@ class methodHandle;
"J") \
do_name(vector_mask_oper_name, "maskReductionCoerced") \
\
do_intrinsic(_VectorCompressExpand, jdk_internal_vm_vector_VectorSupport, vector_compress_expand_op_name, vector_compress_expand_op_sig, F_S)\
do_signature(vector_compress_expand_op_sig, "(I" \
"Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"I" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \
"Ljdk/internal/vm/vector/VectorSupport$CompressExpandOperation;)" \
"Ljdk/internal/vm/vector/VectorSupport$VectorPayload;") \
do_name(vector_compress_expand_op_name, "compressExpandOp") \
/* (2) Bytecode intrinsics */ \
\
do_intrinsic(_park, jdk_internal_misc_Unsafe, park_name, park_signature, F_RN) \
@ -1245,7 +1258,7 @@ enum class vmIntrinsicID : int {
__IGNORE_CLASS, __IGNORE_NAME, __IGNORE_SIGNATURE, __IGNORE_ALIAS)
ID_LIMIT,
LAST_COMPILER_INLINE = _VectorMaskOp,
LAST_COMPILER_INLINE = _VectorCompressExpand,
FIRST_MH_SIG_POLY = _invokeGeneric,
FIRST_MH_STATIC = _linkToVirtual,
LAST_MH_SIG_POLY = _linkToNative,

View file

@ -715,6 +715,7 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
case vmIntrinsics::_Continuation_doYield:
break;
case vmIntrinsics::_VectorCompressExpand:
case vmIntrinsics::_VectorUnaryOp:
case vmIntrinsics::_VectorBinaryOp:
case vmIntrinsics::_VectorTernaryOp:

View file

@ -51,6 +51,7 @@ macro(ReverseBytesI)
macro(ReverseBytesL)
macro(ReverseBytesUS)
macro(ReverseBytesS)
macro(ReverseBytesV)
macro(CProj)
macro(CacheWB)
macro(CacheWBPreSync)
@ -74,6 +75,8 @@ macro(Catch)
macro(CatchProj)
macro(CheckCastPP)
macro(ClearArray)
macro(CompressBits)
macro(ExpandBits)
macro(ConstraintCast)
macro(CMoveD)
macro(CMoveVD)
@ -152,8 +155,10 @@ macro(LongCountedLoop)
macro(LongCountedLoopEnd)
macro(CountLeadingZerosI)
macro(CountLeadingZerosL)
macro(CountLeadingZerosV)
macro(CountTrailingZerosI)
macro(CountTrailingZerosL)
macro(CountTrailingZerosV)
macro(CreateEx)
macro(DecodeN)
macro(DecodeNKlass)
@ -285,6 +290,9 @@ macro(RShiftL)
macro(Region)
macro(Rethrow)
macro(Return)
macro(ReverseI)
macro(ReverseL)
macro(ReverseV)
macro(Root)
macro(RoundDouble)
macro(RoundDoubleMode)
@ -424,6 +432,9 @@ macro(MinV)
macro(MaxV)
macro(MinReductionV)
macro(MaxReductionV)
macro(CompressV)
macro(CompressM)
macro(ExpandV)
macro(LoadVector)
macro(LoadVectorGather)
macro(LoadVectorGatherMasked)

View file

@ -700,6 +700,8 @@ bool LibraryCallKit::try_to_inline(int predicate) {
return inline_vector_insert();
case vmIntrinsics::_VectorExtract:
return inline_vector_extract();
case vmIntrinsics::_VectorCompressExpand:
return inline_vector_compress_expand();
case vmIntrinsics::_getObjectSize:
return inline_getObjectSize();

View file

@ -344,6 +344,8 @@ class LibraryCallKit : public GraphKit {
bool inline_vector_convert();
bool inline_vector_extract();
bool inline_vector_insert();
bool inline_vector_compress_expand();
Node* gen_call_to_svml(int vector_api_op_id, BasicType bt, int num_elem, Node* opd1, Node* opd2);
enum VectorMaskUseType {

View file

@ -974,6 +974,9 @@ bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
case Op_RoundD: {
body_size += Matcher::scalar_op_pre_select_sz_estimate(n->Opcode(), n->bottom_type()->basic_type());
} break;
case Op_CountTrailingZerosV:
case Op_CountLeadingZerosV:
case Op_ReverseV:
case Op_RoundVF:
case Op_RoundVD:
case Op_PopCountVI:

View file

@ -2254,6 +2254,9 @@ bool Matcher::find_shared_visit(MStack& mstack, Node* n, uint opcode, bool& mem_
case Op_MacroLogicV:
case Op_LoadVectorMasked:
case Op_VectorCmpMasked:
case Op_CompressV:
case Op_CompressM:
case Op_ExpandV:
case Op_VectorLoadMask:
set_shared(n); // Force result into register (it will be anyways)
break;

View file

@ -389,4 +389,20 @@ public:
virtual uint ideal_reg() const { return Op_RegI; }
};
//------------------------------CompressBitsNode-------------------------------
// CompressBits placeholder node
class CompressBitsNode : public Node {
public:
CompressBitsNode(Node *in1, Node *in2) : Node(0,in1,in2) {}
virtual int Opcode() const;
};
//------------------------------ExpandBitsNode---------------------------------
// ExpandBits placeholder node
class ExpandBitsNode : public Node {
public:
ExpandBitsNode(Node *in1, Node *in2) : Node(0,in1,in2) {}
virtual int Opcode() const;
};
#endif // SHARE_OPTO_MULNODE_HPP

View file

@ -175,6 +175,9 @@ class VectorUnboxNode;
class VectorSet;
class VectorReinterpretNode;
class ShiftVNode;
class ExpandVNode;
class CompressVNode;
class CompressMNode;
#ifndef OPTO_DU_ITERATOR_ASSERT
@ -704,6 +707,9 @@ public:
DEFINE_CLASS_ID(VectorUnbox, Vector, 1)
DEFINE_CLASS_ID(VectorReinterpret, Vector, 2)
DEFINE_CLASS_ID(ShiftV, Vector, 3)
DEFINE_CLASS_ID(CompressV, Vector, 4)
DEFINE_CLASS_ID(ExpandV, Vector, 5)
DEFINE_CLASS_ID(CompressM, Vector, 6)
DEFINE_CLASS_ID(Proj, Node, 3)
DEFINE_CLASS_ID(CatchProj, Proj, 0)
@ -777,7 +783,8 @@ public:
Flag_is_predicated_vector = 1 << 14,
Flag_for_post_loop_opts_igvn = 1 << 15,
Flag_is_removed_by_peephole = 1 << 16,
_last_flag = Flag_is_removed_by_peephole
Flag_is_predicated_using_blend = 1 << 17,
_last_flag = Flag_is_predicated_using_blend
};
class PD;
@ -931,7 +938,10 @@ public:
DEFINE_CLASS_QUERY(Vector)
DEFINE_CLASS_QUERY(VectorMaskCmp)
DEFINE_CLASS_QUERY(VectorUnbox)
DEFINE_CLASS_QUERY(VectorReinterpret);
DEFINE_CLASS_QUERY(VectorReinterpret)
DEFINE_CLASS_QUERY(CompressV)
DEFINE_CLASS_QUERY(ExpandV)
DEFINE_CLASS_QUERY(CompressM)
DEFINE_CLASS_QUERY(LoadVector)
DEFINE_CLASS_QUERY(LoadVectorGather)
DEFINE_CLASS_QUERY(StoreVector)
@ -989,6 +999,8 @@ public:
bool is_predicated_vector() const { return (_flags & Flag_is_predicated_vector) != 0; }
bool is_predicated_using_blend() const { return (_flags & Flag_is_predicated_using_blend) != 0; }
// Used in lcm to mark nodes that have scheduled
bool is_scheduled() const { return (_flags & Flag_is_scheduled) != 0; }

View file

@ -548,4 +548,24 @@ public:
virtual uint ideal_reg() const { return Op_RegI; }
};
//-------------------------------ReverseINode--------------------------------
// reverse bits of an int
class ReverseINode : public Node {
public:
ReverseINode(Node *c, Node *in1) : Node(c, in1) {}
virtual int Opcode() const;
const Type *bottom_type() const { return TypeInt::INT; }
virtual uint ideal_reg() const { return Op_RegI; }
};
//-------------------------------ReverseLNode--------------------------------
// reverse bits of a long
class ReverseLNode : public Node {
public:
ReverseLNode(Node *c, Node *in1) : Node(c, in1) {}
virtual int Opcode() const;
const Type *bottom_type() const { return TypeLong::LONG; }
virtual uint ideal_reg() const { return Op_RegL; }
};
#endif // SHARE_OPTO_SUBNODE_HPP

View file

@ -2585,7 +2585,9 @@ bool SuperWord::output() {
opc == Op_AbsI || opc == Op_AbsL ||
opc == Op_NegF || opc == Op_NegD ||
opc == Op_RoundF || opc == Op_RoundD ||
opc == Op_PopCountI || opc == Op_PopCountL) {
opc == Op_PopCountI || opc == Op_PopCountL ||
opc == Op_CountLeadingZerosI || opc == Op_CountLeadingZerosL ||
opc == Op_CountTrailingZerosI || opc == Op_CountTrailingZerosL) {
assert(n->req() == 2, "only one input expected");
Node* in = vector_opd(p, 1);
vn = VectorNode::make(opc, in, NULL, vlen, velt_basic_type(n));
@ -3092,9 +3094,9 @@ bool SuperWord::is_vector_use(Node* use, int u_idx) {
return true;
}
if (VectorNode::is_vpopcnt_long(use)) {
// VPOPCNT_LONG takes long and produces int - hence the special checks
// on alignment and size.
if (VectorNode::is_type_transition_long_to_int(use)) {
// PopCountL/CountLeadingZerosL/CountTrailingZerosL takes long and produces
// int - hence the special checks on alignment and size.
if (u_pk->size() != d_pk->size()) {
return false;
}

View file

@ -80,9 +80,12 @@ bool LibraryCallKit::arch_supports_vector_rotate(int opc, int num_elem, BasicTyp
}
if (is_supported) {
// Check whether mask unboxing is supported.
// Check if mask unboxing is supported, this is a two step process which first loads the contents
// of boolean array into vector followed by either lane expansion to match the lane size of masked
// vector operation or populate the predicate register.
if ((mask_use_type & VecMaskUseLoad) != 0) {
if (!Matcher::match_rule_supported_vector(Op_VectorLoadMask, num_elem, elem_bt)) {
if (!Matcher::match_rule_supported_vector(Op_VectorLoadMask, num_elem, elem_bt) ||
!Matcher::match_rule_supported_vector(Op_LoadVector, num_elem, T_BOOLEAN)) {
#ifndef PRODUCT
if (C->print_intrinsics()) {
tty->print_cr(" ** Rejected vector mask loading (%s,%s,%d) because architecture does not support it",
@ -260,9 +263,12 @@ bool LibraryCallKit::arch_supports_vector(int sopc, int num_elem, BasicType type
return false;
}
// Check whether mask unboxing is supported.
// Check if mask unboxing is supported, this is a two step process which first loads the contents
// of boolean array into vector followed by either lane expansion to match the lane size of masked
// vector operation or populate the predicate register.
if ((mask_use_type & VecMaskUseLoad) != 0) {
if (!Matcher::match_rule_supported_vector(Op_VectorLoadMask, num_elem, type)) {
if (!Matcher::match_rule_supported_vector(Op_VectorLoadMask, num_elem, type) ||
!Matcher::match_rule_supported_vector(Op_LoadVector, num_elem, T_BOOLEAN)) {
#ifndef PRODUCT
if (C->print_intrinsics()) {
tty->print_cr(" ** Rejected vector mask loading (%s,%s,%d) because architecture does not support it",
@ -273,9 +279,12 @@ bool LibraryCallKit::arch_supports_vector(int sopc, int num_elem, BasicType type
}
}
// Check whether mask boxing is supported.
// Check if mask boxing is supported, this is a two step process which first stores the contents
// of mask vector / predicate register into a boolean vector followed by vector store operation to
// transfer the contents to underlined storage of mask boxes which is a boolean array.
if ((mask_use_type & VecMaskUseStore) != 0) {
if (!Matcher::match_rule_supported_vector(Op_VectorStoreMask, num_elem, type)) {
if (!Matcher::match_rule_supported_vector(Op_VectorStoreMask, num_elem, type) ||
!Matcher::match_rule_supported_vector(Op_StoreVector, num_elem, T_BOOLEAN)) {
#ifndef PRODUCT
if (C->print_intrinsics()) {
tty->print_cr("Rejected vector mask storing (%s,%s,%d) because architecture does not support it",
@ -560,6 +569,7 @@ bool LibraryCallKit::inline_vector_nary_operation(int n) {
operation->add_req(mask);
operation->add_flag(Node::Flag_is_predicated_vector);
} else {
operation->add_flag(Node::Flag_is_predicated_using_blend);
operation = gvn().transform(operation);
operation = new VectorBlendNode(opd1, operation, mask);
}
@ -695,16 +705,8 @@ bool LibraryCallKit::inline_vector_mask_operation() {
ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type();
BasicType elem_bt = elem_type->basic_type();
if (!arch_supports_vector(Op_LoadVector, num_elem, T_BOOLEAN, VecMaskNotUsed)) {
if (C->print_intrinsics()) {
tty->print_cr(" ** not supported: arity=1 op=cast#%d/3 vlen2=%d etype2=%s",
Op_LoadVector, num_elem, type2name(T_BOOLEAN));
}
return false; // not supported
}
int mopc = VectorSupport::vop2ideal(oper->get_con(), elem_bt);
if (!arch_supports_vector(mopc, num_elem, elem_bt, VecMaskNotUsed)) {
if (!arch_supports_vector(mopc, num_elem, elem_bt, VecMaskUseLoad)) {
if (C->print_intrinsics()) {
tty->print_cr(" ** not supported: arity=1 op=cast#%d/3 vlen2=%d etype2=%s",
mopc, num_elem, type2name(elem_bt));
@ -937,7 +939,7 @@ static bool elem_consistent_with_arr(BasicType elem_bt, const TypeAryPtr* arr_ty
// S extends VectorSpecies<E>>
// VM load(Class<? extends VM> vmClass, Class<E> elementType, int length,
// Object base, long offset, // Unsafe addressing
// C container, int index, S s, // Arguments for default implementation
// C container, long index, S s, // Arguments for default implementation
// LoadOperation<C, VM, E, S> defaultImpl)
//
// public static
@ -946,7 +948,7 @@ static bool elem_consistent_with_arr(BasicType elem_bt, const TypeAryPtr* arr_ty
// void store(Class<?> vectorClass, Class<?> elementType, int length,
// Object base, long offset, // Unsafe addressing
// V v,
// C container, int index, // Arguments for default implementation
// C container, long index, // Arguments for default implementation
// StoreVectorOperation<C, V> defaultImpl)
bool LibraryCallKit::inline_vector_mem_operation(bool is_store) {
@ -1049,16 +1051,6 @@ bool LibraryCallKit::inline_vector_mem_operation(bool is_store) {
}
}
if (is_mask) {
if (!arch_supports_vector(Op_LoadVector, num_elem, T_BOOLEAN, VecMaskNotUsed)) {
if (C->print_intrinsics()) {
tty->print_cr(" ** not supported: arity=%d op=%s/mask vlen=%d etype=bit ismask=no",
is_store, is_store ? "store" : "load",
num_elem);
}
set_map(old_map);
set_sp(old_sp);
return false; // not supported
}
if (!is_store) {
if (!arch_supports_vector(Op_LoadVector, num_elem, elem_bt, VecMaskUseLoad)) {
set_map(old_map);
@ -1096,7 +1088,9 @@ bool LibraryCallKit::inline_vector_mem_operation(bool is_store) {
const TypeVect* to_vect_type = TypeVect::make(T_BYTE, store_num_elem);
val = gvn().transform(new VectorReinterpretNode(val, val->bottom_type()->is_vect(), to_vect_type));
}
if (is_mask) {
val = gvn().transform(VectorStoreMaskNode::make(gvn(), val, elem_bt, num_elem));
}
Node* vstore = gvn().transform(StoreVectorNode::make(0, control(), memory(addr), addr, addr_type, val, store_num_elem));
set_memory(vstore, addr_type);
} else {
@ -1138,7 +1132,7 @@ bool LibraryCallKit::inline_vector_mem_operation(bool is_store) {
// M extends VectorMask<E>>
// V loadMasked(Class<? extends V> vectorClass, Class<M> maskClass, Class<E> elementType,
// int length, Object base, long offset, M m,
// C container, int index, S s, // Arguments for default implementation
// C container, long index, S s, // Arguments for default implementation
// LoadVectorMaskedOperation<C, V, S, M> defaultImpl) {
//
// public static
@ -1149,7 +1143,7 @@ bool LibraryCallKit::inline_vector_mem_operation(bool is_store) {
// void storeMasked(Class<? extends V> vectorClass, Class<M> maskClass, Class<E> elementType,
// int length, Object base, long offset,
// V v, M m,
// C container, int index, // Arguments for default implementation
// C container, long index, // Arguments for default implementation
// StoreVectorMaskedOperation<C, V, M, E> defaultImpl) {
//
bool LibraryCallKit::inline_vector_mem_masked_operation(bool is_store) {
@ -2736,3 +2730,97 @@ bool LibraryCallKit::inline_vector_extract() {
return true;
}
// public static
// <V extends Vector<E>,
// M extends VectorMask<E>,
// E>
// V compressExpandOp(int opr,
// Class<? extends V> vClass, Class<? extends M> mClass, Class<E> eClass,
// int length, V v, M m,
// CompressExpandOperation<V, M> defaultImpl)
bool LibraryCallKit::inline_vector_compress_expand() {
const TypeInt* opr = gvn().type(argument(0))->isa_int();
const TypeInstPtr* vector_klass = gvn().type(argument(1))->isa_instptr();
const TypeInstPtr* mask_klass = gvn().type(argument(2))->isa_instptr();
const TypeInstPtr* elem_klass = gvn().type(argument(3))->isa_instptr();
const TypeInt* vlen = gvn().type(argument(4))->isa_int();
if (vector_klass == NULL || elem_klass == NULL || mask_klass == NULL || vlen == NULL ||
vector_klass->const_oop() == NULL || mask_klass->const_oop() == NULL ||
elem_klass->const_oop() == NULL || !vlen->is_con() || !opr->is_con()) {
if (C->print_intrinsics()) {
tty->print_cr(" ** missing constant: opr=%s vclass=%s mclass=%s etype=%s vlen=%s",
NodeClassNames[argument(0)->Opcode()],
NodeClassNames[argument(1)->Opcode()],
NodeClassNames[argument(2)->Opcode()],
NodeClassNames[argument(3)->Opcode()],
NodeClassNames[argument(4)->Opcode()]);
}
return false; // not enough info for intrinsification
}
if (!is_klass_initialized(vector_klass) || !is_klass_initialized(mask_klass)) {
if (C->print_intrinsics()) {
tty->print_cr(" ** klass argument not initialized");
}
return false;
}
ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type();
if (!elem_type->is_primitive_type()) {
if (C->print_intrinsics()) {
tty->print_cr(" ** not a primitive bt=%d", elem_type->basic_type());
}
return false; // should be primitive type
}
int num_elem = vlen->get_con();
BasicType elem_bt = elem_type->basic_type();
int opc = VectorSupport::vop2ideal(opr->get_con(), elem_bt);
if (!arch_supports_vector(opc, num_elem, elem_bt, VecMaskUseLoad)) {
if (C->print_intrinsics()) {
tty->print_cr(" ** not supported: opc=%d vlen=%d etype=%s ismask=useload",
opc, num_elem, type2name(elem_bt));
}
return false; // not supported
}
Node* opd1 = NULL;
const TypeInstPtr* vbox_type = NULL;
if (opc != Op_CompressM) {
ciKlass* vbox_klass = vector_klass->const_oop()->as_instance()->java_lang_Class_klass();
vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass);
opd1 = unbox_vector(argument(5), vbox_type, elem_bt, num_elem);
if (opd1 == NULL) {
if (C->print_intrinsics()) {
tty->print_cr(" ** unbox failed vector=%s",
NodeClassNames[argument(5)->Opcode()]);
}
return false;
}
}
ciKlass* mbox_klass = mask_klass->const_oop()->as_instance()->java_lang_Class_klass();
assert(is_vector_mask(mbox_klass), "argument(6) should be a mask class");
const TypeInstPtr* mbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, mbox_klass);
Node* mask = unbox_vector(argument(6), mbox_type, elem_bt, num_elem);
if (mask == NULL) {
if (C->print_intrinsics()) {
tty->print_cr(" ** unbox failed mask=%s",
NodeClassNames[argument(6)->Opcode()]);
}
return false;
}
const TypeVect* vt = TypeVect::make(elem_bt, num_elem, opc == Op_CompressM);
Node* operation = gvn().transform(VectorNode::make(opc, opd1, mask, vt));
// Wrap it up in VectorBox to keep object type information.
const TypeInstPtr* box_type = opc == Op_CompressM ? mbox_type : vbox_type;
Node* vbox = box_vector(operation, box_type, elem_bt, num_elem);
set_result(vbox);
C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
return true;
}

View file

@ -162,11 +162,22 @@ int VectorNode::opcode(int sopc, BasicType bt) {
case Op_RoundD:
return (bt == T_LONG ? Op_RoundVD : 0);
case Op_PopCountI:
// Unimplemented for subword types since bit count changes
// depending on size of lane (and sign bit).
return (bt == T_INT ? Op_PopCountVI : 0);
return Op_PopCountVI;
case Op_PopCountL:
return Op_PopCountVL;
case Op_ReverseI:
case Op_ReverseL:
return (is_integral_type(bt) ? Op_ReverseV : 0);
case Op_ReverseBytesS:
case Op_ReverseBytesI:
case Op_ReverseBytesL:
return (is_integral_type(bt) ? Op_ReverseBytesV : 0);
case Op_CompressBits:
// Not implemented. Returning 0 temporarily
return 0;
case Op_ExpandBits:
// Not implemented. Returning 0 temporarily
return 0;
case Op_LShiftI:
switch (bt) {
case T_BOOLEAN:
@ -245,6 +256,12 @@ int VectorNode::opcode(int sopc, BasicType bt) {
return Op_VectorCastF2X;
case Op_ConvD2L:
return Op_VectorCastD2X;
case Op_CountLeadingZerosI:
case Op_CountLeadingZerosL:
return Op_CountLeadingZerosV;
case Op_CountTrailingZerosI:
case Op_CountTrailingZerosL:
return Op_CountTrailingZerosV;
case Op_SignumF:
return Op_SignumVF;
case Op_SignumD:
@ -317,16 +334,17 @@ bool VectorNode::is_muladds2i(Node* n) {
return false;
}
bool VectorNode::is_vpopcnt_long(Node* n) {
if (n->Opcode() == Op_PopCountL) {
return true;
bool VectorNode::is_type_transition_long_to_int(Node* n) {
switch(n->Opcode()) {
case Op_PopCountL:
case Op_CountLeadingZerosL:
case Op_CountTrailingZerosL:
return true;
default:
return false;
}
return false;
}
bool VectorNode::is_roundopD(Node* n) {
if (n->Opcode() == Op_RoundDoubleMode) {
return true;
@ -595,6 +613,9 @@ VectorNode* VectorNode::make(int vopc, Node* n1, Node* n2, const TypeVect* vt, b
case Op_NegVF: return new NegVFNode(n1, vt);
case Op_NegVD: return new NegVDNode(n1, vt);
case Op_ReverseV: return new ReverseVNode(n1, vt);
case Op_ReverseBytesV: return new ReverseBytesVNode(n1, vt);
case Op_SqrtVF: return new SqrtVFNode(n1, vt);
case Op_SqrtVD: return new SqrtVDNode(n1, vt);
@ -628,6 +649,12 @@ VectorNode* VectorNode::make(int vopc, Node* n1, Node* n2, const TypeVect* vt, b
case Op_RoundDoubleModeV: return new RoundDoubleModeVNode(n1, n2, vt);
case Op_MulAddVS2VI: return new MulAddVS2VINode(n1, n2, vt);
case Op_ExpandV: return new ExpandVNode(n1, n2, vt);
case Op_CompressV: return new CompressVNode(n1, n2, vt);
case Op_CompressM: assert(n1 == NULL, ""); return new CompressMNode(n2, vt);
case Op_CountLeadingZerosV: return new CountLeadingZerosVNode(n1, vt);
case Op_CountTrailingZerosV: return new CountTrailingZerosVNode(n1, vt);
default:
fatal("Missed vector creation for '%s'", NodeClassNames[vopc]);
return NULL;
@ -1669,6 +1696,38 @@ Node* NegVNode::Ideal(PhaseGVN* phase, bool can_reshape) {
return NULL;
}
Node* ReverseBytesVNode::Identity(PhaseGVN* phase) {
if (is_predicated_using_blend()) {
return this;
}
// ReverseBytesV (ReverseBytesV X , MASK) , MASK => X
if (in(1)->Opcode() == Op_ReverseBytesV) {
if (is_predicated_vector() && in(1)->is_predicated_vector() && in(2) == in(1)->in(2)) {
return in(1)->in(1);
} else {
// ReverseBytesV (ReverseBytesV X) => X
return in(1)->in(1);
}
}
return this;
}
Node* ReverseVNode::Identity(PhaseGVN* phase) {
if (is_predicated_using_blend()) {
return this;
}
// ReverseV (ReverseV X , MASK) , MASK => X
if (in(1)->Opcode() == Op_ReverseV) {
if (is_predicated_vector() && in(1)->is_predicated_vector() && in(2) == in(1)->in(2)) {
return in(1)->in(1);
} else {
// ReverseV (ReverseV X) => X
return in(1)->in(1);
}
}
return this;
}
#ifndef PRODUCT
void VectorBoxAllocateNode::dump_spec(outputStream *st) const {
CallStaticJavaNode::dump_spec(st);

View file

@ -93,7 +93,7 @@ class VectorNode : public TypeNode {
static bool is_type_transition_short_to_int(Node* n);
static bool is_type_transition_to_int(Node* n);
static bool is_muladds2i(Node* n);
static bool is_vpopcnt_long(Node* n);
static bool is_type_transition_long_to_int(Node* n);
static bool is_roundopD(Node* n);
static bool is_scalar_rotate(Node* n);
static bool is_vector_rotate_supported(int opc, uint vlen, BasicType bt);
@ -769,6 +769,37 @@ public:
virtual int Opcode() const;
};
//------------------------------CompressVNode--------------------------------------
// Vector compress
class CompressVNode: public VectorNode {
public:
CompressVNode(Node* vec, Node* mask, const TypeVect* vt) :
VectorNode(vec, mask, vt) {
init_class_id(Class_CompressV);
}
virtual int Opcode() const;
};
class CompressMNode: public VectorNode {
public:
CompressMNode(Node* mask, const TypeVect* vt) :
VectorNode(mask, vt) {
init_class_id(Class_CompressM);
}
virtual int Opcode() const;
};
//------------------------------ExpandVNode--------------------------------------
// Vector expand
class ExpandVNode: public VectorNode {
public:
ExpandVNode(Node* vec, Node* mask, const TypeVect* vt) :
VectorNode(vec, mask, vt) {
init_class_id(Class_ExpandV);
}
virtual int Opcode() const;
};
//================================= M E M O R Y ===============================
//------------------------------LoadVectorNode---------------------------------
@ -1392,7 +1423,6 @@ class VectorBlendNode : public VectorNode {
public:
VectorBlendNode(Node* vec1, Node* vec2, Node* mask)
: VectorNode(vec1, vec2, mask, vec1->bottom_type()->is_vect()) {
// assert(mask->is_VectorMask(), "VectorBlendNode requires that third argument be a mask");
}
virtual int Opcode() const;
@ -1675,6 +1705,40 @@ public:
Node* Ideal(PhaseGVN* phase, bool can_reshape);
};
class CountLeadingZerosVNode : public VectorNode {
public:
CountLeadingZerosVNode(Node* in, const TypeVect* vt)
: VectorNode(in, vt) {}
virtual int Opcode() const;
};
class CountTrailingZerosVNode : public VectorNode {
public:
CountTrailingZerosVNode(Node* in, const TypeVect* vt)
: VectorNode(in, vt) {}
virtual int Opcode() const;
};
class ReverseVNode : public VectorNode {
public:
ReverseVNode(Node* in, const TypeVect* vt)
: VectorNode(in, vt) {}
virtual Node* Identity(PhaseGVN* phase);
virtual int Opcode() const;
};
class ReverseBytesVNode : public VectorNode {
public:
ReverseBytesVNode(Node* in, const TypeVect* vt)
: VectorNode(in, vt) {}
virtual Node* Identity(PhaseGVN* phase);
virtual int Opcode() const;
};
class SignumVFNode : public VectorNode {
public:
SignumVFNode(Node* in1, Node* zero, Node* one, const TypeVect* vt)
@ -1690,4 +1754,5 @@ public:
virtual int Opcode() const;
};
#endif // SHARE_OPTO_VECTORNODE_HPP

View file

@ -443,6 +443,109 @@ int VectorSupport::vop2ideal(jint id, BasicType bt) {
}
break;
}
case VECTOR_OP_EXPAND: {
switch (bt) {
case T_BYTE: // fall-through
case T_SHORT: // fall-through
case T_INT: // fall-through
case T_LONG: // fall-through
case T_FLOAT: // fall-through
case T_DOUBLE: return Op_ExpandV;
default: fatal("EXPAND: %s", type2name(bt));
}
break;
}
case VECTOR_OP_COMPRESS: {
switch (bt) {
case T_BYTE: // fall-through
case T_SHORT: // fall-through
case T_INT: // fall-through
case T_LONG: // fall-through
case T_FLOAT: // fall-through
case T_DOUBLE: return Op_CompressV;
default: fatal("COMPRESS: %s", type2name(bt));
}
break;
}
case VECTOR_OP_MASK_COMPRESS: {
switch (bt) {
case T_BYTE: // fall-through
case T_SHORT: // fall-through
case T_INT: // fall-through
case T_LONG: // fall-through
case T_FLOAT: // fall-through
case T_DOUBLE: return Op_CompressM;
default: fatal("MASK_COMPRESS: %s", type2name(bt));
}
break;
}
case VECTOR_OP_BIT_COUNT: {
switch (bt) {
case T_BYTE: // Returning Op_PopCountI
case T_SHORT: // for byte and short types temporarily
case T_INT: return Op_PopCountI;
case T_LONG: return Op_PopCountL;
default: fatal("BIT_COUNT: %s", type2name(bt));
}
break;
}
case VECTOR_OP_TZ_COUNT: {
switch (bt) {
case T_BYTE:
case T_SHORT:
case T_INT: return Op_CountTrailingZerosI;
case T_LONG: return Op_CountTrailingZerosL;
default: fatal("TZ_COUNT: %s", type2name(bt));
}
break;
}
case VECTOR_OP_LZ_COUNT: {
switch (bt) {
case T_BYTE:
case T_SHORT:
case T_INT: return Op_CountLeadingZerosI;
case T_LONG: return Op_CountLeadingZerosL;
default: fatal("LZ_COUNT: %s", type2name(bt));
}
break;
}
case VECTOR_OP_REVERSE: {
switch (bt) {
case T_BYTE: // Temporarily returning
case T_SHORT: // Op_ReverseI for byte and short
case T_INT: return Op_ReverseI;
case T_LONG: return Op_ReverseL;
default: fatal("REVERSE: %s", type2name(bt));
}
break;
}
case VECTOR_OP_REVERSE_BYTES: {
switch (bt) {
case T_BYTE:
case T_SHORT:
case T_INT: return Op_ReverseBytesI;
case T_LONG: return Op_ReverseBytesL;
default: fatal("REVERSE_BYTES: %s", type2name(bt));
}
break;
}
case VECTOR_OP_COMPRESS_BITS: {
switch (bt) {
case T_INT:
case T_LONG: return Op_CompressBits;
default: fatal("COMPRESS_BITS: %s", type2name(bt));
}
break;
}
case VECTOR_OP_EXPAND_BITS: {
switch (bt) {
case T_INT:
case T_LONG: return Op_ExpandBits;
default: fatal("EXPAND_BITS: %s", type2name(bt));
}
break;
}
case VECTOR_OP_TAN:
case VECTOR_OP_TANH:
case VECTOR_OP_SIN:

View file

@ -54,6 +54,7 @@ class VectorSupport : AllStatic {
VECTOR_OP_ABS = 0,
VECTOR_OP_NEG = 1,
VECTOR_OP_SQRT = 2,
VECTOR_OP_BIT_COUNT = 3,
// Binary
VECTOR_OP_ADD = 4,
@ -89,6 +90,17 @@ class VectorSupport : AllStatic {
VECTOR_OP_LROTATE = 24,
VECTOR_OP_RROTATE = 25,
VECTOR_OP_COMPRESS = 26,
VECTOR_OP_EXPAND = 27,
VECTOR_OP_MASK_COMPRESS = 28,
VECTOR_OP_TZ_COUNT = 29,
VECTOR_OP_LZ_COUNT = 30,
VECTOR_OP_REVERSE = 31,
VECTOR_OP_REVERSE_BYTES = 32,
VECTOR_OP_COMPRESS_BITS = 33,
VECTOR_OP_EXPAND_BITS = 34,
// Vector Math Library
VECTOR_OP_TAN = 101,
VECTOR_OP_TANH = 102,

View file

@ -1775,6 +1775,9 @@
declare_c2_type(FmaVFNode, VectorNode) \
declare_c2_type(CMoveVFNode, VectorNode) \
declare_c2_type(CMoveVDNode, VectorNode) \
declare_c2_type(CompressVNode, VectorNode) \
declare_c2_type(CompressMNode, VectorNode) \
declare_c2_type(ExpandVNode, VectorNode) \
declare_c2_type(MulReductionVDNode, ReductionNode) \
declare_c2_type(DivVFNode, VectorNode) \
declare_c2_type(DivVDNode, VectorNode) \
@ -1866,6 +1869,10 @@
declare_c2_type(VectorUnboxNode, VectorNode) \
declare_c2_type(VectorReinterpretNode, VectorNode) \
declare_c2_type(VectorMaskCastNode, VectorNode) \
declare_c2_type(CountLeadingZerosVNode, VectorNode) \
declare_c2_type(CountTrailingZerosVNode, VectorNode) \
declare_c2_type(ReverseBytesVNode, VectorNode) \
declare_c2_type(ReverseVNode, VectorNode) \
declare_c2_type(MaskAllNode, VectorNode) \
declare_c2_type(AndVMaskNode, VectorNode) \
declare_c2_type(OrVMaskNode, VectorNode) \

View file

@ -721,6 +721,10 @@ inline bool is_integral_type(BasicType t) {
return is_subword_type(t) || t == T_INT || t == T_LONG;
}
inline bool is_non_subword_integral_type(BasicType t) {
return t == T_INT || t == T_LONG;
}
inline bool is_floating_point_type(BasicType t) {
return (t == T_FLOAT || t == T_DOUBLE);
}

View file

@ -32,8 +32,6 @@ import java.lang.annotation.Target;
import java.lang.foreign.MemorySegment;
import java.lang.ref.Reference;
import java.io.FileDescriptor;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import jdk.internal.access.JavaNioAccess;
import jdk.internal.access.SharedSecrets;
@ -313,59 +311,25 @@ public class ScopedMemoryAccess {
}
}
// ByteBuffer vector access ops
// Buffer access constants, to be initialized when required.
// Avoids a null value for NIO_ACCESS, due to class initialization dependencies
static final class BufferAccess {
// Buffer.address
static final long BUFFER_ADDRESS
= UNSAFE.objectFieldOffset(Buffer.class, "address");
// ByteBuffer.hb
static final long BYTE_BUFFER_HB
= UNSAFE.objectFieldOffset(ByteBuffer.class, "hb");
static final long BYTE_BUFFER_IS_READ_ONLY
= UNSAFE.objectFieldOffset(ByteBuffer.class, "isReadOnly");
@ForceInline
static Object bufferBase(ByteBuffer bb) {
return UNSAFE.getReference(bb, BYTE_BUFFER_HB);
}
@ForceInline
static long bufferAddress(ByteBuffer bb, long offset) {
return UNSAFE.getLong(bb, BUFFER_ADDRESS) + offset;
}
static final JavaNioAccess NIO_ACCESS = SharedSecrets.getJavaNioAccess();
@ForceInline
static MemorySessionImpl session(ByteBuffer bb) {
MemorySegment segment = NIO_ACCESS.bufferSegment(bb);
return segment != null ?
((AbstractMemorySegmentImpl)segment).sessionImpl() : null;
}
}
@ForceInline
public static boolean isReadOnly(ByteBuffer bb) {
return UNSAFE.getBoolean(bb, BufferAccess.BYTE_BUFFER_IS_READ_ONLY);
}
// MemorySegment vector access ops
@ForceInline
public static
<V extends VectorSupport.Vector<E>, E, S extends VectorSupport.VectorSpecies<E>>
V loadFromByteBuffer(Class<? extends V> vmClass, Class<E> e, int length,
ByteBuffer bb, int offset,
S s,
VectorSupport.LoadOperation<ByteBuffer, V, S> defaultImpl) {
V loadFromMemorySegment(Class<? extends V> vmClass, Class<E> e, int length,
AbstractMemorySegmentImpl msp, long offset,
S s,
VectorSupport.LoadOperation<AbstractMemorySegmentImpl, V, S> defaultImpl) {
// @@@ Smarter alignment checking if accessing heap segment backing non-byte[] array
if (msp.maxAlignMask() > 1) {
throw new IllegalArgumentException();
}
try {
return loadFromByteBufferScoped(
BufferAccess.session(bb),
return loadFromMemorySegmentScopedInternal(
msp.sessionImpl(),
vmClass, e, length,
bb, offset,
msp, offset,
s,
defaultImpl);
} catch (ScopedAccessError ex) {
@ -377,22 +341,18 @@ public class ScopedMemoryAccess {
@ForceInline
private static
<V extends VectorSupport.Vector<E>, E, S extends VectorSupport.VectorSpecies<E>>
V loadFromByteBufferScoped(MemorySessionImpl session,
Class<? extends V> vmClass, Class<E> e, int length,
ByteBuffer bb, int offset,
S s,
VectorSupport.LoadOperation<ByteBuffer, V, S> defaultImpl) {
V loadFromMemorySegmentScopedInternal(MemorySessionImpl session,
Class<? extends V> vmClass, Class<E> e, int length,
AbstractMemorySegmentImpl msp, long offset,
S s,
VectorSupport.LoadOperation<AbstractMemorySegmentImpl, V, S> defaultImpl) {
try {
if (session != null) {
session.checkValidState();
}
final byte[] base = (byte[]) BufferAccess.bufferBase(bb);
session.checkValidState();
return VectorSupport.load(vmClass, e, length,
base, BufferAccess.bufferAddress(bb, offset),
bb, offset, s,
defaultImpl);
msp.unsafeGetBase(), msp.unsafeGetOffset() + offset,
msp, offset, s,
defaultImpl);
} finally {
Reference.reachabilityFence(session);
}
@ -402,17 +362,22 @@ public class ScopedMemoryAccess {
public static
<V extends VectorSupport.Vector<E>, E, S extends VectorSupport.VectorSpecies<E>,
M extends VectorSupport.VectorMask<E>>
V loadFromByteBufferMasked(Class<? extends V> vmClass, Class<M> maskClass, Class<E> e,
int length, ByteBuffer bb, int offset, M m, S s,
VectorSupport.LoadVectorMaskedOperation<ByteBuffer, V, S, M> defaultImpl) {
V loadFromMemorySegmentMasked(Class<? extends V> vmClass, Class<M> maskClass, Class<E> e,
int length, AbstractMemorySegmentImpl msp, long offset, M m, S s,
VectorSupport.LoadVectorMaskedOperation<AbstractMemorySegmentImpl, V, S, M> defaultImpl) {
// @@@ Smarter alignment checking if accessing heap segment backing non-byte[] array
if (msp.maxAlignMask() > 1) {
throw new IllegalArgumentException();
}
try {
return loadFromByteBufferMaskedScoped(
BufferAccess.session(bb),
return loadFromMemorySegmentMaskedScopedInternal(
msp.sessionImpl(),
vmClass, maskClass, e, length,
bb, offset, m,
msp, offset, m,
s,
defaultImpl);
} catch (ScopedMemoryAccess.ScopedAccessError ex) {
} catch (ScopedAccessError ex) {
throw new IllegalStateException("This segment is already closed");
}
}
@ -422,19 +387,17 @@ public class ScopedMemoryAccess {
private static
<V extends VectorSupport.Vector<E>, E, S extends VectorSupport.VectorSpecies<E>,
M extends VectorSupport.VectorMask<E>>
V loadFromByteBufferMaskedScoped(MemorySessionImpl session, Class<? extends V> vmClass,
Class<M> maskClass, Class<E> e, int length,
ByteBuffer bb, int offset, M m,
S s,
VectorSupport.LoadVectorMaskedOperation<ByteBuffer, V, S, M> defaultImpl) {
V loadFromMemorySegmentMaskedScopedInternal(MemorySessionImpl session, Class<? extends V> vmClass,
Class<M> maskClass, Class<E> e, int length,
AbstractMemorySegmentImpl msp, long offset, M m,
S s,
VectorSupport.LoadVectorMaskedOperation<AbstractMemorySegmentImpl, V, S, M> defaultImpl) {
try {
if (session != null) {
session.checkValidState();
}
session.checkValidState();
return VectorSupport.loadMasked(vmClass, maskClass, e, length,
BufferAccess.bufferBase(bb), BufferAccess.bufferAddress(bb, offset), m,
bb, offset, s,
msp.unsafeGetBase(), msp.unsafeGetOffset() + offset, m,
msp, offset, s,
defaultImpl);
} finally {
Reference.reachabilityFence(session);
@ -444,16 +407,21 @@ public class ScopedMemoryAccess {
@ForceInline
public static
<V extends VectorSupport.Vector<E>, E>
void storeIntoByteBuffer(Class<? extends V> vmClass, Class<E> e, int length,
V v,
ByteBuffer bb, int offset,
VectorSupport.StoreVectorOperation<ByteBuffer, V> defaultImpl) {
void storeIntoMemorySegment(Class<? extends V> vmClass, Class<E> e, int length,
V v,
AbstractMemorySegmentImpl msp, long offset,
VectorSupport.StoreVectorOperation<AbstractMemorySegmentImpl, V> defaultImpl) {
// @@@ Smarter alignment checking if accessing heap segment backing non-byte[] array
if (msp.maxAlignMask() > 1) {
throw new IllegalArgumentException();
}
try {
storeIntoByteBufferScoped(
BufferAccess.session(bb),
storeIntoMemorySegmentScopedInternal(
msp.sessionImpl(),
vmClass, e, length,
v,
bb, offset,
msp, offset,
defaultImpl);
} catch (ScopedAccessError ex) {
throw new IllegalStateException("This segment is already closed");
@ -464,23 +432,19 @@ public class ScopedMemoryAccess {
@ForceInline
private static
<V extends VectorSupport.Vector<E>, E>
void storeIntoByteBufferScoped(MemorySessionImpl session,
Class<? extends V> vmClass, Class<E> e, int length,
V v,
ByteBuffer bb, int offset,
VectorSupport.StoreVectorOperation<ByteBuffer, V> defaultImpl) {
void storeIntoMemorySegmentScopedInternal(MemorySessionImpl session,
Class<? extends V> vmClass, Class<E> e, int length,
V v,
AbstractMemorySegmentImpl msp, long offset,
VectorSupport.StoreVectorOperation<AbstractMemorySegmentImpl, V> defaultImpl) {
try {
if (session != null) {
session.checkValidState();
}
final byte[] base = (byte[]) BufferAccess.bufferBase(bb);
session.checkValidState();
VectorSupport.store(vmClass, e, length,
base, BufferAccess.bufferAddress(bb, offset),
v,
bb, offset,
defaultImpl);
msp.unsafeGetBase(), msp.unsafeGetOffset() + offset,
v,
msp, offset,
defaultImpl);
} finally {
Reference.reachabilityFence(session);
}
@ -489,18 +453,23 @@ public class ScopedMemoryAccess {
@ForceInline
public static
<V extends VectorSupport.Vector<E>, E, M extends VectorSupport.VectorMask<E>>
void storeIntoByteBufferMasked(Class<? extends V> vmClass, Class<M> maskClass, Class<E> e,
int length, V v, M m,
ByteBuffer bb, int offset,
VectorSupport.StoreVectorMaskedOperation<ByteBuffer, V, M> defaultImpl) {
void storeIntoMemorySegmentMasked(Class<? extends V> vmClass, Class<M> maskClass, Class<E> e,
int length, V v, M m,
AbstractMemorySegmentImpl msp, long offset,
VectorSupport.StoreVectorMaskedOperation<AbstractMemorySegmentImpl, V, M> defaultImpl) {
// @@@ Smarter alignment checking if accessing heap segment backing non-byte[] array
if (msp.maxAlignMask() > 1) {
throw new IllegalArgumentException();
}
try {
storeIntoByteBufferMaskedScoped(
BufferAccess.session(bb),
storeIntoMemorySegmentMaskedScopedInternal(
msp.sessionImpl(),
vmClass, maskClass, e, length,
v, m,
bb, offset,
msp, offset,
defaultImpl);
} catch (ScopedMemoryAccess.ScopedAccessError ex) {
} catch (ScopedAccessError ex) {
throw new IllegalStateException("This segment is already closed");
}
}
@ -509,20 +478,18 @@ public class ScopedMemoryAccess {
@ForceInline
private static
<V extends VectorSupport.Vector<E>, E, M extends VectorSupport.VectorMask<E>>
void storeIntoByteBufferMaskedScoped(MemorySessionImpl session,
Class<? extends V> vmClass, Class<M> maskClass,
Class<E> e, int length, V v, M m,
ByteBuffer bb, int offset,
VectorSupport.StoreVectorMaskedOperation<ByteBuffer, V, M> defaultImpl) {
void storeIntoMemorySegmentMaskedScopedInternal(MemorySessionImpl session,
Class<? extends V> vmClass, Class<M> maskClass,
Class<E> e, int length, V v, M m,
AbstractMemorySegmentImpl msp, long offset,
VectorSupport.StoreVectorMaskedOperation<AbstractMemorySegmentImpl, V, M> defaultImpl) {
try {
if (session != null) {
session.checkValidState();
}
session.checkValidState();
VectorSupport.storeMasked(vmClass, maskClass, e, length,
BufferAccess.bufferBase(bb), BufferAccess.bufferAddress(bb, offset),
msp.unsafeGetBase(), msp.unsafeGetOffset() + offset,
v, m,
bb, offset,
msp, offset,
defaultImpl);
} finally {
Reference.reachabilityFence(session);

View file

@ -41,6 +41,7 @@ public class VectorSupport {
public static final int VECTOR_OP_ABS = 0;
public static final int VECTOR_OP_NEG = 1;
public static final int VECTOR_OP_SQRT = 2;
public static final int VECTOR_OP_BIT_COUNT = 3;
// Binary
public static final int VECTOR_OP_ADD = 4;
@ -76,6 +77,23 @@ public class VectorSupport {
public static final int VECTOR_OP_LROTATE = 24;
public static final int VECTOR_OP_RROTATE = 25;
// Compression expansion operations
public static final int VECTOR_OP_COMPRESS = 26;
public static final int VECTOR_OP_EXPAND = 27;
public static final int VECTOR_OP_MASK_COMPRESS = 28;
// Leading/Trailing zeros count operations
public static final int VECTOR_OP_TZ_COUNT = 29;
public static final int VECTOR_OP_LZ_COUNT = 30;
// Reverse operation
public static final int VECTOR_OP_REVERSE = 31;
public static final int VECTOR_OP_REVERSE_BYTES = 32;
// Compress and Expand Bits operation
public static final int VECTOR_OP_COMPRESS_BITS = 33;
public static final int VECTOR_OP_EXPAND_BITS = 34;
// Math routines
public static final int VECTOR_OP_TAN = 101;
public static final int VECTOR_OP_TANH = 102;
@ -363,7 +381,7 @@ public class VectorSupport {
public interface LoadOperation<C,
VM extends VectorPayload,
S extends VectorSpecies<?>> {
VM load(C container, int index, S s);
VM load(C container, long index, S s);
}
@IntrinsicCandidate
@ -375,7 +393,7 @@ public class VectorSupport {
VM load(Class<? extends VM> vmClass, Class<E> eClass,
int length,
Object base, long offset,
C container, int index, S s,
C container, long index, S s,
LoadOperation<C, VM, S> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.load(container, index, s);
@ -387,7 +405,7 @@ public class VectorSupport {
V extends Vector<?>,
S extends VectorSpecies<?>,
M extends VectorMask<?>> {
V load(C container, int index, S s, M m);
V load(C container, long index, S s, M m);
}
@IntrinsicCandidate
@ -400,7 +418,7 @@ public class VectorSupport {
V loadMasked(Class<? extends V> vClass, Class<M> mClass, Class<E> eClass,
int length,
Object base, long offset,
M m, C container, int index, S s,
M m, C container, long index, S s,
LoadVectorMaskedOperation<C, V, S, M> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.load(container, index, s, m);
@ -437,18 +455,18 @@ public class VectorSupport {
/* ============================================================================ */
public interface StoreVectorOperation<C,
V extends Vector<?>> {
void store(C container, int index, V v);
V extends VectorPayload> {
void store(C container, long index, V v);
}
@IntrinsicCandidate
public static
<C,
V extends Vector<?>>
V extends VectorPayload>
void store(Class<?> vClass, Class<?> eClass,
int length,
Object base, long offset,
V v, C container, int index,
V v, C container, long index,
StoreVectorOperation<C, V> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
defaultImpl.store(container, index, v);
@ -457,7 +475,7 @@ public class VectorSupport {
public interface StoreVectorMaskedOperation<C,
V extends Vector<?>,
M extends VectorMask<?>> {
void store(C container, int index, V v, M m);
void store(C container, long index, V v, M m);
}
@IntrinsicCandidate
@ -469,7 +487,7 @@ public class VectorSupport {
void storeMasked(Class<? extends V> vClass, Class<M> mClass, Class<E> eClass,
int length,
Object base, long offset,
V v, M m, C container, int index,
V v, M m, C container, long index,
StoreVectorMaskedOperation<C, V, M> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
defaultImpl.store(container, index, v, m);
@ -626,6 +644,26 @@ public class VectorSupport {
/* ============================================================================ */
public interface CompressExpandOperation<V extends Vector<?>,
M extends VectorMask<?>> {
VectorPayload apply(V v, M m);
}
@IntrinsicCandidate
public static
<V extends Vector<E>,
M extends VectorMask<E>,
E>
VectorPayload compressExpandOp(int opr,
Class<? extends V> vClass, Class<? extends M> mClass, Class<E> eClass,
int length, V v, M m,
CompressExpandOperation<V, M> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(v, m);
}
/* ============================================================================ */
@IntrinsicCandidate
public static
<VP extends VectorPayload>

View file

@ -158,6 +158,8 @@ module java.base {
jdk.jartool,
jdk.jlink,
jdk.net;
exports jdk.internal.foreign to
jdk.incubator.vector;
exports jdk.internal.event to
jdk.jfr;
exports jdk.internal.jimage to

View file

@ -38,6 +38,7 @@ import com.sun.tools.javac.util.JCDiagnostic.SimpleDiagnosticPosition;
import com.sun.tools.javac.util.JCDiagnostic.Warning;
import com.sun.tools.javac.util.Log;
import com.sun.tools.javac.util.MandatoryWarningHandler;
import com.sun.tools.javac.util.Names;
import com.sun.tools.javac.util.Options;
import javax.tools.JavaFileObject;
@ -78,6 +79,7 @@ public class Preview {
private final Set<JavaFileObject> sourcesWithPreviewFeatures = new HashSet<>();
private final Names names;
private final Lint lint;
private final Log log;
private final Source source;
@ -95,6 +97,7 @@ public class Preview {
Preview(Context context) {
context.put(previewKey, this);
Options options = Options.instance(context);
names = Names.instance(context);
enabled = options.isSet(PREVIEW);
log = Log.instance(context);
lint = Lint.instance(context);
@ -115,7 +118,22 @@ public class Preview {
}
}
return majorVersionToSource;
}
}
/**
* Returns true if {@code s} is deemed to participate in the preview of {@code previewSymbol}, and
* therefore no warnings or errors will be produced.
*
* @param s the symbol depending on the preview symbol
* @param previewSymbol the preview symbol marked with @Preview
* @return true if {@code s} is participating in the preview of {@code previewSymbol}
*/
public boolean participatesInPreview(Symbol s, Symbol previewSymbol) {
// Hardcode the incubating vector API module for now
// Will generalize with an annotation, @PreviewParticipating say, later
return previewSymbol.packge().modle == s.packge().modle ||
s.packge().modle.name == names.jdk_incubator_vector;
}
/**
* Report usage of a preview feature. Usages reported through this method will affect the

View file

@ -3600,7 +3600,7 @@ public class Check {
}
void checkPreview(DiagnosticPosition pos, Symbol other, Symbol s) {
if ((s.flags() & PREVIEW_API) != 0 && s.packge().modle != other.packge().modle) {
if ((s.flags() & PREVIEW_API) != 0 && !preview.participatesInPreview(other, s)) {
if ((s.flags() & PREVIEW_REFLECTIVE) == 0) {
if (!preview.isEnabled()) {
log.error(pos, Errors.IsPreview(s));

View file

@ -124,6 +124,7 @@ public class Names {
// module names
public final Name java_base;
public final Name jdk_unsupported;
public final Name jdk_incubator_vector;
// attribute names
public final Name Annotation;
@ -305,6 +306,7 @@ public class Names {
// module names
java_base = fromString("java.base");
jdk_unsupported = fromString("jdk.unsupported");
jdk_incubator_vector = fromString("jdk.incubator.vector");
// attribute names
Annotation = fromString("Annotation");

View file

@ -28,6 +28,10 @@ import java.util.Objects;
import jdk.internal.vm.annotation.ForceInline;
import jdk.internal.misc.Unsafe;
import jdk.internal.vm.vector.VectorSupport;
import static jdk.incubator.vector.VectorOperators.*;
abstract class AbstractMask<E> extends VectorMask<E> {
@ -77,7 +81,15 @@ abstract class AbstractMask<E> extends VectorMask<E> {
@Override
public void intoArray(boolean[] bits, int i) {
System.arraycopy(getBits(), 0, bits, i, length());
AbstractSpecies<E> vsp = (AbstractSpecies<E>) vectorSpecies();
int laneCount = vsp.laneCount();
i = VectorIntrinsics.checkFromIndexSize(i, laneCount, bits.length);
VectorSupport.store(
vsp.maskType(), vsp.elementType(), laneCount,
bits, (long) i + Unsafe.ARRAY_BOOLEAN_BASE_OFFSET,
this, bits, i,
(c, idx, s) -> System.arraycopy(s.getBits(), 0, c, (int) idx, s.length()));
}
@Override
@ -192,6 +204,15 @@ abstract class AbstractMask<E> extends VectorMask<E> {
return this.andNot(badMask);
}
@Override
@ForceInline
public VectorMask<E> indexInRange(long offset, long limit) {
int vlength = length();
Vector<E> iota = vectorSpecies().zero().addIndex(1);
VectorMask<E> badMask = checkIndex0(offset, limit, iota, vlength);
return this.andNot(badMask);
}
/*package-private*/
@ForceInline
AbstractVector<E>
@ -215,7 +236,7 @@ abstract class AbstractMask<E> extends VectorMask<E> {
*/
/*package-private*/
@ForceInline
void checkIndexByLane(int offset, int alength,
void checkIndexByLane(int offset, int length,
Vector<E> iota,
int esize) {
if (VectorIntrinsics.VECTOR_ACCESS_OOB_CHECK == 0) {
@ -229,15 +250,15 @@ abstract class AbstractMask<E> extends VectorMask<E> {
int vlength = length();
VectorMask<E> badMask;
if (esize == 1) {
badMask = checkIndex0(offset, alength, iota, vlength);
badMask = checkIndex0(offset, length, iota, vlength);
} else if (offset >= 0) {
// Masked access to multi-byte lanes in byte array.
// It could be aligned anywhere.
int elemCount = Math.min(vlength, (alength - offset) / esize);
int elemCount = Math.min(vlength, (length - offset) / esize);
badMask = checkIndex0(0, elemCount, iota, vlength);
} else {
int clipOffset = Math.max(offset, -(vlength * esize));
badMask = checkIndex0(clipOffset, alength,
badMask = checkIndex0(clipOffset, length,
iota.lanewise(VectorOperators.MUL, esize),
vlength * esize);
}
@ -245,20 +266,20 @@ abstract class AbstractMask<E> extends VectorMask<E> {
if (badMask.anyTrue()) {
int badLane = badMask.firstTrue();
throw ((AbstractMask<E>)badMask)
.checkIndexFailed(offset, badLane, alength, esize);
.checkIndexFailed(offset, badLane, length, esize);
}
}
private
@ForceInline
VectorMask<E> checkIndex0(int offset, int alength,
VectorMask<E> checkIndex0(int offset, int length,
Vector<E> iota, int vlength) {
// An active lane is bad if its number is greater than
// alength-offset, since when added to offset it will step off
// length-offset, since when added to offset it will step off
// of the end of the array. To avoid overflow when
// converting, clip the comparison value to [0..vlength]
// inclusive.
int indexLimit = Math.max(0, Math.min(alength - offset, vlength));
int indexLimit = Math.max(0, Math.min(length - offset, vlength));
VectorMask<E> badMask =
iota.compare(GE, iota.broadcast(indexLimit));
if (offset < 0) {
@ -280,14 +301,90 @@ abstract class AbstractMask<E> extends VectorMask<E> {
return badMask;
}
private IndexOutOfBoundsException checkIndexFailed(int offset, int lane,
int alength, int esize) {
/**
* Test if a masked memory access at a given offset into an array
* of the given length will stay within the array.
* The per-lane offsets are iota*esize.
*/
/*package-private*/
@ForceInline
void checkIndexByLane(long offset, long length,
Vector<E> iota,
int esize) {
if (VectorIntrinsics.VECTOR_ACCESS_OOB_CHECK == 0) {
return;
}
// Although the specification is simple, the implementation is
// tricky, because the value iota*esize might possibly
// overflow. So we calculate our test values as scalars,
// clipping to the range [-1..VLENGTH], and test them against
// the unscaled iota vector, whose values are in [0..VLENGTH-1].
int vlength = length();
VectorMask<E> badMask;
if (esize == 1) {
badMask = checkIndex0(offset, length, iota, vlength);
} else if (offset >= 0) {
// Masked access to multi-byte lanes in byte array.
// It could be aligned anywhere.
// 0 <= elemCount <= vlength
int elemCount = (int) Math.min(vlength, (length - offset) / esize);
badMask = checkIndex0(0, elemCount, iota, vlength);
} else {
// -vlength * esize <= clipOffset <= 0
int clipOffset = (int) Math.max(offset, -(vlength * esize));
badMask = checkIndex0(clipOffset, length,
iota.lanewise(VectorOperators.MUL, esize),
vlength * esize);
}
badMask = badMask.and(this);
if (badMask.anyTrue()) {
int badLane = badMask.firstTrue();
throw ((AbstractMask<E>)badMask)
.checkIndexFailed(offset, badLane, length, esize);
}
}
private
@ForceInline
VectorMask<E> checkIndex0(long offset, long length,
Vector<E> iota, int vlength) {
// An active lane is bad if its number is greater than
// length-offset, since when added to offset it will step off
// of the end of the array. To avoid overflow when
// converting, clip the comparison value to [0..vlength]
// inclusive.
// 0 <= indexLimit <= vlength
int indexLimit = (int) Math.max(0, Math.min(length - offset, vlength));
VectorMask<E> badMask =
iota.compare(GE, iota.broadcast(indexLimit));
if (offset < 0) {
// An active lane is bad if its number is less than
// -offset, because when added to offset it will then
// address an array element at a negative index. To avoid
// overflow when converting, clip the comparison value at
// vlength. This specific expression works correctly even
// when offset is Integer.MIN_VALUE.
// 0 <= firstGoodIndex <= vlength
int firstGoodIndex = (int) -Math.max(offset, -vlength);
VectorMask<E> badMask2 =
iota.compare(LT, iota.broadcast(firstGoodIndex));
if (indexLimit >= vlength) {
badMask = badMask2; // 1st badMask is all true
} else {
badMask = badMask.or(badMask2);
}
}
return badMask;
}
private IndexOutOfBoundsException checkIndexFailed(long offset, int lane,
long length, int esize) {
String msg = String.format("Masked range check failed: "+
"vector mask %s out of bounds at "+
"index %d+%d in array of length %d",
this, offset, lane * esize, alength);
"index %d+%d for length %d",
this, offset, lane * esize, length);
if (esize != 1) {
msg += String.format(" (each lane spans %d array elements)", esize);
msg += String.format(" (each lane spans %d elements)", esize);
}
throw new IndexOutOfBoundsException(msg);
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2017, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -24,10 +24,11 @@
*/
package jdk.incubator.vector;
import java.lang.foreign.MemorySegment;
import jdk.internal.vm.annotation.ForceInline;
import jdk.internal.vm.annotation.Stable;
import java.nio.ByteOrder;
import java.lang.reflect.Array;
import java.nio.ByteOrder;
import java.util.Arrays;
import java.util.function.Function;
import java.util.function.IntUnaryOperator;
@ -203,12 +204,24 @@ abstract class AbstractSpecies<E> extends jdk.internal.vm.vector.VectorSupport.V
return VectorIntrinsics.roundDown(length, laneCount);
}
@Override
@ForceInline
public final long loopBound(long length) {
return VectorIntrinsics.roundDown(length, laneCount);
}
@Override
@ForceInline
public final VectorMask<E> indexInRange(int offset, int limit) {
return maskAll(true).indexInRange(offset, limit);
}
@Override
@ForceInline
public final VectorMask<E> indexInRange(long offset, long limit) {
return maskAll(true).indexInRange(offset, limit);
}
@Override
@ForceInline
public final <F> VectorSpecies<F> withLanes(Class<F> newType) {
@ -349,9 +362,9 @@ abstract class AbstractSpecies<E> extends jdk.internal.vm.vector.VectorSupport.V
@ForceInline
@Override
public final Vector<E> fromByteArray(byte[] a, int offset, ByteOrder bo) {
public final Vector<E> fromMemorySegment(MemorySegment ms, long offset, ByteOrder bo) {
return dummyVector()
.fromByteArray0(a, offset)
.fromMemorySegment0(ms, offset)
.maybeSwap(bo);
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2019, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -24,10 +24,10 @@
*/
package jdk.incubator.vector;
import java.lang.foreign.MemorySegment;
import jdk.internal.vm.annotation.ForceInline;
import jdk.internal.vm.vector.VectorSupport;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.function.IntUnaryOperator;
@ -194,7 +194,7 @@ abstract class AbstractVector<E> extends Vector<E> {
abstract AbstractShuffle<E> shuffleFromOp(IntUnaryOperator fn);
/*package-private*/
abstract AbstractVector<E> fromByteArray0(byte[] a, int offset);
abstract AbstractVector<E> fromMemorySegment0(MemorySegment ms, long offset);
/*package-private*/
abstract AbstractVector<E> maybeSwap(ByteOrder bo);
@ -504,23 +504,23 @@ abstract class AbstractVector<E> extends Vector<E> {
AbstractVector<F> defaultReinterpret(AbstractSpecies<F> rsp) {
int blen = Math.max(this.bitSize(), rsp.vectorBitSize()) / Byte.SIZE;
ByteOrder bo = ByteOrder.nativeOrder();
ByteBuffer bb = ByteBuffer.allocate(blen);
this.intoByteBuffer(bb, 0, bo);
MemorySegment ms = MemorySegment.ofArray(new byte[blen]);
this.intoMemorySegment(ms, 0, bo);
VectorMask<F> m = rsp.maskAll(true);
// enum-switches don't optimize properly JDK-8161245
switch (rsp.laneType.switchKey) {
case LaneType.SK_BYTE:
return ByteVector.fromByteBuffer(rsp.check(byte.class), bb, 0, bo, m.check(byte.class)).check0(rsp);
return ByteVector.fromMemorySegment(rsp.check(byte.class), ms, 0, bo, m.check(byte.class)).check0(rsp);
case LaneType.SK_SHORT:
return ShortVector.fromByteBuffer(rsp.check(short.class), bb, 0, bo, m.check(short.class)).check0(rsp);
return ShortVector.fromMemorySegment(rsp.check(short.class), ms, 0, bo, m.check(short.class)).check0(rsp);
case LaneType.SK_INT:
return IntVector.fromByteBuffer(rsp.check(int.class), bb, 0, bo, m.check(int.class)).check0(rsp);
return IntVector.fromMemorySegment(rsp.check(int.class), ms, 0, bo, m.check(int.class)).check0(rsp);
case LaneType.SK_LONG:
return LongVector.fromByteBuffer(rsp.check(long.class), bb, 0, bo, m.check(long.class)).check0(rsp);
return LongVector.fromMemorySegment(rsp.check(long.class), ms, 0, bo, m.check(long.class)).check0(rsp);
case LaneType.SK_FLOAT:
return FloatVector.fromByteBuffer(rsp.check(float.class), bb, 0, bo, m.check(float.class)).check0(rsp);
return FloatVector.fromMemorySegment(rsp.check(float.class), ms, 0, bo, m.check(float.class)).check0(rsp);
case LaneType.SK_DOUBLE:
return DoubleVector.fromByteBuffer(rsp.check(double.class), bb, 0, bo, m.check(double.class)).check0(rsp);
return DoubleVector.fromMemorySegment(rsp.check(double.class), ms, 0, bo, m.check(double.class)).check0(rsp);
default:
throw new AssertionError(rsp.toString());
}
@ -730,15 +730,6 @@ abstract class AbstractVector<E> extends Vector<E> {
throw new AssertionError();
}
// Byte buffer wrappers.
static ByteBuffer wrapper(ByteBuffer bb, ByteOrder bo) {
return bb.duplicate().order(bo);
}
static ByteBuffer wrapper(byte[] a, ByteOrder bo) {
return ByteBuffer.wrap(a).order(bo);
}
static {
// Recode uses of VectorSupport.reinterpret if this assertion fails:
assert(REGISTER_ENDIAN == ByteOrder.LITTLE_ENDIAN);

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -474,6 +474,22 @@ final class Byte128Vector extends ByteVector {
(Byte128Vector) v); // specialize
}
@Override
@ForceInline
public Byte128Vector compress(VectorMask<Byte> m) {
return (Byte128Vector)
super.compressTemplate(Byte128Mask.class,
(Byte128Mask) m); // specialize
}
@Override
@ForceInline
public Byte128Vector expand(VectorMask<Byte> m) {
return (Byte128Vector)
super.expandTemplate(Byte128Mask.class,
(Byte128Mask) m); // specialize
}
@Override
@ForceInline
public Byte128Vector selectFrom(Vector<Byte> v) {
@ -677,6 +693,15 @@ final class Byte128Vector extends ByteVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public Byte128Mask compress() {
return (Byte128Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
Byte128Vector.class, Byte128Mask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -876,29 +901,15 @@ final class Byte128Vector extends ByteVector {
@ForceInline
@Override
final
ByteVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
ByteVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
return super.fromByteArray0Template(Byte128Mask.class, a, offset, (Byte128Mask) m); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
return super.fromByteBuffer0Template(Byte128Mask.class, bb, offset, (Byte128Mask) m); // specialize
ByteVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Byte> m) {
return super.fromMemorySegment0Template(Byte128Mask.class, ms, offset, (Byte128Mask) m); // specialize
}
@ForceInline
@ -926,22 +937,8 @@ final class Byte128Vector extends ByteVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
super.intoByteArray0Template(Byte128Mask.class, a, offset, (Byte128Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
super.intoByteBuffer0Template(Byte128Mask.class, bb, offset, (Byte128Mask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Byte> m) {
super.intoMemorySegment0Template(Byte128Mask.class, ms, offset, (Byte128Mask) m);
}
@ -950,3 +947,4 @@ final class Byte128Vector extends ByteVector {
// ================================================
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -474,6 +474,22 @@ final class Byte256Vector extends ByteVector {
(Byte256Vector) v); // specialize
}
@Override
@ForceInline
public Byte256Vector compress(VectorMask<Byte> m) {
return (Byte256Vector)
super.compressTemplate(Byte256Mask.class,
(Byte256Mask) m); // specialize
}
@Override
@ForceInline
public Byte256Vector expand(VectorMask<Byte> m) {
return (Byte256Vector)
super.expandTemplate(Byte256Mask.class,
(Byte256Mask) m); // specialize
}
@Override
@ForceInline
public Byte256Vector selectFrom(Vector<Byte> v) {
@ -709,6 +725,15 @@ final class Byte256Vector extends ByteVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public Byte256Mask compress() {
return (Byte256Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
Byte256Vector.class, Byte256Mask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -908,29 +933,15 @@ final class Byte256Vector extends ByteVector {
@ForceInline
@Override
final
ByteVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
ByteVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
return super.fromByteArray0Template(Byte256Mask.class, a, offset, (Byte256Mask) m); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
return super.fromByteBuffer0Template(Byte256Mask.class, bb, offset, (Byte256Mask) m); // specialize
ByteVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Byte> m) {
return super.fromMemorySegment0Template(Byte256Mask.class, ms, offset, (Byte256Mask) m); // specialize
}
@ForceInline
@ -958,22 +969,8 @@ final class Byte256Vector extends ByteVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
super.intoByteArray0Template(Byte256Mask.class, a, offset, (Byte256Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
super.intoByteBuffer0Template(Byte256Mask.class, bb, offset, (Byte256Mask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Byte> m) {
super.intoMemorySegment0Template(Byte256Mask.class, ms, offset, (Byte256Mask) m);
}
@ -982,3 +979,4 @@ final class Byte256Vector extends ByteVector {
// ================================================
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -474,6 +474,22 @@ final class Byte512Vector extends ByteVector {
(Byte512Vector) v); // specialize
}
@Override
@ForceInline
public Byte512Vector compress(VectorMask<Byte> m) {
return (Byte512Vector)
super.compressTemplate(Byte512Mask.class,
(Byte512Mask) m); // specialize
}
@Override
@ForceInline
public Byte512Vector expand(VectorMask<Byte> m) {
return (Byte512Vector)
super.expandTemplate(Byte512Mask.class,
(Byte512Mask) m); // specialize
}
@Override
@ForceInline
public Byte512Vector selectFrom(Vector<Byte> v) {
@ -773,6 +789,15 @@ final class Byte512Vector extends ByteVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public Byte512Mask compress() {
return (Byte512Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
Byte512Vector.class, Byte512Mask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -972,29 +997,15 @@ final class Byte512Vector extends ByteVector {
@ForceInline
@Override
final
ByteVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
ByteVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
return super.fromByteArray0Template(Byte512Mask.class, a, offset, (Byte512Mask) m); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
return super.fromByteBuffer0Template(Byte512Mask.class, bb, offset, (Byte512Mask) m); // specialize
ByteVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Byte> m) {
return super.fromMemorySegment0Template(Byte512Mask.class, ms, offset, (Byte512Mask) m); // specialize
}
@ForceInline
@ -1022,22 +1033,8 @@ final class Byte512Vector extends ByteVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
super.intoByteArray0Template(Byte512Mask.class, a, offset, (Byte512Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
super.intoByteBuffer0Template(Byte512Mask.class, bb, offset, (Byte512Mask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Byte> m) {
super.intoMemorySegment0Template(Byte512Mask.class, ms, offset, (Byte512Mask) m);
}
@ -1046,3 +1043,4 @@ final class Byte512Vector extends ByteVector {
// ================================================
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -474,6 +474,22 @@ final class Byte64Vector extends ByteVector {
(Byte64Vector) v); // specialize
}
@Override
@ForceInline
public Byte64Vector compress(VectorMask<Byte> m) {
return (Byte64Vector)
super.compressTemplate(Byte64Mask.class,
(Byte64Mask) m); // specialize
}
@Override
@ForceInline
public Byte64Vector expand(VectorMask<Byte> m) {
return (Byte64Vector)
super.expandTemplate(Byte64Mask.class,
(Byte64Mask) m); // specialize
}
@Override
@ForceInline
public Byte64Vector selectFrom(Vector<Byte> v) {
@ -661,6 +677,15 @@ final class Byte64Vector extends ByteVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public Byte64Mask compress() {
return (Byte64Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
Byte64Vector.class, Byte64Mask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -860,29 +885,15 @@ final class Byte64Vector extends ByteVector {
@ForceInline
@Override
final
ByteVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
ByteVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
return super.fromByteArray0Template(Byte64Mask.class, a, offset, (Byte64Mask) m); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
return super.fromByteBuffer0Template(Byte64Mask.class, bb, offset, (Byte64Mask) m); // specialize
ByteVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Byte> m) {
return super.fromMemorySegment0Template(Byte64Mask.class, ms, offset, (Byte64Mask) m); // specialize
}
@ForceInline
@ -910,22 +921,8 @@ final class Byte64Vector extends ByteVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
super.intoByteArray0Template(Byte64Mask.class, a, offset, (Byte64Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
super.intoByteBuffer0Template(Byte64Mask.class, bb, offset, (Byte64Mask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Byte> m) {
super.intoMemorySegment0Template(Byte64Mask.class, ms, offset, (Byte64Mask) m);
}
@ -934,3 +931,4 @@ final class Byte64Vector extends ByteVector {
// ================================================
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -474,6 +474,22 @@ final class ByteMaxVector extends ByteVector {
(ByteMaxVector) v); // specialize
}
@Override
@ForceInline
public ByteMaxVector compress(VectorMask<Byte> m) {
return (ByteMaxVector)
super.compressTemplate(ByteMaxMask.class,
(ByteMaxMask) m); // specialize
}
@Override
@ForceInline
public ByteMaxVector expand(VectorMask<Byte> m) {
return (ByteMaxVector)
super.expandTemplate(ByteMaxMask.class,
(ByteMaxMask) m); // specialize
}
@Override
@ForceInline
public ByteMaxVector selectFrom(Vector<Byte> v) {
@ -647,6 +663,15 @@ final class ByteMaxVector extends ByteVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public ByteMaxMask compress() {
return (ByteMaxMask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
ByteMaxVector.class, ByteMaxMask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -846,29 +871,15 @@ final class ByteMaxVector extends ByteVector {
@ForceInline
@Override
final
ByteVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
ByteVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
return super.fromByteArray0Template(ByteMaxMask.class, a, offset, (ByteMaxMask) m); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
return super.fromByteBuffer0Template(ByteMaxMask.class, bb, offset, (ByteMaxMask) m); // specialize
ByteVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Byte> m) {
return super.fromMemorySegment0Template(ByteMaxMask.class, ms, offset, (ByteMaxMask) m); // specialize
}
@ForceInline
@ -896,22 +907,8 @@ final class ByteMaxVector extends ByteVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
super.intoByteArray0Template(ByteMaxMask.class, a, offset, (ByteMaxMask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
super.intoByteBuffer0Template(ByteMaxMask.class, bb, offset, (ByteMaxMask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Byte> m) {
super.intoMemorySegment0Template(ByteMaxMask.class, ms, offset, (ByteMaxMask) m);
}
@ -920,3 +917,4 @@ final class ByteMaxVector extends ByteVector {
// ================================================
}

View file

@ -24,14 +24,14 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.lang.foreign.ValueLayout;
import java.nio.ByteOrder;
import java.nio.ReadOnlyBufferException;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.Function;
import java.util.function.UnaryOperator;
import jdk.internal.foreign.AbstractMemorySegmentImpl;
import jdk.internal.misc.ScopedMemoryAccess;
import jdk.internal.misc.Unsafe;
import jdk.internal.vm.annotation.ForceInline;
@ -57,6 +57,8 @@ public abstract class ByteVector extends AbstractVector<Byte> {
static final int FORBID_OPCODE_KIND = VO_ONLYFP;
static final ValueLayout.OfByte ELEMENT_LAYOUT = ValueLayout.JAVA_BYTE.withBitAlignment(8);
@ForceInline
static int opCode(Operator op) {
return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
@ -351,6 +353,45 @@ public abstract class ByteVector extends AbstractVector<Byte> {
return vectorFactory(res);
}
/*package-private*/
interface FLdLongOp {
byte apply(MemorySegment memory, long offset, int i);
}
/*package-private*/
@ForceInline
final
ByteVector ldLongOp(MemorySegment memory, long offset,
FLdLongOp f) {
//dummy; no vec = vec();
byte[] res = new byte[length()];
for (int i = 0; i < res.length; i++) {
res[i] = f.apply(memory, offset, i);
}
return vectorFactory(res);
}
/*package-private*/
@ForceInline
final
ByteVector ldLongOp(MemorySegment memory, long offset,
VectorMask<Byte> m,
FLdLongOp f) {
//byte[] vec = vec();
byte[] res = new byte[length()];
boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
for (int i = 0; i < res.length; i++) {
if (mbits[i]) {
res[i] = f.apply(memory, offset, i);
}
}
return vectorFactory(res);
}
static byte memorySegmentGet(MemorySegment ms, long o, int i) {
return ms.get(ELEMENT_LAYOUT, o + i * 1L);
}
interface FStOp<M> {
void apply(M memory, int offset, int i, byte a);
}
@ -381,6 +422,40 @@ public abstract class ByteVector extends AbstractVector<Byte> {
}
}
interface FStLongOp {
void apply(MemorySegment memory, long offset, int i, byte a);
}
/*package-private*/
@ForceInline
final
void stLongOp(MemorySegment memory, long offset,
FStLongOp f) {
byte[] vec = vec();
for (int i = 0; i < vec.length; i++) {
f.apply(memory, offset, i, vec[i]);
}
}
/*package-private*/
@ForceInline
final
void stLongOp(MemorySegment memory, long offset,
VectorMask<Byte> m,
FStLongOp f) {
byte[] vec = vec();
boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
for (int i = 0; i < vec.length; i++) {
if (mbits[i]) {
f.apply(memory, offset, i, vec[i]);
}
}
}
static void memorySegmentSet(MemorySegment ms, long o, int i, byte e) {
ms.set(ELEMENT_LAYOUT, o + i * 1L, e);
}
// Binary test
/*package-private*/
@ -431,6 +506,36 @@ public abstract class ByteVector extends AbstractVector<Byte> {
return ((byte)bits);
}
static ByteVector expandHelper(Vector<Byte> v, VectorMask<Byte> m) {
VectorSpecies<Byte> vsp = m.vectorSpecies();
ByteVector r = (ByteVector) vsp.zero();
ByteVector vi = (ByteVector) v;
if (m.allTrue()) {
return vi;
}
for (int i = 0, j = 0; i < vsp.length(); i++) {
if (m.laneIsSet(i)) {
r = r.withLane(i, vi.lane(j++));
}
}
return r;
}
static ByteVector compressHelper(Vector<Byte> v, VectorMask<Byte> m) {
VectorSpecies<Byte> vsp = m.vectorSpecies();
ByteVector r = (ByteVector) vsp.zero();
ByteVector vi = (ByteVector) v;
if (m.allTrue()) {
return vi;
}
for (int i = 0, j = 0; i < vsp.length(); i++) {
if (m.laneIsSet(i)) {
r = r.withLane(j++, vi.lane(i));
}
}
return r;
}
// Static factories (other than memory operations)
// Note: A surprising behavior in javadoc
@ -620,6 +725,16 @@ public abstract class ByteVector extends AbstractVector<Byte> {
v0.uOp(m, (i, a) -> (byte) -a);
case VECTOR_OP_ABS: return (v0, m) ->
v0.uOp(m, (i, a) -> (byte) Math.abs(a));
case VECTOR_OP_BIT_COUNT: return (v0, m) ->
v0.uOp(m, (i, a) -> (byte) bitCount(a));
case VECTOR_OP_TZ_COUNT: return (v0, m) ->
v0.uOp(m, (i, a) -> (byte) numberOfTrailingZeros(a));
case VECTOR_OP_LZ_COUNT: return (v0, m) ->
v0.uOp(m, (i, a) -> (byte) numberOfLeadingZeros(a));
case VECTOR_OP_REVERSE: return (v0, m) ->
v0.uOp(m, (i, a) -> reverse(a));
case VECTOR_OP_REVERSE_BYTES: return (v0, m) ->
v0.uOp(m, (i, a) -> a);
default: return null;
}
}
@ -1746,6 +1861,25 @@ public abstract class ByteVector extends AbstractVector<Byte> {
return lanewise(ABS);
}
static int bitCount(byte a) {
return Integer.bitCount((int)a & 0xFF);
}
static int numberOfTrailingZeros(byte a) {
return a != 0 ? Integer.numberOfTrailingZeros(a) : 8;
}
static int numberOfLeadingZeros(byte a) {
return a >= 0 ? Integer.numberOfLeadingZeros(a) - 24 : 0;
}
static byte reverse(byte a) {
if (a == 0 || a == -1) return a;
byte b = rotateLeft(a, 4);
b = (byte) (((b & 0x55) << 1) | ((b & 0xAA) >>> 1));
b = (byte) (((b & 0x33) << 2) | ((b & 0xCC) >>> 2));
return b;
}
// not (~)
/**
* Computes the bitwise logical complement ({@code ~})
@ -2372,6 +2506,45 @@ public abstract class ByteVector extends AbstractVector<Byte> {
ByteVector::toShuffle0);
}
/**
* {@inheritDoc} <!--workaround-->
* @since 19
*/
@Override
public abstract
ByteVector compress(VectorMask<Byte> m);
/*package-private*/
@ForceInline
final
<M extends AbstractMask<Byte>>
ByteVector compressTemplate(Class<M> masktype, M m) {
m.check(masktype, this);
return (ByteVector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_COMPRESS, getClass(), masktype,
byte.class, length(), this, m,
(v1, m1) -> compressHelper(v1, m1));
}
/**
* {@inheritDoc} <!--workaround-->
* @since 19
*/
@Override
public abstract
ByteVector expand(VectorMask<Byte> m);
/*package-private*/
@ForceInline
final
<M extends AbstractMask<Byte>>
ByteVector expandTemplate(Class<M> masktype, M m) {
m.check(masktype, this);
return (ByteVector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_EXPAND, getClass(), masktype,
byte.class, length(), this, m,
(v1, m1) -> expandHelper(v1, m1));
}
/**
* {@inheritDoc} <!--workaround-->
*/
@ -2784,90 +2957,6 @@ public abstract class ByteVector extends AbstractVector<Byte> {
return res;
}
/**
* Loads a vector from a byte array starting at an offset.
* Bytes are composed into primitive lane elements according
* to the specified byte order.
* The vector is arranged into lanes according to
* <a href="Vector.html#lane-order">memory ordering</a>.
* <p>
* This method behaves as if it returns the result of calling
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
* fromByteBuffer()} as follows:
* <pre>{@code
* var bb = ByteBuffer.wrap(a);
* var m = species.maskAll(true);
* return fromByteBuffer(species, bb, offset, bo, m);
* }</pre>
*
* @param species species of desired vector
* @param a the byte array
* @param offset the offset into the array
* @param bo the intended byte order
* @return a vector loaded from a byte array
* @throws IndexOutOfBoundsException
* if {@code offset+N*ESIZE < 0}
* or {@code offset+(N+1)*ESIZE > a.length}
* for any lane {@code N} in the vector
*/
@ForceInline
public static
ByteVector fromByteArray(VectorSpecies<Byte> species,
byte[] a, int offset,
ByteOrder bo) {
offset = checkFromIndexSize(offset, species.vectorByteSize(), a.length);
ByteSpecies vsp = (ByteSpecies) species;
return vsp.dummyVector().fromByteArray0(a, offset).maybeSwap(bo);
}
/**
* Loads a vector from a byte array starting at an offset
* and using a mask.
* Lanes where the mask is unset are filled with the default
* value of {@code byte} (zero).
* Bytes are composed into primitive lane elements according
* to the specified byte order.
* The vector is arranged into lanes according to
* <a href="Vector.html#lane-order">memory ordering</a>.
* <p>
* This method behaves as if it returns the result of calling
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
* fromByteBuffer()} as follows:
* <pre>{@code
* var bb = ByteBuffer.wrap(a);
* return fromByteBuffer(species, bb, offset, bo, m);
* }</pre>
*
* @param species species of desired vector
* @param a the byte array
* @param offset the offset into the array
* @param bo the intended byte order
* @param m the mask controlling lane selection
* @return a vector loaded from a byte array
* @throws IndexOutOfBoundsException
* if {@code offset+N*ESIZE < 0}
* or {@code offset+(N+1)*ESIZE > a.length}
* for any lane {@code N} in the vector
* where the mask is set
*/
@ForceInline
public static
ByteVector fromByteArray(VectorSpecies<Byte> species,
byte[] a, int offset,
ByteOrder bo,
VectorMask<Byte> m) {
ByteSpecies vsp = (ByteSpecies) species;
if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo);
}
// FIXME: optimize
checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
ByteBuffer wb = wrapper(a, bo);
return vsp.ldOp(wb, offset, (AbstractMask<Byte>)m,
(wb_, o, i) -> wb_.get(o + i * 1));
}
/**
* Loads a vector from an array of type {@code byte[]}
* starting at an offset.
@ -3174,44 +3263,49 @@ public abstract class ByteVector extends AbstractVector<Byte> {
}
/**
* Loads a vector from a {@linkplain ByteBuffer byte buffer}
* starting at an offset into the byte buffer.
* Loads a vector from a {@linkplain MemorySegment memory segment}
* starting at an offset into the memory segment.
* Bytes are composed into primitive lane elements according
* to the specified byte order.
* The vector is arranged into lanes according to
* <a href="Vector.html#lane-order">memory ordering</a>.
* <p>
* This method behaves as if it returns the result of calling
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
* fromByteBuffer()} as follows:
* {@link #fromMemorySegment(VectorSpecies,MemorySegment,long,ByteOrder,VectorMask)
* fromMemorySegment()} as follows:
* <pre>{@code
* var m = species.maskAll(true);
* return fromByteBuffer(species, bb, offset, bo, m);
* return fromMemorySegment(species, ms, offset, bo, m);
* }</pre>
*
* @param species species of desired vector
* @param bb the byte buffer
* @param offset the offset into the byte buffer
* @param ms the memory segment
* @param offset the offset into the memory segment
* @param bo the intended byte order
* @return a vector loaded from a byte buffer
* @return a vector loaded from the memory segment
* @throws IndexOutOfBoundsException
* if {@code offset+N*1 < 0}
* or {@code offset+N*1 >= bb.limit()}
* or {@code offset+N*1 >= ms.byteSize()}
* for any lane {@code N} in the vector
* @throws IllegalArgumentException if the memory segment is a heap segment that is
* not backed by a {@code byte[]} array.
* @throws IllegalStateException if the memory segment's session is not alive,
* or if access occurs from a thread other than the thread owning the session.
* @since 19
*/
@ForceInline
public static
ByteVector fromByteBuffer(VectorSpecies<Byte> species,
ByteBuffer bb, int offset,
ByteOrder bo) {
offset = checkFromIndexSize(offset, species.vectorByteSize(), bb.limit());
ByteVector fromMemorySegment(VectorSpecies<Byte> species,
MemorySegment ms, long offset,
ByteOrder bo) {
offset = checkFromIndexSize(offset, species.vectorByteSize(), ms.byteSize());
ByteSpecies vsp = (ByteSpecies) species;
return vsp.dummyVector().fromByteBuffer0(bb, offset).maybeSwap(bo);
return vsp.dummyVector().fromMemorySegment0(ms, offset).maybeSwap(bo);
}
/**
* Loads a vector from a {@linkplain ByteBuffer byte buffer}
* starting at an offset into the byte buffer
* Loads a vector from a {@linkplain MemorySegment memory segment}
* starting at an offset into the memory segment
* and using a mask.
* Lanes where the mask is unset are filled with the default
* value of {@code byte} (zero).
@ -3222,12 +3316,11 @@ public abstract class ByteVector extends AbstractVector<Byte> {
* <p>
* The following pseudocode illustrates the behavior:
* <pre>{@code
* ByteBuffer eb = bb.duplicate()
* .position(offset);
* var slice = ms.asSlice(offset);
* byte[] ar = new byte[species.length()];
* for (int n = 0; n < ar.length; n++) {
* if (m.laneIsSet(n)) {
* ar[n] = eb.get(n);
* ar[n] = slice.getAtIndex(ValuaLayout.JAVA_BYTE.withBitAlignment(8), n);
* }
* }
* ByteVector r = ByteVector.fromArray(species, ar, 0);
@ -3236,33 +3329,36 @@ public abstract class ByteVector extends AbstractVector<Byte> {
* The byte order argument is ignored.
*
* @param species species of desired vector
* @param bb the byte buffer
* @param offset the offset into the byte buffer
* @param ms the memory segment
* @param offset the offset into the memory segment
* @param bo the intended byte order
* @param m the mask controlling lane selection
* @return a vector loaded from a byte buffer
* @return a vector loaded from the memory segment
* @throws IndexOutOfBoundsException
* if {@code offset+N*1 < 0}
* or {@code offset+N*1 >= bb.limit()}
* or {@code offset+N*1 >= ms.byteSize()}
* for any lane {@code N} in the vector
* where the mask is set
* @throws IllegalArgumentException if the memory segment is a heap segment that is
* not backed by a {@code byte[]} array.
* @throws IllegalStateException if the memory segment's session is not alive,
* or if access occurs from a thread other than the thread owning the session.
* @since 19
*/
@ForceInline
public static
ByteVector fromByteBuffer(VectorSpecies<Byte> species,
ByteBuffer bb, int offset,
ByteOrder bo,
VectorMask<Byte> m) {
ByteVector fromMemorySegment(VectorSpecies<Byte> species,
MemorySegment ms, long offset,
ByteOrder bo,
VectorMask<Byte> m) {
ByteSpecies vsp = (ByteSpecies) species;
if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo);
if (offset >= 0 && offset <= (ms.byteSize() - species.vectorByteSize())) {
return vsp.dummyVector().fromMemorySegment0(ms, offset, m).maybeSwap(bo);
}
// FIXME: optimize
checkMaskFromIndexSize(offset, vsp, m, 1, bb.limit());
ByteBuffer wb = wrapper(bb, bo);
return vsp.ldOp(wb, offset, (AbstractMask<Byte>)m,
(wb_, o, i) -> wb_.get(o + i * 1));
checkMaskFromIndexSize(offset, vsp, m, 1, ms.byteSize());
return vsp.ldLongOp(ms, offset, m, ByteVector::memorySegmentGet);
}
// Memory store operations
@ -3292,7 +3388,7 @@ public abstract class ByteVector extends AbstractVector<Byte> {
this,
a, offset,
(arr, off, v)
-> v.stOp(arr, off,
-> v.stOp(arr, (int) off,
(arr_, off_, i, e) -> arr_[off_ + i] = e));
}
@ -3443,7 +3539,7 @@ public abstract class ByteVector extends AbstractVector<Byte> {
normalized,
a, offset,
(arr, off, v)
-> v.stOp(arr, off,
-> v.stOp(arr, (int) off,
(arr_, off_, i, e) -> arr_[off_ + i] = (e & 1) != 0));
}
@ -3581,67 +3677,40 @@ public abstract class ByteVector extends AbstractVector<Byte> {
/**
* {@inheritDoc} <!--workaround-->
* @since 19
*/
@Override
@ForceInline
public final
void intoByteArray(byte[] a, int offset,
ByteOrder bo) {
offset = checkFromIndexSize(offset, byteSize(), a.length);
maybeSwap(bo).intoByteArray0(a, offset);
}
/**
* {@inheritDoc} <!--workaround-->
*/
@Override
@ForceInline
public final
void intoByteArray(byte[] a, int offset,
ByteOrder bo,
VectorMask<Byte> m) {
if (m.allTrue()) {
intoByteArray(a, offset, bo);
} else {
ByteSpecies vsp = vspecies();
checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
maybeSwap(bo).intoByteArray0(a, offset, m);
void intoMemorySegment(MemorySegment ms, long offset,
ByteOrder bo) {
if (ms.isReadOnly()) {
throw new UnsupportedOperationException("Attempt to write a read-only segment");
}
offset = checkFromIndexSize(offset, byteSize(), ms.byteSize());
maybeSwap(bo).intoMemorySegment0(ms, offset);
}
/**
* {@inheritDoc} <!--workaround-->
* @since 19
*/
@Override
@ForceInline
public final
void intoByteBuffer(ByteBuffer bb, int offset,
ByteOrder bo) {
if (ScopedMemoryAccess.isReadOnly(bb)) {
throw new ReadOnlyBufferException();
}
offset = checkFromIndexSize(offset, byteSize(), bb.limit());
maybeSwap(bo).intoByteBuffer0(bb, offset);
}
/**
* {@inheritDoc} <!--workaround-->
*/
@Override
@ForceInline
public final
void intoByteBuffer(ByteBuffer bb, int offset,
ByteOrder bo,
VectorMask<Byte> m) {
void intoMemorySegment(MemorySegment ms, long offset,
ByteOrder bo,
VectorMask<Byte> m) {
if (m.allTrue()) {
intoByteBuffer(bb, offset, bo);
intoMemorySegment(ms, offset, bo);
} else {
if (bb.isReadOnly()) {
throw new ReadOnlyBufferException();
if (ms.isReadOnly()) {
throw new UnsupportedOperationException("Attempt to write a read-only segment");
}
ByteSpecies vsp = vspecies();
checkMaskFromIndexSize(offset, vsp, m, 1, bb.limit());
maybeSwap(bo).intoByteBuffer0(bb, offset, m);
checkMaskFromIndexSize(offset, vsp, m, 1, ms.byteSize());
maybeSwap(bo).intoMemorySegment0(ms, offset, m);
}
}
@ -3675,7 +3744,7 @@ public abstract class ByteVector extends AbstractVector<Byte> {
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
a, arrayAddress(a, offset),
a, offset, vsp,
(arr, off, s) -> s.ldOp(arr, off,
(arr, off, s) -> s.ldOp(arr, (int) off,
(arr_, off_, i) -> arr_[off_ + i]));
}
@ -3692,7 +3761,7 @@ public abstract class ByteVector extends AbstractVector<Byte> {
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
a, arrayAddress(a, offset), m,
a, offset, vsp,
(arr, off, s, vm) -> s.ldOp(arr, off, vm,
(arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
(arr_, off_, i) -> arr_[off_ + i]));
}
@ -3709,7 +3778,7 @@ public abstract class ByteVector extends AbstractVector<Byte> {
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
a, booleanArrayAddress(a, offset),
a, offset, vsp,
(arr, off, s) -> s.ldOp(arr, off,
(arr, off, s) -> s.ldOp(arr, (int) off,
(arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
}
@ -3726,78 +3795,37 @@ public abstract class ByteVector extends AbstractVector<Byte> {
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
a, booleanArrayAddress(a, offset), m,
a, offset, vsp,
(arr, off, s, vm) -> s.ldOp(arr, off, vm,
(arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
(arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
}
@Override
abstract
ByteVector fromByteArray0(byte[] a, int offset);
ByteVector fromMemorySegment0(MemorySegment bb, long offset);
@ForceInline
final
ByteVector fromByteArray0Template(byte[] a, int offset) {
ByteVector fromMemorySegment0Template(MemorySegment ms, long offset) {
ByteSpecies vsp = vspecies();
return VectorSupport.load(
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset),
a, offset, vsp,
(arr, off, s) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
return s.ldOp(wb, off,
(wb_, o, i) -> wb_.get(o + i * 1));
});
}
abstract
ByteVector fromByteArray0(byte[] a, int offset, VectorMask<Byte> m);
@ForceInline
final
<M extends VectorMask<Byte>>
ByteVector fromByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
ByteSpecies vsp = vspecies();
m.check(vsp);
return VectorSupport.loadMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset), m,
a, offset, vsp,
(arr, off, s, vm) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
return s.ldOp(wb, off, vm,
(wb_, o, i) -> wb_.get(o + i * 1));
});
}
abstract
ByteVector fromByteBuffer0(ByteBuffer bb, int offset);
@ForceInline
final
ByteVector fromByteBuffer0Template(ByteBuffer bb, int offset) {
ByteSpecies vsp = vspecies();
return ScopedMemoryAccess.loadFromByteBuffer(
return ScopedMemoryAccess.loadFromMemorySegment(
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
bb, offset, vsp,
(buf, off, s) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
return s.ldOp(wb, off,
(wb_, o, i) -> wb_.get(o + i * 1));
(AbstractMemorySegmentImpl) ms, offset, vsp,
(msp, off, s) -> {
return s.ldLongOp((MemorySegment) msp, off, ByteVector::memorySegmentGet);
});
}
abstract
ByteVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m);
ByteVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Byte> m);
@ForceInline
final
<M extends VectorMask<Byte>>
ByteVector fromByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
ByteVector fromMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
ByteSpecies vsp = vspecies();
m.check(vsp);
return ScopedMemoryAccess.loadFromByteBufferMasked(
return ScopedMemoryAccess.loadFromMemorySegmentMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
bb, offset, m, vsp,
(buf, off, s, vm) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
return s.ldOp(wb, off, vm,
(wb_, o, i) -> wb_.get(o + i * 1));
(AbstractMemorySegmentImpl) ms, offset, m, vsp,
(msp, off, s, vm) -> {
return s.ldLongOp((MemorySegment) msp, off, vm, ByteVector::memorySegmentGet);
});
}
@ -3816,7 +3844,7 @@ public abstract class ByteVector extends AbstractVector<Byte> {
a, arrayAddress(a, offset),
this, a, offset,
(arr, off, v)
-> v.stOp(arr, off,
-> v.stOp(arr, (int) off,
(arr_, off_, i, e) -> arr_[off_+i] = e));
}
@ -3833,7 +3861,7 @@ public abstract class ByteVector extends AbstractVector<Byte> {
a, arrayAddress(a, offset),
this, m, a, offset,
(arr, off, v, vm)
-> v.stOp(arr, off, vm,
-> v.stOp(arr, (int) off, vm,
(arr_, off_, i, e) -> arr_[off_ + i] = e));
}
@ -3852,75 +3880,37 @@ public abstract class ByteVector extends AbstractVector<Byte> {
a, booleanArrayAddress(a, offset),
normalized, m, a, offset,
(arr, off, v, vm)
-> v.stOp(arr, off, vm,
-> v.stOp(arr, (int) off, vm,
(arr_, off_, i, e) -> arr_[off_ + i] = (e & 1) != 0));
}
abstract
void intoByteArray0(byte[] a, int offset);
@ForceInline
final
void intoByteArray0Template(byte[] a, int offset) {
void intoMemorySegment0(MemorySegment ms, long offset) {
ByteSpecies vsp = vspecies();
VectorSupport.store(
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset),
this, a, offset,
(arr, off, v) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
v.stOp(wb, off,
(tb_, o, i, e) -> tb_.put(o + i * 1, e));
});
}
abstract
void intoByteArray0(byte[] a, int offset, VectorMask<Byte> m);
@ForceInline
final
<M extends VectorMask<Byte>>
void intoByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
ByteSpecies vsp = vspecies();
m.check(vsp);
VectorSupport.storeMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset),
this, m, a, offset,
(arr, off, v, vm) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
v.stOp(wb, off, vm,
(tb_, o, i, e) -> tb_.put(o + i * 1, e));
});
}
@ForceInline
final
void intoByteBuffer0(ByteBuffer bb, int offset) {
ByteSpecies vsp = vspecies();
ScopedMemoryAccess.storeIntoByteBuffer(
ScopedMemoryAccess.storeIntoMemorySegment(
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
this, bb, offset,
(buf, off, v) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
v.stOp(wb, off,
(wb_, o, i, e) -> wb_.put(o + i * 1, e));
this,
(AbstractMemorySegmentImpl) ms, offset,
(msp, off, v) -> {
v.stLongOp((MemorySegment) msp, off, ByteVector::memorySegmentSet);
});
}
abstract
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m);
void intoMemorySegment0(MemorySegment bb, long offset, VectorMask<Byte> m);
@ForceInline
final
<M extends VectorMask<Byte>>
void intoByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
void intoMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
ByteSpecies vsp = vspecies();
m.check(vsp);
ScopedMemoryAccess.storeIntoByteBufferMasked(
ScopedMemoryAccess.storeIntoMemorySegmentMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
this, m, bb, offset,
(buf, off, v, vm) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
v.stOp(wb, off, vm,
(wb_, o, i, e) -> wb_.put(o + i * 1, e));
this, m,
(AbstractMemorySegmentImpl) ms, offset,
(msp, off, v, vm) -> {
v.stLongOp((MemorySegment) msp, off, vm, ByteVector::memorySegmentSet);
});
}
@ -3937,6 +3927,16 @@ public abstract class ByteVector extends AbstractVector<Byte> {
.checkIndexByLane(offset, limit, vsp.iota(), scale);
}
private static
void checkMaskFromIndexSize(long offset,
ByteSpecies vsp,
VectorMask<Byte> m,
int scale,
long limit) {
((AbstractMask<Byte>)m)
.checkIndexByLane(offset, limit, vsp.iota(), scale);
}
@ForceInline
private void conditionalStoreNYI(int offset,
ByteSpecies vsp,
@ -4256,6 +4256,21 @@ public abstract class ByteVector extends AbstractVector<Byte> {
return dummyVector().ldOp(memory, offset, m, f);
}
/*package-private*/
@ForceInline
ByteVector ldLongOp(MemorySegment memory, long offset,
FLdLongOp f) {
return dummyVector().ldLongOp(memory, offset, f);
}
/*package-private*/
@ForceInline
ByteVector ldLongOp(MemorySegment memory, long offset,
VectorMask<Byte> m,
FLdLongOp f) {
return dummyVector().ldLongOp(memory, offset, m, f);
}
/*package-private*/
@ForceInline
<M> void stOp(M memory, int offset, FStOp<M> f) {
@ -4270,6 +4285,20 @@ public abstract class ByteVector extends AbstractVector<Byte> {
dummyVector().stOp(memory, offset, m, f);
}
/*package-private*/
@ForceInline
void stLongOp(MemorySegment memory, long offset, FStLongOp f) {
dummyVector().stLongOp(memory, offset, f);
}
/*package-private*/
@ForceInline
void stLongOp(MemorySegment memory, long offset,
AbstractMask<Byte> m,
FStLongOp f) {
dummyVector().stLongOp(memory, offset, m, f);
}
// N.B. Make sure these constant vectors and
// masks load up correctly into registers.
//
@ -4383,3 +4412,4 @@ public abstract class ByteVector extends AbstractVector<Byte> {
public static final VectorSpecies<Byte> SPECIES_PREFERRED
= (ByteSpecies) VectorSpecies.ofPreferred(byte.class);
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -461,6 +461,22 @@ final class Double128Vector extends DoubleVector {
(Double128Vector) v); // specialize
}
@Override
@ForceInline
public Double128Vector compress(VectorMask<Double> m) {
return (Double128Vector)
super.compressTemplate(Double128Mask.class,
(Double128Mask) m); // specialize
}
@Override
@ForceInline
public Double128Vector expand(VectorMask<Double> m) {
return (Double128Vector)
super.expandTemplate(Double128Mask.class,
(Double128Mask) m); // specialize
}
@Override
@ForceInline
public Double128Vector selectFrom(Vector<Double> v) {
@ -638,6 +654,15 @@ final class Double128Vector extends DoubleVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public Double128Mask compress() {
return (Double128Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
Double128Vector.class, Double128Mask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -830,29 +855,15 @@ final class Double128Vector extends DoubleVector {
@ForceInline
@Override
final
DoubleVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
DoubleVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteArray0(byte[] a, int offset, VectorMask<Double> m) {
return super.fromByteArray0Template(Double128Mask.class, a, offset, (Double128Mask) m); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
return super.fromByteBuffer0Template(Double128Mask.class, bb, offset, (Double128Mask) m); // specialize
DoubleVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Double> m) {
return super.fromMemorySegment0Template(Double128Mask.class, ms, offset, (Double128Mask) m); // specialize
}
@ForceInline
@ -880,22 +891,8 @@ final class Double128Vector extends DoubleVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Double> m) {
super.intoByteArray0Template(Double128Mask.class, a, offset, (Double128Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
super.intoByteBuffer0Template(Double128Mask.class, bb, offset, (Double128Mask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Double> m) {
super.intoMemorySegment0Template(Double128Mask.class, ms, offset, (Double128Mask) m);
}
@ -904,3 +901,4 @@ final class Double128Vector extends DoubleVector {
// ================================================
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -461,6 +461,22 @@ final class Double256Vector extends DoubleVector {
(Double256Vector) v); // specialize
}
@Override
@ForceInline
public Double256Vector compress(VectorMask<Double> m) {
return (Double256Vector)
super.compressTemplate(Double256Mask.class,
(Double256Mask) m); // specialize
}
@Override
@ForceInline
public Double256Vector expand(VectorMask<Double> m) {
return (Double256Vector)
super.expandTemplate(Double256Mask.class,
(Double256Mask) m); // specialize
}
@Override
@ForceInline
public Double256Vector selectFrom(Vector<Double> v) {
@ -642,6 +658,15 @@ final class Double256Vector extends DoubleVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public Double256Mask compress() {
return (Double256Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
Double256Vector.class, Double256Mask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -834,29 +859,15 @@ final class Double256Vector extends DoubleVector {
@ForceInline
@Override
final
DoubleVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
DoubleVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteArray0(byte[] a, int offset, VectorMask<Double> m) {
return super.fromByteArray0Template(Double256Mask.class, a, offset, (Double256Mask) m); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
return super.fromByteBuffer0Template(Double256Mask.class, bb, offset, (Double256Mask) m); // specialize
DoubleVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Double> m) {
return super.fromMemorySegment0Template(Double256Mask.class, ms, offset, (Double256Mask) m); // specialize
}
@ForceInline
@ -884,22 +895,8 @@ final class Double256Vector extends DoubleVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Double> m) {
super.intoByteArray0Template(Double256Mask.class, a, offset, (Double256Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
super.intoByteBuffer0Template(Double256Mask.class, bb, offset, (Double256Mask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Double> m) {
super.intoMemorySegment0Template(Double256Mask.class, ms, offset, (Double256Mask) m);
}
@ -908,3 +905,4 @@ final class Double256Vector extends DoubleVector {
// ================================================
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -461,6 +461,22 @@ final class Double512Vector extends DoubleVector {
(Double512Vector) v); // specialize
}
@Override
@ForceInline
public Double512Vector compress(VectorMask<Double> m) {
return (Double512Vector)
super.compressTemplate(Double512Mask.class,
(Double512Mask) m); // specialize
}
@Override
@ForceInline
public Double512Vector expand(VectorMask<Double> m) {
return (Double512Vector)
super.expandTemplate(Double512Mask.class,
(Double512Mask) m); // specialize
}
@Override
@ForceInline
public Double512Vector selectFrom(Vector<Double> v) {
@ -650,6 +666,15 @@ final class Double512Vector extends DoubleVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public Double512Mask compress() {
return (Double512Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
Double512Vector.class, Double512Mask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -842,29 +867,15 @@ final class Double512Vector extends DoubleVector {
@ForceInline
@Override
final
DoubleVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
DoubleVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteArray0(byte[] a, int offset, VectorMask<Double> m) {
return super.fromByteArray0Template(Double512Mask.class, a, offset, (Double512Mask) m); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
return super.fromByteBuffer0Template(Double512Mask.class, bb, offset, (Double512Mask) m); // specialize
DoubleVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Double> m) {
return super.fromMemorySegment0Template(Double512Mask.class, ms, offset, (Double512Mask) m); // specialize
}
@ForceInline
@ -892,22 +903,8 @@ final class Double512Vector extends DoubleVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Double> m) {
super.intoByteArray0Template(Double512Mask.class, a, offset, (Double512Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
super.intoByteBuffer0Template(Double512Mask.class, bb, offset, (Double512Mask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Double> m) {
super.intoMemorySegment0Template(Double512Mask.class, ms, offset, (Double512Mask) m);
}
@ -916,3 +913,4 @@ final class Double512Vector extends DoubleVector {
// ================================================
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -461,6 +461,22 @@ final class Double64Vector extends DoubleVector {
(Double64Vector) v); // specialize
}
@Override
@ForceInline
public Double64Vector compress(VectorMask<Double> m) {
return (Double64Vector)
super.compressTemplate(Double64Mask.class,
(Double64Mask) m); // specialize
}
@Override
@ForceInline
public Double64Vector expand(VectorMask<Double> m) {
return (Double64Vector)
super.expandTemplate(Double64Mask.class,
(Double64Mask) m); // specialize
}
@Override
@ForceInline
public Double64Vector selectFrom(Vector<Double> v) {
@ -636,6 +652,15 @@ final class Double64Vector extends DoubleVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public Double64Mask compress() {
return (Double64Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
Double64Vector.class, Double64Mask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -828,29 +853,15 @@ final class Double64Vector extends DoubleVector {
@ForceInline
@Override
final
DoubleVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
DoubleVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteArray0(byte[] a, int offset, VectorMask<Double> m) {
return super.fromByteArray0Template(Double64Mask.class, a, offset, (Double64Mask) m); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
return super.fromByteBuffer0Template(Double64Mask.class, bb, offset, (Double64Mask) m); // specialize
DoubleVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Double> m) {
return super.fromMemorySegment0Template(Double64Mask.class, ms, offset, (Double64Mask) m); // specialize
}
@ForceInline
@ -878,22 +889,8 @@ final class Double64Vector extends DoubleVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Double> m) {
super.intoByteArray0Template(Double64Mask.class, a, offset, (Double64Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
super.intoByteBuffer0Template(Double64Mask.class, bb, offset, (Double64Mask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Double> m) {
super.intoMemorySegment0Template(Double64Mask.class, ms, offset, (Double64Mask) m);
}
@ -902,3 +899,4 @@ final class Double64Vector extends DoubleVector {
// ================================================
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -461,6 +461,22 @@ final class DoubleMaxVector extends DoubleVector {
(DoubleMaxVector) v); // specialize
}
@Override
@ForceInline
public DoubleMaxVector compress(VectorMask<Double> m) {
return (DoubleMaxVector)
super.compressTemplate(DoubleMaxMask.class,
(DoubleMaxMask) m); // specialize
}
@Override
@ForceInline
public DoubleMaxVector expand(VectorMask<Double> m) {
return (DoubleMaxVector)
super.expandTemplate(DoubleMaxMask.class,
(DoubleMaxMask) m); // specialize
}
@Override
@ForceInline
public DoubleMaxVector selectFrom(Vector<Double> v) {
@ -635,6 +651,15 @@ final class DoubleMaxVector extends DoubleVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public DoubleMaxMask compress() {
return (DoubleMaxMask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
DoubleMaxVector.class, DoubleMaxMask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -827,29 +852,15 @@ final class DoubleMaxVector extends DoubleVector {
@ForceInline
@Override
final
DoubleVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
DoubleVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteArray0(byte[] a, int offset, VectorMask<Double> m) {
return super.fromByteArray0Template(DoubleMaxMask.class, a, offset, (DoubleMaxMask) m); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
return super.fromByteBuffer0Template(DoubleMaxMask.class, bb, offset, (DoubleMaxMask) m); // specialize
DoubleVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Double> m) {
return super.fromMemorySegment0Template(DoubleMaxMask.class, ms, offset, (DoubleMaxMask) m); // specialize
}
@ForceInline
@ -877,22 +888,8 @@ final class DoubleMaxVector extends DoubleVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Double> m) {
super.intoByteArray0Template(DoubleMaxMask.class, a, offset, (DoubleMaxMask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
super.intoByteBuffer0Template(DoubleMaxMask.class, bb, offset, (DoubleMaxMask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Double> m) {
super.intoMemorySegment0Template(DoubleMaxMask.class, ms, offset, (DoubleMaxMask) m);
}
@ -901,3 +898,4 @@ final class DoubleMaxVector extends DoubleVector {
// ================================================
}

View file

@ -24,14 +24,14 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.lang.foreign.ValueLayout;
import java.nio.ByteOrder;
import java.nio.ReadOnlyBufferException;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.Function;
import java.util.function.UnaryOperator;
import jdk.internal.foreign.AbstractMemorySegmentImpl;
import jdk.internal.misc.ScopedMemoryAccess;
import jdk.internal.misc.Unsafe;
import jdk.internal.vm.annotation.ForceInline;
@ -57,6 +57,8 @@ public abstract class DoubleVector extends AbstractVector<Double> {
static final int FORBID_OPCODE_KIND = VO_NOFP;
static final ValueLayout.OfDouble ELEMENT_LAYOUT = ValueLayout.JAVA_DOUBLE.withBitAlignment(8);
@ForceInline
static int opCode(Operator op) {
return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
@ -351,6 +353,45 @@ public abstract class DoubleVector extends AbstractVector<Double> {
return vectorFactory(res);
}
/*package-private*/
interface FLdLongOp {
double apply(MemorySegment memory, long offset, int i);
}
/*package-private*/
@ForceInline
final
DoubleVector ldLongOp(MemorySegment memory, long offset,
FLdLongOp f) {
//dummy; no vec = vec();
double[] res = new double[length()];
for (int i = 0; i < res.length; i++) {
res[i] = f.apply(memory, offset, i);
}
return vectorFactory(res);
}
/*package-private*/
@ForceInline
final
DoubleVector ldLongOp(MemorySegment memory, long offset,
VectorMask<Double> m,
FLdLongOp f) {
//double[] vec = vec();
double[] res = new double[length()];
boolean[] mbits = ((AbstractMask<Double>)m).getBits();
for (int i = 0; i < res.length; i++) {
if (mbits[i]) {
res[i] = f.apply(memory, offset, i);
}
}
return vectorFactory(res);
}
static double memorySegmentGet(MemorySegment ms, long o, int i) {
return ms.get(ELEMENT_LAYOUT, o + i * 8L);
}
interface FStOp<M> {
void apply(M memory, int offset, int i, double a);
}
@ -381,6 +422,40 @@ public abstract class DoubleVector extends AbstractVector<Double> {
}
}
interface FStLongOp {
void apply(MemorySegment memory, long offset, int i, double a);
}
/*package-private*/
@ForceInline
final
void stLongOp(MemorySegment memory, long offset,
FStLongOp f) {
double[] vec = vec();
for (int i = 0; i < vec.length; i++) {
f.apply(memory, offset, i, vec[i]);
}
}
/*package-private*/
@ForceInline
final
void stLongOp(MemorySegment memory, long offset,
VectorMask<Double> m,
FStLongOp f) {
double[] vec = vec();
boolean[] mbits = ((AbstractMask<Double>)m).getBits();
for (int i = 0; i < vec.length; i++) {
if (mbits[i]) {
f.apply(memory, offset, i, vec[i]);
}
}
}
static void memorySegmentSet(MemorySegment ms, long o, int i, double e) {
ms.set(ELEMENT_LAYOUT, o + i * 8L, e);
}
// Binary test
/*package-private*/
@ -420,6 +495,36 @@ public abstract class DoubleVector extends AbstractVector<Double> {
return Double.longBitsToDouble((long)bits);
}
static DoubleVector expandHelper(Vector<Double> v, VectorMask<Double> m) {
VectorSpecies<Double> vsp = m.vectorSpecies();
DoubleVector r = (DoubleVector) vsp.zero();
DoubleVector vi = (DoubleVector) v;
if (m.allTrue()) {
return vi;
}
for (int i = 0, j = 0; i < vsp.length(); i++) {
if (m.laneIsSet(i)) {
r = r.withLane(i, vi.lane(j++));
}
}
return r;
}
static DoubleVector compressHelper(Vector<Double> v, VectorMask<Double> m) {
VectorSpecies<Double> vsp = m.vectorSpecies();
DoubleVector r = (DoubleVector) vsp.zero();
DoubleVector vi = (DoubleVector) v;
if (m.allTrue()) {
return vi;
}
for (int i = 0, j = 0; i < vsp.length(); i++) {
if (m.laneIsSet(i)) {
r = r.withLane(j++, vi.lane(i));
}
}
return r;
}
// Static factories (other than memory operations)
// Note: A surprising behavior in javadoc
@ -1594,6 +1699,7 @@ public abstract class DoubleVector extends AbstractVector<Double> {
}
// sqrt
/**
* Computes the square root of this vector.
@ -2241,6 +2347,45 @@ public abstract class DoubleVector extends AbstractVector<Double> {
DoubleVector::toShuffle0);
}
/**
* {@inheritDoc} <!--workaround-->
* @since 19
*/
@Override
public abstract
DoubleVector compress(VectorMask<Double> m);
/*package-private*/
@ForceInline
final
<M extends AbstractMask<Double>>
DoubleVector compressTemplate(Class<M> masktype, M m) {
m.check(masktype, this);
return (DoubleVector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_COMPRESS, getClass(), masktype,
double.class, length(), this, m,
(v1, m1) -> compressHelper(v1, m1));
}
/**
* {@inheritDoc} <!--workaround-->
* @since 19
*/
@Override
public abstract
DoubleVector expand(VectorMask<Double> m);
/*package-private*/
@ForceInline
final
<M extends AbstractMask<Double>>
DoubleVector expandTemplate(Class<M> masktype, M m) {
m.check(masktype, this);
return (DoubleVector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_EXPAND, getClass(), masktype,
double.class, length(), this, m,
(v1, m1) -> expandHelper(v1, m1));
}
/**
* {@inheritDoc} <!--workaround-->
*/
@ -2609,90 +2754,6 @@ public abstract class DoubleVector extends AbstractVector<Double> {
return toArray();
}
/**
* Loads a vector from a byte array starting at an offset.
* Bytes are composed into primitive lane elements according
* to the specified byte order.
* The vector is arranged into lanes according to
* <a href="Vector.html#lane-order">memory ordering</a>.
* <p>
* This method behaves as if it returns the result of calling
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
* fromByteBuffer()} as follows:
* <pre>{@code
* var bb = ByteBuffer.wrap(a);
* var m = species.maskAll(true);
* return fromByteBuffer(species, bb, offset, bo, m);
* }</pre>
*
* @param species species of desired vector
* @param a the byte array
* @param offset the offset into the array
* @param bo the intended byte order
* @return a vector loaded from a byte array
* @throws IndexOutOfBoundsException
* if {@code offset+N*ESIZE < 0}
* or {@code offset+(N+1)*ESIZE > a.length}
* for any lane {@code N} in the vector
*/
@ForceInline
public static
DoubleVector fromByteArray(VectorSpecies<Double> species,
byte[] a, int offset,
ByteOrder bo) {
offset = checkFromIndexSize(offset, species.vectorByteSize(), a.length);
DoubleSpecies vsp = (DoubleSpecies) species;
return vsp.dummyVector().fromByteArray0(a, offset).maybeSwap(bo);
}
/**
* Loads a vector from a byte array starting at an offset
* and using a mask.
* Lanes where the mask is unset are filled with the default
* value of {@code double} (positive zero).
* Bytes are composed into primitive lane elements according
* to the specified byte order.
* The vector is arranged into lanes according to
* <a href="Vector.html#lane-order">memory ordering</a>.
* <p>
* This method behaves as if it returns the result of calling
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
* fromByteBuffer()} as follows:
* <pre>{@code
* var bb = ByteBuffer.wrap(a);
* return fromByteBuffer(species, bb, offset, bo, m);
* }</pre>
*
* @param species species of desired vector
* @param a the byte array
* @param offset the offset into the array
* @param bo the intended byte order
* @param m the mask controlling lane selection
* @return a vector loaded from a byte array
* @throws IndexOutOfBoundsException
* if {@code offset+N*ESIZE < 0}
* or {@code offset+(N+1)*ESIZE > a.length}
* for any lane {@code N} in the vector
* where the mask is set
*/
@ForceInline
public static
DoubleVector fromByteArray(VectorSpecies<Double> species,
byte[] a, int offset,
ByteOrder bo,
VectorMask<Double> m) {
DoubleSpecies vsp = (DoubleSpecies) species;
if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo);
}
// FIXME: optimize
checkMaskFromIndexSize(offset, vsp, m, 8, a.length);
ByteBuffer wb = wrapper(a, bo);
return vsp.ldOp(wb, offset, (AbstractMask<Double>)m,
(wb_, o, i) -> wb_.getDouble(o + i * 8));
}
/**
* Loads a vector from an array of type {@code double[]}
* starting at an offset.
@ -2883,44 +2944,49 @@ public abstract class DoubleVector extends AbstractVector<Double> {
/**
* Loads a vector from a {@linkplain ByteBuffer byte buffer}
* starting at an offset into the byte buffer.
* Loads a vector from a {@linkplain MemorySegment memory segment}
* starting at an offset into the memory segment.
* Bytes are composed into primitive lane elements according
* to the specified byte order.
* The vector is arranged into lanes according to
* <a href="Vector.html#lane-order">memory ordering</a>.
* <p>
* This method behaves as if it returns the result of calling
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
* fromByteBuffer()} as follows:
* {@link #fromMemorySegment(VectorSpecies,MemorySegment,long,ByteOrder,VectorMask)
* fromMemorySegment()} as follows:
* <pre>{@code
* var m = species.maskAll(true);
* return fromByteBuffer(species, bb, offset, bo, m);
* return fromMemorySegment(species, ms, offset, bo, m);
* }</pre>
*
* @param species species of desired vector
* @param bb the byte buffer
* @param offset the offset into the byte buffer
* @param ms the memory segment
* @param offset the offset into the memory segment
* @param bo the intended byte order
* @return a vector loaded from a byte buffer
* @return a vector loaded from the memory segment
* @throws IndexOutOfBoundsException
* if {@code offset+N*8 < 0}
* or {@code offset+N*8 >= bb.limit()}
* or {@code offset+N*8 >= ms.byteSize()}
* for any lane {@code N} in the vector
* @throws IllegalArgumentException if the memory segment is a heap segment that is
* not backed by a {@code byte[]} array.
* @throws IllegalStateException if the memory segment's session is not alive,
* or if access occurs from a thread other than the thread owning the session.
* @since 19
*/
@ForceInline
public static
DoubleVector fromByteBuffer(VectorSpecies<Double> species,
ByteBuffer bb, int offset,
ByteOrder bo) {
offset = checkFromIndexSize(offset, species.vectorByteSize(), bb.limit());
DoubleVector fromMemorySegment(VectorSpecies<Double> species,
MemorySegment ms, long offset,
ByteOrder bo) {
offset = checkFromIndexSize(offset, species.vectorByteSize(), ms.byteSize());
DoubleSpecies vsp = (DoubleSpecies) species;
return vsp.dummyVector().fromByteBuffer0(bb, offset).maybeSwap(bo);
return vsp.dummyVector().fromMemorySegment0(ms, offset).maybeSwap(bo);
}
/**
* Loads a vector from a {@linkplain ByteBuffer byte buffer}
* starting at an offset into the byte buffer
* Loads a vector from a {@linkplain MemorySegment memory segment}
* starting at an offset into the memory segment
* and using a mask.
* Lanes where the mask is unset are filled with the default
* value of {@code double} (positive zero).
@ -2931,13 +2997,11 @@ public abstract class DoubleVector extends AbstractVector<Double> {
* <p>
* The following pseudocode illustrates the behavior:
* <pre>{@code
* DoubleBuffer eb = bb.duplicate()
* .position(offset)
* .order(bo).asDoubleBuffer();
* var slice = ms.asSlice(offset);
* double[] ar = new double[species.length()];
* for (int n = 0; n < ar.length; n++) {
* if (m.laneIsSet(n)) {
* ar[n] = eb.get(n);
* ar[n] = slice.getAtIndex(ValuaLayout.JAVA_DOUBLE.withBitAlignment(8), n);
* }
* }
* DoubleVector r = DoubleVector.fromArray(species, ar, 0);
@ -2951,33 +3015,36 @@ public abstract class DoubleVector extends AbstractVector<Double> {
* the bytes of lane values.
*
* @param species species of desired vector
* @param bb the byte buffer
* @param offset the offset into the byte buffer
* @param ms the memory segment
* @param offset the offset into the memory segment
* @param bo the intended byte order
* @param m the mask controlling lane selection
* @return a vector loaded from a byte buffer
* @return a vector loaded from the memory segment
* @throws IndexOutOfBoundsException
* if {@code offset+N*8 < 0}
* or {@code offset+N*8 >= bb.limit()}
* or {@code offset+N*8 >= ms.byteSize()}
* for any lane {@code N} in the vector
* where the mask is set
* @throws IllegalArgumentException if the memory segment is a heap segment that is
* not backed by a {@code byte[]} array.
* @throws IllegalStateException if the memory segment's session is not alive,
* or if access occurs from a thread other than the thread owning the session.
* @since 19
*/
@ForceInline
public static
DoubleVector fromByteBuffer(VectorSpecies<Double> species,
ByteBuffer bb, int offset,
ByteOrder bo,
VectorMask<Double> m) {
DoubleVector fromMemorySegment(VectorSpecies<Double> species,
MemorySegment ms, long offset,
ByteOrder bo,
VectorMask<Double> m) {
DoubleSpecies vsp = (DoubleSpecies) species;
if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo);
if (offset >= 0 && offset <= (ms.byteSize() - species.vectorByteSize())) {
return vsp.dummyVector().fromMemorySegment0(ms, offset, m).maybeSwap(bo);
}
// FIXME: optimize
checkMaskFromIndexSize(offset, vsp, m, 8, bb.limit());
ByteBuffer wb = wrapper(bb, bo);
return vsp.ldOp(wb, offset, (AbstractMask<Double>)m,
(wb_, o, i) -> wb_.getDouble(o + i * 8));
checkMaskFromIndexSize(offset, vsp, m, 8, ms.byteSize());
return vsp.ldLongOp(ms, offset, m, DoubleVector::memorySegmentGet);
}
// Memory store operations
@ -3007,7 +3074,7 @@ public abstract class DoubleVector extends AbstractVector<Double> {
this,
a, offset,
(arr, off, v)
-> v.stOp(arr, off,
-> v.stOp(arr, (int) off,
(arr_, off_, i, e) -> arr_[off_ + i] = e));
}
@ -3167,67 +3234,40 @@ public abstract class DoubleVector extends AbstractVector<Double> {
/**
* {@inheritDoc} <!--workaround-->
* @since 19
*/
@Override
@ForceInline
public final
void intoByteArray(byte[] a, int offset,
ByteOrder bo) {
offset = checkFromIndexSize(offset, byteSize(), a.length);
maybeSwap(bo).intoByteArray0(a, offset);
}
/**
* {@inheritDoc} <!--workaround-->
*/
@Override
@ForceInline
public final
void intoByteArray(byte[] a, int offset,
ByteOrder bo,
VectorMask<Double> m) {
if (m.allTrue()) {
intoByteArray(a, offset, bo);
} else {
DoubleSpecies vsp = vspecies();
checkMaskFromIndexSize(offset, vsp, m, 8, a.length);
maybeSwap(bo).intoByteArray0(a, offset, m);
void intoMemorySegment(MemorySegment ms, long offset,
ByteOrder bo) {
if (ms.isReadOnly()) {
throw new UnsupportedOperationException("Attempt to write a read-only segment");
}
offset = checkFromIndexSize(offset, byteSize(), ms.byteSize());
maybeSwap(bo).intoMemorySegment0(ms, offset);
}
/**
* {@inheritDoc} <!--workaround-->
* @since 19
*/
@Override
@ForceInline
public final
void intoByteBuffer(ByteBuffer bb, int offset,
ByteOrder bo) {
if (ScopedMemoryAccess.isReadOnly(bb)) {
throw new ReadOnlyBufferException();
}
offset = checkFromIndexSize(offset, byteSize(), bb.limit());
maybeSwap(bo).intoByteBuffer0(bb, offset);
}
/**
* {@inheritDoc} <!--workaround-->
*/
@Override
@ForceInline
public final
void intoByteBuffer(ByteBuffer bb, int offset,
ByteOrder bo,
VectorMask<Double> m) {
void intoMemorySegment(MemorySegment ms, long offset,
ByteOrder bo,
VectorMask<Double> m) {
if (m.allTrue()) {
intoByteBuffer(bb, offset, bo);
intoMemorySegment(ms, offset, bo);
} else {
if (bb.isReadOnly()) {
throw new ReadOnlyBufferException();
if (ms.isReadOnly()) {
throw new UnsupportedOperationException("Attempt to write a read-only segment");
}
DoubleSpecies vsp = vspecies();
checkMaskFromIndexSize(offset, vsp, m, 8, bb.limit());
maybeSwap(bo).intoByteBuffer0(bb, offset, m);
checkMaskFromIndexSize(offset, vsp, m, 8, ms.byteSize());
maybeSwap(bo).intoMemorySegment0(ms, offset, m);
}
}
@ -3261,7 +3301,7 @@ public abstract class DoubleVector extends AbstractVector<Double> {
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
a, arrayAddress(a, offset),
a, offset, vsp,
(arr, off, s) -> s.ldOp(arr, off,
(arr, off, s) -> s.ldOp(arr, (int) off,
(arr_, off_, i) -> arr_[off_ + i]));
}
@ -3278,7 +3318,7 @@ public abstract class DoubleVector extends AbstractVector<Double> {
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
a, arrayAddress(a, offset), m,
a, offset, vsp,
(arr, off, s, vm) -> s.ldOp(arr, off, vm,
(arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
(arr_, off_, i) -> arr_[off_ + i]));
}
@ -3336,74 +3376,33 @@ public abstract class DoubleVector extends AbstractVector<Double> {
@Override
abstract
DoubleVector fromByteArray0(byte[] a, int offset);
DoubleVector fromMemorySegment0(MemorySegment bb, long offset);
@ForceInline
final
DoubleVector fromByteArray0Template(byte[] a, int offset) {
DoubleVector fromMemorySegment0Template(MemorySegment ms, long offset) {
DoubleSpecies vsp = vspecies();
return VectorSupport.load(
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset),
a, offset, vsp,
(arr, off, s) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
return s.ldOp(wb, off,
(wb_, o, i) -> wb_.getDouble(o + i * 8));
});
}
abstract
DoubleVector fromByteArray0(byte[] a, int offset, VectorMask<Double> m);
@ForceInline
final
<M extends VectorMask<Double>>
DoubleVector fromByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
DoubleSpecies vsp = vspecies();
m.check(vsp);
return VectorSupport.loadMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset), m,
a, offset, vsp,
(arr, off, s, vm) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
return s.ldOp(wb, off, vm,
(wb_, o, i) -> wb_.getDouble(o + i * 8));
});
}
abstract
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset);
@ForceInline
final
DoubleVector fromByteBuffer0Template(ByteBuffer bb, int offset) {
DoubleSpecies vsp = vspecies();
return ScopedMemoryAccess.loadFromByteBuffer(
return ScopedMemoryAccess.loadFromMemorySegment(
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
bb, offset, vsp,
(buf, off, s) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
return s.ldOp(wb, off,
(wb_, o, i) -> wb_.getDouble(o + i * 8));
(AbstractMemorySegmentImpl) ms, offset, vsp,
(msp, off, s) -> {
return s.ldLongOp((MemorySegment) msp, off, DoubleVector::memorySegmentGet);
});
}
abstract
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m);
DoubleVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Double> m);
@ForceInline
final
<M extends VectorMask<Double>>
DoubleVector fromByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
DoubleVector fromMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
DoubleSpecies vsp = vspecies();
m.check(vsp);
return ScopedMemoryAccess.loadFromByteBufferMasked(
return ScopedMemoryAccess.loadFromMemorySegmentMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
bb, offset, m, vsp,
(buf, off, s, vm) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
return s.ldOp(wb, off, vm,
(wb_, o, i) -> wb_.getDouble(o + i * 8));
(AbstractMemorySegmentImpl) ms, offset, m, vsp,
(msp, off, s, vm) -> {
return s.ldLongOp((MemorySegment) msp, off, vm, DoubleVector::memorySegmentGet);
});
}
@ -3422,7 +3421,7 @@ public abstract class DoubleVector extends AbstractVector<Double> {
a, arrayAddress(a, offset),
this, a, offset,
(arr, off, v)
-> v.stOp(arr, off,
-> v.stOp(arr, (int) off,
(arr_, off_, i, e) -> arr_[off_+i] = e));
}
@ -3439,7 +3438,7 @@ public abstract class DoubleVector extends AbstractVector<Double> {
a, arrayAddress(a, offset),
this, m, a, offset,
(arr, off, v, vm)
-> v.stOp(arr, off, vm,
-> v.stOp(arr, (int) off, vm,
(arr_, off_, i, e) -> arr_[off_ + i] = e));
}
@ -3497,71 +3496,33 @@ public abstract class DoubleVector extends AbstractVector<Double> {
}
abstract
void intoByteArray0(byte[] a, int offset);
@ForceInline
final
void intoByteArray0Template(byte[] a, int offset) {
void intoMemorySegment0(MemorySegment ms, long offset) {
DoubleSpecies vsp = vspecies();
VectorSupport.store(
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset),
this, a, offset,
(arr, off, v) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
v.stOp(wb, off,
(tb_, o, i, e) -> tb_.putDouble(o + i * 8, e));
});
}
abstract
void intoByteArray0(byte[] a, int offset, VectorMask<Double> m);
@ForceInline
final
<M extends VectorMask<Double>>
void intoByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
DoubleSpecies vsp = vspecies();
m.check(vsp);
VectorSupport.storeMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset),
this, m, a, offset,
(arr, off, v, vm) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
v.stOp(wb, off, vm,
(tb_, o, i, e) -> tb_.putDouble(o + i * 8, e));
});
}
@ForceInline
final
void intoByteBuffer0(ByteBuffer bb, int offset) {
DoubleSpecies vsp = vspecies();
ScopedMemoryAccess.storeIntoByteBuffer(
ScopedMemoryAccess.storeIntoMemorySegment(
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
this, bb, offset,
(buf, off, v) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
v.stOp(wb, off,
(wb_, o, i, e) -> wb_.putDouble(o + i * 8, e));
this,
(AbstractMemorySegmentImpl) ms, offset,
(msp, off, v) -> {
v.stLongOp((MemorySegment) msp, off, DoubleVector::memorySegmentSet);
});
}
abstract
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m);
void intoMemorySegment0(MemorySegment bb, long offset, VectorMask<Double> m);
@ForceInline
final
<M extends VectorMask<Double>>
void intoByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
void intoMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
DoubleSpecies vsp = vspecies();
m.check(vsp);
ScopedMemoryAccess.storeIntoByteBufferMasked(
ScopedMemoryAccess.storeIntoMemorySegmentMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
this, m, bb, offset,
(buf, off, v, vm) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
v.stOp(wb, off, vm,
(wb_, o, i, e) -> wb_.putDouble(o + i * 8, e));
this, m,
(AbstractMemorySegmentImpl) ms, offset,
(msp, off, v, vm) -> {
v.stLongOp((MemorySegment) msp, off, vm, DoubleVector::memorySegmentSet);
});
}
@ -3578,6 +3539,16 @@ public abstract class DoubleVector extends AbstractVector<Double> {
.checkIndexByLane(offset, limit, vsp.iota(), scale);
}
private static
void checkMaskFromIndexSize(long offset,
DoubleSpecies vsp,
VectorMask<Double> m,
int scale,
long limit) {
((AbstractMask<Double>)m)
.checkIndexByLane(offset, limit, vsp.iota(), scale);
}
@ForceInline
private void conditionalStoreNYI(int offset,
DoubleSpecies vsp,
@ -3888,6 +3859,21 @@ public abstract class DoubleVector extends AbstractVector<Double> {
return dummyVector().ldOp(memory, offset, m, f);
}
/*package-private*/
@ForceInline
DoubleVector ldLongOp(MemorySegment memory, long offset,
FLdLongOp f) {
return dummyVector().ldLongOp(memory, offset, f);
}
/*package-private*/
@ForceInline
DoubleVector ldLongOp(MemorySegment memory, long offset,
VectorMask<Double> m,
FLdLongOp f) {
return dummyVector().ldLongOp(memory, offset, m, f);
}
/*package-private*/
@ForceInline
<M> void stOp(M memory, int offset, FStOp<M> f) {
@ -3902,6 +3888,20 @@ public abstract class DoubleVector extends AbstractVector<Double> {
dummyVector().stOp(memory, offset, m, f);
}
/*package-private*/
@ForceInline
void stLongOp(MemorySegment memory, long offset, FStLongOp f) {
dummyVector().stLongOp(memory, offset, f);
}
/*package-private*/
@ForceInline
void stLongOp(MemorySegment memory, long offset,
AbstractMask<Double> m,
FStLongOp f) {
dummyVector().stLongOp(memory, offset, m, f);
}
// N.B. Make sure these constant vectors and
// masks load up correctly into registers.
//
@ -4015,3 +4015,4 @@ public abstract class DoubleVector extends AbstractVector<Double> {
public static final VectorSpecies<Double> SPECIES_PREFERRED
= (DoubleSpecies) VectorSpecies.ofPreferred(double.class);
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -461,6 +461,22 @@ final class Float128Vector extends FloatVector {
(Float128Vector) v); // specialize
}
@Override
@ForceInline
public Float128Vector compress(VectorMask<Float> m) {
return (Float128Vector)
super.compressTemplate(Float128Mask.class,
(Float128Mask) m); // specialize
}
@Override
@ForceInline
public Float128Vector expand(VectorMask<Float> m) {
return (Float128Vector)
super.expandTemplate(Float128Mask.class,
(Float128Mask) m); // specialize
}
@Override
@ForceInline
public Float128Vector selectFrom(Vector<Float> v) {
@ -642,6 +658,15 @@ final class Float128Vector extends FloatVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public Float128Mask compress() {
return (Float128Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
Float128Vector.class, Float128Mask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -834,29 +859,15 @@ final class Float128Vector extends FloatVector {
@ForceInline
@Override
final
FloatVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
FloatVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
FloatVector fromByteArray0(byte[] a, int offset, VectorMask<Float> m) {
return super.fromByteArray0Template(Float128Mask.class, a, offset, (Float128Mask) m); // specialize
}
@ForceInline
@Override
final
FloatVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
FloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
return super.fromByteBuffer0Template(Float128Mask.class, bb, offset, (Float128Mask) m); // specialize
FloatVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Float> m) {
return super.fromMemorySegment0Template(Float128Mask.class, ms, offset, (Float128Mask) m); // specialize
}
@ForceInline
@ -884,22 +895,8 @@ final class Float128Vector extends FloatVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Float> m) {
super.intoByteArray0Template(Float128Mask.class, a, offset, (Float128Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
super.intoByteBuffer0Template(Float128Mask.class, bb, offset, (Float128Mask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Float> m) {
super.intoMemorySegment0Template(Float128Mask.class, ms, offset, (Float128Mask) m);
}
@ -908,3 +905,4 @@ final class Float128Vector extends FloatVector {
// ================================================
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -461,6 +461,22 @@ final class Float256Vector extends FloatVector {
(Float256Vector) v); // specialize
}
@Override
@ForceInline
public Float256Vector compress(VectorMask<Float> m) {
return (Float256Vector)
super.compressTemplate(Float256Mask.class,
(Float256Mask) m); // specialize
}
@Override
@ForceInline
public Float256Vector expand(VectorMask<Float> m) {
return (Float256Vector)
super.expandTemplate(Float256Mask.class,
(Float256Mask) m); // specialize
}
@Override
@ForceInline
public Float256Vector selectFrom(Vector<Float> v) {
@ -650,6 +666,15 @@ final class Float256Vector extends FloatVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public Float256Mask compress() {
return (Float256Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
Float256Vector.class, Float256Mask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -842,29 +867,15 @@ final class Float256Vector extends FloatVector {
@ForceInline
@Override
final
FloatVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
FloatVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
FloatVector fromByteArray0(byte[] a, int offset, VectorMask<Float> m) {
return super.fromByteArray0Template(Float256Mask.class, a, offset, (Float256Mask) m); // specialize
}
@ForceInline
@Override
final
FloatVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
FloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
return super.fromByteBuffer0Template(Float256Mask.class, bb, offset, (Float256Mask) m); // specialize
FloatVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Float> m) {
return super.fromMemorySegment0Template(Float256Mask.class, ms, offset, (Float256Mask) m); // specialize
}
@ForceInline
@ -892,22 +903,8 @@ final class Float256Vector extends FloatVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Float> m) {
super.intoByteArray0Template(Float256Mask.class, a, offset, (Float256Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
super.intoByteBuffer0Template(Float256Mask.class, bb, offset, (Float256Mask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Float> m) {
super.intoMemorySegment0Template(Float256Mask.class, ms, offset, (Float256Mask) m);
}
@ -916,3 +913,4 @@ final class Float256Vector extends FloatVector {
// ================================================
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -461,6 +461,22 @@ final class Float512Vector extends FloatVector {
(Float512Vector) v); // specialize
}
@Override
@ForceInline
public Float512Vector compress(VectorMask<Float> m) {
return (Float512Vector)
super.compressTemplate(Float512Mask.class,
(Float512Mask) m); // specialize
}
@Override
@ForceInline
public Float512Vector expand(VectorMask<Float> m) {
return (Float512Vector)
super.expandTemplate(Float512Mask.class,
(Float512Mask) m); // specialize
}
@Override
@ForceInline
public Float512Vector selectFrom(Vector<Float> v) {
@ -666,6 +682,15 @@ final class Float512Vector extends FloatVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public Float512Mask compress() {
return (Float512Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
Float512Vector.class, Float512Mask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -858,29 +883,15 @@ final class Float512Vector extends FloatVector {
@ForceInline
@Override
final
FloatVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
FloatVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
FloatVector fromByteArray0(byte[] a, int offset, VectorMask<Float> m) {
return super.fromByteArray0Template(Float512Mask.class, a, offset, (Float512Mask) m); // specialize
}
@ForceInline
@Override
final
FloatVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
FloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
return super.fromByteBuffer0Template(Float512Mask.class, bb, offset, (Float512Mask) m); // specialize
FloatVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Float> m) {
return super.fromMemorySegment0Template(Float512Mask.class, ms, offset, (Float512Mask) m); // specialize
}
@ForceInline
@ -908,22 +919,8 @@ final class Float512Vector extends FloatVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Float> m) {
super.intoByteArray0Template(Float512Mask.class, a, offset, (Float512Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
super.intoByteBuffer0Template(Float512Mask.class, bb, offset, (Float512Mask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Float> m) {
super.intoMemorySegment0Template(Float512Mask.class, ms, offset, (Float512Mask) m);
}
@ -932,3 +929,4 @@ final class Float512Vector extends FloatVector {
// ================================================
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -461,6 +461,22 @@ final class Float64Vector extends FloatVector {
(Float64Vector) v); // specialize
}
@Override
@ForceInline
public Float64Vector compress(VectorMask<Float> m) {
return (Float64Vector)
super.compressTemplate(Float64Mask.class,
(Float64Mask) m); // specialize
}
@Override
@ForceInline
public Float64Vector expand(VectorMask<Float> m) {
return (Float64Vector)
super.expandTemplate(Float64Mask.class,
(Float64Mask) m); // specialize
}
@Override
@ForceInline
public Float64Vector selectFrom(Vector<Float> v) {
@ -638,6 +654,15 @@ final class Float64Vector extends FloatVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public Float64Mask compress() {
return (Float64Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
Float64Vector.class, Float64Mask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -830,29 +855,15 @@ final class Float64Vector extends FloatVector {
@ForceInline
@Override
final
FloatVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
FloatVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
FloatVector fromByteArray0(byte[] a, int offset, VectorMask<Float> m) {
return super.fromByteArray0Template(Float64Mask.class, a, offset, (Float64Mask) m); // specialize
}
@ForceInline
@Override
final
FloatVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
FloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
return super.fromByteBuffer0Template(Float64Mask.class, bb, offset, (Float64Mask) m); // specialize
FloatVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Float> m) {
return super.fromMemorySegment0Template(Float64Mask.class, ms, offset, (Float64Mask) m); // specialize
}
@ForceInline
@ -880,22 +891,8 @@ final class Float64Vector extends FloatVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Float> m) {
super.intoByteArray0Template(Float64Mask.class, a, offset, (Float64Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
super.intoByteBuffer0Template(Float64Mask.class, bb, offset, (Float64Mask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Float> m) {
super.intoMemorySegment0Template(Float64Mask.class, ms, offset, (Float64Mask) m);
}
@ -904,3 +901,4 @@ final class Float64Vector extends FloatVector {
// ================================================
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -461,6 +461,22 @@ final class FloatMaxVector extends FloatVector {
(FloatMaxVector) v); // specialize
}
@Override
@ForceInline
public FloatMaxVector compress(VectorMask<Float> m) {
return (FloatMaxVector)
super.compressTemplate(FloatMaxMask.class,
(FloatMaxMask) m); // specialize
}
@Override
@ForceInline
public FloatMaxVector expand(VectorMask<Float> m) {
return (FloatMaxVector)
super.expandTemplate(FloatMaxMask.class,
(FloatMaxMask) m); // specialize
}
@Override
@ForceInline
public FloatMaxVector selectFrom(Vector<Float> v) {
@ -635,6 +651,15 @@ final class FloatMaxVector extends FloatVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public FloatMaxMask compress() {
return (FloatMaxMask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
FloatMaxVector.class, FloatMaxMask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -827,29 +852,15 @@ final class FloatMaxVector extends FloatVector {
@ForceInline
@Override
final
FloatVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
FloatVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
FloatVector fromByteArray0(byte[] a, int offset, VectorMask<Float> m) {
return super.fromByteArray0Template(FloatMaxMask.class, a, offset, (FloatMaxMask) m); // specialize
}
@ForceInline
@Override
final
FloatVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
FloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
return super.fromByteBuffer0Template(FloatMaxMask.class, bb, offset, (FloatMaxMask) m); // specialize
FloatVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Float> m) {
return super.fromMemorySegment0Template(FloatMaxMask.class, ms, offset, (FloatMaxMask) m); // specialize
}
@ForceInline
@ -877,22 +888,8 @@ final class FloatMaxVector extends FloatVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Float> m) {
super.intoByteArray0Template(FloatMaxMask.class, a, offset, (FloatMaxMask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
super.intoByteBuffer0Template(FloatMaxMask.class, bb, offset, (FloatMaxMask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Float> m) {
super.intoMemorySegment0Template(FloatMaxMask.class, ms, offset, (FloatMaxMask) m);
}
@ -901,3 +898,4 @@ final class FloatMaxVector extends FloatVector {
// ================================================
}

View file

@ -24,14 +24,14 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.lang.foreign.ValueLayout;
import java.nio.ByteOrder;
import java.nio.ReadOnlyBufferException;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.Function;
import java.util.function.UnaryOperator;
import jdk.internal.foreign.AbstractMemorySegmentImpl;
import jdk.internal.misc.ScopedMemoryAccess;
import jdk.internal.misc.Unsafe;
import jdk.internal.vm.annotation.ForceInline;
@ -57,6 +57,8 @@ public abstract class FloatVector extends AbstractVector<Float> {
static final int FORBID_OPCODE_KIND = VO_NOFP;
static final ValueLayout.OfFloat ELEMENT_LAYOUT = ValueLayout.JAVA_FLOAT.withBitAlignment(8);
@ForceInline
static int opCode(Operator op) {
return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
@ -351,6 +353,45 @@ public abstract class FloatVector extends AbstractVector<Float> {
return vectorFactory(res);
}
/*package-private*/
interface FLdLongOp {
float apply(MemorySegment memory, long offset, int i);
}
/*package-private*/
@ForceInline
final
FloatVector ldLongOp(MemorySegment memory, long offset,
FLdLongOp f) {
//dummy; no vec = vec();
float[] res = new float[length()];
for (int i = 0; i < res.length; i++) {
res[i] = f.apply(memory, offset, i);
}
return vectorFactory(res);
}
/*package-private*/
@ForceInline
final
FloatVector ldLongOp(MemorySegment memory, long offset,
VectorMask<Float> m,
FLdLongOp f) {
//float[] vec = vec();
float[] res = new float[length()];
boolean[] mbits = ((AbstractMask<Float>)m).getBits();
for (int i = 0; i < res.length; i++) {
if (mbits[i]) {
res[i] = f.apply(memory, offset, i);
}
}
return vectorFactory(res);
}
static float memorySegmentGet(MemorySegment ms, long o, int i) {
return ms.get(ELEMENT_LAYOUT, o + i * 4L);
}
interface FStOp<M> {
void apply(M memory, int offset, int i, float a);
}
@ -381,6 +422,40 @@ public abstract class FloatVector extends AbstractVector<Float> {
}
}
interface FStLongOp {
void apply(MemorySegment memory, long offset, int i, float a);
}
/*package-private*/
@ForceInline
final
void stLongOp(MemorySegment memory, long offset,
FStLongOp f) {
float[] vec = vec();
for (int i = 0; i < vec.length; i++) {
f.apply(memory, offset, i, vec[i]);
}
}
/*package-private*/
@ForceInline
final
void stLongOp(MemorySegment memory, long offset,
VectorMask<Float> m,
FStLongOp f) {
float[] vec = vec();
boolean[] mbits = ((AbstractMask<Float>)m).getBits();
for (int i = 0; i < vec.length; i++) {
if (mbits[i]) {
f.apply(memory, offset, i, vec[i]);
}
}
}
static void memorySegmentSet(MemorySegment ms, long o, int i, float e) {
ms.set(ELEMENT_LAYOUT, o + i * 4L, e);
}
// Binary test
/*package-private*/
@ -420,6 +495,36 @@ public abstract class FloatVector extends AbstractVector<Float> {
return Float.intBitsToFloat((int)bits);
}
static FloatVector expandHelper(Vector<Float> v, VectorMask<Float> m) {
VectorSpecies<Float> vsp = m.vectorSpecies();
FloatVector r = (FloatVector) vsp.zero();
FloatVector vi = (FloatVector) v;
if (m.allTrue()) {
return vi;
}
for (int i = 0, j = 0; i < vsp.length(); i++) {
if (m.laneIsSet(i)) {
r = r.withLane(i, vi.lane(j++));
}
}
return r;
}
static FloatVector compressHelper(Vector<Float> v, VectorMask<Float> m) {
VectorSpecies<Float> vsp = m.vectorSpecies();
FloatVector r = (FloatVector) vsp.zero();
FloatVector vi = (FloatVector) v;
if (m.allTrue()) {
return vi;
}
for (int i = 0, j = 0; i < vsp.length(); i++) {
if (m.laneIsSet(i)) {
r = r.withLane(j++, vi.lane(i));
}
}
return r;
}
// Static factories (other than memory operations)
// Note: A surprising behavior in javadoc
@ -1602,6 +1707,7 @@ public abstract class FloatVector extends AbstractVector<Float> {
}
// sqrt
/**
* Computes the square root of this vector.
@ -2253,6 +2359,45 @@ public abstract class FloatVector extends AbstractVector<Float> {
FloatVector::toShuffle0);
}
/**
* {@inheritDoc} <!--workaround-->
* @since 19
*/
@Override
public abstract
FloatVector compress(VectorMask<Float> m);
/*package-private*/
@ForceInline
final
<M extends AbstractMask<Float>>
FloatVector compressTemplate(Class<M> masktype, M m) {
m.check(masktype, this);
return (FloatVector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_COMPRESS, getClass(), masktype,
float.class, length(), this, m,
(v1, m1) -> compressHelper(v1, m1));
}
/**
* {@inheritDoc} <!--workaround-->
* @since 19
*/
@Override
public abstract
FloatVector expand(VectorMask<Float> m);
/*package-private*/
@ForceInline
final
<M extends AbstractMask<Float>>
FloatVector expandTemplate(Class<M> masktype, M m) {
m.check(masktype, this);
return (FloatVector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_EXPAND, getClass(), masktype,
float.class, length(), this, m,
(v1, m1) -> expandHelper(v1, m1));
}
/**
* {@inheritDoc} <!--workaround-->
*/
@ -2633,90 +2778,6 @@ public abstract class FloatVector extends AbstractVector<Float> {
return res;
}
/**
* Loads a vector from a byte array starting at an offset.
* Bytes are composed into primitive lane elements according
* to the specified byte order.
* The vector is arranged into lanes according to
* <a href="Vector.html#lane-order">memory ordering</a>.
* <p>
* This method behaves as if it returns the result of calling
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
* fromByteBuffer()} as follows:
* <pre>{@code
* var bb = ByteBuffer.wrap(a);
* var m = species.maskAll(true);
* return fromByteBuffer(species, bb, offset, bo, m);
* }</pre>
*
* @param species species of desired vector
* @param a the byte array
* @param offset the offset into the array
* @param bo the intended byte order
* @return a vector loaded from a byte array
* @throws IndexOutOfBoundsException
* if {@code offset+N*ESIZE < 0}
* or {@code offset+(N+1)*ESIZE > a.length}
* for any lane {@code N} in the vector
*/
@ForceInline
public static
FloatVector fromByteArray(VectorSpecies<Float> species,
byte[] a, int offset,
ByteOrder bo) {
offset = checkFromIndexSize(offset, species.vectorByteSize(), a.length);
FloatSpecies vsp = (FloatSpecies) species;
return vsp.dummyVector().fromByteArray0(a, offset).maybeSwap(bo);
}
/**
* Loads a vector from a byte array starting at an offset
* and using a mask.
* Lanes where the mask is unset are filled with the default
* value of {@code float} (positive zero).
* Bytes are composed into primitive lane elements according
* to the specified byte order.
* The vector is arranged into lanes according to
* <a href="Vector.html#lane-order">memory ordering</a>.
* <p>
* This method behaves as if it returns the result of calling
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
* fromByteBuffer()} as follows:
* <pre>{@code
* var bb = ByteBuffer.wrap(a);
* return fromByteBuffer(species, bb, offset, bo, m);
* }</pre>
*
* @param species species of desired vector
* @param a the byte array
* @param offset the offset into the array
* @param bo the intended byte order
* @param m the mask controlling lane selection
* @return a vector loaded from a byte array
* @throws IndexOutOfBoundsException
* if {@code offset+N*ESIZE < 0}
* or {@code offset+(N+1)*ESIZE > a.length}
* for any lane {@code N} in the vector
* where the mask is set
*/
@ForceInline
public static
FloatVector fromByteArray(VectorSpecies<Float> species,
byte[] a, int offset,
ByteOrder bo,
VectorMask<Float> m) {
FloatSpecies vsp = (FloatSpecies) species;
if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo);
}
// FIXME: optimize
checkMaskFromIndexSize(offset, vsp, m, 4, a.length);
ByteBuffer wb = wrapper(a, bo);
return vsp.ldOp(wb, offset, (AbstractMask<Float>)m,
(wb_, o, i) -> wb_.getFloat(o + i * 4));
}
/**
* Loads a vector from an array of type {@code float[]}
* starting at an offset.
@ -2889,44 +2950,49 @@ public abstract class FloatVector extends AbstractVector<Float> {
/**
* Loads a vector from a {@linkplain ByteBuffer byte buffer}
* starting at an offset into the byte buffer.
* Loads a vector from a {@linkplain MemorySegment memory segment}
* starting at an offset into the memory segment.
* Bytes are composed into primitive lane elements according
* to the specified byte order.
* The vector is arranged into lanes according to
* <a href="Vector.html#lane-order">memory ordering</a>.
* <p>
* This method behaves as if it returns the result of calling
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
* fromByteBuffer()} as follows:
* {@link #fromMemorySegment(VectorSpecies,MemorySegment,long,ByteOrder,VectorMask)
* fromMemorySegment()} as follows:
* <pre>{@code
* var m = species.maskAll(true);
* return fromByteBuffer(species, bb, offset, bo, m);
* return fromMemorySegment(species, ms, offset, bo, m);
* }</pre>
*
* @param species species of desired vector
* @param bb the byte buffer
* @param offset the offset into the byte buffer
* @param ms the memory segment
* @param offset the offset into the memory segment
* @param bo the intended byte order
* @return a vector loaded from a byte buffer
* @return a vector loaded from the memory segment
* @throws IndexOutOfBoundsException
* if {@code offset+N*4 < 0}
* or {@code offset+N*4 >= bb.limit()}
* or {@code offset+N*4 >= ms.byteSize()}
* for any lane {@code N} in the vector
* @throws IllegalArgumentException if the memory segment is a heap segment that is
* not backed by a {@code byte[]} array.
* @throws IllegalStateException if the memory segment's session is not alive,
* or if access occurs from a thread other than the thread owning the session.
* @since 19
*/
@ForceInline
public static
FloatVector fromByteBuffer(VectorSpecies<Float> species,
ByteBuffer bb, int offset,
ByteOrder bo) {
offset = checkFromIndexSize(offset, species.vectorByteSize(), bb.limit());
FloatVector fromMemorySegment(VectorSpecies<Float> species,
MemorySegment ms, long offset,
ByteOrder bo) {
offset = checkFromIndexSize(offset, species.vectorByteSize(), ms.byteSize());
FloatSpecies vsp = (FloatSpecies) species;
return vsp.dummyVector().fromByteBuffer0(bb, offset).maybeSwap(bo);
return vsp.dummyVector().fromMemorySegment0(ms, offset).maybeSwap(bo);
}
/**
* Loads a vector from a {@linkplain ByteBuffer byte buffer}
* starting at an offset into the byte buffer
* Loads a vector from a {@linkplain MemorySegment memory segment}
* starting at an offset into the memory segment
* and using a mask.
* Lanes where the mask is unset are filled with the default
* value of {@code float} (positive zero).
@ -2937,13 +3003,11 @@ public abstract class FloatVector extends AbstractVector<Float> {
* <p>
* The following pseudocode illustrates the behavior:
* <pre>{@code
* FloatBuffer eb = bb.duplicate()
* .position(offset)
* .order(bo).asFloatBuffer();
* var slice = ms.asSlice(offset);
* float[] ar = new float[species.length()];
* for (int n = 0; n < ar.length; n++) {
* if (m.laneIsSet(n)) {
* ar[n] = eb.get(n);
* ar[n] = slice.getAtIndex(ValuaLayout.JAVA_FLOAT.withBitAlignment(8), n);
* }
* }
* FloatVector r = FloatVector.fromArray(species, ar, 0);
@ -2957,33 +3021,36 @@ public abstract class FloatVector extends AbstractVector<Float> {
* the bytes of lane values.
*
* @param species species of desired vector
* @param bb the byte buffer
* @param offset the offset into the byte buffer
* @param ms the memory segment
* @param offset the offset into the memory segment
* @param bo the intended byte order
* @param m the mask controlling lane selection
* @return a vector loaded from a byte buffer
* @return a vector loaded from the memory segment
* @throws IndexOutOfBoundsException
* if {@code offset+N*4 < 0}
* or {@code offset+N*4 >= bb.limit()}
* or {@code offset+N*4 >= ms.byteSize()}
* for any lane {@code N} in the vector
* where the mask is set
* @throws IllegalArgumentException if the memory segment is a heap segment that is
* not backed by a {@code byte[]} array.
* @throws IllegalStateException if the memory segment's session is not alive,
* or if access occurs from a thread other than the thread owning the session.
* @since 19
*/
@ForceInline
public static
FloatVector fromByteBuffer(VectorSpecies<Float> species,
ByteBuffer bb, int offset,
ByteOrder bo,
VectorMask<Float> m) {
FloatVector fromMemorySegment(VectorSpecies<Float> species,
MemorySegment ms, long offset,
ByteOrder bo,
VectorMask<Float> m) {
FloatSpecies vsp = (FloatSpecies) species;
if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo);
if (offset >= 0 && offset <= (ms.byteSize() - species.vectorByteSize())) {
return vsp.dummyVector().fromMemorySegment0(ms, offset, m).maybeSwap(bo);
}
// FIXME: optimize
checkMaskFromIndexSize(offset, vsp, m, 4, bb.limit());
ByteBuffer wb = wrapper(bb, bo);
return vsp.ldOp(wb, offset, (AbstractMask<Float>)m,
(wb_, o, i) -> wb_.getFloat(o + i * 4));
checkMaskFromIndexSize(offset, vsp, m, 4, ms.byteSize());
return vsp.ldLongOp(ms, offset, m, FloatVector::memorySegmentGet);
}
// Memory store operations
@ -3013,7 +3080,7 @@ public abstract class FloatVector extends AbstractVector<Float> {
this,
a, offset,
(arr, off, v)
-> v.stOp(arr, off,
-> v.stOp(arr, (int) off,
(arr_, off_, i, e) -> arr_[off_ + i] = e));
}
@ -3154,67 +3221,40 @@ public abstract class FloatVector extends AbstractVector<Float> {
/**
* {@inheritDoc} <!--workaround-->
* @since 19
*/
@Override
@ForceInline
public final
void intoByteArray(byte[] a, int offset,
ByteOrder bo) {
offset = checkFromIndexSize(offset, byteSize(), a.length);
maybeSwap(bo).intoByteArray0(a, offset);
}
/**
* {@inheritDoc} <!--workaround-->
*/
@Override
@ForceInline
public final
void intoByteArray(byte[] a, int offset,
ByteOrder bo,
VectorMask<Float> m) {
if (m.allTrue()) {
intoByteArray(a, offset, bo);
} else {
FloatSpecies vsp = vspecies();
checkMaskFromIndexSize(offset, vsp, m, 4, a.length);
maybeSwap(bo).intoByteArray0(a, offset, m);
void intoMemorySegment(MemorySegment ms, long offset,
ByteOrder bo) {
if (ms.isReadOnly()) {
throw new UnsupportedOperationException("Attempt to write a read-only segment");
}
offset = checkFromIndexSize(offset, byteSize(), ms.byteSize());
maybeSwap(bo).intoMemorySegment0(ms, offset);
}
/**
* {@inheritDoc} <!--workaround-->
* @since 19
*/
@Override
@ForceInline
public final
void intoByteBuffer(ByteBuffer bb, int offset,
ByteOrder bo) {
if (ScopedMemoryAccess.isReadOnly(bb)) {
throw new ReadOnlyBufferException();
}
offset = checkFromIndexSize(offset, byteSize(), bb.limit());
maybeSwap(bo).intoByteBuffer0(bb, offset);
}
/**
* {@inheritDoc} <!--workaround-->
*/
@Override
@ForceInline
public final
void intoByteBuffer(ByteBuffer bb, int offset,
ByteOrder bo,
VectorMask<Float> m) {
void intoMemorySegment(MemorySegment ms, long offset,
ByteOrder bo,
VectorMask<Float> m) {
if (m.allTrue()) {
intoByteBuffer(bb, offset, bo);
intoMemorySegment(ms, offset, bo);
} else {
if (bb.isReadOnly()) {
throw new ReadOnlyBufferException();
if (ms.isReadOnly()) {
throw new UnsupportedOperationException("Attempt to write a read-only segment");
}
FloatSpecies vsp = vspecies();
checkMaskFromIndexSize(offset, vsp, m, 4, bb.limit());
maybeSwap(bo).intoByteBuffer0(bb, offset, m);
checkMaskFromIndexSize(offset, vsp, m, 4, ms.byteSize());
maybeSwap(bo).intoMemorySegment0(ms, offset, m);
}
}
@ -3248,7 +3288,7 @@ public abstract class FloatVector extends AbstractVector<Float> {
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
a, arrayAddress(a, offset),
a, offset, vsp,
(arr, off, s) -> s.ldOp(arr, off,
(arr, off, s) -> s.ldOp(arr, (int) off,
(arr_, off_, i) -> arr_[off_ + i]));
}
@ -3265,7 +3305,7 @@ public abstract class FloatVector extends AbstractVector<Float> {
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
a, arrayAddress(a, offset), m,
a, offset, vsp,
(arr, off, s, vm) -> s.ldOp(arr, off, vm,
(arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
(arr_, off_, i) -> arr_[off_ + i]));
}
@ -3305,74 +3345,33 @@ public abstract class FloatVector extends AbstractVector<Float> {
@Override
abstract
FloatVector fromByteArray0(byte[] a, int offset);
FloatVector fromMemorySegment0(MemorySegment bb, long offset);
@ForceInline
final
FloatVector fromByteArray0Template(byte[] a, int offset) {
FloatVector fromMemorySegment0Template(MemorySegment ms, long offset) {
FloatSpecies vsp = vspecies();
return VectorSupport.load(
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset),
a, offset, vsp,
(arr, off, s) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
return s.ldOp(wb, off,
(wb_, o, i) -> wb_.getFloat(o + i * 4));
});
}
abstract
FloatVector fromByteArray0(byte[] a, int offset, VectorMask<Float> m);
@ForceInline
final
<M extends VectorMask<Float>>
FloatVector fromByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
FloatSpecies vsp = vspecies();
m.check(vsp);
return VectorSupport.loadMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset), m,
a, offset, vsp,
(arr, off, s, vm) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
return s.ldOp(wb, off, vm,
(wb_, o, i) -> wb_.getFloat(o + i * 4));
});
}
abstract
FloatVector fromByteBuffer0(ByteBuffer bb, int offset);
@ForceInline
final
FloatVector fromByteBuffer0Template(ByteBuffer bb, int offset) {
FloatSpecies vsp = vspecies();
return ScopedMemoryAccess.loadFromByteBuffer(
return ScopedMemoryAccess.loadFromMemorySegment(
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
bb, offset, vsp,
(buf, off, s) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
return s.ldOp(wb, off,
(wb_, o, i) -> wb_.getFloat(o + i * 4));
(AbstractMemorySegmentImpl) ms, offset, vsp,
(msp, off, s) -> {
return s.ldLongOp((MemorySegment) msp, off, FloatVector::memorySegmentGet);
});
}
abstract
FloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m);
FloatVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Float> m);
@ForceInline
final
<M extends VectorMask<Float>>
FloatVector fromByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
FloatVector fromMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
FloatSpecies vsp = vspecies();
m.check(vsp);
return ScopedMemoryAccess.loadFromByteBufferMasked(
return ScopedMemoryAccess.loadFromMemorySegmentMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
bb, offset, m, vsp,
(buf, off, s, vm) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
return s.ldOp(wb, off, vm,
(wb_, o, i) -> wb_.getFloat(o + i * 4));
(AbstractMemorySegmentImpl) ms, offset, m, vsp,
(msp, off, s, vm) -> {
return s.ldLongOp((MemorySegment) msp, off, vm, FloatVector::memorySegmentGet);
});
}
@ -3391,7 +3390,7 @@ public abstract class FloatVector extends AbstractVector<Float> {
a, arrayAddress(a, offset),
this, a, offset,
(arr, off, v)
-> v.stOp(arr, off,
-> v.stOp(arr, (int) off,
(arr_, off_, i, e) -> arr_[off_+i] = e));
}
@ -3408,7 +3407,7 @@ public abstract class FloatVector extends AbstractVector<Float> {
a, arrayAddress(a, offset),
this, m, a, offset,
(arr, off, v, vm)
-> v.stOp(arr, off, vm,
-> v.stOp(arr, (int) off, vm,
(arr_, off_, i, e) -> arr_[off_ + i] = e));
}
@ -3447,71 +3446,33 @@ public abstract class FloatVector extends AbstractVector<Float> {
}
abstract
void intoByteArray0(byte[] a, int offset);
@ForceInline
final
void intoByteArray0Template(byte[] a, int offset) {
void intoMemorySegment0(MemorySegment ms, long offset) {
FloatSpecies vsp = vspecies();
VectorSupport.store(
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset),
this, a, offset,
(arr, off, v) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
v.stOp(wb, off,
(tb_, o, i, e) -> tb_.putFloat(o + i * 4, e));
});
}
abstract
void intoByteArray0(byte[] a, int offset, VectorMask<Float> m);
@ForceInline
final
<M extends VectorMask<Float>>
void intoByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
FloatSpecies vsp = vspecies();
m.check(vsp);
VectorSupport.storeMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset),
this, m, a, offset,
(arr, off, v, vm) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
v.stOp(wb, off, vm,
(tb_, o, i, e) -> tb_.putFloat(o + i * 4, e));
});
}
@ForceInline
final
void intoByteBuffer0(ByteBuffer bb, int offset) {
FloatSpecies vsp = vspecies();
ScopedMemoryAccess.storeIntoByteBuffer(
ScopedMemoryAccess.storeIntoMemorySegment(
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
this, bb, offset,
(buf, off, v) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
v.stOp(wb, off,
(wb_, o, i, e) -> wb_.putFloat(o + i * 4, e));
this,
(AbstractMemorySegmentImpl) ms, offset,
(msp, off, v) -> {
v.stLongOp((MemorySegment) msp, off, FloatVector::memorySegmentSet);
});
}
abstract
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m);
void intoMemorySegment0(MemorySegment bb, long offset, VectorMask<Float> m);
@ForceInline
final
<M extends VectorMask<Float>>
void intoByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
void intoMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
FloatSpecies vsp = vspecies();
m.check(vsp);
ScopedMemoryAccess.storeIntoByteBufferMasked(
ScopedMemoryAccess.storeIntoMemorySegmentMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
this, m, bb, offset,
(buf, off, v, vm) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
v.stOp(wb, off, vm,
(wb_, o, i, e) -> wb_.putFloat(o + i * 4, e));
this, m,
(AbstractMemorySegmentImpl) ms, offset,
(msp, off, v, vm) -> {
v.stLongOp((MemorySegment) msp, off, vm, FloatVector::memorySegmentSet);
});
}
@ -3528,6 +3489,16 @@ public abstract class FloatVector extends AbstractVector<Float> {
.checkIndexByLane(offset, limit, vsp.iota(), scale);
}
private static
void checkMaskFromIndexSize(long offset,
FloatSpecies vsp,
VectorMask<Float> m,
int scale,
long limit) {
((AbstractMask<Float>)m)
.checkIndexByLane(offset, limit, vsp.iota(), scale);
}
@ForceInline
private void conditionalStoreNYI(int offset,
FloatSpecies vsp,
@ -3838,6 +3809,21 @@ public abstract class FloatVector extends AbstractVector<Float> {
return dummyVector().ldOp(memory, offset, m, f);
}
/*package-private*/
@ForceInline
FloatVector ldLongOp(MemorySegment memory, long offset,
FLdLongOp f) {
return dummyVector().ldLongOp(memory, offset, f);
}
/*package-private*/
@ForceInline
FloatVector ldLongOp(MemorySegment memory, long offset,
VectorMask<Float> m,
FLdLongOp f) {
return dummyVector().ldLongOp(memory, offset, m, f);
}
/*package-private*/
@ForceInline
<M> void stOp(M memory, int offset, FStOp<M> f) {
@ -3852,6 +3838,20 @@ public abstract class FloatVector extends AbstractVector<Float> {
dummyVector().stOp(memory, offset, m, f);
}
/*package-private*/
@ForceInline
void stLongOp(MemorySegment memory, long offset, FStLongOp f) {
dummyVector().stLongOp(memory, offset, f);
}
/*package-private*/
@ForceInline
void stLongOp(MemorySegment memory, long offset,
AbstractMask<Float> m,
FStLongOp f) {
dummyVector().stLongOp(memory, offset, m, f);
}
// N.B. Make sure these constant vectors and
// masks load up correctly into registers.
//
@ -3965,3 +3965,4 @@ public abstract class FloatVector extends AbstractVector<Float> {
public static final VectorSpecies<Float> SPECIES_PREFERRED
= (FloatSpecies) VectorSpecies.ofPreferred(float.class);
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -474,6 +474,22 @@ final class Int128Vector extends IntVector {
(Int128Vector) v); // specialize
}
@Override
@ForceInline
public Int128Vector compress(VectorMask<Integer> m) {
return (Int128Vector)
super.compressTemplate(Int128Mask.class,
(Int128Mask) m); // specialize
}
@Override
@ForceInline
public Int128Vector expand(VectorMask<Integer> m) {
return (Int128Vector)
super.expandTemplate(Int128Mask.class,
(Int128Mask) m); // specialize
}
@Override
@ForceInline
public Int128Vector selectFrom(Vector<Integer> v) {
@ -653,6 +669,15 @@ final class Int128Vector extends IntVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public Int128Mask compress() {
return (Int128Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
Int128Vector.class, Int128Mask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -845,29 +870,15 @@ final class Int128Vector extends IntVector {
@ForceInline
@Override
final
IntVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
IntVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
IntVector fromByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
return super.fromByteArray0Template(Int128Mask.class, a, offset, (Int128Mask) m); // specialize
}
@ForceInline
@Override
final
IntVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
IntVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
return super.fromByteBuffer0Template(Int128Mask.class, bb, offset, (Int128Mask) m); // specialize
IntVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Integer> m) {
return super.fromMemorySegment0Template(Int128Mask.class, ms, offset, (Int128Mask) m); // specialize
}
@ForceInline
@ -895,22 +906,8 @@ final class Int128Vector extends IntVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
super.intoByteArray0Template(Int128Mask.class, a, offset, (Int128Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
super.intoByteBuffer0Template(Int128Mask.class, bb, offset, (Int128Mask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Integer> m) {
super.intoMemorySegment0Template(Int128Mask.class, ms, offset, (Int128Mask) m);
}
@ -919,3 +916,4 @@ final class Int128Vector extends IntVector {
// ================================================
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -474,6 +474,22 @@ final class Int256Vector extends IntVector {
(Int256Vector) v); // specialize
}
@Override
@ForceInline
public Int256Vector compress(VectorMask<Integer> m) {
return (Int256Vector)
super.compressTemplate(Int256Mask.class,
(Int256Mask) m); // specialize
}
@Override
@ForceInline
public Int256Vector expand(VectorMask<Integer> m) {
return (Int256Vector)
super.expandTemplate(Int256Mask.class,
(Int256Mask) m); // specialize
}
@Override
@ForceInline
public Int256Vector selectFrom(Vector<Integer> v) {
@ -661,6 +677,15 @@ final class Int256Vector extends IntVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public Int256Mask compress() {
return (Int256Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
Int256Vector.class, Int256Mask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -853,29 +878,15 @@ final class Int256Vector extends IntVector {
@ForceInline
@Override
final
IntVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
IntVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
IntVector fromByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
return super.fromByteArray0Template(Int256Mask.class, a, offset, (Int256Mask) m); // specialize
}
@ForceInline
@Override
final
IntVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
IntVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
return super.fromByteBuffer0Template(Int256Mask.class, bb, offset, (Int256Mask) m); // specialize
IntVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Integer> m) {
return super.fromMemorySegment0Template(Int256Mask.class, ms, offset, (Int256Mask) m); // specialize
}
@ForceInline
@ -903,22 +914,8 @@ final class Int256Vector extends IntVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
super.intoByteArray0Template(Int256Mask.class, a, offset, (Int256Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
super.intoByteBuffer0Template(Int256Mask.class, bb, offset, (Int256Mask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Integer> m) {
super.intoMemorySegment0Template(Int256Mask.class, ms, offset, (Int256Mask) m);
}
@ -927,3 +924,4 @@ final class Int256Vector extends IntVector {
// ================================================
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -474,6 +474,22 @@ final class Int512Vector extends IntVector {
(Int512Vector) v); // specialize
}
@Override
@ForceInline
public Int512Vector compress(VectorMask<Integer> m) {
return (Int512Vector)
super.compressTemplate(Int512Mask.class,
(Int512Mask) m); // specialize
}
@Override
@ForceInline
public Int512Vector expand(VectorMask<Integer> m) {
return (Int512Vector)
super.expandTemplate(Int512Mask.class,
(Int512Mask) m); // specialize
}
@Override
@ForceInline
public Int512Vector selectFrom(Vector<Integer> v) {
@ -677,6 +693,15 @@ final class Int512Vector extends IntVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public Int512Mask compress() {
return (Int512Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
Int512Vector.class, Int512Mask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -869,29 +894,15 @@ final class Int512Vector extends IntVector {
@ForceInline
@Override
final
IntVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
IntVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
IntVector fromByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
return super.fromByteArray0Template(Int512Mask.class, a, offset, (Int512Mask) m); // specialize
}
@ForceInline
@Override
final
IntVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
IntVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
return super.fromByteBuffer0Template(Int512Mask.class, bb, offset, (Int512Mask) m); // specialize
IntVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Integer> m) {
return super.fromMemorySegment0Template(Int512Mask.class, ms, offset, (Int512Mask) m); // specialize
}
@ForceInline
@ -919,22 +930,8 @@ final class Int512Vector extends IntVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
super.intoByteArray0Template(Int512Mask.class, a, offset, (Int512Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
super.intoByteBuffer0Template(Int512Mask.class, bb, offset, (Int512Mask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Integer> m) {
super.intoMemorySegment0Template(Int512Mask.class, ms, offset, (Int512Mask) m);
}
@ -943,3 +940,4 @@ final class Int512Vector extends IntVector {
// ================================================
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -474,6 +474,22 @@ final class Int64Vector extends IntVector {
(Int64Vector) v); // specialize
}
@Override
@ForceInline
public Int64Vector compress(VectorMask<Integer> m) {
return (Int64Vector)
super.compressTemplate(Int64Mask.class,
(Int64Mask) m); // specialize
}
@Override
@ForceInline
public Int64Vector expand(VectorMask<Integer> m) {
return (Int64Vector)
super.expandTemplate(Int64Mask.class,
(Int64Mask) m); // specialize
}
@Override
@ForceInline
public Int64Vector selectFrom(Vector<Integer> v) {
@ -649,6 +665,15 @@ final class Int64Vector extends IntVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public Int64Mask compress() {
return (Int64Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
Int64Vector.class, Int64Mask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -841,29 +866,15 @@ final class Int64Vector extends IntVector {
@ForceInline
@Override
final
IntVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
IntVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
IntVector fromByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
return super.fromByteArray0Template(Int64Mask.class, a, offset, (Int64Mask) m); // specialize
}
@ForceInline
@Override
final
IntVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
IntVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
return super.fromByteBuffer0Template(Int64Mask.class, bb, offset, (Int64Mask) m); // specialize
IntVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Integer> m) {
return super.fromMemorySegment0Template(Int64Mask.class, ms, offset, (Int64Mask) m); // specialize
}
@ForceInline
@ -891,22 +902,8 @@ final class Int64Vector extends IntVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
super.intoByteArray0Template(Int64Mask.class, a, offset, (Int64Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
super.intoByteBuffer0Template(Int64Mask.class, bb, offset, (Int64Mask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Integer> m) {
super.intoMemorySegment0Template(Int64Mask.class, ms, offset, (Int64Mask) m);
}
@ -915,3 +912,4 @@ final class Int64Vector extends IntVector {
// ================================================
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -474,6 +474,22 @@ final class IntMaxVector extends IntVector {
(IntMaxVector) v); // specialize
}
@Override
@ForceInline
public IntMaxVector compress(VectorMask<Integer> m) {
return (IntMaxVector)
super.compressTemplate(IntMaxMask.class,
(IntMaxMask) m); // specialize
}
@Override
@ForceInline
public IntMaxVector expand(VectorMask<Integer> m) {
return (IntMaxVector)
super.expandTemplate(IntMaxMask.class,
(IntMaxMask) m); // specialize
}
@Override
@ForceInline
public IntMaxVector selectFrom(Vector<Integer> v) {
@ -647,6 +663,15 @@ final class IntMaxVector extends IntVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public IntMaxMask compress() {
return (IntMaxMask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
IntMaxVector.class, IntMaxMask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -850,29 +875,15 @@ final class IntMaxVector extends IntVector {
@ForceInline
@Override
final
IntVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
IntVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
IntVector fromByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
return super.fromByteArray0Template(IntMaxMask.class, a, offset, (IntMaxMask) m); // specialize
}
@ForceInline
@Override
final
IntVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
IntVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
return super.fromByteBuffer0Template(IntMaxMask.class, bb, offset, (IntMaxMask) m); // specialize
IntVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Integer> m) {
return super.fromMemorySegment0Template(IntMaxMask.class, ms, offset, (IntMaxMask) m); // specialize
}
@ForceInline
@ -900,22 +911,8 @@ final class IntMaxVector extends IntVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
super.intoByteArray0Template(IntMaxMask.class, a, offset, (IntMaxMask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
super.intoByteBuffer0Template(IntMaxMask.class, bb, offset, (IntMaxMask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Integer> m) {
super.intoMemorySegment0Template(IntMaxMask.class, ms, offset, (IntMaxMask) m);
}
@ -924,3 +921,4 @@ final class IntMaxVector extends IntVector {
// ================================================
}

View file

@ -24,14 +24,14 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.lang.foreign.ValueLayout;
import java.nio.ByteOrder;
import java.nio.ReadOnlyBufferException;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.Function;
import java.util.function.UnaryOperator;
import jdk.internal.foreign.AbstractMemorySegmentImpl;
import jdk.internal.misc.ScopedMemoryAccess;
import jdk.internal.misc.Unsafe;
import jdk.internal.vm.annotation.ForceInline;
@ -57,6 +57,8 @@ public abstract class IntVector extends AbstractVector<Integer> {
static final int FORBID_OPCODE_KIND = VO_ONLYFP;
static final ValueLayout.OfInt ELEMENT_LAYOUT = ValueLayout.JAVA_INT.withBitAlignment(8);
@ForceInline
static int opCode(Operator op) {
return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
@ -351,6 +353,45 @@ public abstract class IntVector extends AbstractVector<Integer> {
return vectorFactory(res);
}
/*package-private*/
interface FLdLongOp {
int apply(MemorySegment memory, long offset, int i);
}
/*package-private*/
@ForceInline
final
IntVector ldLongOp(MemorySegment memory, long offset,
FLdLongOp f) {
//dummy; no vec = vec();
int[] res = new int[length()];
for (int i = 0; i < res.length; i++) {
res[i] = f.apply(memory, offset, i);
}
return vectorFactory(res);
}
/*package-private*/
@ForceInline
final
IntVector ldLongOp(MemorySegment memory, long offset,
VectorMask<Integer> m,
FLdLongOp f) {
//int[] vec = vec();
int[] res = new int[length()];
boolean[] mbits = ((AbstractMask<Integer>)m).getBits();
for (int i = 0; i < res.length; i++) {
if (mbits[i]) {
res[i] = f.apply(memory, offset, i);
}
}
return vectorFactory(res);
}
static int memorySegmentGet(MemorySegment ms, long o, int i) {
return ms.get(ELEMENT_LAYOUT, o + i * 4L);
}
interface FStOp<M> {
void apply(M memory, int offset, int i, int a);
}
@ -381,6 +422,40 @@ public abstract class IntVector extends AbstractVector<Integer> {
}
}
interface FStLongOp {
void apply(MemorySegment memory, long offset, int i, int a);
}
/*package-private*/
@ForceInline
final
void stLongOp(MemorySegment memory, long offset,
FStLongOp f) {
int[] vec = vec();
for (int i = 0; i < vec.length; i++) {
f.apply(memory, offset, i, vec[i]);
}
}
/*package-private*/
@ForceInline
final
void stLongOp(MemorySegment memory, long offset,
VectorMask<Integer> m,
FStLongOp f) {
int[] vec = vec();
boolean[] mbits = ((AbstractMask<Integer>)m).getBits();
for (int i = 0; i < vec.length; i++) {
if (mbits[i]) {
f.apply(memory, offset, i, vec[i]);
}
}
}
static void memorySegmentSet(MemorySegment ms, long o, int i, int e) {
ms.set(ELEMENT_LAYOUT, o + i * 4L, e);
}
// Binary test
/*package-private*/
@ -431,6 +506,36 @@ public abstract class IntVector extends AbstractVector<Integer> {
return ((int)bits);
}
static IntVector expandHelper(Vector<Integer> v, VectorMask<Integer> m) {
VectorSpecies<Integer> vsp = m.vectorSpecies();
IntVector r = (IntVector) vsp.zero();
IntVector vi = (IntVector) v;
if (m.allTrue()) {
return vi;
}
for (int i = 0, j = 0; i < vsp.length(); i++) {
if (m.laneIsSet(i)) {
r = r.withLane(i, vi.lane(j++));
}
}
return r;
}
static IntVector compressHelper(Vector<Integer> v, VectorMask<Integer> m) {
VectorSpecies<Integer> vsp = m.vectorSpecies();
IntVector r = (IntVector) vsp.zero();
IntVector vi = (IntVector) v;
if (m.allTrue()) {
return vi;
}
for (int i = 0, j = 0; i < vsp.length(); i++) {
if (m.laneIsSet(i)) {
r = r.withLane(j++, vi.lane(i));
}
}
return r;
}
// Static factories (other than memory operations)
// Note: A surprising behavior in javadoc
@ -620,6 +725,16 @@ public abstract class IntVector extends AbstractVector<Integer> {
v0.uOp(m, (i, a) -> (int) -a);
case VECTOR_OP_ABS: return (v0, m) ->
v0.uOp(m, (i, a) -> (int) Math.abs(a));
case VECTOR_OP_BIT_COUNT: return (v0, m) ->
v0.uOp(m, (i, a) -> (int) Integer.bitCount(a));
case VECTOR_OP_TZ_COUNT: return (v0, m) ->
v0.uOp(m, (i, a) -> (int) Integer.numberOfTrailingZeros(a));
case VECTOR_OP_LZ_COUNT: return (v0, m) ->
v0.uOp(m, (i, a) -> (int) Integer.numberOfLeadingZeros(a));
case VECTOR_OP_REVERSE: return (v0, m) ->
v0.uOp(m, (i, a) -> (int) Integer.reverse(a));
case VECTOR_OP_REVERSE_BYTES: return (v0, m) ->
v0.uOp(m, (i, a) -> (int) Integer.reverseBytes(a));
default: return null;
}
}
@ -760,6 +875,10 @@ public abstract class IntVector extends AbstractVector<Integer> {
v0.bOp(v1, vm, (i, a, n) -> rotateLeft(a, (int)n));
case VECTOR_OP_RROTATE: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, n) -> rotateRight(a, (int)n));
case VECTOR_OP_COMPRESS_BITS: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, n) -> Integer.compress(a, n));
case VECTOR_OP_EXPAND_BITS: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, n) -> Integer.expand(a, n));
default: return null;
}
}
@ -1745,6 +1864,7 @@ public abstract class IntVector extends AbstractVector<Integer> {
return lanewise(ABS);
}
// not (~)
/**
* Computes the bitwise logical complement ({@code ~})
@ -2371,6 +2491,45 @@ public abstract class IntVector extends AbstractVector<Integer> {
IntVector::toShuffle0);
}
/**
* {@inheritDoc} <!--workaround-->
* @since 19
*/
@Override
public abstract
IntVector compress(VectorMask<Integer> m);
/*package-private*/
@ForceInline
final
<M extends AbstractMask<Integer>>
IntVector compressTemplate(Class<M> masktype, M m) {
m.check(masktype, this);
return (IntVector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_COMPRESS, getClass(), masktype,
int.class, length(), this, m,
(v1, m1) -> compressHelper(v1, m1));
}
/**
* {@inheritDoc} <!--workaround-->
* @since 19
*/
@Override
public abstract
IntVector expand(VectorMask<Integer> m);
/*package-private*/
@ForceInline
final
<M extends AbstractMask<Integer>>
IntVector expandTemplate(Class<M> masktype, M m) {
m.check(masktype, this);
return (IntVector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_EXPAND, getClass(), masktype,
int.class, length(), this, m,
(v1, m1) -> expandHelper(v1, m1));
}
/**
* {@inheritDoc} <!--workaround-->
*/
@ -2776,90 +2935,6 @@ public abstract class IntVector extends AbstractVector<Integer> {
return res;
}
/**
* Loads a vector from a byte array starting at an offset.
* Bytes are composed into primitive lane elements according
* to the specified byte order.
* The vector is arranged into lanes according to
* <a href="Vector.html#lane-order">memory ordering</a>.
* <p>
* This method behaves as if it returns the result of calling
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
* fromByteBuffer()} as follows:
* <pre>{@code
* var bb = ByteBuffer.wrap(a);
* var m = species.maskAll(true);
* return fromByteBuffer(species, bb, offset, bo, m);
* }</pre>
*
* @param species species of desired vector
* @param a the byte array
* @param offset the offset into the array
* @param bo the intended byte order
* @return a vector loaded from a byte array
* @throws IndexOutOfBoundsException
* if {@code offset+N*ESIZE < 0}
* or {@code offset+(N+1)*ESIZE > a.length}
* for any lane {@code N} in the vector
*/
@ForceInline
public static
IntVector fromByteArray(VectorSpecies<Integer> species,
byte[] a, int offset,
ByteOrder bo) {
offset = checkFromIndexSize(offset, species.vectorByteSize(), a.length);
IntSpecies vsp = (IntSpecies) species;
return vsp.dummyVector().fromByteArray0(a, offset).maybeSwap(bo);
}
/**
* Loads a vector from a byte array starting at an offset
* and using a mask.
* Lanes where the mask is unset are filled with the default
* value of {@code int} (zero).
* Bytes are composed into primitive lane elements according
* to the specified byte order.
* The vector is arranged into lanes according to
* <a href="Vector.html#lane-order">memory ordering</a>.
* <p>
* This method behaves as if it returns the result of calling
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
* fromByteBuffer()} as follows:
* <pre>{@code
* var bb = ByteBuffer.wrap(a);
* return fromByteBuffer(species, bb, offset, bo, m);
* }</pre>
*
* @param species species of desired vector
* @param a the byte array
* @param offset the offset into the array
* @param bo the intended byte order
* @param m the mask controlling lane selection
* @return a vector loaded from a byte array
* @throws IndexOutOfBoundsException
* if {@code offset+N*ESIZE < 0}
* or {@code offset+(N+1)*ESIZE > a.length}
* for any lane {@code N} in the vector
* where the mask is set
*/
@ForceInline
public static
IntVector fromByteArray(VectorSpecies<Integer> species,
byte[] a, int offset,
ByteOrder bo,
VectorMask<Integer> m) {
IntSpecies vsp = (IntSpecies) species;
if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo);
}
// FIXME: optimize
checkMaskFromIndexSize(offset, vsp, m, 4, a.length);
ByteBuffer wb = wrapper(a, bo);
return vsp.ldOp(wb, offset, (AbstractMask<Integer>)m,
(wb_, o, i) -> wb_.getInt(o + i * 4));
}
/**
* Loads a vector from an array of type {@code int[]}
* starting at an offset.
@ -3032,44 +3107,49 @@ public abstract class IntVector extends AbstractVector<Integer> {
/**
* Loads a vector from a {@linkplain ByteBuffer byte buffer}
* starting at an offset into the byte buffer.
* Loads a vector from a {@linkplain MemorySegment memory segment}
* starting at an offset into the memory segment.
* Bytes are composed into primitive lane elements according
* to the specified byte order.
* The vector is arranged into lanes according to
* <a href="Vector.html#lane-order">memory ordering</a>.
* <p>
* This method behaves as if it returns the result of calling
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
* fromByteBuffer()} as follows:
* {@link #fromMemorySegment(VectorSpecies,MemorySegment,long,ByteOrder,VectorMask)
* fromMemorySegment()} as follows:
* <pre>{@code
* var m = species.maskAll(true);
* return fromByteBuffer(species, bb, offset, bo, m);
* return fromMemorySegment(species, ms, offset, bo, m);
* }</pre>
*
* @param species species of desired vector
* @param bb the byte buffer
* @param offset the offset into the byte buffer
* @param ms the memory segment
* @param offset the offset into the memory segment
* @param bo the intended byte order
* @return a vector loaded from a byte buffer
* @return a vector loaded from the memory segment
* @throws IndexOutOfBoundsException
* if {@code offset+N*4 < 0}
* or {@code offset+N*4 >= bb.limit()}
* or {@code offset+N*4 >= ms.byteSize()}
* for any lane {@code N} in the vector
* @throws IllegalArgumentException if the memory segment is a heap segment that is
* not backed by a {@code byte[]} array.
* @throws IllegalStateException if the memory segment's session is not alive,
* or if access occurs from a thread other than the thread owning the session.
* @since 19
*/
@ForceInline
public static
IntVector fromByteBuffer(VectorSpecies<Integer> species,
ByteBuffer bb, int offset,
ByteOrder bo) {
offset = checkFromIndexSize(offset, species.vectorByteSize(), bb.limit());
IntVector fromMemorySegment(VectorSpecies<Integer> species,
MemorySegment ms, long offset,
ByteOrder bo) {
offset = checkFromIndexSize(offset, species.vectorByteSize(), ms.byteSize());
IntSpecies vsp = (IntSpecies) species;
return vsp.dummyVector().fromByteBuffer0(bb, offset).maybeSwap(bo);
return vsp.dummyVector().fromMemorySegment0(ms, offset).maybeSwap(bo);
}
/**
* Loads a vector from a {@linkplain ByteBuffer byte buffer}
* starting at an offset into the byte buffer
* Loads a vector from a {@linkplain MemorySegment memory segment}
* starting at an offset into the memory segment
* and using a mask.
* Lanes where the mask is unset are filled with the default
* value of {@code int} (zero).
@ -3080,13 +3160,11 @@ public abstract class IntVector extends AbstractVector<Integer> {
* <p>
* The following pseudocode illustrates the behavior:
* <pre>{@code
* IntBuffer eb = bb.duplicate()
* .position(offset)
* .order(bo).asIntBuffer();
* var slice = ms.asSlice(offset);
* int[] ar = new int[species.length()];
* for (int n = 0; n < ar.length; n++) {
* if (m.laneIsSet(n)) {
* ar[n] = eb.get(n);
* ar[n] = slice.getAtIndex(ValuaLayout.JAVA_INT.withBitAlignment(8), n);
* }
* }
* IntVector r = IntVector.fromArray(species, ar, 0);
@ -3100,33 +3178,36 @@ public abstract class IntVector extends AbstractVector<Integer> {
* the bytes of lane values.
*
* @param species species of desired vector
* @param bb the byte buffer
* @param offset the offset into the byte buffer
* @param ms the memory segment
* @param offset the offset into the memory segment
* @param bo the intended byte order
* @param m the mask controlling lane selection
* @return a vector loaded from a byte buffer
* @return a vector loaded from the memory segment
* @throws IndexOutOfBoundsException
* if {@code offset+N*4 < 0}
* or {@code offset+N*4 >= bb.limit()}
* or {@code offset+N*4 >= ms.byteSize()}
* for any lane {@code N} in the vector
* where the mask is set
* @throws IllegalArgumentException if the memory segment is a heap segment that is
* not backed by a {@code byte[]} array.
* @throws IllegalStateException if the memory segment's session is not alive,
* or if access occurs from a thread other than the thread owning the session.
* @since 19
*/
@ForceInline
public static
IntVector fromByteBuffer(VectorSpecies<Integer> species,
ByteBuffer bb, int offset,
ByteOrder bo,
VectorMask<Integer> m) {
IntVector fromMemorySegment(VectorSpecies<Integer> species,
MemorySegment ms, long offset,
ByteOrder bo,
VectorMask<Integer> m) {
IntSpecies vsp = (IntSpecies) species;
if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo);
if (offset >= 0 && offset <= (ms.byteSize() - species.vectorByteSize())) {
return vsp.dummyVector().fromMemorySegment0(ms, offset, m).maybeSwap(bo);
}
// FIXME: optimize
checkMaskFromIndexSize(offset, vsp, m, 4, bb.limit());
ByteBuffer wb = wrapper(bb, bo);
return vsp.ldOp(wb, offset, (AbstractMask<Integer>)m,
(wb_, o, i) -> wb_.getInt(o + i * 4));
checkMaskFromIndexSize(offset, vsp, m, 4, ms.byteSize());
return vsp.ldLongOp(ms, offset, m, IntVector::memorySegmentGet);
}
// Memory store operations
@ -3156,7 +3237,7 @@ public abstract class IntVector extends AbstractVector<Integer> {
this,
a, offset,
(arr, off, v)
-> v.stOp(arr, off,
-> v.stOp(arr, (int) off,
(arr_, off_, i, e) -> arr_[off_ + i] = e));
}
@ -3297,67 +3378,40 @@ public abstract class IntVector extends AbstractVector<Integer> {
/**
* {@inheritDoc} <!--workaround-->
* @since 19
*/
@Override
@ForceInline
public final
void intoByteArray(byte[] a, int offset,
ByteOrder bo) {
offset = checkFromIndexSize(offset, byteSize(), a.length);
maybeSwap(bo).intoByteArray0(a, offset);
}
/**
* {@inheritDoc} <!--workaround-->
*/
@Override
@ForceInline
public final
void intoByteArray(byte[] a, int offset,
ByteOrder bo,
VectorMask<Integer> m) {
if (m.allTrue()) {
intoByteArray(a, offset, bo);
} else {
IntSpecies vsp = vspecies();
checkMaskFromIndexSize(offset, vsp, m, 4, a.length);
maybeSwap(bo).intoByteArray0(a, offset, m);
void intoMemorySegment(MemorySegment ms, long offset,
ByteOrder bo) {
if (ms.isReadOnly()) {
throw new UnsupportedOperationException("Attempt to write a read-only segment");
}
offset = checkFromIndexSize(offset, byteSize(), ms.byteSize());
maybeSwap(bo).intoMemorySegment0(ms, offset);
}
/**
* {@inheritDoc} <!--workaround-->
* @since 19
*/
@Override
@ForceInline
public final
void intoByteBuffer(ByteBuffer bb, int offset,
ByteOrder bo) {
if (ScopedMemoryAccess.isReadOnly(bb)) {
throw new ReadOnlyBufferException();
}
offset = checkFromIndexSize(offset, byteSize(), bb.limit());
maybeSwap(bo).intoByteBuffer0(bb, offset);
}
/**
* {@inheritDoc} <!--workaround-->
*/
@Override
@ForceInline
public final
void intoByteBuffer(ByteBuffer bb, int offset,
ByteOrder bo,
VectorMask<Integer> m) {
void intoMemorySegment(MemorySegment ms, long offset,
ByteOrder bo,
VectorMask<Integer> m) {
if (m.allTrue()) {
intoByteBuffer(bb, offset, bo);
intoMemorySegment(ms, offset, bo);
} else {
if (bb.isReadOnly()) {
throw new ReadOnlyBufferException();
if (ms.isReadOnly()) {
throw new UnsupportedOperationException("Attempt to write a read-only segment");
}
IntSpecies vsp = vspecies();
checkMaskFromIndexSize(offset, vsp, m, 4, bb.limit());
maybeSwap(bo).intoByteBuffer0(bb, offset, m);
checkMaskFromIndexSize(offset, vsp, m, 4, ms.byteSize());
maybeSwap(bo).intoMemorySegment0(ms, offset, m);
}
}
@ -3391,7 +3445,7 @@ public abstract class IntVector extends AbstractVector<Integer> {
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
a, arrayAddress(a, offset),
a, offset, vsp,
(arr, off, s) -> s.ldOp(arr, off,
(arr, off, s) -> s.ldOp(arr, (int) off,
(arr_, off_, i) -> arr_[off_ + i]));
}
@ -3408,7 +3462,7 @@ public abstract class IntVector extends AbstractVector<Integer> {
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
a, arrayAddress(a, offset), m,
a, offset, vsp,
(arr, off, s, vm) -> s.ldOp(arr, off, vm,
(arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
(arr_, off_, i) -> arr_[off_ + i]));
}
@ -3448,74 +3502,33 @@ public abstract class IntVector extends AbstractVector<Integer> {
@Override
abstract
IntVector fromByteArray0(byte[] a, int offset);
IntVector fromMemorySegment0(MemorySegment bb, long offset);
@ForceInline
final
IntVector fromByteArray0Template(byte[] a, int offset) {
IntVector fromMemorySegment0Template(MemorySegment ms, long offset) {
IntSpecies vsp = vspecies();
return VectorSupport.load(
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset),
a, offset, vsp,
(arr, off, s) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
return s.ldOp(wb, off,
(wb_, o, i) -> wb_.getInt(o + i * 4));
});
}
abstract
IntVector fromByteArray0(byte[] a, int offset, VectorMask<Integer> m);
@ForceInline
final
<M extends VectorMask<Integer>>
IntVector fromByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
IntSpecies vsp = vspecies();
m.check(vsp);
return VectorSupport.loadMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset), m,
a, offset, vsp,
(arr, off, s, vm) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
return s.ldOp(wb, off, vm,
(wb_, o, i) -> wb_.getInt(o + i * 4));
});
}
abstract
IntVector fromByteBuffer0(ByteBuffer bb, int offset);
@ForceInline
final
IntVector fromByteBuffer0Template(ByteBuffer bb, int offset) {
IntSpecies vsp = vspecies();
return ScopedMemoryAccess.loadFromByteBuffer(
return ScopedMemoryAccess.loadFromMemorySegment(
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
bb, offset, vsp,
(buf, off, s) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
return s.ldOp(wb, off,
(wb_, o, i) -> wb_.getInt(o + i * 4));
(AbstractMemorySegmentImpl) ms, offset, vsp,
(msp, off, s) -> {
return s.ldLongOp((MemorySegment) msp, off, IntVector::memorySegmentGet);
});
}
abstract
IntVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m);
IntVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Integer> m);
@ForceInline
final
<M extends VectorMask<Integer>>
IntVector fromByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
IntVector fromMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
IntSpecies vsp = vspecies();
m.check(vsp);
return ScopedMemoryAccess.loadFromByteBufferMasked(
return ScopedMemoryAccess.loadFromMemorySegmentMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
bb, offset, m, vsp,
(buf, off, s, vm) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
return s.ldOp(wb, off, vm,
(wb_, o, i) -> wb_.getInt(o + i * 4));
(AbstractMemorySegmentImpl) ms, offset, m, vsp,
(msp, off, s, vm) -> {
return s.ldLongOp((MemorySegment) msp, off, vm, IntVector::memorySegmentGet);
});
}
@ -3534,7 +3547,7 @@ public abstract class IntVector extends AbstractVector<Integer> {
a, arrayAddress(a, offset),
this, a, offset,
(arr, off, v)
-> v.stOp(arr, off,
-> v.stOp(arr, (int) off,
(arr_, off_, i, e) -> arr_[off_+i] = e));
}
@ -3551,7 +3564,7 @@ public abstract class IntVector extends AbstractVector<Integer> {
a, arrayAddress(a, offset),
this, m, a, offset,
(arr, off, v, vm)
-> v.stOp(arr, off, vm,
-> v.stOp(arr, (int) off, vm,
(arr_, off_, i, e) -> arr_[off_ + i] = e));
}
@ -3590,71 +3603,33 @@ public abstract class IntVector extends AbstractVector<Integer> {
}
abstract
void intoByteArray0(byte[] a, int offset);
@ForceInline
final
void intoByteArray0Template(byte[] a, int offset) {
void intoMemorySegment0(MemorySegment ms, long offset) {
IntSpecies vsp = vspecies();
VectorSupport.store(
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset),
this, a, offset,
(arr, off, v) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
v.stOp(wb, off,
(tb_, o, i, e) -> tb_.putInt(o + i * 4, e));
});
}
abstract
void intoByteArray0(byte[] a, int offset, VectorMask<Integer> m);
@ForceInline
final
<M extends VectorMask<Integer>>
void intoByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
IntSpecies vsp = vspecies();
m.check(vsp);
VectorSupport.storeMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset),
this, m, a, offset,
(arr, off, v, vm) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
v.stOp(wb, off, vm,
(tb_, o, i, e) -> tb_.putInt(o + i * 4, e));
});
}
@ForceInline
final
void intoByteBuffer0(ByteBuffer bb, int offset) {
IntSpecies vsp = vspecies();
ScopedMemoryAccess.storeIntoByteBuffer(
ScopedMemoryAccess.storeIntoMemorySegment(
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
this, bb, offset,
(buf, off, v) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
v.stOp(wb, off,
(wb_, o, i, e) -> wb_.putInt(o + i * 4, e));
this,
(AbstractMemorySegmentImpl) ms, offset,
(msp, off, v) -> {
v.stLongOp((MemorySegment) msp, off, IntVector::memorySegmentSet);
});
}
abstract
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m);
void intoMemorySegment0(MemorySegment bb, long offset, VectorMask<Integer> m);
@ForceInline
final
<M extends VectorMask<Integer>>
void intoByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
void intoMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
IntSpecies vsp = vspecies();
m.check(vsp);
ScopedMemoryAccess.storeIntoByteBufferMasked(
ScopedMemoryAccess.storeIntoMemorySegmentMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
this, m, bb, offset,
(buf, off, v, vm) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
v.stOp(wb, off, vm,
(wb_, o, i, e) -> wb_.putInt(o + i * 4, e));
this, m,
(AbstractMemorySegmentImpl) ms, offset,
(msp, off, v, vm) -> {
v.stLongOp((MemorySegment) msp, off, vm, IntVector::memorySegmentSet);
});
}
@ -3671,6 +3646,16 @@ public abstract class IntVector extends AbstractVector<Integer> {
.checkIndexByLane(offset, limit, vsp.iota(), scale);
}
private static
void checkMaskFromIndexSize(long offset,
IntSpecies vsp,
VectorMask<Integer> m,
int scale,
long limit) {
((AbstractMask<Integer>)m)
.checkIndexByLane(offset, limit, vsp.iota(), scale);
}
@ForceInline
private void conditionalStoreNYI(int offset,
IntSpecies vsp,
@ -3981,6 +3966,21 @@ public abstract class IntVector extends AbstractVector<Integer> {
return dummyVector().ldOp(memory, offset, m, f);
}
/*package-private*/
@ForceInline
IntVector ldLongOp(MemorySegment memory, long offset,
FLdLongOp f) {
return dummyVector().ldLongOp(memory, offset, f);
}
/*package-private*/
@ForceInline
IntVector ldLongOp(MemorySegment memory, long offset,
VectorMask<Integer> m,
FLdLongOp f) {
return dummyVector().ldLongOp(memory, offset, m, f);
}
/*package-private*/
@ForceInline
<M> void stOp(M memory, int offset, FStOp<M> f) {
@ -3995,6 +3995,20 @@ public abstract class IntVector extends AbstractVector<Integer> {
dummyVector().stOp(memory, offset, m, f);
}
/*package-private*/
@ForceInline
void stLongOp(MemorySegment memory, long offset, FStLongOp f) {
dummyVector().stLongOp(memory, offset, f);
}
/*package-private*/
@ForceInline
void stLongOp(MemorySegment memory, long offset,
AbstractMask<Integer> m,
FStLongOp f) {
dummyVector().stLongOp(memory, offset, m, f);
}
// N.B. Make sure these constant vectors and
// masks load up correctly into registers.
//
@ -4108,3 +4122,4 @@ public abstract class IntVector extends AbstractVector<Integer> {
public static final VectorSpecies<Integer> SPECIES_PREFERRED
= (IntSpecies) VectorSpecies.ofPreferred(int.class);
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -464,6 +464,22 @@ final class Long128Vector extends LongVector {
(Long128Vector) v); // specialize
}
@Override
@ForceInline
public Long128Vector compress(VectorMask<Long> m) {
return (Long128Vector)
super.compressTemplate(Long128Mask.class,
(Long128Mask) m); // specialize
}
@Override
@ForceInline
public Long128Vector expand(VectorMask<Long> m) {
return (Long128Vector)
super.expandTemplate(Long128Mask.class,
(Long128Mask) m); // specialize
}
@Override
@ForceInline
public Long128Vector selectFrom(Vector<Long> v) {
@ -639,6 +655,15 @@ final class Long128Vector extends LongVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public Long128Mask compress() {
return (Long128Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
Long128Vector.class, Long128Mask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -831,29 +856,15 @@ final class Long128Vector extends LongVector {
@ForceInline
@Override
final
LongVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
LongVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
LongVector fromByteArray0(byte[] a, int offset, VectorMask<Long> m) {
return super.fromByteArray0Template(Long128Mask.class, a, offset, (Long128Mask) m); // specialize
}
@ForceInline
@Override
final
LongVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
LongVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
return super.fromByteBuffer0Template(Long128Mask.class, bb, offset, (Long128Mask) m); // specialize
LongVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Long> m) {
return super.fromMemorySegment0Template(Long128Mask.class, ms, offset, (Long128Mask) m); // specialize
}
@ForceInline
@ -881,22 +892,8 @@ final class Long128Vector extends LongVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Long> m) {
super.intoByteArray0Template(Long128Mask.class, a, offset, (Long128Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
super.intoByteBuffer0Template(Long128Mask.class, bb, offset, (Long128Mask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Long> m) {
super.intoMemorySegment0Template(Long128Mask.class, ms, offset, (Long128Mask) m);
}
@ -905,3 +902,4 @@ final class Long128Vector extends LongVector {
// ================================================
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -464,6 +464,22 @@ final class Long256Vector extends LongVector {
(Long256Vector) v); // specialize
}
@Override
@ForceInline
public Long256Vector compress(VectorMask<Long> m) {
return (Long256Vector)
super.compressTemplate(Long256Mask.class,
(Long256Mask) m); // specialize
}
@Override
@ForceInline
public Long256Vector expand(VectorMask<Long> m) {
return (Long256Vector)
super.expandTemplate(Long256Mask.class,
(Long256Mask) m); // specialize
}
@Override
@ForceInline
public Long256Vector selectFrom(Vector<Long> v) {
@ -643,6 +659,15 @@ final class Long256Vector extends LongVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public Long256Mask compress() {
return (Long256Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
Long256Vector.class, Long256Mask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -835,29 +860,15 @@ final class Long256Vector extends LongVector {
@ForceInline
@Override
final
LongVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
LongVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
LongVector fromByteArray0(byte[] a, int offset, VectorMask<Long> m) {
return super.fromByteArray0Template(Long256Mask.class, a, offset, (Long256Mask) m); // specialize
}
@ForceInline
@Override
final
LongVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
LongVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
return super.fromByteBuffer0Template(Long256Mask.class, bb, offset, (Long256Mask) m); // specialize
LongVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Long> m) {
return super.fromMemorySegment0Template(Long256Mask.class, ms, offset, (Long256Mask) m); // specialize
}
@ForceInline
@ -885,22 +896,8 @@ final class Long256Vector extends LongVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Long> m) {
super.intoByteArray0Template(Long256Mask.class, a, offset, (Long256Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
super.intoByteBuffer0Template(Long256Mask.class, bb, offset, (Long256Mask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Long> m) {
super.intoMemorySegment0Template(Long256Mask.class, ms, offset, (Long256Mask) m);
}
@ -909,3 +906,4 @@ final class Long256Vector extends LongVector {
// ================================================
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -464,6 +464,22 @@ final class Long512Vector extends LongVector {
(Long512Vector) v); // specialize
}
@Override
@ForceInline
public Long512Vector compress(VectorMask<Long> m) {
return (Long512Vector)
super.compressTemplate(Long512Mask.class,
(Long512Mask) m); // specialize
}
@Override
@ForceInline
public Long512Vector expand(VectorMask<Long> m) {
return (Long512Vector)
super.expandTemplate(Long512Mask.class,
(Long512Mask) m); // specialize
}
@Override
@ForceInline
public Long512Vector selectFrom(Vector<Long> v) {
@ -651,6 +667,15 @@ final class Long512Vector extends LongVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public Long512Mask compress() {
return (Long512Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
Long512Vector.class, Long512Mask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -843,29 +868,15 @@ final class Long512Vector extends LongVector {
@ForceInline
@Override
final
LongVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
LongVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
LongVector fromByteArray0(byte[] a, int offset, VectorMask<Long> m) {
return super.fromByteArray0Template(Long512Mask.class, a, offset, (Long512Mask) m); // specialize
}
@ForceInline
@Override
final
LongVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
LongVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
return super.fromByteBuffer0Template(Long512Mask.class, bb, offset, (Long512Mask) m); // specialize
LongVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Long> m) {
return super.fromMemorySegment0Template(Long512Mask.class, ms, offset, (Long512Mask) m); // specialize
}
@ForceInline
@ -893,22 +904,8 @@ final class Long512Vector extends LongVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Long> m) {
super.intoByteArray0Template(Long512Mask.class, a, offset, (Long512Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
super.intoByteBuffer0Template(Long512Mask.class, bb, offset, (Long512Mask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Long> m) {
super.intoMemorySegment0Template(Long512Mask.class, ms, offset, (Long512Mask) m);
}
@ -917,3 +914,4 @@ final class Long512Vector extends LongVector {
// ================================================
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -464,6 +464,22 @@ final class Long64Vector extends LongVector {
(Long64Vector) v); // specialize
}
@Override
@ForceInline
public Long64Vector compress(VectorMask<Long> m) {
return (Long64Vector)
super.compressTemplate(Long64Mask.class,
(Long64Mask) m); // specialize
}
@Override
@ForceInline
public Long64Vector expand(VectorMask<Long> m) {
return (Long64Vector)
super.expandTemplate(Long64Mask.class,
(Long64Mask) m); // specialize
}
@Override
@ForceInline
public Long64Vector selectFrom(Vector<Long> v) {
@ -637,6 +653,15 @@ final class Long64Vector extends LongVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public Long64Mask compress() {
return (Long64Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
Long64Vector.class, Long64Mask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -829,29 +854,15 @@ final class Long64Vector extends LongVector {
@ForceInline
@Override
final
LongVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
LongVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
LongVector fromByteArray0(byte[] a, int offset, VectorMask<Long> m) {
return super.fromByteArray0Template(Long64Mask.class, a, offset, (Long64Mask) m); // specialize
}
@ForceInline
@Override
final
LongVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
LongVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
return super.fromByteBuffer0Template(Long64Mask.class, bb, offset, (Long64Mask) m); // specialize
LongVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Long> m) {
return super.fromMemorySegment0Template(Long64Mask.class, ms, offset, (Long64Mask) m); // specialize
}
@ForceInline
@ -879,22 +890,8 @@ final class Long64Vector extends LongVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Long> m) {
super.intoByteArray0Template(Long64Mask.class, a, offset, (Long64Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
super.intoByteBuffer0Template(Long64Mask.class, bb, offset, (Long64Mask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Long> m) {
super.intoMemorySegment0Template(Long64Mask.class, ms, offset, (Long64Mask) m);
}
@ -903,3 +900,4 @@ final class Long64Vector extends LongVector {
// ================================================
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -464,6 +464,22 @@ final class LongMaxVector extends LongVector {
(LongMaxVector) v); // specialize
}
@Override
@ForceInline
public LongMaxVector compress(VectorMask<Long> m) {
return (LongMaxVector)
super.compressTemplate(LongMaxMask.class,
(LongMaxMask) m); // specialize
}
@Override
@ForceInline
public LongMaxVector expand(VectorMask<Long> m) {
return (LongMaxVector)
super.expandTemplate(LongMaxMask.class,
(LongMaxMask) m); // specialize
}
@Override
@ForceInline
public LongMaxVector selectFrom(Vector<Long> v) {
@ -637,6 +653,15 @@ final class LongMaxVector extends LongVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public LongMaxMask compress() {
return (LongMaxMask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
LongMaxVector.class, LongMaxMask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -829,29 +854,15 @@ final class LongMaxVector extends LongVector {
@ForceInline
@Override
final
LongVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
LongVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
LongVector fromByteArray0(byte[] a, int offset, VectorMask<Long> m) {
return super.fromByteArray0Template(LongMaxMask.class, a, offset, (LongMaxMask) m); // specialize
}
@ForceInline
@Override
final
LongVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
LongVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
return super.fromByteBuffer0Template(LongMaxMask.class, bb, offset, (LongMaxMask) m); // specialize
LongVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Long> m) {
return super.fromMemorySegment0Template(LongMaxMask.class, ms, offset, (LongMaxMask) m); // specialize
}
@ForceInline
@ -879,22 +890,8 @@ final class LongMaxVector extends LongVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Long> m) {
super.intoByteArray0Template(LongMaxMask.class, a, offset, (LongMaxMask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
super.intoByteBuffer0Template(LongMaxMask.class, bb, offset, (LongMaxMask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Long> m) {
super.intoMemorySegment0Template(LongMaxMask.class, ms, offset, (LongMaxMask) m);
}
@ -903,3 +900,4 @@ final class LongMaxVector extends LongVector {
// ================================================
}

View file

@ -24,14 +24,14 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.lang.foreign.ValueLayout;
import java.nio.ByteOrder;
import java.nio.ReadOnlyBufferException;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.Function;
import java.util.function.UnaryOperator;
import jdk.internal.foreign.AbstractMemorySegmentImpl;
import jdk.internal.misc.ScopedMemoryAccess;
import jdk.internal.misc.Unsafe;
import jdk.internal.vm.annotation.ForceInline;
@ -57,6 +57,8 @@ public abstract class LongVector extends AbstractVector<Long> {
static final int FORBID_OPCODE_KIND = VO_ONLYFP;
static final ValueLayout.OfLong ELEMENT_LAYOUT = ValueLayout.JAVA_LONG.withBitAlignment(8);
@ForceInline
static int opCode(Operator op) {
return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
@ -351,6 +353,45 @@ public abstract class LongVector extends AbstractVector<Long> {
return vectorFactory(res);
}
/*package-private*/
interface FLdLongOp {
long apply(MemorySegment memory, long offset, int i);
}
/*package-private*/
@ForceInline
final
LongVector ldLongOp(MemorySegment memory, long offset,
FLdLongOp f) {
//dummy; no vec = vec();
long[] res = new long[length()];
for (int i = 0; i < res.length; i++) {
res[i] = f.apply(memory, offset, i);
}
return vectorFactory(res);
}
/*package-private*/
@ForceInline
final
LongVector ldLongOp(MemorySegment memory, long offset,
VectorMask<Long> m,
FLdLongOp f) {
//long[] vec = vec();
long[] res = new long[length()];
boolean[] mbits = ((AbstractMask<Long>)m).getBits();
for (int i = 0; i < res.length; i++) {
if (mbits[i]) {
res[i] = f.apply(memory, offset, i);
}
}
return vectorFactory(res);
}
static long memorySegmentGet(MemorySegment ms, long o, int i) {
return ms.get(ELEMENT_LAYOUT, o + i * 8L);
}
interface FStOp<M> {
void apply(M memory, int offset, int i, long a);
}
@ -381,6 +422,40 @@ public abstract class LongVector extends AbstractVector<Long> {
}
}
interface FStLongOp {
void apply(MemorySegment memory, long offset, int i, long a);
}
/*package-private*/
@ForceInline
final
void stLongOp(MemorySegment memory, long offset,
FStLongOp f) {
long[] vec = vec();
for (int i = 0; i < vec.length; i++) {
f.apply(memory, offset, i, vec[i]);
}
}
/*package-private*/
@ForceInline
final
void stLongOp(MemorySegment memory, long offset,
VectorMask<Long> m,
FStLongOp f) {
long[] vec = vec();
boolean[] mbits = ((AbstractMask<Long>)m).getBits();
for (int i = 0; i < vec.length; i++) {
if (mbits[i]) {
f.apply(memory, offset, i, vec[i]);
}
}
}
static void memorySegmentSet(MemorySegment ms, long o, int i, long e) {
ms.set(ELEMENT_LAYOUT, o + i * 8L, e);
}
// Binary test
/*package-private*/
@ -431,6 +506,36 @@ public abstract class LongVector extends AbstractVector<Long> {
return ((long)bits);
}
static LongVector expandHelper(Vector<Long> v, VectorMask<Long> m) {
VectorSpecies<Long> vsp = m.vectorSpecies();
LongVector r = (LongVector) vsp.zero();
LongVector vi = (LongVector) v;
if (m.allTrue()) {
return vi;
}
for (int i = 0, j = 0; i < vsp.length(); i++) {
if (m.laneIsSet(i)) {
r = r.withLane(i, vi.lane(j++));
}
}
return r;
}
static LongVector compressHelper(Vector<Long> v, VectorMask<Long> m) {
VectorSpecies<Long> vsp = m.vectorSpecies();
LongVector r = (LongVector) vsp.zero();
LongVector vi = (LongVector) v;
if (m.allTrue()) {
return vi;
}
for (int i = 0, j = 0; i < vsp.length(); i++) {
if (m.laneIsSet(i)) {
r = r.withLane(j++, vi.lane(i));
}
}
return r;
}
// Static factories (other than memory operations)
// Note: A surprising behavior in javadoc
@ -578,6 +683,16 @@ public abstract class LongVector extends AbstractVector<Long> {
v0.uOp(m, (i, a) -> (long) -a);
case VECTOR_OP_ABS: return (v0, m) ->
v0.uOp(m, (i, a) -> (long) Math.abs(a));
case VECTOR_OP_BIT_COUNT: return (v0, m) ->
v0.uOp(m, (i, a) -> (long) Long.bitCount(a));
case VECTOR_OP_TZ_COUNT: return (v0, m) ->
v0.uOp(m, (i, a) -> (long) Long.numberOfTrailingZeros(a));
case VECTOR_OP_LZ_COUNT: return (v0, m) ->
v0.uOp(m, (i, a) -> (long) Long.numberOfLeadingZeros(a));
case VECTOR_OP_REVERSE: return (v0, m) ->
v0.uOp(m, (i, a) -> (long) Long.reverse(a));
case VECTOR_OP_REVERSE_BYTES: return (v0, m) ->
v0.uOp(m, (i, a) -> (long) Long.reverseBytes(a));
default: return null;
}
}
@ -718,6 +833,10 @@ public abstract class LongVector extends AbstractVector<Long> {
v0.bOp(v1, vm, (i, a, n) -> rotateLeft(a, (int)n));
case VECTOR_OP_RROTATE: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, n) -> rotateRight(a, (int)n));
case VECTOR_OP_COMPRESS_BITS: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, n) -> Long.compress(a, n));
case VECTOR_OP_EXPAND_BITS: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, n) -> Long.expand(a, n));
default: return null;
}
}
@ -1658,6 +1777,7 @@ public abstract class LongVector extends AbstractVector<Long> {
return lanewise(ABS);
}
// not (~)
/**
* Computes the bitwise logical complement ({@code ~})
@ -2237,6 +2357,45 @@ public abstract class LongVector extends AbstractVector<Long> {
LongVector::toShuffle0);
}
/**
* {@inheritDoc} <!--workaround-->
* @since 19
*/
@Override
public abstract
LongVector compress(VectorMask<Long> m);
/*package-private*/
@ForceInline
final
<M extends AbstractMask<Long>>
LongVector compressTemplate(Class<M> masktype, M m) {
m.check(masktype, this);
return (LongVector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_COMPRESS, getClass(), masktype,
long.class, length(), this, m,
(v1, m1) -> compressHelper(v1, m1));
}
/**
* {@inheritDoc} <!--workaround-->
* @since 19
*/
@Override
public abstract
LongVector expand(VectorMask<Long> m);
/*package-private*/
@ForceInline
final
<M extends AbstractMask<Long>>
LongVector expandTemplate(Class<M> masktype, M m) {
m.check(masktype, this);
return (LongVector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_EXPAND, getClass(), masktype,
long.class, length(), this, m,
(v1, m1) -> expandHelper(v1, m1));
}
/**
* {@inheritDoc} <!--workaround-->
*/
@ -2637,90 +2796,6 @@ public abstract class LongVector extends AbstractVector<Long> {
return res;
}
/**
* Loads a vector from a byte array starting at an offset.
* Bytes are composed into primitive lane elements according
* to the specified byte order.
* The vector is arranged into lanes according to
* <a href="Vector.html#lane-order">memory ordering</a>.
* <p>
* This method behaves as if it returns the result of calling
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
* fromByteBuffer()} as follows:
* <pre>{@code
* var bb = ByteBuffer.wrap(a);
* var m = species.maskAll(true);
* return fromByteBuffer(species, bb, offset, bo, m);
* }</pre>
*
* @param species species of desired vector
* @param a the byte array
* @param offset the offset into the array
* @param bo the intended byte order
* @return a vector loaded from a byte array
* @throws IndexOutOfBoundsException
* if {@code offset+N*ESIZE < 0}
* or {@code offset+(N+1)*ESIZE > a.length}
* for any lane {@code N} in the vector
*/
@ForceInline
public static
LongVector fromByteArray(VectorSpecies<Long> species,
byte[] a, int offset,
ByteOrder bo) {
offset = checkFromIndexSize(offset, species.vectorByteSize(), a.length);
LongSpecies vsp = (LongSpecies) species;
return vsp.dummyVector().fromByteArray0(a, offset).maybeSwap(bo);
}
/**
* Loads a vector from a byte array starting at an offset
* and using a mask.
* Lanes where the mask is unset are filled with the default
* value of {@code long} (zero).
* Bytes are composed into primitive lane elements according
* to the specified byte order.
* The vector is arranged into lanes according to
* <a href="Vector.html#lane-order">memory ordering</a>.
* <p>
* This method behaves as if it returns the result of calling
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
* fromByteBuffer()} as follows:
* <pre>{@code
* var bb = ByteBuffer.wrap(a);
* return fromByteBuffer(species, bb, offset, bo, m);
* }</pre>
*
* @param species species of desired vector
* @param a the byte array
* @param offset the offset into the array
* @param bo the intended byte order
* @param m the mask controlling lane selection
* @return a vector loaded from a byte array
* @throws IndexOutOfBoundsException
* if {@code offset+N*ESIZE < 0}
* or {@code offset+(N+1)*ESIZE > a.length}
* for any lane {@code N} in the vector
* where the mask is set
*/
@ForceInline
public static
LongVector fromByteArray(VectorSpecies<Long> species,
byte[] a, int offset,
ByteOrder bo,
VectorMask<Long> m) {
LongSpecies vsp = (LongSpecies) species;
if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo);
}
// FIXME: optimize
checkMaskFromIndexSize(offset, vsp, m, 8, a.length);
ByteBuffer wb = wrapper(a, bo);
return vsp.ldOp(wb, offset, (AbstractMask<Long>)m,
(wb_, o, i) -> wb_.getLong(o + i * 8));
}
/**
* Loads a vector from an array of type {@code long[]}
* starting at an offset.
@ -2911,44 +2986,49 @@ public abstract class LongVector extends AbstractVector<Long> {
/**
* Loads a vector from a {@linkplain ByteBuffer byte buffer}
* starting at an offset into the byte buffer.
* Loads a vector from a {@linkplain MemorySegment memory segment}
* starting at an offset into the memory segment.
* Bytes are composed into primitive lane elements according
* to the specified byte order.
* The vector is arranged into lanes according to
* <a href="Vector.html#lane-order">memory ordering</a>.
* <p>
* This method behaves as if it returns the result of calling
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
* fromByteBuffer()} as follows:
* {@link #fromMemorySegment(VectorSpecies,MemorySegment,long,ByteOrder,VectorMask)
* fromMemorySegment()} as follows:
* <pre>{@code
* var m = species.maskAll(true);
* return fromByteBuffer(species, bb, offset, bo, m);
* return fromMemorySegment(species, ms, offset, bo, m);
* }</pre>
*
* @param species species of desired vector
* @param bb the byte buffer
* @param offset the offset into the byte buffer
* @param ms the memory segment
* @param offset the offset into the memory segment
* @param bo the intended byte order
* @return a vector loaded from a byte buffer
* @return a vector loaded from the memory segment
* @throws IndexOutOfBoundsException
* if {@code offset+N*8 < 0}
* or {@code offset+N*8 >= bb.limit()}
* or {@code offset+N*8 >= ms.byteSize()}
* for any lane {@code N} in the vector
* @throws IllegalArgumentException if the memory segment is a heap segment that is
* not backed by a {@code byte[]} array.
* @throws IllegalStateException if the memory segment's session is not alive,
* or if access occurs from a thread other than the thread owning the session.
* @since 19
*/
@ForceInline
public static
LongVector fromByteBuffer(VectorSpecies<Long> species,
ByteBuffer bb, int offset,
ByteOrder bo) {
offset = checkFromIndexSize(offset, species.vectorByteSize(), bb.limit());
LongVector fromMemorySegment(VectorSpecies<Long> species,
MemorySegment ms, long offset,
ByteOrder bo) {
offset = checkFromIndexSize(offset, species.vectorByteSize(), ms.byteSize());
LongSpecies vsp = (LongSpecies) species;
return vsp.dummyVector().fromByteBuffer0(bb, offset).maybeSwap(bo);
return vsp.dummyVector().fromMemorySegment0(ms, offset).maybeSwap(bo);
}
/**
* Loads a vector from a {@linkplain ByteBuffer byte buffer}
* starting at an offset into the byte buffer
* Loads a vector from a {@linkplain MemorySegment memory segment}
* starting at an offset into the memory segment
* and using a mask.
* Lanes where the mask is unset are filled with the default
* value of {@code long} (zero).
@ -2959,13 +3039,11 @@ public abstract class LongVector extends AbstractVector<Long> {
* <p>
* The following pseudocode illustrates the behavior:
* <pre>{@code
* LongBuffer eb = bb.duplicate()
* .position(offset)
* .order(bo).asLongBuffer();
* var slice = ms.asSlice(offset);
* long[] ar = new long[species.length()];
* for (int n = 0; n < ar.length; n++) {
* if (m.laneIsSet(n)) {
* ar[n] = eb.get(n);
* ar[n] = slice.getAtIndex(ValuaLayout.JAVA_LONG.withBitAlignment(8), n);
* }
* }
* LongVector r = LongVector.fromArray(species, ar, 0);
@ -2979,33 +3057,36 @@ public abstract class LongVector extends AbstractVector<Long> {
* the bytes of lane values.
*
* @param species species of desired vector
* @param bb the byte buffer
* @param offset the offset into the byte buffer
* @param ms the memory segment
* @param offset the offset into the memory segment
* @param bo the intended byte order
* @param m the mask controlling lane selection
* @return a vector loaded from a byte buffer
* @return a vector loaded from the memory segment
* @throws IndexOutOfBoundsException
* if {@code offset+N*8 < 0}
* or {@code offset+N*8 >= bb.limit()}
* or {@code offset+N*8 >= ms.byteSize()}
* for any lane {@code N} in the vector
* where the mask is set
* @throws IllegalArgumentException if the memory segment is a heap segment that is
* not backed by a {@code byte[]} array.
* @throws IllegalStateException if the memory segment's session is not alive,
* or if access occurs from a thread other than the thread owning the session.
* @since 19
*/
@ForceInline
public static
LongVector fromByteBuffer(VectorSpecies<Long> species,
ByteBuffer bb, int offset,
ByteOrder bo,
VectorMask<Long> m) {
LongVector fromMemorySegment(VectorSpecies<Long> species,
MemorySegment ms, long offset,
ByteOrder bo,
VectorMask<Long> m) {
LongSpecies vsp = (LongSpecies) species;
if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo);
if (offset >= 0 && offset <= (ms.byteSize() - species.vectorByteSize())) {
return vsp.dummyVector().fromMemorySegment0(ms, offset, m).maybeSwap(bo);
}
// FIXME: optimize
checkMaskFromIndexSize(offset, vsp, m, 8, bb.limit());
ByteBuffer wb = wrapper(bb, bo);
return vsp.ldOp(wb, offset, (AbstractMask<Long>)m,
(wb_, o, i) -> wb_.getLong(o + i * 8));
checkMaskFromIndexSize(offset, vsp, m, 8, ms.byteSize());
return vsp.ldLongOp(ms, offset, m, LongVector::memorySegmentGet);
}
// Memory store operations
@ -3035,7 +3116,7 @@ public abstract class LongVector extends AbstractVector<Long> {
this,
a, offset,
(arr, off, v)
-> v.stOp(arr, off,
-> v.stOp(arr, (int) off,
(arr_, off_, i, e) -> arr_[off_ + i] = e));
}
@ -3195,67 +3276,40 @@ public abstract class LongVector extends AbstractVector<Long> {
/**
* {@inheritDoc} <!--workaround-->
* @since 19
*/
@Override
@ForceInline
public final
void intoByteArray(byte[] a, int offset,
ByteOrder bo) {
offset = checkFromIndexSize(offset, byteSize(), a.length);
maybeSwap(bo).intoByteArray0(a, offset);
}
/**
* {@inheritDoc} <!--workaround-->
*/
@Override
@ForceInline
public final
void intoByteArray(byte[] a, int offset,
ByteOrder bo,
VectorMask<Long> m) {
if (m.allTrue()) {
intoByteArray(a, offset, bo);
} else {
LongSpecies vsp = vspecies();
checkMaskFromIndexSize(offset, vsp, m, 8, a.length);
maybeSwap(bo).intoByteArray0(a, offset, m);
void intoMemorySegment(MemorySegment ms, long offset,
ByteOrder bo) {
if (ms.isReadOnly()) {
throw new UnsupportedOperationException("Attempt to write a read-only segment");
}
offset = checkFromIndexSize(offset, byteSize(), ms.byteSize());
maybeSwap(bo).intoMemorySegment0(ms, offset);
}
/**
* {@inheritDoc} <!--workaround-->
* @since 19
*/
@Override
@ForceInline
public final
void intoByteBuffer(ByteBuffer bb, int offset,
ByteOrder bo) {
if (ScopedMemoryAccess.isReadOnly(bb)) {
throw new ReadOnlyBufferException();
}
offset = checkFromIndexSize(offset, byteSize(), bb.limit());
maybeSwap(bo).intoByteBuffer0(bb, offset);
}
/**
* {@inheritDoc} <!--workaround-->
*/
@Override
@ForceInline
public final
void intoByteBuffer(ByteBuffer bb, int offset,
ByteOrder bo,
VectorMask<Long> m) {
void intoMemorySegment(MemorySegment ms, long offset,
ByteOrder bo,
VectorMask<Long> m) {
if (m.allTrue()) {
intoByteBuffer(bb, offset, bo);
intoMemorySegment(ms, offset, bo);
} else {
if (bb.isReadOnly()) {
throw new ReadOnlyBufferException();
if (ms.isReadOnly()) {
throw new UnsupportedOperationException("Attempt to write a read-only segment");
}
LongSpecies vsp = vspecies();
checkMaskFromIndexSize(offset, vsp, m, 8, bb.limit());
maybeSwap(bo).intoByteBuffer0(bb, offset, m);
checkMaskFromIndexSize(offset, vsp, m, 8, ms.byteSize());
maybeSwap(bo).intoMemorySegment0(ms, offset, m);
}
}
@ -3289,7 +3343,7 @@ public abstract class LongVector extends AbstractVector<Long> {
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
a, arrayAddress(a, offset),
a, offset, vsp,
(arr, off, s) -> s.ldOp(arr, off,
(arr, off, s) -> s.ldOp(arr, (int) off,
(arr_, off_, i) -> arr_[off_ + i]));
}
@ -3306,7 +3360,7 @@ public abstract class LongVector extends AbstractVector<Long> {
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
a, arrayAddress(a, offset), m,
a, offset, vsp,
(arr, off, s, vm) -> s.ldOp(arr, off, vm,
(arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
(arr_, off_, i) -> arr_[off_ + i]));
}
@ -3364,74 +3418,33 @@ public abstract class LongVector extends AbstractVector<Long> {
@Override
abstract
LongVector fromByteArray0(byte[] a, int offset);
LongVector fromMemorySegment0(MemorySegment bb, long offset);
@ForceInline
final
LongVector fromByteArray0Template(byte[] a, int offset) {
LongVector fromMemorySegment0Template(MemorySegment ms, long offset) {
LongSpecies vsp = vspecies();
return VectorSupport.load(
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset),
a, offset, vsp,
(arr, off, s) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
return s.ldOp(wb, off,
(wb_, o, i) -> wb_.getLong(o + i * 8));
});
}
abstract
LongVector fromByteArray0(byte[] a, int offset, VectorMask<Long> m);
@ForceInline
final
<M extends VectorMask<Long>>
LongVector fromByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
LongSpecies vsp = vspecies();
m.check(vsp);
return VectorSupport.loadMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset), m,
a, offset, vsp,
(arr, off, s, vm) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
return s.ldOp(wb, off, vm,
(wb_, o, i) -> wb_.getLong(o + i * 8));
});
}
abstract
LongVector fromByteBuffer0(ByteBuffer bb, int offset);
@ForceInline
final
LongVector fromByteBuffer0Template(ByteBuffer bb, int offset) {
LongSpecies vsp = vspecies();
return ScopedMemoryAccess.loadFromByteBuffer(
return ScopedMemoryAccess.loadFromMemorySegment(
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
bb, offset, vsp,
(buf, off, s) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
return s.ldOp(wb, off,
(wb_, o, i) -> wb_.getLong(o + i * 8));
(AbstractMemorySegmentImpl) ms, offset, vsp,
(msp, off, s) -> {
return s.ldLongOp((MemorySegment) msp, off, LongVector::memorySegmentGet);
});
}
abstract
LongVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m);
LongVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Long> m);
@ForceInline
final
<M extends VectorMask<Long>>
LongVector fromByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
LongVector fromMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
LongSpecies vsp = vspecies();
m.check(vsp);
return ScopedMemoryAccess.loadFromByteBufferMasked(
return ScopedMemoryAccess.loadFromMemorySegmentMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
bb, offset, m, vsp,
(buf, off, s, vm) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
return s.ldOp(wb, off, vm,
(wb_, o, i) -> wb_.getLong(o + i * 8));
(AbstractMemorySegmentImpl) ms, offset, m, vsp,
(msp, off, s, vm) -> {
return s.ldLongOp((MemorySegment) msp, off, vm, LongVector::memorySegmentGet);
});
}
@ -3450,7 +3463,7 @@ public abstract class LongVector extends AbstractVector<Long> {
a, arrayAddress(a, offset),
this, a, offset,
(arr, off, v)
-> v.stOp(arr, off,
-> v.stOp(arr, (int) off,
(arr_, off_, i, e) -> arr_[off_+i] = e));
}
@ -3467,7 +3480,7 @@ public abstract class LongVector extends AbstractVector<Long> {
a, arrayAddress(a, offset),
this, m, a, offset,
(arr, off, v, vm)
-> v.stOp(arr, off, vm,
-> v.stOp(arr, (int) off, vm,
(arr_, off_, i, e) -> arr_[off_ + i] = e));
}
@ -3525,71 +3538,33 @@ public abstract class LongVector extends AbstractVector<Long> {
}
abstract
void intoByteArray0(byte[] a, int offset);
@ForceInline
final
void intoByteArray0Template(byte[] a, int offset) {
void intoMemorySegment0(MemorySegment ms, long offset) {
LongSpecies vsp = vspecies();
VectorSupport.store(
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset),
this, a, offset,
(arr, off, v) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
v.stOp(wb, off,
(tb_, o, i, e) -> tb_.putLong(o + i * 8, e));
});
}
abstract
void intoByteArray0(byte[] a, int offset, VectorMask<Long> m);
@ForceInline
final
<M extends VectorMask<Long>>
void intoByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
LongSpecies vsp = vspecies();
m.check(vsp);
VectorSupport.storeMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset),
this, m, a, offset,
(arr, off, v, vm) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
v.stOp(wb, off, vm,
(tb_, o, i, e) -> tb_.putLong(o + i * 8, e));
});
}
@ForceInline
final
void intoByteBuffer0(ByteBuffer bb, int offset) {
LongSpecies vsp = vspecies();
ScopedMemoryAccess.storeIntoByteBuffer(
ScopedMemoryAccess.storeIntoMemorySegment(
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
this, bb, offset,
(buf, off, v) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
v.stOp(wb, off,
(wb_, o, i, e) -> wb_.putLong(o + i * 8, e));
this,
(AbstractMemorySegmentImpl) ms, offset,
(msp, off, v) -> {
v.stLongOp((MemorySegment) msp, off, LongVector::memorySegmentSet);
});
}
abstract
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m);
void intoMemorySegment0(MemorySegment bb, long offset, VectorMask<Long> m);
@ForceInline
final
<M extends VectorMask<Long>>
void intoByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
void intoMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
LongSpecies vsp = vspecies();
m.check(vsp);
ScopedMemoryAccess.storeIntoByteBufferMasked(
ScopedMemoryAccess.storeIntoMemorySegmentMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
this, m, bb, offset,
(buf, off, v, vm) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
v.stOp(wb, off, vm,
(wb_, o, i, e) -> wb_.putLong(o + i * 8, e));
this, m,
(AbstractMemorySegmentImpl) ms, offset,
(msp, off, v, vm) -> {
v.stLongOp((MemorySegment) msp, off, vm, LongVector::memorySegmentSet);
});
}
@ -3606,6 +3581,16 @@ public abstract class LongVector extends AbstractVector<Long> {
.checkIndexByLane(offset, limit, vsp.iota(), scale);
}
private static
void checkMaskFromIndexSize(long offset,
LongSpecies vsp,
VectorMask<Long> m,
int scale,
long limit) {
((AbstractMask<Long>)m)
.checkIndexByLane(offset, limit, vsp.iota(), scale);
}
@ForceInline
private void conditionalStoreNYI(int offset,
LongSpecies vsp,
@ -3907,6 +3892,21 @@ public abstract class LongVector extends AbstractVector<Long> {
return dummyVector().ldOp(memory, offset, m, f);
}
/*package-private*/
@ForceInline
LongVector ldLongOp(MemorySegment memory, long offset,
FLdLongOp f) {
return dummyVector().ldLongOp(memory, offset, f);
}
/*package-private*/
@ForceInline
LongVector ldLongOp(MemorySegment memory, long offset,
VectorMask<Long> m,
FLdLongOp f) {
return dummyVector().ldLongOp(memory, offset, m, f);
}
/*package-private*/
@ForceInline
<M> void stOp(M memory, int offset, FStOp<M> f) {
@ -3921,6 +3921,20 @@ public abstract class LongVector extends AbstractVector<Long> {
dummyVector().stOp(memory, offset, m, f);
}
/*package-private*/
@ForceInline
void stLongOp(MemorySegment memory, long offset, FStLongOp f) {
dummyVector().stLongOp(memory, offset, f);
}
/*package-private*/
@ForceInline
void stLongOp(MemorySegment memory, long offset,
AbstractMask<Long> m,
FStLongOp f) {
dummyVector().stLongOp(memory, offset, m, f);
}
// N.B. Make sure these constant vectors and
// masks load up correctly into registers.
//
@ -4034,3 +4048,4 @@ public abstract class LongVector extends AbstractVector<Long> {
public static final VectorSpecies<Long> SPECIES_PREFERRED
= (LongSpecies) VectorSpecies.ofPreferred(long.class);
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -474,6 +474,22 @@ final class Short128Vector extends ShortVector {
(Short128Vector) v); // specialize
}
@Override
@ForceInline
public Short128Vector compress(VectorMask<Short> m) {
return (Short128Vector)
super.compressTemplate(Short128Mask.class,
(Short128Mask) m); // specialize
}
@Override
@ForceInline
public Short128Vector expand(VectorMask<Short> m) {
return (Short128Vector)
super.expandTemplate(Short128Mask.class,
(Short128Mask) m); // specialize
}
@Override
@ForceInline
public Short128Vector selectFrom(Vector<Short> v) {
@ -661,6 +677,15 @@ final class Short128Vector extends ShortVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public Short128Mask compress() {
return (Short128Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
Short128Vector.class, Short128Mask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -860,29 +885,15 @@ final class Short128Vector extends ShortVector {
@ForceInline
@Override
final
ShortVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
ShortVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromByteArray0(byte[] a, int offset, VectorMask<Short> m) {
return super.fromByteArray0Template(Short128Mask.class, a, offset, (Short128Mask) m); // specialize
}
@ForceInline
@Override
final
ShortVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
return super.fromByteBuffer0Template(Short128Mask.class, bb, offset, (Short128Mask) m); // specialize
ShortVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Short> m) {
return super.fromMemorySegment0Template(Short128Mask.class, ms, offset, (Short128Mask) m); // specialize
}
@ForceInline
@ -904,22 +915,8 @@ final class Short128Vector extends ShortVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Short> m) {
super.intoByteArray0Template(Short128Mask.class, a, offset, (Short128Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
super.intoByteBuffer0Template(Short128Mask.class, bb, offset, (Short128Mask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Short> m) {
super.intoMemorySegment0Template(Short128Mask.class, ms, offset, (Short128Mask) m);
}
@ForceInline
@ -934,3 +931,4 @@ final class Short128Vector extends ShortVector {
// ================================================
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -474,6 +474,22 @@ final class Short256Vector extends ShortVector {
(Short256Vector) v); // specialize
}
@Override
@ForceInline
public Short256Vector compress(VectorMask<Short> m) {
return (Short256Vector)
super.compressTemplate(Short256Mask.class,
(Short256Mask) m); // specialize
}
@Override
@ForceInline
public Short256Vector expand(VectorMask<Short> m) {
return (Short256Vector)
super.expandTemplate(Short256Mask.class,
(Short256Mask) m); // specialize
}
@Override
@ForceInline
public Short256Vector selectFrom(Vector<Short> v) {
@ -677,6 +693,15 @@ final class Short256Vector extends ShortVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public Short256Mask compress() {
return (Short256Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
Short256Vector.class, Short256Mask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -876,29 +901,15 @@ final class Short256Vector extends ShortVector {
@ForceInline
@Override
final
ShortVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
ShortVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromByteArray0(byte[] a, int offset, VectorMask<Short> m) {
return super.fromByteArray0Template(Short256Mask.class, a, offset, (Short256Mask) m); // specialize
}
@ForceInline
@Override
final
ShortVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
return super.fromByteBuffer0Template(Short256Mask.class, bb, offset, (Short256Mask) m); // specialize
ShortVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Short> m) {
return super.fromMemorySegment0Template(Short256Mask.class, ms, offset, (Short256Mask) m); // specialize
}
@ForceInline
@ -920,22 +931,8 @@ final class Short256Vector extends ShortVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Short> m) {
super.intoByteArray0Template(Short256Mask.class, a, offset, (Short256Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
super.intoByteBuffer0Template(Short256Mask.class, bb, offset, (Short256Mask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Short> m) {
super.intoMemorySegment0Template(Short256Mask.class, ms, offset, (Short256Mask) m);
}
@ForceInline
@ -950,3 +947,4 @@ final class Short256Vector extends ShortVector {
// ================================================
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -474,6 +474,22 @@ final class Short512Vector extends ShortVector {
(Short512Vector) v); // specialize
}
@Override
@ForceInline
public Short512Vector compress(VectorMask<Short> m) {
return (Short512Vector)
super.compressTemplate(Short512Mask.class,
(Short512Mask) m); // specialize
}
@Override
@ForceInline
public Short512Vector expand(VectorMask<Short> m) {
return (Short512Vector)
super.expandTemplate(Short512Mask.class,
(Short512Mask) m); // specialize
}
@Override
@ForceInline
public Short512Vector selectFrom(Vector<Short> v) {
@ -709,6 +725,15 @@ final class Short512Vector extends ShortVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public Short512Mask compress() {
return (Short512Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
Short512Vector.class, Short512Mask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -908,29 +933,15 @@ final class Short512Vector extends ShortVector {
@ForceInline
@Override
final
ShortVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
ShortVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromByteArray0(byte[] a, int offset, VectorMask<Short> m) {
return super.fromByteArray0Template(Short512Mask.class, a, offset, (Short512Mask) m); // specialize
}
@ForceInline
@Override
final
ShortVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
return super.fromByteBuffer0Template(Short512Mask.class, bb, offset, (Short512Mask) m); // specialize
ShortVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Short> m) {
return super.fromMemorySegment0Template(Short512Mask.class, ms, offset, (Short512Mask) m); // specialize
}
@ForceInline
@ -952,22 +963,8 @@ final class Short512Vector extends ShortVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Short> m) {
super.intoByteArray0Template(Short512Mask.class, a, offset, (Short512Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
super.intoByteBuffer0Template(Short512Mask.class, bb, offset, (Short512Mask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Short> m) {
super.intoMemorySegment0Template(Short512Mask.class, ms, offset, (Short512Mask) m);
}
@ForceInline
@ -982,3 +979,4 @@ final class Short512Vector extends ShortVector {
// ================================================
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -474,6 +474,22 @@ final class Short64Vector extends ShortVector {
(Short64Vector) v); // specialize
}
@Override
@ForceInline
public Short64Vector compress(VectorMask<Short> m) {
return (Short64Vector)
super.compressTemplate(Short64Mask.class,
(Short64Mask) m); // specialize
}
@Override
@ForceInline
public Short64Vector expand(VectorMask<Short> m) {
return (Short64Vector)
super.expandTemplate(Short64Mask.class,
(Short64Mask) m); // specialize
}
@Override
@ForceInline
public Short64Vector selectFrom(Vector<Short> v) {
@ -653,6 +669,15 @@ final class Short64Vector extends ShortVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public Short64Mask compress() {
return (Short64Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
Short64Vector.class, Short64Mask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -852,29 +877,15 @@ final class Short64Vector extends ShortVector {
@ForceInline
@Override
final
ShortVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
ShortVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromByteArray0(byte[] a, int offset, VectorMask<Short> m) {
return super.fromByteArray0Template(Short64Mask.class, a, offset, (Short64Mask) m); // specialize
}
@ForceInline
@Override
final
ShortVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
return super.fromByteBuffer0Template(Short64Mask.class, bb, offset, (Short64Mask) m); // specialize
ShortVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Short> m) {
return super.fromMemorySegment0Template(Short64Mask.class, ms, offset, (Short64Mask) m); // specialize
}
@ForceInline
@ -896,22 +907,8 @@ final class Short64Vector extends ShortVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Short> m) {
super.intoByteArray0Template(Short64Mask.class, a, offset, (Short64Mask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
super.intoByteBuffer0Template(Short64Mask.class, bb, offset, (Short64Mask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Short> m) {
super.intoMemorySegment0Template(Short64Mask.class, ms, offset, (Short64Mask) m);
}
@ForceInline
@ -926,3 +923,4 @@ final class Short64Vector extends ShortVector {
// ================================================
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -474,6 +474,22 @@ final class ShortMaxVector extends ShortVector {
(ShortMaxVector) v); // specialize
}
@Override
@ForceInline
public ShortMaxVector compress(VectorMask<Short> m) {
return (ShortMaxVector)
super.compressTemplate(ShortMaxMask.class,
(ShortMaxMask) m); // specialize
}
@Override
@ForceInline
public ShortMaxVector expand(VectorMask<Short> m) {
return (ShortMaxVector)
super.expandTemplate(ShortMaxMask.class,
(ShortMaxMask) m); // specialize
}
@Override
@ForceInline
public ShortMaxVector selectFrom(Vector<Short> v) {
@ -647,6 +663,15 @@ final class ShortMaxVector extends ShortVector {
return xor(maskAll(true));
}
@Override
@ForceInline
public ShortMaxMask compress() {
return (ShortMaxMask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
ShortMaxVector.class, ShortMaxMask.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -846,29 +871,15 @@ final class ShortMaxVector extends ShortVector {
@ForceInline
@Override
final
ShortVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
ShortVector fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromByteArray0(byte[] a, int offset, VectorMask<Short> m) {
return super.fromByteArray0Template(ShortMaxMask.class, a, offset, (ShortMaxMask) m); // specialize
}
@ForceInline
@Override
final
ShortVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
ShortVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
return super.fromByteBuffer0Template(ShortMaxMask.class, bb, offset, (ShortMaxMask) m); // specialize
ShortVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Short> m) {
return super.fromMemorySegment0Template(ShortMaxMask.class, ms, offset, (ShortMaxMask) m); // specialize
}
@ForceInline
@ -890,22 +901,8 @@ final class ShortMaxVector extends ShortVector {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<Short> m) {
super.intoByteArray0Template(ShortMaxMask.class, a, offset, (ShortMaxMask) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
super.intoByteBuffer0Template(ShortMaxMask.class, bb, offset, (ShortMaxMask) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Short> m) {
super.intoMemorySegment0Template(ShortMaxMask.class, ms, offset, (ShortMaxMask) m);
}
@ForceInline
@ -920,3 +917,4 @@ final class ShortMaxVector extends ShortVector {
// ================================================
}

View file

@ -24,14 +24,14 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.lang.foreign.ValueLayout;
import java.nio.ByteOrder;
import java.nio.ReadOnlyBufferException;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.Function;
import java.util.function.UnaryOperator;
import jdk.internal.foreign.AbstractMemorySegmentImpl;
import jdk.internal.misc.ScopedMemoryAccess;
import jdk.internal.misc.Unsafe;
import jdk.internal.vm.annotation.ForceInline;
@ -57,6 +57,8 @@ public abstract class ShortVector extends AbstractVector<Short> {
static final int FORBID_OPCODE_KIND = VO_ONLYFP;
static final ValueLayout.OfShort ELEMENT_LAYOUT = ValueLayout.JAVA_SHORT.withBitAlignment(8);
@ForceInline
static int opCode(Operator op) {
return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
@ -351,6 +353,45 @@ public abstract class ShortVector extends AbstractVector<Short> {
return vectorFactory(res);
}
/*package-private*/
interface FLdLongOp {
short apply(MemorySegment memory, long offset, int i);
}
/*package-private*/
@ForceInline
final
ShortVector ldLongOp(MemorySegment memory, long offset,
FLdLongOp f) {
//dummy; no vec = vec();
short[] res = new short[length()];
for (int i = 0; i < res.length; i++) {
res[i] = f.apply(memory, offset, i);
}
return vectorFactory(res);
}
/*package-private*/
@ForceInline
final
ShortVector ldLongOp(MemorySegment memory, long offset,
VectorMask<Short> m,
FLdLongOp f) {
//short[] vec = vec();
short[] res = new short[length()];
boolean[] mbits = ((AbstractMask<Short>)m).getBits();
for (int i = 0; i < res.length; i++) {
if (mbits[i]) {
res[i] = f.apply(memory, offset, i);
}
}
return vectorFactory(res);
}
static short memorySegmentGet(MemorySegment ms, long o, int i) {
return ms.get(ELEMENT_LAYOUT, o + i * 2L);
}
interface FStOp<M> {
void apply(M memory, int offset, int i, short a);
}
@ -381,6 +422,40 @@ public abstract class ShortVector extends AbstractVector<Short> {
}
}
interface FStLongOp {
void apply(MemorySegment memory, long offset, int i, short a);
}
/*package-private*/
@ForceInline
final
void stLongOp(MemorySegment memory, long offset,
FStLongOp f) {
short[] vec = vec();
for (int i = 0; i < vec.length; i++) {
f.apply(memory, offset, i, vec[i]);
}
}
/*package-private*/
@ForceInline
final
void stLongOp(MemorySegment memory, long offset,
VectorMask<Short> m,
FStLongOp f) {
short[] vec = vec();
boolean[] mbits = ((AbstractMask<Short>)m).getBits();
for (int i = 0; i < vec.length; i++) {
if (mbits[i]) {
f.apply(memory, offset, i, vec[i]);
}
}
}
static void memorySegmentSet(MemorySegment ms, long o, int i, short e) {
ms.set(ELEMENT_LAYOUT, o + i * 2L, e);
}
// Binary test
/*package-private*/
@ -431,6 +506,36 @@ public abstract class ShortVector extends AbstractVector<Short> {
return ((short)bits);
}
static ShortVector expandHelper(Vector<Short> v, VectorMask<Short> m) {
VectorSpecies<Short> vsp = m.vectorSpecies();
ShortVector r = (ShortVector) vsp.zero();
ShortVector vi = (ShortVector) v;
if (m.allTrue()) {
return vi;
}
for (int i = 0, j = 0; i < vsp.length(); i++) {
if (m.laneIsSet(i)) {
r = r.withLane(i, vi.lane(j++));
}
}
return r;
}
static ShortVector compressHelper(Vector<Short> v, VectorMask<Short> m) {
VectorSpecies<Short> vsp = m.vectorSpecies();
ShortVector r = (ShortVector) vsp.zero();
ShortVector vi = (ShortVector) v;
if (m.allTrue()) {
return vi;
}
for (int i = 0, j = 0; i < vsp.length(); i++) {
if (m.laneIsSet(i)) {
r = r.withLane(j++, vi.lane(i));
}
}
return r;
}
// Static factories (other than memory operations)
// Note: A surprising behavior in javadoc
@ -620,6 +725,16 @@ public abstract class ShortVector extends AbstractVector<Short> {
v0.uOp(m, (i, a) -> (short) -a);
case VECTOR_OP_ABS: return (v0, m) ->
v0.uOp(m, (i, a) -> (short) Math.abs(a));
case VECTOR_OP_BIT_COUNT: return (v0, m) ->
v0.uOp(m, (i, a) -> (short) bitCount(a));
case VECTOR_OP_TZ_COUNT: return (v0, m) ->
v0.uOp(m, (i, a) -> (short) numberOfTrailingZeros(a));
case VECTOR_OP_LZ_COUNT: return (v0, m) ->
v0.uOp(m, (i, a) -> (short) numberOfLeadingZeros(a));
case VECTOR_OP_REVERSE: return (v0, m) ->
v0.uOp(m, (i, a) -> reverse(a));
case VECTOR_OP_REVERSE_BYTES: return (v0, m) ->
v0.uOp(m, (i, a) -> (short) Short.reverseBytes(a));
default: return null;
}
}
@ -1746,6 +1861,26 @@ public abstract class ShortVector extends AbstractVector<Short> {
return lanewise(ABS);
}
static int bitCount(short a) {
return Integer.bitCount((int)a & 0xFFFF);
}
static int numberOfTrailingZeros(short a) {
return a != 0 ? Integer.numberOfTrailingZeros(a) : 16;
}
static int numberOfLeadingZeros(short a) {
return a >= 0 ? Integer.numberOfLeadingZeros(a) - 16 : 0;
}
static short reverse(short a) {
if (a == 0 || a == -1) return a;
short b = rotateLeft(a, 8);
b = (short) (((b & 0x5555) << 1) | ((b & 0xAAAA) >>> 1));
b = (short) (((b & 0x3333) << 2) | ((b & 0xCCCC) >>> 2));
b = (short) (((b & 0x0F0F) << 4) | ((b & 0xF0F0) >>> 4));
return b;
}
// not (~)
/**
* Computes the bitwise logical complement ({@code ~})
@ -2372,6 +2507,45 @@ public abstract class ShortVector extends AbstractVector<Short> {
ShortVector::toShuffle0);
}
/**
* {@inheritDoc} <!--workaround-->
* @since 19
*/
@Override
public abstract
ShortVector compress(VectorMask<Short> m);
/*package-private*/
@ForceInline
final
<M extends AbstractMask<Short>>
ShortVector compressTemplate(Class<M> masktype, M m) {
m.check(masktype, this);
return (ShortVector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_COMPRESS, getClass(), masktype,
short.class, length(), this, m,
(v1, m1) -> compressHelper(v1, m1));
}
/**
* {@inheritDoc} <!--workaround-->
* @since 19
*/
@Override
public abstract
ShortVector expand(VectorMask<Short> m);
/*package-private*/
@ForceInline
final
<M extends AbstractMask<Short>>
ShortVector expandTemplate(Class<M> masktype, M m) {
m.check(masktype, this);
return (ShortVector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_EXPAND, getClass(), masktype,
short.class, length(), this, m,
(v1, m1) -> expandHelper(v1, m1));
}
/**
* {@inheritDoc} <!--workaround-->
*/
@ -2784,90 +2958,6 @@ public abstract class ShortVector extends AbstractVector<Short> {
return res;
}
/**
* Loads a vector from a byte array starting at an offset.
* Bytes are composed into primitive lane elements according
* to the specified byte order.
* The vector is arranged into lanes according to
* <a href="Vector.html#lane-order">memory ordering</a>.
* <p>
* This method behaves as if it returns the result of calling
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
* fromByteBuffer()} as follows:
* <pre>{@code
* var bb = ByteBuffer.wrap(a);
* var m = species.maskAll(true);
* return fromByteBuffer(species, bb, offset, bo, m);
* }</pre>
*
* @param species species of desired vector
* @param a the byte array
* @param offset the offset into the array
* @param bo the intended byte order
* @return a vector loaded from a byte array
* @throws IndexOutOfBoundsException
* if {@code offset+N*ESIZE < 0}
* or {@code offset+(N+1)*ESIZE > a.length}
* for any lane {@code N} in the vector
*/
@ForceInline
public static
ShortVector fromByteArray(VectorSpecies<Short> species,
byte[] a, int offset,
ByteOrder bo) {
offset = checkFromIndexSize(offset, species.vectorByteSize(), a.length);
ShortSpecies vsp = (ShortSpecies) species;
return vsp.dummyVector().fromByteArray0(a, offset).maybeSwap(bo);
}
/**
* Loads a vector from a byte array starting at an offset
* and using a mask.
* Lanes where the mask is unset are filled with the default
* value of {@code short} (zero).
* Bytes are composed into primitive lane elements according
* to the specified byte order.
* The vector is arranged into lanes according to
* <a href="Vector.html#lane-order">memory ordering</a>.
* <p>
* This method behaves as if it returns the result of calling
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
* fromByteBuffer()} as follows:
* <pre>{@code
* var bb = ByteBuffer.wrap(a);
* return fromByteBuffer(species, bb, offset, bo, m);
* }</pre>
*
* @param species species of desired vector
* @param a the byte array
* @param offset the offset into the array
* @param bo the intended byte order
* @param m the mask controlling lane selection
* @return a vector loaded from a byte array
* @throws IndexOutOfBoundsException
* if {@code offset+N*ESIZE < 0}
* or {@code offset+(N+1)*ESIZE > a.length}
* for any lane {@code N} in the vector
* where the mask is set
*/
@ForceInline
public static
ShortVector fromByteArray(VectorSpecies<Short> species,
byte[] a, int offset,
ByteOrder bo,
VectorMask<Short> m) {
ShortSpecies vsp = (ShortSpecies) species;
if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo);
}
// FIXME: optimize
checkMaskFromIndexSize(offset, vsp, m, 2, a.length);
ByteBuffer wb = wrapper(a, bo);
return vsp.ldOp(wb, offset, (AbstractMask<Short>)m,
(wb_, o, i) -> wb_.getShort(o + i * 2));
}
/**
* Loads a vector from an array of type {@code short[]}
* starting at an offset.
@ -3167,44 +3257,49 @@ public abstract class ShortVector extends AbstractVector<Short> {
/**
* Loads a vector from a {@linkplain ByteBuffer byte buffer}
* starting at an offset into the byte buffer.
* Loads a vector from a {@linkplain MemorySegment memory segment}
* starting at an offset into the memory segment.
* Bytes are composed into primitive lane elements according
* to the specified byte order.
* The vector is arranged into lanes according to
* <a href="Vector.html#lane-order">memory ordering</a>.
* <p>
* This method behaves as if it returns the result of calling
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
* fromByteBuffer()} as follows:
* {@link #fromMemorySegment(VectorSpecies,MemorySegment,long,ByteOrder,VectorMask)
* fromMemorySegment()} as follows:
* <pre>{@code
* var m = species.maskAll(true);
* return fromByteBuffer(species, bb, offset, bo, m);
* return fromMemorySegment(species, ms, offset, bo, m);
* }</pre>
*
* @param species species of desired vector
* @param bb the byte buffer
* @param offset the offset into the byte buffer
* @param ms the memory segment
* @param offset the offset into the memory segment
* @param bo the intended byte order
* @return a vector loaded from a byte buffer
* @return a vector loaded from the memory segment
* @throws IndexOutOfBoundsException
* if {@code offset+N*2 < 0}
* or {@code offset+N*2 >= bb.limit()}
* or {@code offset+N*2 >= ms.byteSize()}
* for any lane {@code N} in the vector
* @throws IllegalArgumentException if the memory segment is a heap segment that is
* not backed by a {@code byte[]} array.
* @throws IllegalStateException if the memory segment's session is not alive,
* or if access occurs from a thread other than the thread owning the session.
* @since 19
*/
@ForceInline
public static
ShortVector fromByteBuffer(VectorSpecies<Short> species,
ByteBuffer bb, int offset,
ByteOrder bo) {
offset = checkFromIndexSize(offset, species.vectorByteSize(), bb.limit());
ShortVector fromMemorySegment(VectorSpecies<Short> species,
MemorySegment ms, long offset,
ByteOrder bo) {
offset = checkFromIndexSize(offset, species.vectorByteSize(), ms.byteSize());
ShortSpecies vsp = (ShortSpecies) species;
return vsp.dummyVector().fromByteBuffer0(bb, offset).maybeSwap(bo);
return vsp.dummyVector().fromMemorySegment0(ms, offset).maybeSwap(bo);
}
/**
* Loads a vector from a {@linkplain ByteBuffer byte buffer}
* starting at an offset into the byte buffer
* Loads a vector from a {@linkplain MemorySegment memory segment}
* starting at an offset into the memory segment
* and using a mask.
* Lanes where the mask is unset are filled with the default
* value of {@code short} (zero).
@ -3215,13 +3310,11 @@ public abstract class ShortVector extends AbstractVector<Short> {
* <p>
* The following pseudocode illustrates the behavior:
* <pre>{@code
* ShortBuffer eb = bb.duplicate()
* .position(offset)
* .order(bo).asShortBuffer();
* var slice = ms.asSlice(offset);
* short[] ar = new short[species.length()];
* for (int n = 0; n < ar.length; n++) {
* if (m.laneIsSet(n)) {
* ar[n] = eb.get(n);
* ar[n] = slice.getAtIndex(ValuaLayout.JAVA_SHORT.withBitAlignment(8), n);
* }
* }
* ShortVector r = ShortVector.fromArray(species, ar, 0);
@ -3235,33 +3328,36 @@ public abstract class ShortVector extends AbstractVector<Short> {
* the bytes of lane values.
*
* @param species species of desired vector
* @param bb the byte buffer
* @param offset the offset into the byte buffer
* @param ms the memory segment
* @param offset the offset into the memory segment
* @param bo the intended byte order
* @param m the mask controlling lane selection
* @return a vector loaded from a byte buffer
* @return a vector loaded from the memory segment
* @throws IndexOutOfBoundsException
* if {@code offset+N*2 < 0}
* or {@code offset+N*2 >= bb.limit()}
* or {@code offset+N*2 >= ms.byteSize()}
* for any lane {@code N} in the vector
* where the mask is set
* @throws IllegalArgumentException if the memory segment is a heap segment that is
* not backed by a {@code byte[]} array.
* @throws IllegalStateException if the memory segment's session is not alive,
* or if access occurs from a thread other than the thread owning the session.
* @since 19
*/
@ForceInline
public static
ShortVector fromByteBuffer(VectorSpecies<Short> species,
ByteBuffer bb, int offset,
ByteOrder bo,
VectorMask<Short> m) {
ShortVector fromMemorySegment(VectorSpecies<Short> species,
MemorySegment ms, long offset,
ByteOrder bo,
VectorMask<Short> m) {
ShortSpecies vsp = (ShortSpecies) species;
if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo);
if (offset >= 0 && offset <= (ms.byteSize() - species.vectorByteSize())) {
return vsp.dummyVector().fromMemorySegment0(ms, offset, m).maybeSwap(bo);
}
// FIXME: optimize
checkMaskFromIndexSize(offset, vsp, m, 2, bb.limit());
ByteBuffer wb = wrapper(bb, bo);
return vsp.ldOp(wb, offset, (AbstractMask<Short>)m,
(wb_, o, i) -> wb_.getShort(o + i * 2));
checkMaskFromIndexSize(offset, vsp, m, 2, ms.byteSize());
return vsp.ldLongOp(ms, offset, m, ShortVector::memorySegmentGet);
}
// Memory store operations
@ -3291,7 +3387,7 @@ public abstract class ShortVector extends AbstractVector<Short> {
this,
a, offset,
(arr, off, v)
-> v.stOp(arr, off,
-> v.stOp(arr, (int) off,
(arr_, off_, i, e) -> arr_[off_ + i] = e));
}
@ -3437,7 +3533,7 @@ public abstract class ShortVector extends AbstractVector<Short> {
this,
a, offset,
(arr, off, v)
-> v.stOp(arr, off,
-> v.stOp(arr, (int) off,
(arr_, off_, i, e) -> arr_[off_ + i] = (char) e));
}
@ -3567,67 +3663,40 @@ public abstract class ShortVector extends AbstractVector<Short> {
/**
* {@inheritDoc} <!--workaround-->
* @since 19
*/
@Override
@ForceInline
public final
void intoByteArray(byte[] a, int offset,
ByteOrder bo) {
offset = checkFromIndexSize(offset, byteSize(), a.length);
maybeSwap(bo).intoByteArray0(a, offset);
}
/**
* {@inheritDoc} <!--workaround-->
*/
@Override
@ForceInline
public final
void intoByteArray(byte[] a, int offset,
ByteOrder bo,
VectorMask<Short> m) {
if (m.allTrue()) {
intoByteArray(a, offset, bo);
} else {
ShortSpecies vsp = vspecies();
checkMaskFromIndexSize(offset, vsp, m, 2, a.length);
maybeSwap(bo).intoByteArray0(a, offset, m);
void intoMemorySegment(MemorySegment ms, long offset,
ByteOrder bo) {
if (ms.isReadOnly()) {
throw new UnsupportedOperationException("Attempt to write a read-only segment");
}
offset = checkFromIndexSize(offset, byteSize(), ms.byteSize());
maybeSwap(bo).intoMemorySegment0(ms, offset);
}
/**
* {@inheritDoc} <!--workaround-->
* @since 19
*/
@Override
@ForceInline
public final
void intoByteBuffer(ByteBuffer bb, int offset,
ByteOrder bo) {
if (ScopedMemoryAccess.isReadOnly(bb)) {
throw new ReadOnlyBufferException();
}
offset = checkFromIndexSize(offset, byteSize(), bb.limit());
maybeSwap(bo).intoByteBuffer0(bb, offset);
}
/**
* {@inheritDoc} <!--workaround-->
*/
@Override
@ForceInline
public final
void intoByteBuffer(ByteBuffer bb, int offset,
ByteOrder bo,
VectorMask<Short> m) {
void intoMemorySegment(MemorySegment ms, long offset,
ByteOrder bo,
VectorMask<Short> m) {
if (m.allTrue()) {
intoByteBuffer(bb, offset, bo);
intoMemorySegment(ms, offset, bo);
} else {
if (bb.isReadOnly()) {
throw new ReadOnlyBufferException();
if (ms.isReadOnly()) {
throw new UnsupportedOperationException("Attempt to write a read-only segment");
}
ShortSpecies vsp = vspecies();
checkMaskFromIndexSize(offset, vsp, m, 2, bb.limit());
maybeSwap(bo).intoByteBuffer0(bb, offset, m);
checkMaskFromIndexSize(offset, vsp, m, 2, ms.byteSize());
maybeSwap(bo).intoMemorySegment0(ms, offset, m);
}
}
@ -3661,7 +3730,7 @@ public abstract class ShortVector extends AbstractVector<Short> {
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
a, arrayAddress(a, offset),
a, offset, vsp,
(arr, off, s) -> s.ldOp(arr, off,
(arr, off, s) -> s.ldOp(arr, (int) off,
(arr_, off_, i) -> arr_[off_ + i]));
}
@ -3678,7 +3747,7 @@ public abstract class ShortVector extends AbstractVector<Short> {
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
a, arrayAddress(a, offset), m,
a, offset, vsp,
(arr, off, s, vm) -> s.ldOp(arr, off, vm,
(arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
(arr_, off_, i) -> arr_[off_ + i]));
}
@ -3694,7 +3763,7 @@ public abstract class ShortVector extends AbstractVector<Short> {
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
a, charArrayAddress(a, offset),
a, offset, vsp,
(arr, off, s) -> s.ldOp(arr, off,
(arr, off, s) -> s.ldOp(arr, (int) off,
(arr_, off_, i) -> (short) arr_[off_ + i]));
}
@ -3711,79 +3780,38 @@ public abstract class ShortVector extends AbstractVector<Short> {
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
a, charArrayAddress(a, offset), m,
a, offset, vsp,
(arr, off, s, vm) -> s.ldOp(arr, off, vm,
(arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
(arr_, off_, i) -> (short) arr_[off_ + i]));
}
@Override
abstract
ShortVector fromByteArray0(byte[] a, int offset);
ShortVector fromMemorySegment0(MemorySegment bb, long offset);
@ForceInline
final
ShortVector fromByteArray0Template(byte[] a, int offset) {
ShortVector fromMemorySegment0Template(MemorySegment ms, long offset) {
ShortSpecies vsp = vspecies();
return VectorSupport.load(
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset),
a, offset, vsp,
(arr, off, s) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
return s.ldOp(wb, off,
(wb_, o, i) -> wb_.getShort(o + i * 2));
});
}
abstract
ShortVector fromByteArray0(byte[] a, int offset, VectorMask<Short> m);
@ForceInline
final
<M extends VectorMask<Short>>
ShortVector fromByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
ShortSpecies vsp = vspecies();
m.check(vsp);
return VectorSupport.loadMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset), m,
a, offset, vsp,
(arr, off, s, vm) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
return s.ldOp(wb, off, vm,
(wb_, o, i) -> wb_.getShort(o + i * 2));
});
}
abstract
ShortVector fromByteBuffer0(ByteBuffer bb, int offset);
@ForceInline
final
ShortVector fromByteBuffer0Template(ByteBuffer bb, int offset) {
ShortSpecies vsp = vspecies();
return ScopedMemoryAccess.loadFromByteBuffer(
return ScopedMemoryAccess.loadFromMemorySegment(
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
bb, offset, vsp,
(buf, off, s) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
return s.ldOp(wb, off,
(wb_, o, i) -> wb_.getShort(o + i * 2));
(AbstractMemorySegmentImpl) ms, offset, vsp,
(msp, off, s) -> {
return s.ldLongOp((MemorySegment) msp, off, ShortVector::memorySegmentGet);
});
}
abstract
ShortVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m);
ShortVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Short> m);
@ForceInline
final
<M extends VectorMask<Short>>
ShortVector fromByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
ShortVector fromMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
ShortSpecies vsp = vspecies();
m.check(vsp);
return ScopedMemoryAccess.loadFromByteBufferMasked(
return ScopedMemoryAccess.loadFromMemorySegmentMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
bb, offset, m, vsp,
(buf, off, s, vm) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
return s.ldOp(wb, off, vm,
(wb_, o, i) -> wb_.getShort(o + i * 2));
(AbstractMemorySegmentImpl) ms, offset, m, vsp,
(msp, off, s, vm) -> {
return s.ldLongOp((MemorySegment) msp, off, vm, ShortVector::memorySegmentGet);
});
}
@ -3802,7 +3830,7 @@ public abstract class ShortVector extends AbstractVector<Short> {
a, arrayAddress(a, offset),
this, a, offset,
(arr, off, v)
-> v.stOp(arr, off,
-> v.stOp(arr, (int) off,
(arr_, off_, i, e) -> arr_[off_+i] = e));
}
@ -3819,77 +3847,39 @@ public abstract class ShortVector extends AbstractVector<Short> {
a, arrayAddress(a, offset),
this, m, a, offset,
(arr, off, v, vm)
-> v.stOp(arr, off, vm,
-> v.stOp(arr, (int) off, vm,
(arr_, off_, i, e) -> arr_[off_ + i] = e));
}
abstract
void intoByteArray0(byte[] a, int offset);
@ForceInline
final
void intoByteArray0Template(byte[] a, int offset) {
void intoMemorySegment0(MemorySegment ms, long offset) {
ShortSpecies vsp = vspecies();
VectorSupport.store(
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset),
this, a, offset,
(arr, off, v) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
v.stOp(wb, off,
(tb_, o, i, e) -> tb_.putShort(o + i * 2, e));
});
}
abstract
void intoByteArray0(byte[] a, int offset, VectorMask<Short> m);
@ForceInline
final
<M extends VectorMask<Short>>
void intoByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
ShortSpecies vsp = vspecies();
m.check(vsp);
VectorSupport.storeMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset),
this, m, a, offset,
(arr, off, v, vm) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
v.stOp(wb, off, vm,
(tb_, o, i, e) -> tb_.putShort(o + i * 2, e));
});
}
@ForceInline
final
void intoByteBuffer0(ByteBuffer bb, int offset) {
ShortSpecies vsp = vspecies();
ScopedMemoryAccess.storeIntoByteBuffer(
ScopedMemoryAccess.storeIntoMemorySegment(
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
this, bb, offset,
(buf, off, v) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
v.stOp(wb, off,
(wb_, o, i, e) -> wb_.putShort(o + i * 2, e));
this,
(AbstractMemorySegmentImpl) ms, offset,
(msp, off, v) -> {
v.stLongOp((MemorySegment) msp, off, ShortVector::memorySegmentSet);
});
}
abstract
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m);
void intoMemorySegment0(MemorySegment bb, long offset, VectorMask<Short> m);
@ForceInline
final
<M extends VectorMask<Short>>
void intoByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
void intoMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
ShortSpecies vsp = vspecies();
m.check(vsp);
ScopedMemoryAccess.storeIntoByteBufferMasked(
ScopedMemoryAccess.storeIntoMemorySegmentMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
this, m, bb, offset,
(buf, off, v, vm) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
v.stOp(wb, off, vm,
(wb_, o, i, e) -> wb_.putShort(o + i * 2, e));
this, m,
(AbstractMemorySegmentImpl) ms, offset,
(msp, off, v, vm) -> {
v.stLongOp((MemorySegment) msp, off, vm, ShortVector::memorySegmentSet);
});
}
@ -3907,7 +3897,7 @@ public abstract class ShortVector extends AbstractVector<Short> {
a, charArrayAddress(a, offset),
this, m, a, offset,
(arr, off, v, vm)
-> v.stOp(arr, off, vm,
-> v.stOp(arr, (int) off, vm,
(arr_, off_, i, e) -> arr_[off_ + i] = (char) e));
}
@ -3923,6 +3913,16 @@ public abstract class ShortVector extends AbstractVector<Short> {
.checkIndexByLane(offset, limit, vsp.iota(), scale);
}
private static
void checkMaskFromIndexSize(long offset,
ShortSpecies vsp,
VectorMask<Short> m,
int scale,
long limit) {
((AbstractMask<Short>)m)
.checkIndexByLane(offset, limit, vsp.iota(), scale);
}
@ForceInline
private void conditionalStoreNYI(int offset,
ShortSpecies vsp,
@ -4250,6 +4250,21 @@ public abstract class ShortVector extends AbstractVector<Short> {
return dummyVector().ldOp(memory, offset, m, f);
}
/*package-private*/
@ForceInline
ShortVector ldLongOp(MemorySegment memory, long offset,
FLdLongOp f) {
return dummyVector().ldLongOp(memory, offset, f);
}
/*package-private*/
@ForceInline
ShortVector ldLongOp(MemorySegment memory, long offset,
VectorMask<Short> m,
FLdLongOp f) {
return dummyVector().ldLongOp(memory, offset, m, f);
}
/*package-private*/
@ForceInline
<M> void stOp(M memory, int offset, FStOp<M> f) {
@ -4264,6 +4279,20 @@ public abstract class ShortVector extends AbstractVector<Short> {
dummyVector().stOp(memory, offset, m, f);
}
/*package-private*/
@ForceInline
void stLongOp(MemorySegment memory, long offset, FStLongOp f) {
dummyVector().stLongOp(memory, offset, f);
}
/*package-private*/
@ForceInline
void stLongOp(MemorySegment memory, long offset,
AbstractMask<Short> m,
FStLongOp f) {
dummyVector().stLongOp(memory, offset, m, f);
}
// N.B. Make sure these constant vectors and
// masks load up correctly into registers.
//
@ -4377,3 +4406,4 @@ public abstract class ShortVector extends AbstractVector<Short> {
public static final VectorSpecies<Short> SPECIES_PREFERRED
= (ShortSpecies) VectorSpecies.ofPreferred(short.class);
}

View file

@ -24,7 +24,8 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.nio.ByteOrder;
import java.util.Arrays;
@ -763,11 +764,11 @@ import java.util.Arrays;
* first vector lane value occupies the first position in memory, and so on,
* up to the length of the vector. Further, the memory order of stored
* vector lanes corresponds to increasing index values in a Java array or
* in a {@link java.nio.ByteBuffer}.
* in a {@link java.lang.foreign.MemorySegment}.
*
* <p> Byte order for lane storage is chosen such that the stored
* vector values can be read or written as single primitive values,
* within the array or buffer that holds the vector, producing the
* within the array or segment that holds the vector, producing the
* same values as the lane-wise values within the vector.
* This fact is independent of the convenient fiction that lane values
* inside of vectors are stored in little-endian order.
@ -1039,6 +1040,12 @@ import java.util.Arrays;
* can encode a mathematical permutation as well as many other
* patterns of data movement.
*
* <li>The {@link #compress(VectorMask)} and {@link #expand(VectorMask)}
* methods, which select up to {@code VLENGTH} lanes from an
* input vector, and assemble them in lane order. The selection of lanes
* is controlled by a {@code VectorMask}, with set lane elements mapping, by
* compression or expansion in lane order, source lanes to destination lanes.
*
* </ul>
* <p> Some vector operations are not lane-wise, but rather move data
* across lane boundaries. Such operations are typically rare in SIMD
@ -2689,6 +2696,46 @@ public abstract class Vector<E> extends jdk.internal.vm.vector.VectorSupport.Vec
*/
public abstract Vector<E> rearrange(VectorShuffle<E> s, Vector<E> v);
/**
* Compresses the lane elements of this vector selecting lanes
* under the control of a specific mask.
*
* This is a cross-lane operation that compresses the lane
* elements of this vector as selected by the specified mask.
*
* For each lane {@code N} of the mask, if the mask at
* lane {@code N} is set, the element at lane {@code N}
* of input vector is selected and stored into the output
* vector contiguously starting from the lane {@code 0}.
* All the upper remaining lanes, if any, of the output
* vector are set to zero.
*
* @param m the mask controlling the compression
* @return the compressed lane elements of this vector
* @since 19
*/
public abstract Vector<E> compress(VectorMask<E> m);
/**
* Expands the lane elements of this vector
* under the control of a specific mask.
*
* This is a cross-lane operation that expands the contiguous lane
* elements of this vector into lanes of an output vector
* as selected by the specified mask.
*
* For each lane {@code N} of the mask, if the mask at
* lane {@code N} is set, the next contiguous element of input vector
* starting from lane {@code 0} is selected and stored into the output
* vector at lane {@code N}.
* All the remaining lanes, if any, of the output vector are set to zero.
*
* @param m the mask controlling the compression
* @return the expanded lane elements of this vector
* @since 19
*/
public abstract Vector<E> expand(VectorMask<E> m);
/**
* Using index values stored in the lanes of this vector,
* assemble values stored in second vector {@code v}.
@ -2854,9 +2901,8 @@ public abstract class Vector<E> extends jdk.internal.vm.vector.VectorSupport.Vec
* implementation costs.
*
* <p> The method behaves as if this vector is stored into a byte
* buffer or array using little-endian byte ordering and then the
* desired vector is loaded from the same byte buffer or array
* using the same ordering.
* array using little-endian byte ordering and then the desired vector is loaded from the same byte
* array using the same ordering.
*
* <p> The following pseudocode illustrates the behavior:
* <pre>{@code
@ -2865,15 +2911,15 @@ public abstract class Vector<E> extends jdk.internal.vm.vector.VectorSupport.Vec
* int M = (domSize > ranSize ? domSize / ranSize : ranSize / domSize);
* assert Math.abs(part) < M;
* assert (part == 0) || (part > 0) == (domSize > ranSize);
* byte[] ra = new byte[Math.max(domSize, ranSize)];
* MemorySegment ms = MemorySegment.ofArray(new byte[Math.max(domSize, ranSize)]);
* if (domSize > ranSize) { // expansion
* this.intoByteArray(ra, 0, ByteOrder.native());
* this.intoMemorySegment(ms, 0, ByteOrder.native());
* int origin = part * ranSize;
* return species.fromByteArray(ra, origin, ByteOrder.native());
* return species.fromMemorySegment(ms, origin, ByteOrder.native());
* } else { // contraction or size-invariant
* int origin = (-part) * domSize;
* this.intoByteArray(ra, origin, ByteOrder.native());
* return species.fromByteArray(ra, 0, ByteOrder.native());
* this.intoMemorySegment(ms, origin, ByteOrder.native());
* return species.fromMemorySegment(ms, 0, ByteOrder.native());
* }
* }</pre>
*
@ -2910,8 +2956,8 @@ public abstract class Vector<E> extends jdk.internal.vm.vector.VectorSupport.Vec
*
* @return a {@code ByteVector} with the same shape and information content
* @see Vector#reinterpretShape(VectorSpecies,int)
* @see IntVector#intoByteArray(byte[], int, ByteOrder)
* @see FloatVector#intoByteArray(byte[], int, ByteOrder)
* @see IntVector#intoMemorySegment(java.lang.foreign.MemorySegment, long, java.nio.ByteOrder)
* @see FloatVector#intoMemorySegment(java.lang.foreign.MemorySegment, long, java.nio.ByteOrder)
* @see VectorSpecies#withLanes(Class)
*/
public abstract ByteVector reinterpretAsBytes();
@ -3319,8 +3365,8 @@ public abstract class Vector<E> extends jdk.internal.vm.vector.VectorSupport.Vec
//Array stores
/**
* Stores this vector into a byte array starting at an offset
* using explicit byte order.
* Stores this vector into a {@linkplain MemorySegment memory segment}
* starting at an offset using explicit byte order.
* <p>
* Bytes are extracted from primitive lane elements according
* to the specified byte ordering.
@ -3328,88 +3374,33 @@ public abstract class Vector<E> extends jdk.internal.vm.vector.VectorSupport.Vec
* <a href="Vector.html#lane-order">memory ordering</a>.
* <p>
* This method behaves as if it calls
* {@link #intoByteBuffer(ByteBuffer,int,ByteOrder,VectorMask)
* intoByteBuffer()} as follows:
* <pre>{@code
* var bb = ByteBuffer.wrap(a);
* var m = maskAll(true);
* intoByteBuffer(bb, offset, bo, m);
* }</pre>
*
* @param a the byte array
* @param offset the offset into the array
* @param bo the intended byte order
* @throws IndexOutOfBoundsException
* if {@code offset+N*ESIZE < 0}
* or {@code offset+(N+1)*ESIZE > a.length}
* for any lane {@code N} in the vector
*/
public abstract void intoByteArray(byte[] a, int offset,
ByteOrder bo);
/**
* Stores this vector into a byte array starting at an offset
* using explicit byte order and a mask.
* <p>
* Bytes are extracted from primitive lane elements according
* to the specified byte ordering.
* The lanes are stored according to their
* <a href="Vector.html#lane-order">memory ordering</a>.
* <p>
* This method behaves as if it calls
* {@link #intoByteBuffer(ByteBuffer,int,ByteOrder,VectorMask)
* intoByteBuffer()} as follows:
* <pre>{@code
* var bb = ByteBuffer.wrap(a);
* intoByteBuffer(bb, offset, bo, m);
* }</pre>
*
* @param a the byte array
* @param offset the offset into the array
* @param bo the intended byte order
* @param m the mask controlling lane selection
* @throws IndexOutOfBoundsException
* if {@code offset+N*ESIZE < 0}
* or {@code offset+(N+1)*ESIZE > a.length}
* for any lane {@code N} in the vector
* where the mask is set
*/
public abstract void intoByteArray(byte[] a, int offset,
ByteOrder bo,
VectorMask<E> m);
/**
* Stores this vector into a byte buffer starting at an offset
* using explicit byte order.
* <p>
* Bytes are extracted from primitive lane elements according
* to the specified byte ordering.
* The lanes are stored according to their
* <a href="Vector.html#lane-order">memory ordering</a>.
* <p>
* This method behaves as if it calls
* {@link #intoByteBuffer(ByteBuffer,int,ByteOrder,VectorMask)
* intoByteBuffer()} as follows:
* {@link #intoMemorySegment(MemorySegment,long,ByteOrder,VectorMask)
* intoMemorySegment()} as follows:
* <pre>{@code
* var m = maskAll(true);
* intoByteBuffer(bb, offset, bo, m);
* intoMemorySegment(ms, offset, bo, m);
* }</pre>
*
* @param bb the byte buffer
* @param offset the offset into the array
* @param ms the memory segment
* @param offset the offset into the memory segment
* @param bo the intended byte order
* @throws IndexOutOfBoundsException
* if {@code offset+N*ESIZE < 0}
* or {@code offset+(N+1)*ESIZE > bb.limit()}
* or {@code offset+(N+1)*ESIZE > ms.byteSize()}
* for any lane {@code N} in the vector
* @throws java.nio.ReadOnlyBufferException
* if the byte buffer is read-only
* @throws UnsupportedOperationException
* if the memory segment is read-only
* @throws IllegalArgumentException if the memory segment is a heap segment that is
* not backed by a {@code byte[]} array.
* @throws IllegalStateException if the memory segment's session is not alive,
* or if access occurs from a thread other than the thread owning the session.
* @since 19
*/
public abstract void intoByteBuffer(ByteBuffer bb, int offset, ByteOrder bo);
public abstract void intoMemorySegment(MemorySegment ms, long offset, ByteOrder bo);
/**
* Stores this vector into a byte buffer starting at an offset
* using explicit byte order and a mask.
* Stores this vector into a {@linkplain MemorySegment memory segment}
* starting at an offset using explicit byte order and a mask.
* <p>
* Bytes are extracted from primitive lane elements according
* to the specified byte ordering.
@ -3417,28 +3408,18 @@ public abstract class Vector<E> extends jdk.internal.vm.vector.VectorSupport.Vec
* <a href="Vector.html#lane-order">memory ordering</a>.
* <p>
* The following pseudocode illustrates the behavior, where
* the primitive element type is not of {@code byte},
* {@code EBuffer} is the primitive buffer type, {@code ETYPE} is the
* {@code JAVA_E} is the layout of the primitive element type, {@code ETYPE} is the
* primitive element type, and {@code EVector} is the primitive
* vector type for this vector:
* <pre>{@code
* EBuffer eb = bb.duplicate()
* .position(offset)
* .order(bo).asEBuffer();
* ETYPE[] a = this.toArray();
* var slice = ms.asSlice(offset)
* for (int n = 0; n < a.length; n++) {
* if (m.laneIsSet(n)) {
* eb.put(n, a[n]);
* slice.setAtIndex(ValueLayout.JAVA_E.withBitAlignment(8), n);
* }
* }
* }</pre>
* When the primitive element type is of {@code byte} the primitive
* byte buffer is obtained as follows, where operation on the buffer
* remains the same as in the prior pseudocode:
* <pre>{@code
* ByteBuffer eb = bb.duplicate()
* .position(offset);
* }</pre>
*
* @implNote
* This operation is likely to be more efficient if
@ -3451,20 +3432,25 @@ public abstract class Vector<E> extends jdk.internal.vm.vector.VectorSupport.Vec
* {@code byte}, the byte order argument is
* ignored.
*
* @param bb the byte buffer
* @param offset the offset into the array
* @param ms the memory segment
* @param offset the offset into the memory segment
* @param bo the intended byte order
* @param m the mask controlling lane selection
* @throws IndexOutOfBoundsException
* if {@code offset+N*ESIZE < 0}
* or {@code offset+(N+1)*ESIZE > bb.limit()}
* or {@code offset+(N+1)*ESIZE > ms.byteSize()}
* for any lane {@code N} in the vector
* where the mask is set
* @throws java.nio.ReadOnlyBufferException
* if the byte buffer is read-only
* @throws UnsupportedOperationException
* if the memory segment is read-only
* @throws IllegalArgumentException if the memory segment is a heap segment that is
* not backed by a {@code byte[]} array.
* @throws IllegalStateException if the memory segment's session is not alive,
* or if access occurs from a thread other than the thread owning the session.
* @since 19
*/
public abstract void intoByteBuffer(ByteBuffer bb, int offset,
ByteOrder bo, VectorMask<E> m);
public abstract void intoMemorySegment(MemorySegment ms, long offset,
ByteOrder bo, VectorMask<E> m);
/**
* Returns a packed array containing all the lane values.

View file

@ -54,6 +54,16 @@ import java.util.Objects;
}
}
@ForceInline
static long checkFromIndexSize(long ix, long vlen, long length) {
switch (VectorIntrinsics.VECTOR_ACCESS_OOB_CHECK) {
case 0: return ix; // no range check
case 1: return Objects.checkFromIndexSize(ix, vlen, length);
case 2: return Objects.checkIndex(ix, length - (vlen - 1));
default: throw new InternalError();
}
}
@ForceInline
static IntVector checkIndex(IntVector vix, int length) {
switch (VectorIntrinsics.VECTOR_ACCESS_OOB_CHECK) {
@ -92,9 +102,30 @@ import java.util.Objects;
if (index >= 0) {
return index - (index % size);
} else {
return index - Math.floorMod(index, Math.abs(size));
return index - Math.floorMod(index, size);
}
}
// If the index is not already a multiple of size,
// round it down to the next smaller multiple of size.
// It is an error if size is less than zero.
@ForceInline
static long roundDown(long index, int size) {
if ((size & (size - 1)) == 0) {
// Size is zero or a power of two, so we got this.
return index & ~(size - 1);
} else {
return roundDownNPOT(index, size);
}
}
private static long roundDownNPOT(long index, int size) {
if (index >= 0) {
return index - (index % size);
} else {
return index - Math.floorMod(index, size);
}
}
@ForceInline
static int wrapToRange(int index, int size) {
if ((size & (size - 1)) == 0) {

View file

@ -210,7 +210,7 @@ public abstract class VectorMask<E> extends jdk.internal.vm.vector.VectorSupport
bits, (long) offset + Unsafe.ARRAY_BOOLEAN_BASE_OFFSET,
bits, offset, vsp,
(c, idx, s)
-> s.opm(n -> c[idx + n]));
-> s.opm(n -> c[((int )idx) + n]));
}
/**
@ -471,6 +471,39 @@ public abstract class VectorMask<E> extends jdk.internal.vm.vector.VectorSupport
*/
public abstract VectorMask<E> indexInRange(int offset, int limit);
/**
* Removes lanes numbered {@code N} from this mask where the
* adjusted index {@code N+offset}, is not in the range
* {@code [0..limit-1]}.
*
* <p> In all cases the series of set and unset lanes is assigned
* as if by using infinite precision or {@code VLENGTH-}saturating
* additions or subtractions, without overflow or wrap-around.
*
* @apiNote
*
* This method performs a SIMD emulation of the check performed by
* {@link Objects#checkIndex(long,long)}, on the index numbers in
* the range {@code [offset..offset+VLENGTH-1]}. If an exception
* is desired, the resulting mask can be compared with the
* original mask; if they are not equal, then at least one lane
* was out of range, and exception processing can be performed.
*
* <p> A mask which is a series of {@code N} set lanes followed by
* a series of unset lanes can be obtained by calling
* {@code allTrue.indexInRange(0, N)}, where {@code allTrue} is a
* mask of all true bits. A mask of {@code N1} unset lanes
* followed by {@code N2} set lanes can be obtained by calling
* {@code allTrue.indexInRange(-N1, N2)}.
*
* @param offset the starting index
* @param limit the upper-bound (exclusive) of index range
* @return the original mask, with out-of-range lanes unset
* @see VectorSpecies#indexInRange(long, long)
* @since 19
*/
public abstract VectorMask<E> indexInRange(long offset, long limit);
/**
* Returns a vector representation of this mask, the
* lane bits of which are set or unset in correspondence
@ -621,6 +654,18 @@ public abstract class VectorMask<E> extends jdk.internal.vm.vector.VectorSupport
return Objects.hash(vectorSpecies(), Arrays.hashCode(toArray()));
}
/**
* Compresses set lanes from this mask.
*
* Returns a mask which is a series of {@code N} set lanes
* followed by a series of unset lanes, where {@code N} is
* the true count of this mask.
*
* @return the compressed mask of this mask
* @since 19
*/
public abstract VectorMask<E> compress();
// ==== JROSE NAME CHANGES ====
// TYPE CHANGED

View file

@ -452,6 +452,26 @@ public abstract class VectorOperators {
public static final Unary ABS = unary("ABS", "abs", VectorSupport.VECTOR_OP_ABS, VO_ALL);
/** Produce {@code -a}. */
public static final Unary NEG = unary("NEG", "-a", VectorSupport.VECTOR_OP_NEG, VO_ALL|VO_SPECIAL);
/** Produce {@code bitCount(a)}
* @since 19
*/
public static final Unary BIT_COUNT = unary("BIT_COUNT", "bitCount", VectorSupport.VECTOR_OP_BIT_COUNT, VO_NOFP);
/** Produce {@code numberOfTrailingZeros(a)}
* @since 19
*/
public static final Unary TRAILING_ZEROS_COUNT = unary("TRAILING_ZEROS_COUNT", "numberOfTrailingZeros", VectorSupport.VECTOR_OP_TZ_COUNT, VO_NOFP);
/** Produce {@code numberOfLeadingZeros(a)}
* @since 19
*/
public static final Unary LEADING_ZEROS_COUNT = unary("LEADING_ZEROS_COUNT", "numberOfLeadingZeros", VectorSupport.VECTOR_OP_LZ_COUNT, VO_NOFP);
/** Produce {@code reverse(a)}
* @since 19
*/
public static final Unary REVERSE = unary("REVERSE", "reverse", VectorSupport.VECTOR_OP_REVERSE, VO_NOFP);
/** Produce {@code reverseBytes(a)}
* @since 19
*/
public static final Unary REVERSE_BYTES = unary("REVERSE_BYTES", "reverseBytes", VectorSupport.VECTOR_OP_REVERSE_BYTES, VO_NOFP);
/** Produce {@code sin(a)}. Floating only.
* Not guaranteed to be semi-monotonic. See section "Operations on floating point vectors" above
@ -556,6 +576,14 @@ public abstract class VectorOperators {
public static final /*bitwise*/ Binary ROL = binary("ROL", "rotateLeft", VectorSupport.VECTOR_OP_LROTATE, VO_SHIFT);
/** Produce {@code rotateRight(a,n)}. Integral only. */
public static final /*bitwise*/ Binary ROR = binary("ROR", "rotateRight", VectorSupport.VECTOR_OP_RROTATE, VO_SHIFT);
/** Produce {@code compress(a,n)}. Integral, {@code int} and {@code long}, only.
* @since 19
*/
public static final /*bitwise*/ Binary COMPRESS_BITS = binary("COMPRESS_BITS", "compressBits", VectorSupport.VECTOR_OP_COMPRESS_BITS, VO_NOFP);
/** Produce {@code expand(a,n)}. Integral, {@code int} and {@code long}, only.
* @since 19
*/
public static final /*bitwise*/ Binary EXPAND_BITS = binary("EXPAND_BITS", "expandBits", VectorSupport.VECTOR_OP_EXPAND_BITS, VO_NOFP);
/** Produce {@code atan2(a,b)}. See Floating only.
* Not guaranteed to be semi-monotonic. See section "Operations on floating point vectors" above

View file

@ -24,6 +24,8 @@
*/
package jdk.incubator.vector;
import java.lang.foreign.MemorySegment;
import java.nio.ByteOrder;
import java.util.function.IntUnaryOperator;
@ -149,11 +151,37 @@ public interface VectorSpecies<E> {
* @return the largest multiple of the vector length not greater
* than the given length
* @throws IllegalArgumentException if the {@code length} is
negative and the result would overflow to a positive value
* negative and the result would overflow to a positive value
* @see Math#floorMod(int, int)
*/
int loopBound(int length);
/**
* Loop control function which returns the largest multiple of
* {@code VLENGTH} that is less than or equal to the given
* {@code length} value.
* Here, {@code VLENGTH} is the result of {@code this.length()},
* and {@code length} is interpreted as a number of lanes.
* The resulting value {@code R} satisfies this inequality:
* <pre>{@code R <= length < R+VLENGTH}
* </pre>
* <p> Specifically, this method computes
* {@code length - floorMod(length, VLENGTH)}, where
* {@link Math#floorMod(long,int) floorMod} computes a remainder
* value by rounding its quotient toward negative infinity.
* As long as {@code VLENGTH} is a power of two, then the result
* is also equal to {@code length & ~(VLENGTH - 1)}.
*
* @param length the input length
* @return the largest multiple of the vector length not greater
* than the given length
* @throws IllegalArgumentException if the {@code length} is
* negative and the result would overflow to a positive value
* @see Math#floorMod(long, int)
* @since 19
*/
long loopBound(long length);
/**
* Returns a mask of this species where only
* the lanes at index N such that the adjusted index
@ -171,6 +199,24 @@ public interface VectorSpecies<E> {
*/
VectorMask<E> indexInRange(int offset, int limit);
/**
* Returns a mask of this species where only
* the lanes at index N such that the adjusted index
* {@code N+offset} is in the range {@code [0..limit-1]}
* are set.
*
* <p>
* This method returns the value of the expression
* {@code maskAll(true).indexInRange(offset, limit)}
*
* @param offset the starting index
* @param limit the upper-bound (exclusive) of index range
* @return a mask with out-of-range lanes unset
* @see VectorMask#indexInRange(long, long)
* @since 19
*/
VectorMask<E> indexInRange(long offset, long limit);
/**
* Checks that this species has the given element type,
* and returns this species unchanged.
@ -433,31 +479,31 @@ public interface VectorSpecies<E> {
// Defined when ETYPE is known.
/**
* Loads a vector of this species from a byte array starting
* at an offset.
* Loads a vector of this species from a {@linkplain MemorySegment memory segment}
* starting at an offset into the memory segment.
* Bytes are composed into primitive lane elements according
* to the specified byte order.
* The vector is arranged into lanes according to
* <a href="Vector.html#lane-order">memory ordering</a>.
* <p>
* Equivalent to
* {@code IntVector.fromByteArray(this,a,offset,bo)}
* or an equivalent {@code fromByteArray} method,
* {@code IntVector.fromMemorySegment(this,ms,offset,bo)},
* on the vector type corresponding to
* this species.
*
* @param a a byte array
* @param offset the index of the first byte to load
* @param ms the memory segment
* @param offset the offset into the memory segment
* @param bo the intended byte order
* @return a vector of the given species filled from the byte array
* @return a vector of the given species filled from the memory segment
* @throws IndexOutOfBoundsException
* if {@code offset+N*ESIZE < 0}
* or {@code offset+(N+1)*ESIZE > a.length}
* for any lane {@code N} in the vector
* @see IntVector#fromByteArray(VectorSpecies,byte[],int,ByteOrder)
* @see FloatVector#fromByteArray(VectorSpecies,byte[],int,ByteOrder)
* @see IntVector#fromMemorySegment(VectorSpecies, java.lang.foreign.MemorySegment, long, java.nio.ByteOrder)
* @see FloatVector#fromMemorySegment(VectorSpecies, java.lang.foreign.MemorySegment, long, java.nio.ByteOrder)
* @since 19
*/
Vector<E> fromByteArray(byte[] a, int offset, ByteOrder bo);
Vector<E> fromMemorySegment(MemorySegment ms, long offset, ByteOrder bo);
/**
* Returns a mask of this species

View file

@ -24,14 +24,14 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.lang.foreign.ValueLayout;
import java.nio.ByteOrder;
import java.nio.ReadOnlyBufferException;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.Function;
import java.util.function.UnaryOperator;
import jdk.internal.foreign.AbstractMemorySegmentImpl;
import jdk.internal.misc.ScopedMemoryAccess;
import jdk.internal.misc.Unsafe;
import jdk.internal.vm.annotation.ForceInline;
@ -61,6 +61,8 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
static final int FORBID_OPCODE_KIND = VO_ONLYFP;
#end[FP]
static final ValueLayout.Of$Type$ ELEMENT_LAYOUT = ValueLayout.JAVA_$TYPE$.withBitAlignment(8);
@ForceInline
static int opCode(Operator op) {
return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
@ -355,6 +357,45 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
return vectorFactory(res);
}
/*package-private*/
interface FLdLongOp {
$type$ apply(MemorySegment memory, long offset, int i);
}
/*package-private*/
@ForceInline
final
$abstractvectortype$ ldLongOp(MemorySegment memory, long offset,
FLdLongOp f) {
//dummy; no vec = vec();
$type$[] res = new $type$[length()];
for (int i = 0; i < res.length; i++) {
res[i] = f.apply(memory, offset, i);
}
return vectorFactory(res);
}
/*package-private*/
@ForceInline
final
$abstractvectortype$ ldLongOp(MemorySegment memory, long offset,
VectorMask<$Boxtype$> m,
FLdLongOp f) {
//$type$[] vec = vec();
$type$[] res = new $type$[length()];
boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
for (int i = 0; i < res.length; i++) {
if (mbits[i]) {
res[i] = f.apply(memory, offset, i);
}
}
return vectorFactory(res);
}
static $type$ memorySegmentGet(MemorySegment ms, long o, int i) {
return ms.get(ELEMENT_LAYOUT, o + i * $sizeInBytes$L);
}
interface FStOp<M> {
void apply(M memory, int offset, int i, $type$ a);
}
@ -385,6 +426,40 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
}
}
interface FStLongOp {
void apply(MemorySegment memory, long offset, int i, $type$ a);
}
/*package-private*/
@ForceInline
final
void stLongOp(MemorySegment memory, long offset,
FStLongOp f) {
$type$[] vec = vec();
for (int i = 0; i < vec.length; i++) {
f.apply(memory, offset, i, vec[i]);
}
}
/*package-private*/
@ForceInline
final
void stLongOp(MemorySegment memory, long offset,
VectorMask<$Boxtype$> m,
FStLongOp f) {
$type$[] vec = vec();
boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
for (int i = 0; i < vec.length; i++) {
if (mbits[i]) {
f.apply(memory, offset, i, vec[i]);
}
}
}
static void memorySegmentSet(MemorySegment ms, long o, int i, $type$ e) {
ms.set(ELEMENT_LAYOUT, o + i * $sizeInBytes$L, e);
}
// Binary test
/*package-private*/
@ -445,6 +520,36 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
return {#if[FP]?$Type$.$bitstype$BitsTo$Type$}(($bitstype$)bits);
}
static $abstractvectortype$ expandHelper(Vector<$Boxtype$> v, VectorMask<$Boxtype$> m) {
VectorSpecies<$Boxtype$> vsp = m.vectorSpecies();
$abstractvectortype$ r = ($abstractvectortype$) vsp.zero();
$abstractvectortype$ vi = ($abstractvectortype$) v;
if (m.allTrue()) {
return vi;
}
for (int i = 0, j = 0; i < vsp.length(); i++) {
if (m.laneIsSet(i)) {
r = r.withLane(i, vi.lane(j++));
}
}
return r;
}
static $abstractvectortype$ compressHelper(Vector<$Boxtype$> v, VectorMask<$Boxtype$> m) {
VectorSpecies<$Boxtype$> vsp = m.vectorSpecies();
$abstractvectortype$ r = ($abstractvectortype$) vsp.zero();
$abstractvectortype$ vi = ($abstractvectortype$) v;
if (m.allTrue()) {
return vi;
}
for (int i = 0, j = 0; i < vsp.length(); i++) {
if (m.laneIsSet(i)) {
r = r.withLane(j++, vi.lane(i));
}
}
return r;
}
// Static factories (other than memory operations)
// Note: A surprising behavior in javadoc
@ -646,6 +751,36 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
v0.uOp(m, (i, a) -> ($type$) -a);
case VECTOR_OP_ABS: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) Math.abs(a));
#if[!FP]
#if[intOrLong]
case VECTOR_OP_BIT_COUNT: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) $Boxtype$.bitCount(a));
case VECTOR_OP_TZ_COUNT: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) $Boxtype$.numberOfTrailingZeros(a));
case VECTOR_OP_LZ_COUNT: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) $Boxtype$.numberOfLeadingZeros(a));
case VECTOR_OP_REVERSE: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) $Boxtype$.reverse(a));
#else[intOrLong]
case VECTOR_OP_BIT_COUNT: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) bitCount(a));
case VECTOR_OP_TZ_COUNT: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) numberOfTrailingZeros(a));
case VECTOR_OP_LZ_COUNT: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) numberOfLeadingZeros(a));
case VECTOR_OP_REVERSE: return (v0, m) ->
v0.uOp(m, (i, a) -> reverse(a));
#end[intOrLong]
#if[BITWISE]
#if[byte]
case VECTOR_OP_REVERSE_BYTES: return (v0, m) ->
v0.uOp(m, (i, a) -> a);
#else[byte]
case VECTOR_OP_REVERSE_BYTES: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) $Boxtype$.reverseBytes(a));
#end[byte]
#end[BITWISE]
#end[!FP]
#if[FP]
case VECTOR_OP_SIN: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) Math.sin(a));
@ -839,6 +974,12 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
v0.bOp(v1, vm, (i, a, n) -> rotateLeft(a, (int)n));
case VECTOR_OP_RROTATE: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, n) -> rotateRight(a, (int)n));
#if[intOrLong]
case VECTOR_OP_COMPRESS_BITS: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, n) -> $Boxtype$.compress(a, n));
case VECTOR_OP_EXPAND_BITS: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, n) -> $Boxtype$.expand(a, n));
#end[intOrLong]
#end[BITWISE]
#if[FP]
case VECTOR_OP_OR: return (v0, v1, vm) ->
@ -1987,6 +2128,56 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
return lanewise(ABS);
}
#if[!FP]
#if[!intOrLong]
static int bitCount($type$ a) {
#if[short]
return Integer.bitCount((int)a & 0xFFFF);
#else[short]
return Integer.bitCount((int)a & 0xFF);
#end[short]
}
#end[!intOrLong]
#end[!FP]
#if[!FP]
#if[!intOrLong]
static int numberOfTrailingZeros($type$ a) {
#if[short]
return a != 0 ? Integer.numberOfTrailingZeros(a) : 16;
#else[short]
return a != 0 ? Integer.numberOfTrailingZeros(a) : 8;
#end[short]
}
#end[!intOrLong]
#end[!FP]
#if[!FP]
#if[!intOrLong]
static int numberOfLeadingZeros($type$ a) {
#if[short]
return a >= 0 ? Integer.numberOfLeadingZeros(a) - 16 : 0;
#else[short]
return a >= 0 ? Integer.numberOfLeadingZeros(a) - 24 : 0;
#end[short]
}
static $type$ reverse($type$ a) {
if (a == 0 || a == -1) return a;
#if[short]
$type$ b = rotateLeft(a, 8);
b = ($type$) (((b & 0x5555) << 1) | ((b & 0xAAAA) >>> 1));
b = ($type$) (((b & 0x3333) << 2) | ((b & 0xCCCC) >>> 2));
b = ($type$) (((b & 0x0F0F) << 4) | ((b & 0xF0F0) >>> 4));
#else[short]
$type$ b = rotateLeft(a, 4);
b = ($type$) (((b & 0x55) << 1) | ((b & 0xAA) >>> 1));
b = ($type$) (((b & 0x33) << 2) | ((b & 0xCC) >>> 2));
#end[short]
return b;
}
#end[!intOrLong]
#end[!FP]
#if[BITWISE]
// not (~)
/**
@ -2695,6 +2886,45 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
$Type$Vector::toShuffle0);
}
/**
* {@inheritDoc} <!--workaround-->
* @since 19
*/
@Override
public abstract
$Type$Vector compress(VectorMask<$Boxtype$> m);
/*package-private*/
@ForceInline
final
<M extends AbstractMask<$Boxtype$>>
$Type$Vector compressTemplate(Class<M> masktype, M m) {
m.check(masktype, this);
return ($Type$Vector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_COMPRESS, getClass(), masktype,
$type$.class, length(), this, m,
(v1, m1) -> compressHelper(v1, m1));
}
/**
* {@inheritDoc} <!--workaround-->
* @since 19
*/
@Override
public abstract
$Type$Vector expand(VectorMask<$Boxtype$> m);
/*package-private*/
@ForceInline
final
<M extends AbstractMask<$Boxtype$>>
$Type$Vector expandTemplate(Class<M> masktype, M m) {
m.check(masktype, this);
return ($Type$Vector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_EXPAND, getClass(), masktype,
$type$.class, length(), this, m,
(v1, m1) -> expandHelper(v1, m1));
}
/**
* {@inheritDoc} <!--workaround-->
*/
@ -3302,90 +3532,6 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
}
#end[double]
/**
* Loads a vector from a byte array starting at an offset.
* Bytes are composed into primitive lane elements according
* to the specified byte order.
* The vector is arranged into lanes according to
* <a href="Vector.html#lane-order">memory ordering</a>.
* <p>
* This method behaves as if it returns the result of calling
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
* fromByteBuffer()} as follows:
* <pre>{@code
* var bb = ByteBuffer.wrap(a);
* var m = species.maskAll(true);
* return fromByteBuffer(species, bb, offset, bo, m);
* }</pre>
*
* @param species species of desired vector
* @param a the byte array
* @param offset the offset into the array
* @param bo the intended byte order
* @return a vector loaded from a byte array
* @throws IndexOutOfBoundsException
* if {@code offset+N*ESIZE < 0}
* or {@code offset+(N+1)*ESIZE > a.length}
* for any lane {@code N} in the vector
*/
@ForceInline
public static
$abstractvectortype$ fromByteArray(VectorSpecies<$Boxtype$> species,
byte[] a, int offset,
ByteOrder bo) {
offset = checkFromIndexSize(offset, species.vectorByteSize(), a.length);
$Type$Species vsp = ($Type$Species) species;
return vsp.dummyVector().fromByteArray0(a, offset).maybeSwap(bo);
}
/**
* Loads a vector from a byte array starting at an offset
* and using a mask.
* Lanes where the mask is unset are filled with the default
* value of {@code $type$} ({#if[FP]?positive }zero).
* Bytes are composed into primitive lane elements according
* to the specified byte order.
* The vector is arranged into lanes according to
* <a href="Vector.html#lane-order">memory ordering</a>.
* <p>
* This method behaves as if it returns the result of calling
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
* fromByteBuffer()} as follows:
* <pre>{@code
* var bb = ByteBuffer.wrap(a);
* return fromByteBuffer(species, bb, offset, bo, m);
* }</pre>
*
* @param species species of desired vector
* @param a the byte array
* @param offset the offset into the array
* @param bo the intended byte order
* @param m the mask controlling lane selection
* @return a vector loaded from a byte array
* @throws IndexOutOfBoundsException
* if {@code offset+N*ESIZE < 0}
* or {@code offset+(N+1)*ESIZE > a.length}
* for any lane {@code N} in the vector
* where the mask is set
*/
@ForceInline
public static
$abstractvectortype$ fromByteArray(VectorSpecies<$Boxtype$> species,
byte[] a, int offset,
ByteOrder bo,
VectorMask<$Boxtype$> m) {
$Type$Species vsp = ($Type$Species) species;
if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo);
}
// FIXME: optimize
checkMaskFromIndexSize(offset, vsp, m, $sizeInBytes$, a.length);
ByteBuffer wb = wrapper(a, bo);
return vsp.ldOp(wb, offset, (AbstractMask<$Boxtype$>)m,
(wb_, o, i) -> wb_.get{#if[byte]?(:$Type$(}o + i * $sizeInBytes$));
}
/**
* Loads a vector from an array of type {@code $type$[]}
* starting at an offset.
@ -3917,44 +4063,49 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
#end[byte]
/**
* Loads a vector from a {@linkplain ByteBuffer byte buffer}
* starting at an offset into the byte buffer.
* Loads a vector from a {@linkplain MemorySegment memory segment}
* starting at an offset into the memory segment.
* Bytes are composed into primitive lane elements according
* to the specified byte order.
* The vector is arranged into lanes according to
* <a href="Vector.html#lane-order">memory ordering</a>.
* <p>
* This method behaves as if it returns the result of calling
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
* fromByteBuffer()} as follows:
* {@link #fromMemorySegment(VectorSpecies,MemorySegment,long,ByteOrder,VectorMask)
* fromMemorySegment()} as follows:
* <pre>{@code
* var m = species.maskAll(true);
* return fromByteBuffer(species, bb, offset, bo, m);
* return fromMemorySegment(species, ms, offset, bo, m);
* }</pre>
*
* @param species species of desired vector
* @param bb the byte buffer
* @param offset the offset into the byte buffer
* @param ms the memory segment
* @param offset the offset into the memory segment
* @param bo the intended byte order
* @return a vector loaded from a byte buffer
* @return a vector loaded from the memory segment
* @throws IndexOutOfBoundsException
* if {@code offset+N*$sizeInBytes$ < 0}
* or {@code offset+N*$sizeInBytes$ >= bb.limit()}
* or {@code offset+N*$sizeInBytes$ >= ms.byteSize()}
* for any lane {@code N} in the vector
* @throws IllegalArgumentException if the memory segment is a heap segment that is
* not backed by a {@code byte[]} array.
* @throws IllegalStateException if the memory segment's session is not alive,
* or if access occurs from a thread other than the thread owning the session.
* @since 19
*/
@ForceInline
public static
$abstractvectortype$ fromByteBuffer(VectorSpecies<$Boxtype$> species,
ByteBuffer bb, int offset,
ByteOrder bo) {
offset = checkFromIndexSize(offset, species.vectorByteSize(), bb.limit());
$abstractvectortype$ fromMemorySegment(VectorSpecies<$Boxtype$> species,
MemorySegment ms, long offset,
ByteOrder bo) {
offset = checkFromIndexSize(offset, species.vectorByteSize(), ms.byteSize());
$Type$Species vsp = ($Type$Species) species;
return vsp.dummyVector().fromByteBuffer0(bb, offset).maybeSwap(bo);
return vsp.dummyVector().fromMemorySegment0(ms, offset).maybeSwap(bo);
}
/**
* Loads a vector from a {@linkplain ByteBuffer byte buffer}
* starting at an offset into the byte buffer
* Loads a vector from a {@linkplain MemorySegment memory segment}
* starting at an offset into the memory segment
* and using a mask.
* Lanes where the mask is unset are filled with the default
* value of {@code $type$} ({#if[FP]?positive }zero).
@ -3965,15 +4116,11 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
* <p>
* The following pseudocode illustrates the behavior:
* <pre>{@code
* $Type$Buffer eb = bb.duplicate()
* .position(offset){#if[byte]?;}
#if[!byte]
* .order(bo).as$Type$Buffer();
#end[!byte]
* var slice = ms.asSlice(offset);
* $type$[] ar = new $type$[species.length()];
* for (int n = 0; n < ar.length; n++) {
* if (m.laneIsSet(n)) {
* ar[n] = eb.get(n);
* ar[n] = slice.getAtIndex(ValuaLayout.JAVA_$TYPE$.withBitAlignment(8), n);
* }
* }
* $abstractvectortype$ r = $abstractvectortype$.fromArray(species, ar, 0);
@ -3991,33 +4138,36 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
#end[!byte]
*
* @param species species of desired vector
* @param bb the byte buffer
* @param offset the offset into the byte buffer
* @param ms the memory segment
* @param offset the offset into the memory segment
* @param bo the intended byte order
* @param m the mask controlling lane selection
* @return a vector loaded from a byte buffer
* @return a vector loaded from the memory segment
* @throws IndexOutOfBoundsException
* if {@code offset+N*$sizeInBytes$ < 0}
* or {@code offset+N*$sizeInBytes$ >= bb.limit()}
* or {@code offset+N*$sizeInBytes$ >= ms.byteSize()}
* for any lane {@code N} in the vector
* where the mask is set
* @throws IllegalArgumentException if the memory segment is a heap segment that is
* not backed by a {@code byte[]} array.
* @throws IllegalStateException if the memory segment's session is not alive,
* or if access occurs from a thread other than the thread owning the session.
* @since 19
*/
@ForceInline
public static
$abstractvectortype$ fromByteBuffer(VectorSpecies<$Boxtype$> species,
ByteBuffer bb, int offset,
ByteOrder bo,
VectorMask<$Boxtype$> m) {
$abstractvectortype$ fromMemorySegment(VectorSpecies<$Boxtype$> species,
MemorySegment ms, long offset,
ByteOrder bo,
VectorMask<$Boxtype$> m) {
$Type$Species vsp = ($Type$Species) species;
if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo);
if (offset >= 0 && offset <= (ms.byteSize() - species.vectorByteSize())) {
return vsp.dummyVector().fromMemorySegment0(ms, offset, m).maybeSwap(bo);
}
// FIXME: optimize
checkMaskFromIndexSize(offset, vsp, m, $sizeInBytes$, bb.limit());
ByteBuffer wb = wrapper(bb, bo);
return vsp.ldOp(wb, offset, (AbstractMask<$Boxtype$>)m,
(wb_, o, i) -> wb_.get{#if[byte]?(:$Type$(}o + i * $sizeInBytes$));
checkMaskFromIndexSize(offset, vsp, m, $sizeInBytes$, ms.byteSize());
return vsp.ldLongOp(ms, offset, m, $abstractvectortype$::memorySegmentGet);
}
// Memory store operations
@ -4047,7 +4197,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
this,
a, offset,
(arr, off, v)
-> v.stOp(arr, off,
-> v.stOp(arr, (int) off,
(arr_, off_, i, e) -> arr_[off_ + i] = e));
}
@ -4264,7 +4414,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
this,
a, offset,
(arr, off, v)
-> v.stOp(arr, off,
-> v.stOp(arr, (int) off,
(arr_, off_, i, e) -> arr_[off_ + i] = (char) e));
}
@ -4423,7 +4573,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
normalized,
a, offset,
(arr, off, v)
-> v.stOp(arr, off,
-> v.stOp(arr, (int) off,
(arr_, off_, i, e) -> arr_[off_ + i] = (e & 1) != 0));
}
@ -4562,67 +4712,40 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
/**
* {@inheritDoc} <!--workaround-->
* @since 19
*/
@Override
@ForceInline
public final
void intoByteArray(byte[] a, int offset,
ByteOrder bo) {
offset = checkFromIndexSize(offset, byteSize(), a.length);
maybeSwap(bo).intoByteArray0(a, offset);
}
/**
* {@inheritDoc} <!--workaround-->
*/
@Override
@ForceInline
public final
void intoByteArray(byte[] a, int offset,
ByteOrder bo,
VectorMask<$Boxtype$> m) {
if (m.allTrue()) {
intoByteArray(a, offset, bo);
} else {
$Type$Species vsp = vspecies();
checkMaskFromIndexSize(offset, vsp, m, $sizeInBytes$, a.length);
maybeSwap(bo).intoByteArray0(a, offset, m);
void intoMemorySegment(MemorySegment ms, long offset,
ByteOrder bo) {
if (ms.isReadOnly()) {
throw new UnsupportedOperationException("Attempt to write a read-only segment");
}
offset = checkFromIndexSize(offset, byteSize(), ms.byteSize());
maybeSwap(bo).intoMemorySegment0(ms, offset);
}
/**
* {@inheritDoc} <!--workaround-->
* @since 19
*/
@Override
@ForceInline
public final
void intoByteBuffer(ByteBuffer bb, int offset,
ByteOrder bo) {
if (ScopedMemoryAccess.isReadOnly(bb)) {
throw new ReadOnlyBufferException();
}
offset = checkFromIndexSize(offset, byteSize(), bb.limit());
maybeSwap(bo).intoByteBuffer0(bb, offset);
}
/**
* {@inheritDoc} <!--workaround-->
*/
@Override
@ForceInline
public final
void intoByteBuffer(ByteBuffer bb, int offset,
ByteOrder bo,
VectorMask<$Boxtype$> m) {
void intoMemorySegment(MemorySegment ms, long offset,
ByteOrder bo,
VectorMask<$Boxtype$> m) {
if (m.allTrue()) {
intoByteBuffer(bb, offset, bo);
intoMemorySegment(ms, offset, bo);
} else {
if (bb.isReadOnly()) {
throw new ReadOnlyBufferException();
if (ms.isReadOnly()) {
throw new UnsupportedOperationException("Attempt to write a read-only segment");
}
$Type$Species vsp = vspecies();
checkMaskFromIndexSize(offset, vsp, m, $sizeInBytes$, bb.limit());
maybeSwap(bo).intoByteBuffer0(bb, offset, m);
checkMaskFromIndexSize(offset, vsp, m, $sizeInBytes$, ms.byteSize());
maybeSwap(bo).intoMemorySegment0(ms, offset, m);
}
}
@ -4656,7 +4779,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
a, arrayAddress(a, offset),
a, offset, vsp,
(arr, off, s) -> s.ldOp(arr, off,
(arr, off, s) -> s.ldOp(arr, (int) off,
(arr_, off_, i) -> arr_[off_ + i]));
}
@ -4673,7 +4796,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
a, arrayAddress(a, offset), m,
a, offset, vsp,
(arr, off, s, vm) -> s.ldOp(arr, off, vm,
(arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
(arr_, off_, i) -> arr_[off_ + i]));
}
@ -4750,7 +4873,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
a, charArrayAddress(a, offset),
a, offset, vsp,
(arr, off, s) -> s.ldOp(arr, off,
(arr, off, s) -> s.ldOp(arr, (int) off,
(arr_, off_, i) -> (short) arr_[off_ + i]));
}
@ -4767,7 +4890,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
a, charArrayAddress(a, offset), m,
a, offset, vsp,
(arr, off, s, vm) -> s.ldOp(arr, off, vm,
(arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
(arr_, off_, i) -> (short) arr_[off_ + i]));
}
#end[short]
@ -4784,7 +4907,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
a, booleanArrayAddress(a, offset),
a, offset, vsp,
(arr, off, s) -> s.ldOp(arr, off,
(arr, off, s) -> s.ldOp(arr, (int) off,
(arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
}
@ -4801,79 +4924,38 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
a, booleanArrayAddress(a, offset), m,
a, offset, vsp,
(arr, off, s, vm) -> s.ldOp(arr, off, vm,
(arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
(arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
}
#end[byte]
@Override
abstract
$abstractvectortype$ fromByteArray0(byte[] a, int offset);
$abstractvectortype$ fromMemorySegment0(MemorySegment bb, long offset);
@ForceInline
final
$abstractvectortype$ fromByteArray0Template(byte[] a, int offset) {
$abstractvectortype$ fromMemorySegment0Template(MemorySegment ms, long offset) {
$Type$Species vsp = vspecies();
return VectorSupport.load(
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset),
a, offset, vsp,
(arr, off, s) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
return s.ldOp(wb, off,
(wb_, o, i) -> wb_.get{#if[byte]?(:$Type$(}o + i * $sizeInBytes$));
});
}
abstract
$abstractvectortype$ fromByteArray0(byte[] a, int offset, VectorMask<$Boxtype$> m);
@ForceInline
final
<M extends VectorMask<$Boxtype$>>
$abstractvectortype$ fromByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
$Type$Species vsp = vspecies();
m.check(vsp);
return VectorSupport.loadMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset), m,
a, offset, vsp,
(arr, off, s, vm) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
return s.ldOp(wb, off, vm,
(wb_, o, i) -> wb_.get{#if[byte]?(:$Type$(}o + i * $sizeInBytes$));
});
}
abstract
$abstractvectortype$ fromByteBuffer0(ByteBuffer bb, int offset);
@ForceInline
final
$abstractvectortype$ fromByteBuffer0Template(ByteBuffer bb, int offset) {
$Type$Species vsp = vspecies();
return ScopedMemoryAccess.loadFromByteBuffer(
return ScopedMemoryAccess.loadFromMemorySegment(
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
bb, offset, vsp,
(buf, off, s) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
return s.ldOp(wb, off,
(wb_, o, i) -> wb_.get{#if[byte]?(:$Type$(}o + i * $sizeInBytes$));
(AbstractMemorySegmentImpl) ms, offset, vsp,
(msp, off, s) -> {
return s.ldLongOp((MemorySegment) msp, off, $abstractvectortype$::memorySegmentGet);
});
}
abstract
$abstractvectortype$ fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<$Boxtype$> m);
$abstractvectortype$ fromMemorySegment0(MemorySegment ms, long offset, VectorMask<$Boxtype$> m);
@ForceInline
final
<M extends VectorMask<$Boxtype$>>
$abstractvectortype$ fromByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
$abstractvectortype$ fromMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
$Type$Species vsp = vspecies();
m.check(vsp);
return ScopedMemoryAccess.loadFromByteBufferMasked(
return ScopedMemoryAccess.loadFromMemorySegmentMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
bb, offset, m, vsp,
(buf, off, s, vm) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
return s.ldOp(wb, off, vm,
(wb_, o, i) -> wb_.get{#if[byte]?(:$Type$(}o + i * $sizeInBytes$));
(AbstractMemorySegmentImpl) ms, offset, m, vsp,
(msp, off, s, vm) -> {
return s.ldLongOp((MemorySegment) msp, off, vm, $abstractvectortype$::memorySegmentGet);
});
}
@ -4892,7 +4974,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
a, arrayAddress(a, offset),
this, a, offset,
(arr, off, v)
-> v.stOp(arr, off,
-> v.stOp(arr, (int) off,
(arr_, off_, i, e) -> arr_[off_+i] = e));
}
@ -4909,7 +4991,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
a, arrayAddress(a, offset),
this, m, a, offset,
(arr, off, v, vm)
-> v.stOp(arr, off, vm,
-> v.stOp(arr, (int) off, vm,
(arr_, off_, i, e) -> arr_[off_ + i] = e));
}
@ -4990,76 +5072,38 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
a, booleanArrayAddress(a, offset),
normalized, m, a, offset,
(arr, off, v, vm)
-> v.stOp(arr, off, vm,
-> v.stOp(arr, (int) off, vm,
(arr_, off_, i, e) -> arr_[off_ + i] = (e & 1) != 0));
}
#end[byte]
abstract
void intoByteArray0(byte[] a, int offset);
@ForceInline
final
void intoByteArray0Template(byte[] a, int offset) {
void intoMemorySegment0(MemorySegment ms, long offset) {
$Type$Species vsp = vspecies();
VectorSupport.store(
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset),
this, a, offset,
(arr, off, v) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
v.stOp(wb, off,
(tb_, o, i, e) -> tb_.put{#if[byte]?(:$Type$(}o + i * $sizeInBytes$, e));
});
}
abstract
void intoByteArray0(byte[] a, int offset, VectorMask<$Boxtype$> m);
@ForceInline
final
<M extends VectorMask<$Boxtype$>>
void intoByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
$Type$Species vsp = vspecies();
m.check(vsp);
VectorSupport.storeMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
a, byteArrayAddress(a, offset),
this, m, a, offset,
(arr, off, v, vm) -> {
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
v.stOp(wb, off, vm,
(tb_, o, i, e) -> tb_.put{#if[byte]?(:$Type$(}o + i * $sizeInBytes$, e));
});
}
@ForceInline
final
void intoByteBuffer0(ByteBuffer bb, int offset) {
$Type$Species vsp = vspecies();
ScopedMemoryAccess.storeIntoByteBuffer(
ScopedMemoryAccess.storeIntoMemorySegment(
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
this, bb, offset,
(buf, off, v) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
v.stOp(wb, off,
(wb_, o, i, e) -> wb_.put{#if[byte]?(:$Type$(}o + i * $sizeInBytes$, e));
this,
(AbstractMemorySegmentImpl) ms, offset,
(msp, off, v) -> {
v.stLongOp((MemorySegment) msp, off, $abstractvectortype$::memorySegmentSet);
});
}
abstract
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<$Boxtype$> m);
void intoMemorySegment0(MemorySegment bb, long offset, VectorMask<$Boxtype$> m);
@ForceInline
final
<M extends VectorMask<$Boxtype$>>
void intoByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
void intoMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
$Type$Species vsp = vspecies();
m.check(vsp);
ScopedMemoryAccess.storeIntoByteBufferMasked(
ScopedMemoryAccess.storeIntoMemorySegmentMasked(
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
this, m, bb, offset,
(buf, off, v, vm) -> {
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
v.stOp(wb, off, vm,
(wb_, o, i, e) -> wb_.put{#if[byte]?(:$Type$(}o + i * $sizeInBytes$, e));
this, m,
(AbstractMemorySegmentImpl) ms, offset,
(msp, off, v, vm) -> {
v.stLongOp((MemorySegment) msp, off, vm, $abstractvectortype$::memorySegmentSet);
});
}
@ -5078,7 +5122,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
a, charArrayAddress(a, offset),
this, m, a, offset,
(arr, off, v, vm)
-> v.stOp(arr, off, vm,
-> v.stOp(arr, (int) off, vm,
(arr_, off_, i, e) -> arr_[off_ + i] = (char) e));
}
#end[short]
@ -5095,6 +5139,16 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
.checkIndexByLane(offset, limit, vsp.iota(), scale);
}
private static
void checkMaskFromIndexSize(long offset,
$Type$Species vsp,
VectorMask<$Boxtype$> m,
int scale,
long limit) {
((AbstractMask<$Boxtype$>)m)
.checkIndexByLane(offset, limit, vsp.iota(), scale);
}
@ForceInline
private void conditionalStoreNYI(int offset,
$Type$Species vsp,
@ -5463,6 +5517,21 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
return dummyVector().ldOp(memory, offset, m, f);
}
/*package-private*/
@ForceInline
$abstractvectortype$ ldLongOp(MemorySegment memory, long offset,
FLdLongOp f) {
return dummyVector().ldLongOp(memory, offset, f);
}
/*package-private*/
@ForceInline
$abstractvectortype$ ldLongOp(MemorySegment memory, long offset,
VectorMask<$Boxtype$> m,
FLdLongOp f) {
return dummyVector().ldLongOp(memory, offset, m, f);
}
/*package-private*/
@ForceInline
<M> void stOp(M memory, int offset, FStOp<M> f) {
@ -5477,6 +5546,20 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
dummyVector().stOp(memory, offset, m, f);
}
/*package-private*/
@ForceInline
void stLongOp(MemorySegment memory, long offset, FStLongOp f) {
dummyVector().stLongOp(memory, offset, f);
}
/*package-private*/
@ForceInline
void stLongOp(MemorySegment memory, long offset,
AbstractMask<$Boxtype$> m,
FStLongOp f) {
dummyVector().stLongOp(memory, offset, m, f);
}
// N.B. Make sure these constant vectors and
// masks load up correctly into registers.
//
@ -5590,3 +5673,4 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
public static final VectorSpecies<$Boxtype$> SPECIES_PREFERRED
= ($Type$Species) VectorSpecies.ofPreferred($type$.class);
}

View file

@ -24,7 +24,7 @@
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
@ -480,6 +480,22 @@ final class $vectortype$ extends $abstractvectortype$ {
($vectortype$) v); // specialize
}
@Override
@ForceInline
public $vectortype$ compress(VectorMask<$Boxtype$> m) {
return ($vectortype$)
super.compressTemplate($masktype$.class,
($masktype$) m); // specialize
}
@Override
@ForceInline
public $vectortype$ expand(VectorMask<$Boxtype$> m) {
return ($vectortype$)
super.expandTemplate($masktype$.class,
($masktype$) m); // specialize
}
@Override
@ForceInline
public $vectortype$ selectFrom(Vector<$Boxtype$> v) {
@ -920,6 +936,15 @@ final class $vectortype$ extends $abstractvectortype$ {
return xor(maskAll(true));
}
@Override
@ForceInline
public $masktype$ compress() {
return ($masktype$)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
$vectortype$.class, $masktype$.class, ETYPE, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
}
// Binary operations
@Override
@ -1159,29 +1184,15 @@ final class $vectortype$ extends $abstractvectortype$ {
@ForceInline
@Override
final
$abstractvectortype$ fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
$abstractvectortype$ fromMemorySegment0(MemorySegment ms, long offset) {
return super.fromMemorySegment0Template(ms, offset); // specialize
}
@ForceInline
@Override
final
$abstractvectortype$ fromByteArray0(byte[] a, int offset, VectorMask<$Boxtype$> m) {
return super.fromByteArray0Template($masktype$.class, a, offset, ($masktype$) m); // specialize
}
@ForceInline
@Override
final
$abstractvectortype$ fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
$abstractvectortype$ fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<$Boxtype$> m) {
return super.fromByteBuffer0Template($masktype$.class, bb, offset, ($masktype$) m); // specialize
$abstractvectortype$ fromMemorySegment0(MemorySegment ms, long offset, VectorMask<$Boxtype$> m) {
return super.fromMemorySegment0Template($masktype$.class, ms, offset, ($masktype$) m); // specialize
}
@ForceInline
@ -1219,22 +1230,8 @@ final class $vectortype$ extends $abstractvectortype$ {
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset, VectorMask<$Boxtype$> m) {
super.intoByteArray0Template($masktype$.class, a, offset, ($masktype$) m); // specialize
}
@ForceInline
@Override
final
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<$Boxtype$> m) {
super.intoByteBuffer0Template($masktype$.class, bb, offset, ($masktype$) m);
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<$Boxtype$> m) {
super.intoMemorySegment0Template($masktype$.class, ms, offset, ($masktype$) m);
}
#if[short]
@ -1251,3 +1248,4 @@ final class $vectortype$ extends $abstractvectortype$ {
// ================================================
}

View file

@ -224,6 +224,8 @@ public class AMD64 extends Architecture {
RDTSCP,
RDPID,
FSRM,
GFNI,
AVX512_BITALG,
}
private final EnumSet<CPUFeature> features;

View file

@ -1769,6 +1769,10 @@ generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);",
["lasta", "__ sve_lasta(v0, __ B, p0, z15);", "lasta\tb0, p0, z15.b"],
["lastb", "__ sve_lastb(v1, __ B, p1, z16);", "lastb\tb1, p1, z16.b"],
["index", "__ sve_index(z6, __ S, 1, 1);", "index\tz6.s, #1, #1"],
["index", "__ sve_index(z6, __ B, r5, 2);", "index\tz6.b, w5, #2"],
["index", "__ sve_index(z6, __ H, r5, 3);", "index\tz6.h, w5, #3"],
["index", "__ sve_index(z6, __ S, r5, 4);", "index\tz6.s, w5, #4"],
["index", "__ sve_index(z7, __ D, r5, 5);", "index\tz7.d, x5, #5"],
["cpy", "__ sve_cpy(z7, __ H, p3, r5);", "cpy\tz7.h, p3/m, w5"],
["tbl", "__ sve_tbl(z16, __ S, z17, z18);", "tbl\tz16.s, {z17.s}, z18.s"],
["ld1w", "__ sve_ld1w_gather(z15, p0, r5, z16);", "ld1w\t{z15.s}, p0/z, [x5, z16.s, uxtw #2]"],
@ -1811,7 +1815,12 @@ generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);",
["uzp2", "__ sve_uzp2(p0, __ D, p0, p1);", "uzp2\tp0.d, p0.d, p1.d"],
["punpklo", "__ sve_punpklo(p1, p0);", "punpklo\tp1.h, p0.b"],
["punpkhi", "__ sve_punpkhi(p1, p0);", "punpkhi\tp1.h, p0.b"],
["compact", "__ sve_compact(z16, __ S, z16, p1);", "compact\tz16.s, p1, z16.s"],
["compact", "__ sve_compact(z16, __ D, z16, p1);", "compact\tz16.d, p1, z16.d"],
["ext", "__ sve_ext(z17, z16, 63);", "ext\tz17.b, z17.b, z16.b, #63"],
# SVE2 instructions
["histcnt", "__ sve_histcnt(z16, __ S, p0, z16, z16);", "histcnt\tz16.s, p0/z, z16.s, z16.s"],
["histcnt", "__ sve_histcnt(z17, __ D, p0, z17, z17);", "histcnt\tz17.d, p0/z, z17.d, z17.d"],
])
print "\n// FloatImmediateOp"
@ -1855,6 +1864,7 @@ generate(SVEVectorOp, [["add", "ZZZ"],
["and", "ZPZ", "m", "dn"],
["asr", "ZPZ", "m", "dn"],
["bic", "ZPZ", "m", "dn"],
["clz", "ZPZ", "m"],
["cnt", "ZPZ", "m"],
["eor", "ZPZ", "m", "dn"],
["lsl", "ZPZ", "m", "dn"],
@ -1863,6 +1873,8 @@ generate(SVEVectorOp, [["add", "ZZZ"],
["neg", "ZPZ", "m"],
["not", "ZPZ", "m"],
["orr", "ZPZ", "m", "dn"],
["rbit", "ZPZ", "m"],
["revb", "ZPZ", "m"],
["smax", "ZPZ", "m", "dn"],
["smin", "ZPZ", "m", "dn"],
["sub", "ZPZ", "m", "dn"],

View file

@ -913,6 +913,10 @@
__ sve_lasta(v0, __ B, p0, z15); // lasta b0, p0, z15.b
__ sve_lastb(v1, __ B, p1, z16); // lastb b1, p1, z16.b
__ sve_index(z6, __ S, 1, 1); // index z6.s, #1, #1
__ sve_index(z6, __ B, r5, 2); // index z6.b, w5, #2
__ sve_index(z6, __ H, r5, 3); // index z6.h, w5, #3
__ sve_index(z6, __ S, r5, 4); // index z6.s, w5, #4
__ sve_index(z7, __ D, r5, 5); // index z7.d, x5, #5
__ sve_cpy(z7, __ H, p3, r5); // cpy z7.h, p3/m, w5
__ sve_tbl(z16, __ S, z17, z18); // tbl z16.s, {z17.s}, z18.s
__ sve_ld1w_gather(z15, p0, r5, z16); // ld1w {z15.s}, p0/z, [x5, z16.s, uxtw #2]
@ -955,7 +959,11 @@
__ sve_uzp2(p0, __ D, p0, p1); // uzp2 p0.d, p0.d, p1.d
__ sve_punpklo(p1, p0); // punpklo p1.h, p0.b
__ sve_punpkhi(p1, p0); // punpkhi p1.h, p0.b
__ sve_compact(z16, __ S, z16, p1); // compact z16.s, p1, z16.s
__ sve_compact(z16, __ D, z16, p1); // compact z16.d, p1, z16.d
__ sve_ext(z17, z16, 63); // ext z17.b, z17.b, z16.b, #63
__ sve_histcnt(z16, __ S, p0, z16, z16); // histcnt z16.s, p0/z, z16.s, z16.s
__ sve_histcnt(z17, __ D, p0, z17, z17); // histcnt z17.d, p0/z, z17.d, z17.d
// FloatImmediateOp
__ fmovd(v0, 2.0); // fmov d0, #2.0
@ -1144,57 +1152,60 @@
__ sve_and(z22, __ D, p5, z20); // and z22.d, p5/m, z22.d, z20.d
__ sve_asr(z28, __ S, p2, z13); // asr z28.s, p2/m, z28.s, z13.s
__ sve_bic(z7, __ H, p5, z28); // bic z7.h, p5/m, z7.h, z28.h
__ sve_cnt(z11, __ S, p3, z11); // cnt z11.s, p3/m, z11.s
__ sve_eor(z1, __ S, p6, z8); // eor z1.s, p6/m, z1.s, z8.s
__ sve_lsl(z13, __ S, p4, z17); // lsl z13.s, p4/m, z13.s, z17.s
__ sve_lsr(z4, __ H, p0, z3); // lsr z4.h, p0/m, z4.h, z3.h
__ sve_mul(z7, __ S, p3, z14); // mul z7.s, p3/m, z7.s, z14.s
__ sve_neg(z4, __ B, p3, z29); // neg z4.b, p3/m, z29.b
__ sve_not(z0, __ D, p2, z21); // not z0.d, p2/m, z21.d
__ sve_orr(z3, __ S, p0, z9); // orr z3.s, p0/m, z3.s, z9.s
__ sve_smax(z28, __ B, p2, z24); // smax z28.b, p2/m, z28.b, z24.b
__ sve_smin(z19, __ D, p1, z23); // smin z19.d, p1/m, z19.d, z23.d
__ sve_sub(z13, __ D, p5, z10); // sub z13.d, p5/m, z13.d, z10.d
__ sve_fabs(z12, __ D, p4, z30); // fabs z12.d, p4/m, z30.d
__ sve_fadd(z14, __ D, p0, z29); // fadd z14.d, p0/m, z14.d, z29.d
__ sve_fdiv(z21, __ D, p5, z7); // fdiv z21.d, p5/m, z21.d, z7.d
__ sve_fmax(z2, __ D, p0, z26); // fmax z2.d, p0/m, z2.d, z26.d
__ sve_fmin(z9, __ D, p4, z17); // fmin z9.d, p4/m, z9.d, z17.d
__ sve_fmul(z0, __ D, p1, z2); // fmul z0.d, p1/m, z0.d, z2.d
__ sve_fneg(z14, __ D, p1, z11); // fneg z14.d, p1/m, z11.d
__ sve_frintm(z14, __ S, p4, z29); // frintm z14.s, p4/m, z29.s
__ sve_frintn(z3, __ S, p0, z22); // frintn z3.s, p0/m, z22.s
__ sve_frintp(z3, __ S, p6, z27); // frintp z3.s, p6/m, z27.s
__ sve_fsqrt(z19, __ D, p5, z7); // fsqrt z19.d, p5/m, z7.d
__ sve_fsub(z21, __ S, p3, z5); // fsub z21.s, p3/m, z21.s, z5.s
__ sve_fmad(z25, __ D, p1, z21, z17); // fmad z25.d, p1/m, z21.d, z17.d
__ sve_fmla(z0, __ S, p0, z9, z19); // fmla z0.s, p0/m, z9.s, z19.s
__ sve_fmls(z7, __ D, p3, z14, z17); // fmls z7.d, p3/m, z14.d, z17.d
__ sve_fmsb(z11, __ D, p3, z24, z17); // fmsb z11.d, p3/m, z24.d, z17.d
__ sve_fnmad(z17, __ D, p2, z15, z14); // fnmad z17.d, p2/m, z15.d, z14.d
__ sve_fnmsb(z22, __ S, p7, z22, z7); // fnmsb z22.s, p7/m, z22.s, z7.s
__ sve_fnmla(z5, __ S, p7, z27, z10); // fnmla z5.s, p7/m, z27.s, z10.s
__ sve_fnmls(z14, __ S, p6, z21, z20); // fnmls z14.s, p6/m, z21.s, z20.s
__ sve_mla(z3, __ D, p5, z25, z5); // mla z3.d, p5/m, z25.d, z5.d
__ sve_mls(z29, __ H, p4, z17, z1); // mls z29.h, p4/m, z17.h, z1.h
__ sve_and(z14, z29, z13); // and z14.d, z29.d, z13.d
__ sve_eor(z17, z2, z30); // eor z17.d, z2.d, z30.d
__ sve_orr(z22, z21, z29); // orr z22.d, z21.d, z29.d
__ sve_bic(z8, z2, z0); // bic z8.d, z2.d, z0.d
__ sve_uzp1(z23, __ S, z22, z0); // uzp1 z23.s, z22.s, z0.s
__ sve_uzp2(z25, __ H, z26, z23); // uzp2 z25.h, z26.h, z23.h
__ sve_bext(z21, __ B, z21, z1); // bext z21.b, z21.b, z1.b
__ sve_clz(z11, __ S, p3, z11); // clz z11.s, p3/m, z11.s
__ sve_cnt(z1, __ S, p6, z8); // cnt z1.s, p6/m, z8.s
__ sve_eor(z13, __ S, p4, z17); // eor z13.s, p4/m, z13.s, z17.s
__ sve_lsl(z4, __ H, p0, z3); // lsl z4.h, p0/m, z4.h, z3.h
__ sve_lsr(z7, __ S, p3, z14); // lsr z7.s, p3/m, z7.s, z14.s
__ sve_mul(z4, __ B, p3, z29); // mul z4.b, p3/m, z4.b, z29.b
__ sve_neg(z0, __ D, p2, z21); // neg z0.d, p2/m, z21.d
__ sve_not(z3, __ S, p0, z9); // not z3.s, p0/m, z9.s
__ sve_orr(z28, __ B, p2, z24); // orr z28.b, p2/m, z28.b, z24.b
__ sve_rbit(z19, __ D, p1, z23); // rbit z19.d, p1/m, z23.d
__ sve_revb(z13, __ D, p5, z10); // revb z13.d, p5/m, z10.d
__ sve_smax(z12, __ S, p4, z30); // smax z12.s, p4/m, z12.s, z30.s
__ sve_smin(z14, __ S, p0, z29); // smin z14.s, p0/m, z14.s, z29.s
__ sve_sub(z21, __ S, p5, z7); // sub z21.s, p5/m, z21.s, z7.s
__ sve_fabs(z2, __ D, p0, z26); // fabs z2.d, p0/m, z26.d
__ sve_fadd(z9, __ D, p4, z17); // fadd z9.d, p4/m, z9.d, z17.d
__ sve_fdiv(z0, __ D, p1, z2); // fdiv z0.d, p1/m, z0.d, z2.d
__ sve_fmax(z14, __ D, p1, z11); // fmax z14.d, p1/m, z14.d, z11.d
__ sve_fmin(z14, __ S, p4, z29); // fmin z14.s, p4/m, z14.s, z29.s
__ sve_fmul(z3, __ S, p0, z22); // fmul z3.s, p0/m, z3.s, z22.s
__ sve_fneg(z3, __ S, p6, z27); // fneg z3.s, p6/m, z27.s
__ sve_frintm(z19, __ D, p5, z7); // frintm z19.d, p5/m, z7.d
__ sve_frintn(z21, __ S, p3, z5); // frintn z21.s, p3/m, z5.s
__ sve_frintp(z25, __ D, p1, z21); // frintp z25.d, p1/m, z21.d
__ sve_fsqrt(z17, __ S, p0, z3); // fsqrt z17.s, p0/m, z3.s
__ sve_fsub(z19, __ S, p3, z7); // fsub z19.s, p3/m, z19.s, z7.s
__ sve_fmad(z14, __ S, p4, z17, z11); // fmad z14.s, p4/m, z17.s, z11.s
__ sve_fmla(z24, __ S, p4, z30, z17); // fmla z24.s, p4/m, z30.s, z17.s
__ sve_fmls(z15, __ D, p3, z26, z22); // fmls z15.d, p3/m, z26.d, z22.d
__ sve_fmsb(z22, __ D, p2, z8, z5); // fmsb z22.d, p2/m, z8.d, z5.d
__ sve_fnmad(z27, __ D, p2, z0, z14); // fnmad z27.d, p2/m, z0.d, z14.d
__ sve_fnmsb(z21, __ D, p5, z0, z3); // fnmsb z21.d, p5/m, z0.d, z3.d
__ sve_fnmla(z25, __ D, p1, z25, z29); // fnmla z25.d, p1/m, z25.d, z29.d
__ sve_fnmls(z17, __ D, p0, z12, z14); // fnmls z17.d, p0/m, z12.d, z14.d
__ sve_mla(z13, __ D, p0, z17, z2); // mla z13.d, p0/m, z17.d, z2.d
__ sve_mls(z20, __ H, p5, z21, z29); // mls z20.h, p5/m, z21.h, z29.h
__ sve_and(z8, z2, z0); // and z8.d, z2.d, z0.d
__ sve_eor(z23, z22, z0); // eor z23.d, z22.d, z0.d
__ sve_orr(z25, z26, z23); // orr z25.d, z26.d, z23.d
__ sve_bic(z21, z21, z1); // bic z21.d, z21.d, z1.d
__ sve_uzp1(z10, __ S, z19, z11); // uzp1 z10.s, z19.s, z11.s
__ sve_uzp2(z23, __ D, z23, z8); // uzp2 z23.d, z23.d, z8.d
__ sve_bext(z17, __ S, z19, z19); // bext z17.s, z19.s, z19.s
// SVEReductionOp
__ sve_andv(v10, __ S, p5, z11); // andv s10, p5, z11.s
__ sve_orv(v23, __ D, p6, z8); // orv d23, p6, z8.d
__ sve_eorv(v17, __ S, p5, z19); // eorv s17, p5, z19.s
__ sve_smaxv(v4, __ D, p5, z13); // smaxv d4, p5, z13.d
__ sve_sminv(v22, __ D, p7, z30); // sminv d22, p7, z30.d
__ sve_fminv(v17, __ S, p4, z14); // fminv s17, p4, z14.s
__ sve_fmaxv(v12, __ S, p7, z20); // fmaxv s12, p7, z20.s
__ sve_fadda(v1, __ S, p3, z13); // fadda s1, p3, s1, z13.s
__ sve_uaddv(v7, __ S, p2, z11); // uaddv d7, p2, z11.s
__ sve_andv(v4, __ D, p5, z13); // andv d4, p5, z13.d
__ sve_orv(v22, __ D, p7, z30); // orv d22, p7, z30.d
__ sve_eorv(v17, __ H, p4, z14); // eorv h17, p4, z14.h
__ sve_smaxv(v12, __ B, p7, z20); // smaxv b12, p7, z20.b
__ sve_sminv(v1, __ B, p3, z13); // sminv b1, p3, z13.b
__ sve_fminv(v7, __ D, p2, z11); // fminv d7, p2, z11.d
__ sve_fmaxv(v4, __ S, p6, z15); // fmaxv s4, p6, z15.s
__ sve_fadda(v3, __ D, p7, z0); // fadda d3, p7, d3, z0.d
__ sve_uaddv(v5, __ D, p5, z30); // uaddv d5, p5, z30.d
__ bind(forth);
@ -1213,30 +1224,30 @@
0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
0x120cb166, 0x321764bc, 0x52174681, 0x720c0227,
0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
0x14000000, 0x17ffffd7, 0x140003e5, 0x94000000,
0x97ffffd4, 0x940003e2, 0x3400000a, 0x34fffa2a,
0x34007bea, 0x35000008, 0x35fff9c8, 0x35007b88,
0xb400000b, 0xb4fff96b, 0xb4007b2b, 0xb500001d,
0xb5fff91d, 0xb5007add, 0x10000013, 0x10fff8b3,
0x10007a73, 0x90000013, 0x36300016, 0x3637f836,
0x363079f6, 0x3758000c, 0x375ff7cc, 0x3758798c,
0x14000000, 0x17ffffd7, 0x140003f0, 0x94000000,
0x97ffffd4, 0x940003ed, 0x3400000a, 0x34fffa2a,
0x34007d4a, 0x35000008, 0x35fff9c8, 0x35007ce8,
0xb400000b, 0xb4fff96b, 0xb4007c8b, 0xb500001d,
0xb5fff91d, 0xb5007c3d, 0x10000013, 0x10fff8b3,
0x10007bd3, 0x90000013, 0x36300016, 0x3637f836,
0x36307b56, 0x3758000c, 0x375ff7cc, 0x37587aec,
0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
0x54007760, 0x54000001, 0x54fff541, 0x54007701,
0x54000002, 0x54fff4e2, 0x540076a2, 0x54000002,
0x54fff482, 0x54007642, 0x54000003, 0x54fff423,
0x540075e3, 0x54000003, 0x54fff3c3, 0x54007583,
0x54000004, 0x54fff364, 0x54007524, 0x54000005,
0x54fff305, 0x540074c5, 0x54000006, 0x54fff2a6,
0x54007466, 0x54000007, 0x54fff247, 0x54007407,
0x54000008, 0x54fff1e8, 0x540073a8, 0x54000009,
0x54fff189, 0x54007349, 0x5400000a, 0x54fff12a,
0x540072ea, 0x5400000b, 0x54fff0cb, 0x5400728b,
0x5400000c, 0x54fff06c, 0x5400722c, 0x5400000d,
0x54fff00d, 0x540071cd, 0x5400000e, 0x54ffefae,
0x5400716e, 0x5400000f, 0x54ffef4f, 0x5400710f,
0x540078c0, 0x54000001, 0x54fff541, 0x54007861,
0x54000002, 0x54fff4e2, 0x54007802, 0x54000002,
0x54fff482, 0x540077a2, 0x54000003, 0x54fff423,
0x54007743, 0x54000003, 0x54fff3c3, 0x540076e3,
0x54000004, 0x54fff364, 0x54007684, 0x54000005,
0x54fff305, 0x54007625, 0x54000006, 0x54fff2a6,
0x540075c6, 0x54000007, 0x54fff247, 0x54007567,
0x54000008, 0x54fff1e8, 0x54007508, 0x54000009,
0x54fff189, 0x540074a9, 0x5400000a, 0x54fff12a,
0x5400744a, 0x5400000b, 0x54fff0cb, 0x540073eb,
0x5400000c, 0x54fff06c, 0x5400738c, 0x5400000d,
0x54fff00d, 0x5400732d, 0x5400000e, 0x54ffefae,
0x540072ce, 0x5400000f, 0x54ffef4f, 0x5400726f,
0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
0xd44cad80, 0xd503201f, 0xd503203f, 0xd503205f,
0xd503209f, 0xd50320bf, 0xd503219f, 0xd50323bf,
@ -1401,7 +1412,8 @@
0x6554ac26, 0x6556ac26, 0x6552ac26, 0x65cbac85,
0x65caac01, 0x65dea833, 0x659ca509, 0x65d8a801,
0x65dcac01, 0x655cb241, 0x0520a1e0, 0x0521a601,
0x052281e0, 0x05238601, 0x04a14026, 0x0568aca7,
0x052281e0, 0x05238601, 0x04a14026, 0x042244a6,
0x046344a6, 0x04a444a6, 0x04e544a7, 0x0568aca7,
0x05b23230, 0x853040af, 0xc5b040af, 0xe57080af,
0xe5b080af, 0x25034440, 0x254054c4, 0x25034640,
0x25415a05, 0x25834440, 0x25c54489, 0x250b5d3a,
@ -1412,7 +1424,8 @@
0x25d8e104, 0x25d8e184, 0x2518e407, 0x05214800,
0x05614800, 0x05a14800, 0x05e14800, 0x05214c00,
0x05614c00, 0x05a14c00, 0x05e14c00, 0x05304001,
0x05314001, 0x05271e11, 0x1e601000, 0x1e603000,
0x05314001, 0x05a18610, 0x05e18610, 0x05271e11,
0x45b0c210, 0x45f1c231, 0x1e601000, 0x1e603000,
0x1e621000, 0x1e623000, 0x1e641000, 0x1e643000,
0x1e661000, 0x1e663000, 0x1e681000, 0x1e683000,
0x1e6a1000, 0x1e6a3000, 0x1e6c1000, 0x1e6c3000,
@ -1450,18 +1463,19 @@
0x25a1de96, 0x05808874, 0x05423bb1, 0x050030e4,
0x04680102, 0x04be0638, 0x658103c4, 0x65800993,
0x65910707, 0x04d6a53b, 0x04c00e17, 0x04da1696,
0x049089bc, 0x045b1787, 0x049aad6b, 0x04991901,
0x0493922d, 0x04518064, 0x04900dc7, 0x0417afa4,
0x04deaaa0, 0x04980123, 0x04080b1c, 0x04ca06f3,
0x04c1154d, 0x04dcb3cc, 0x65c083ae, 0x65cd94f5,
0x65c68342, 0x65c79229, 0x65c28440, 0x04dda56e,
0x6582b3ae, 0x6580a2c3, 0x6581bb63, 0x65cdb4f3,
0x65818cb5, 0x65f186b9, 0x65b30120, 0x65f12dc7,
0x65f1af0b, 0x65eec9f1, 0x65a7fed6, 0x65aa5f65,
0x65b47aae, 0x04c55723, 0x0441723d, 0x042d33ae,
0x04be3051, 0x047d32b6, 0x04e03048, 0x05a06ad7,
0x05776f59, 0x4501b2b5, 0x049a356a, 0x04d83917,
0x04993671, 0x04c835a4, 0x04ca3fd6, 0x658731d1,
0x65863e8c, 0x65982da1, 0x04812967,
0x049089bc, 0x045b1787, 0x0499ad6b, 0x049ab901,
0x0499122d, 0x04538064, 0x04918dc7, 0x04100fa4,
0x04d7aaa0, 0x049ea123, 0x04180b1c, 0x05e786f3,
0x05e4954d, 0x048813cc, 0x048a03ae, 0x048114f5,
0x04dca342, 0x65c09229, 0x65cd8440, 0x65c6856e,
0x658793ae, 0x658282c3, 0x049dbb63, 0x65c2b4f3,
0x6580acb5, 0x65c1a6b9, 0x658da071, 0x65818cf3,
0x65ab922e, 0x65b113d8, 0x65f62f4f, 0x65e5a916,
0x65eec81b, 0x65e3f415, 0x65fd4739, 0x65ee6191,
0x04c2422d, 0x045d76b4, 0x04203048, 0x04a032d7,
0x04773359, 0x04e132b5, 0x05ab6a6a, 0x05e86ef7,
0x4593b271, 0x04da35a4, 0x04d83fd6, 0x045931d1,
0x04083e8c, 0x040a2da1, 0x65c72967, 0x658639e4,
0x65d83c03, 0x04c137c5,
};
// END Generated code -- do not edit

View file

@ -1,5 +1,5 @@
/*
* Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
* Copyright (C) 2021, 2022, THL A29 Limited, a Tencent company. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -23,11 +23,13 @@
package compiler.vectorapi;
import java.lang.foreign.MemorySegment;
import jdk.incubator.vector.*;
import java.nio.ByteOrder;
/*
* @test
* @enablePreview
* @bug 8262998
* @summary Vector API intrinsincs should not modify IR when bailing out
* @modules jdk.incubator.vector
@ -40,13 +42,15 @@ public class TestIntrinsicBailOut {
static final VectorSpecies<Double> SPECIES256 = DoubleVector.SPECIES_256;
static byte[] a = new byte[512];
static byte[] r = new byte[512];
static MemorySegment msa = MemorySegment.ofArray(a);
static MemorySegment msr = MemorySegment.ofArray(r);
static void test() {
DoubleVector av = DoubleVector.fromByteArray(SPECIES256, a, 0, ByteOrder.BIG_ENDIAN);
av.intoByteArray(r, 0, ByteOrder.BIG_ENDIAN);
DoubleVector av = DoubleVector.fromMemorySegment(SPECIES256, msa, 0, ByteOrder.BIG_ENDIAN);
av.intoMemorySegment(msr, 0, ByteOrder.BIG_ENDIAN);
DoubleVector bv = DoubleVector.fromByteArray(SPECIES256, a, 32, ByteOrder.LITTLE_ENDIAN);
bv.intoByteArray(r, 32, ByteOrder.LITTLE_ENDIAN);
DoubleVector bv = DoubleVector.fromMemorySegment(SPECIES256, msa, 32, ByteOrder.LITTLE_ENDIAN);
bv.intoMemorySegment(msr, 32, ByteOrder.LITTLE_ENDIAN);
}
public static void main(String[] args) {

View file

@ -38,42 +38,42 @@ public class TestVectorErgonomics {
public static void main(String[] args) throws Throwable {
ProcessTools.executeTestJvm("--add-modules=jdk.incubator.vector", "-XX:+UnlockExperimentalVMOptions",
"-XX:+EnableVectorReboxing", "-Xlog:compilation", "-version")
"-XX:+EnableVectorReboxing", "-Xlog:compilation", "-version", "--enable-preview")
.shouldHaveExitValue(0)
.shouldContain("EnableVectorReboxing=true");
ProcessTools.executeTestJvm("--add-modules=jdk.incubator.vector", "-XX:+UnlockExperimentalVMOptions",
"-XX:+EnableVectorAggressiveReboxing", "-Xlog:compilation", "-version")
"-XX:+EnableVectorAggressiveReboxing", "-Xlog:compilation", "-version", "--enable-preview")
.shouldHaveExitValue(0)
.shouldContain("EnableVectorAggressiveReboxing=true");
ProcessTools.executeTestJvm("--add-modules=jdk.incubator.vector", "-XX:+UnlockExperimentalVMOptions",
"-XX:-EnableVectorReboxing", "-Xlog:compilation", "-version")
"-XX:-EnableVectorReboxing", "-Xlog:compilation", "-version", "--enable-preview")
.shouldHaveExitValue(0)
.shouldContain("EnableVectorReboxing=false")
.shouldContain("EnableVectorAggressiveReboxing=false");
ProcessTools.executeTestJvm("--add-modules=jdk.incubator.vector", "-XX:+UnlockExperimentalVMOptions",
"-XX:-EnableVectorAggressiveReboxing", "-Xlog:compilation", "-version")
"-XX:-EnableVectorAggressiveReboxing", "-Xlog:compilation", "-version", "--enable-preview")
.shouldHaveExitValue(0)
.shouldContain("EnableVectorAggressiveReboxing=false");
ProcessTools.executeTestJvm("--add-modules=jdk.incubator.vector", "-XX:+UnlockExperimentalVMOptions",
"-XX:-EnableVectorSupport", "-Xlog:compilation", "-version")
"-XX:-EnableVectorSupport", "-Xlog:compilation", "-version", "--enable-preview")
.shouldHaveExitValue(0)
.shouldContain("EnableVectorSupport=false")
.shouldContain("EnableVectorReboxing=false")
.shouldContain("EnableVectorAggressiveReboxing=false");
ProcessTools.executeTestJvm("--add-modules=jdk.incubator.vector", "-XX:+UnlockExperimentalVMOptions",
"-XX:-EnableVectorSupport", "-XX:+EnableVectorReboxing", "-Xlog:compilation", "-version")
"-XX:-EnableVectorSupport", "-XX:+EnableVectorReboxing", "-Xlog:compilation", "-version", "--enable-preview")
.shouldHaveExitValue(0)
.shouldContain("EnableVectorSupport=false")
.shouldContain("EnableVectorReboxing=false")
.shouldContain("EnableVectorAggressiveReboxing=false");
ProcessTools.executeTestJvm("--add-modules=jdk.incubator.vector", "-XX:+UnlockExperimentalVMOptions",
"-XX:-EnableVectorSupport", "-XX:+EnableVectorAggressiveReboxing", "-Xlog:compilation", "-version")
"-XX:-EnableVectorSupport", "-XX:+EnableVectorAggressiveReboxing", "-Xlog:compilation", "-version", "--enable-preview")
.shouldHaveExitValue(0)
.shouldContain("EnableVectorSupport=false")
.shouldContain("EnableVectorReboxing=false")

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2021, Rado Smogura. All rights reserved.
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@ -26,6 +26,7 @@
/*
* @test
* @enablePreview
* @summary Test if memory ordering is preserved
*
* @run main/othervm -XX:-TieredCompilation -XX:+UnlockDiagnosticVMOptions -XX:+AbortVMOnCompilationFailure
@ -36,8 +37,8 @@
package compiler.vectorapi;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.lang.foreign.MemorySegment;
import jdk.incubator.vector.ByteVector;
import jdk.incubator.vector.VectorSpecies;
@ -53,13 +54,13 @@ public class VectorMemoryAlias {
public static int test() {
byte arr[] = new byte[256];
final var bb = ByteBuffer.wrap(arr);
final var ms = MemorySegment.ofArray(arr);
final var ones = ByteVector.broadcast(SPECIES, 1);
var res = ByteVector.zero(SPECIES);
int result = 0;
result += arr[2];
res.add(ones).intoByteBuffer(bb, 0, ByteOrder.nativeOrder());
res.add(ones).intoMemorySegment(ms, 0L, ByteOrder.nativeOrder());
result += arr[2];
return result;

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -20,30 +20,28 @@
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
import jdk.incubator.vector.*;
import jdk.internal.vm.annotation.ForceInline;
import org.testng.Assert;
import org.testng.annotations.Test;
import org.testng.annotations.DataProvider;
import java.lang.invoke.MethodHandles;
import java.lang.invoke.VarHandle;
import java.nio.ByteOrder;
import java.util.Arrays;
import java.util.List;
import java.util.function.IntFunction;
import java.util.function.IntUnaryOperator;
import jdk.incubator.vector.VectorShape;
import jdk.incubator.vector.VectorSpecies;
import java.lang.foreign.MemorySegment;
import jdk.incubator.vector.*;
import jdk.internal.vm.annotation.ForceInline;
/*
* @test id=Z
* @bug 8260473
* @enablePreview
* @requires vm.gc.Z
* @modules jdk.incubator.vector
* @modules java.base/jdk.internal.vm.annotation
* @run testng/othervm -XX:CompileCommand=compileonly,jdk/incubator/vector/ByteVector.fromByteBuffer
* @run testng/othervm -XX:CompileCommand=compileonly,jdk/incubator/vector/ByteVector.fromMemorySegment
* -XX:-TieredCompilation -XX:CICompilerCount=1 -XX:+UseZGC -Xbatch -Xmx256m VectorRebracket128Test
*/
@ -124,8 +122,10 @@ public class VectorRebracket128Test {
@ForceInline
static <E,F>
void testVectorRebracket(VectorSpecies<E> a, VectorSpecies<F> b, byte[] input, byte[] output) {
Vector<E> av = a.fromByteArray(input, 0, ByteOrder.nativeOrder());
void testVectorRebracket(VectorSpecies<E> a, VectorSpecies<F> b,
byte[] input, byte[] output,
MemorySegment msInput, MemorySegment msOutput) {
Vector<E> av = a.fromMemorySegment(msInput, 0, ByteOrder.nativeOrder());
int block;
assert(input.length == output.length);
@ -139,7 +139,7 @@ public class VectorRebracket128Test {
int part = 0;
Vector<F> bv = av.reinterpretShape(b, part);
bv.intoByteArray(output, 0, ByteOrder.nativeOrder());
bv.intoMemorySegment(msOutput, 0, ByteOrder.nativeOrder());
// in-place copy, no resize
expected = input;
origin = 0;
@ -152,10 +152,12 @@ public class VectorRebracket128Test {
static void testRebracket128(IntFunction<byte[]> fa) {
byte[] barr = fa.apply(128/Byte.SIZE);
byte[] bout = new byte[barr.length];
MemorySegment msin = MemorySegment.ofArray(barr);
MemorySegment msout = MemorySegment.ofArray(bout);
for (int i = 0; i < NUM_ITER; i++) {
testVectorRebracket(bspec128, bspec128, barr, bout);
testVectorRebracket(bspec128, sspec128, barr, bout);
testVectorRebracket(bspec128, ispec128, barr, bout);
testVectorRebracket(bspec128, bspec128, barr, bout, msin, msout);
testVectorRebracket(bspec128, sspec128, barr, bout, msin, msout);
testVectorRebracket(bspec128, ispec128, barr, bout, msin, msout);
}
}

Some files were not shown because too many files have changed in this diff Show more