mirror of
https://github.com/openjdk/jdk.git
synced 2025-09-23 04:24:49 +02:00
8153310: AArch64: JEP 254: Implement byte_array_inflate
Reviewed-by: roland
This commit is contained in:
parent
66208f1fca
commit
cdcd378bd6
4 changed files with 159 additions and 13 deletions
|
@ -14930,6 +14930,40 @@ instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
|
||||||
|
// fast char[] to byte[] compression
|
||||||
|
instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
|
||||||
|
vRegD_V0 tmp1, vRegD_V1 tmp2,
|
||||||
|
vRegD_V2 tmp3, vRegD_V3 tmp4,
|
||||||
|
iRegI_R0 result, rFlagsReg cr)
|
||||||
|
%{
|
||||||
|
match(Set result (StrCompressedCopy src (Binary dst len)));
|
||||||
|
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
|
||||||
|
|
||||||
|
format %{ "String Compress $src,$dst -> $result // KILL R1, R2, R3, R4" %}
|
||||||
|
ins_encode %{
|
||||||
|
__ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
|
||||||
|
$tmp1$$FloatRegister, $tmp2$$FloatRegister,
|
||||||
|
$tmp3$$FloatRegister, $tmp4$$FloatRegister,
|
||||||
|
$result$$Register);
|
||||||
|
%}
|
||||||
|
ins_pipe( pipe_slow );
|
||||||
|
%}
|
||||||
|
|
||||||
|
// fast byte[] to char[] inflation
|
||||||
|
instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
|
||||||
|
vRegD tmp1, vRegD tmp2, vRegD tmp3, iRegP_R3 tmp4, rFlagsReg cr)
|
||||||
|
%{
|
||||||
|
match(Set dummy (StrInflatedCopy src (Binary dst len)));
|
||||||
|
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
|
||||||
|
|
||||||
|
format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
|
||||||
|
ins_encode %{
|
||||||
|
__ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
|
||||||
|
$tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
|
||||||
|
%}
|
||||||
|
ins_pipe(pipe_class_memory);
|
||||||
|
%}
|
||||||
|
|
||||||
// encode char[] to byte[] in ISO_8859_1
|
// encode char[] to byte[] in ISO_8859_1
|
||||||
instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
|
instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
|
||||||
vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
|
vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
|
||||||
|
|
|
@ -2347,6 +2347,24 @@ public:
|
||||||
f(0b000001, 15, 10), rf(Vn, 5), rf(Vd, 0);
|
f(0b000001, 15, 10), rf(Vn, 5), rf(Vd, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AdvSIMD ZIP/UZP/TRN
|
||||||
|
#define INSN(NAME, opcode) \
|
||||||
|
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
|
||||||
|
starti; \
|
||||||
|
f(0, 31), f(0b001110, 29, 24), f(0, 21), f(0b001110, 15, 10); \
|
||||||
|
rf(Vm, 16), rf(Vn, 5), rf(Vd, 0); \
|
||||||
|
f(T & 1, 30), f(T >> 1, 23, 22); \
|
||||||
|
}
|
||||||
|
|
||||||
|
INSN(uzp1, 0b001);
|
||||||
|
INSN(trn1, 0b010);
|
||||||
|
INSN(zip1, 0b011);
|
||||||
|
INSN(uzp2, 0b101);
|
||||||
|
INSN(trn2, 0b110);
|
||||||
|
INSN(zip2, 0b111);
|
||||||
|
|
||||||
|
#undef INSN
|
||||||
|
|
||||||
// CRC32 instructions
|
// CRC32 instructions
|
||||||
#define INSN(NAME, c, sf, sz) \
|
#define INSN(NAME, c, sf, sz) \
|
||||||
void NAME(Register Rd, Register Rn, Register Rm) { \
|
void NAME(Register Rd, Register Rn, Register Rm) { \
|
||||||
|
|
|
@ -4680,7 +4680,8 @@ void MacroAssembler::arrays_equals(Register a1, Register a2,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// encode char[] to byte[] in ISO_8859_1
|
// Intrinsic for sun/nio/cs/ISO_8859_1$Encoder.implEncodeISOArray and
|
||||||
|
// java/lang/StringUTF16.compress.
|
||||||
void MacroAssembler::encode_iso_array(Register src, Register dst,
|
void MacroAssembler::encode_iso_array(Register src, Register dst,
|
||||||
Register len, Register result,
|
Register len, Register result,
|
||||||
FloatRegister Vtmp1, FloatRegister Vtmp2,
|
FloatRegister Vtmp1, FloatRegister Vtmp2,
|
||||||
|
@ -4743,6 +4744,90 @@ void MacroAssembler::encode_iso_array(Register src, Register dst,
|
||||||
|
|
||||||
BIND(DONE);
|
BIND(DONE);
|
||||||
sub(result, result, len); // Return index where we stopped
|
sub(result, result, len); // Return index where we stopped
|
||||||
|
// Return len == 0 if we processed all
|
||||||
|
// characters
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Inflate byte[] array to char[].
|
||||||
|
void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
|
||||||
|
FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3,
|
||||||
|
Register tmp4) {
|
||||||
|
Label big, done;
|
||||||
|
|
||||||
|
assert_different_registers(src, dst, len, tmp4, rscratch1);
|
||||||
|
|
||||||
|
fmovd(vtmp1 , zr);
|
||||||
|
lsrw(rscratch1, len, 3);
|
||||||
|
|
||||||
|
cbnzw(rscratch1, big);
|
||||||
|
|
||||||
|
// Short string: less than 8 bytes.
|
||||||
|
{
|
||||||
|
Label loop, around, tiny;
|
||||||
|
|
||||||
|
subsw(len, len, 4);
|
||||||
|
andw(len, len, 3);
|
||||||
|
br(LO, tiny);
|
||||||
|
|
||||||
|
// Use SIMD to do 4 bytes.
|
||||||
|
ldrs(vtmp2, post(src, 4));
|
||||||
|
zip1(vtmp3, T8B, vtmp2, vtmp1);
|
||||||
|
strd(vtmp3, post(dst, 8));
|
||||||
|
|
||||||
|
cbzw(len, done);
|
||||||
|
|
||||||
|
// Do the remaining bytes by steam.
|
||||||
|
bind(loop);
|
||||||
|
ldrb(tmp4, post(src, 1));
|
||||||
|
strh(tmp4, post(dst, 2));
|
||||||
|
subw(len, len, 1);
|
||||||
|
|
||||||
|
bind(tiny);
|
||||||
|
cbnz(len, loop);
|
||||||
|
|
||||||
|
bind(around);
|
||||||
|
b(done);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unpack the bytes 8 at a time.
|
||||||
|
bind(big);
|
||||||
|
andw(len, len, 7);
|
||||||
|
|
||||||
|
{
|
||||||
|
Label loop, around;
|
||||||
|
|
||||||
|
bind(loop);
|
||||||
|
ldrd(vtmp2, post(src, 8));
|
||||||
|
sub(rscratch1, rscratch1, 1);
|
||||||
|
zip1(vtmp3, T16B, vtmp2, vtmp1);
|
||||||
|
st1(vtmp3, T8H, post(dst, 16));
|
||||||
|
cbnz(rscratch1, loop);
|
||||||
|
|
||||||
|
bind(around);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do the tail of up to 8 bytes.
|
||||||
|
sub(src, src, 8);
|
||||||
|
add(src, src, len, ext::uxtw, 0);
|
||||||
|
ldrd(vtmp2, Address(src));
|
||||||
|
sub(dst, dst, 16);
|
||||||
|
add(dst, dst, len, ext::uxtw, 1);
|
||||||
|
zip1(vtmp3, T16B, vtmp2, vtmp1);
|
||||||
|
st1(vtmp3, T8H, Address(dst));
|
||||||
|
|
||||||
|
bind(done);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compress char[] array to byte[].
|
||||||
|
void MacroAssembler::char_array_compress(Register src, Register dst, Register len,
|
||||||
|
FloatRegister tmp1Reg, FloatRegister tmp2Reg,
|
||||||
|
FloatRegister tmp3Reg, FloatRegister tmp4Reg,
|
||||||
|
Register result) {
|
||||||
|
encode_iso_array(src, dst, len, result,
|
||||||
|
tmp1Reg, tmp2Reg, tmp3Reg, tmp4Reg);
|
||||||
|
cmp(len, zr);
|
||||||
|
csel(result, result, zr, EQ);
|
||||||
}
|
}
|
||||||
|
|
||||||
// get_thread() can be called anywhere inside generated code so we
|
// get_thread() can be called anywhere inside generated code so we
|
||||||
|
|
|
@ -1184,6 +1184,15 @@ public:
|
||||||
Register result, Register cnt1,
|
Register result, Register cnt1,
|
||||||
int elem_size, bool is_string);
|
int elem_size, bool is_string);
|
||||||
|
|
||||||
|
void byte_array_inflate(Register src, Register dst, Register len,
|
||||||
|
FloatRegister vtmp1, FloatRegister vtmp2,
|
||||||
|
FloatRegister vtmp3, Register tmp4);
|
||||||
|
|
||||||
|
void char_array_compress(Register src, Register dst, Register len,
|
||||||
|
FloatRegister tmp1Reg, FloatRegister tmp2Reg,
|
||||||
|
FloatRegister tmp3Reg, FloatRegister tmp4Reg,
|
||||||
|
Register result);
|
||||||
|
|
||||||
void encode_iso_array(Register src, Register dst,
|
void encode_iso_array(Register src, Register dst,
|
||||||
Register len, Register result,
|
Register len, Register result,
|
||||||
FloatRegister Vtmp1, FloatRegister Vtmp2,
|
FloatRegister Vtmp1, FloatRegister Vtmp2,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue