mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-27 14:54:52 +02:00
8327964: Simplify BigInteger.implMultiplyToLen intrinsic
Reviewed-by: mdoerr, amitkumar, kvn, fyang
This commit is contained in:
parent
08face8c4c
commit
ed81a478e1
17 changed files with 64 additions and 135 deletions
|
@ -3879,7 +3879,7 @@ void MacroAssembler::multiply_128_x_128_loop(Register y, Register z,
|
||||||
* r2: y
|
* r2: y
|
||||||
* r3: ylen
|
* r3: ylen
|
||||||
* r4: z
|
* r4: z
|
||||||
* r5: zlen
|
* r5: tmp0
|
||||||
* r10: tmp1
|
* r10: tmp1
|
||||||
* r11: tmp2
|
* r11: tmp2
|
||||||
* r12: tmp3
|
* r12: tmp3
|
||||||
|
@ -3890,11 +3890,11 @@ void MacroAssembler::multiply_128_x_128_loop(Register y, Register z,
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen,
|
void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen,
|
||||||
Register z, Register zlen,
|
Register z, Register tmp0,
|
||||||
Register tmp1, Register tmp2, Register tmp3, Register tmp4,
|
Register tmp1, Register tmp2, Register tmp3, Register tmp4,
|
||||||
Register tmp5, Register tmp6, Register product_hi) {
|
Register tmp5, Register tmp6, Register product_hi) {
|
||||||
|
|
||||||
assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
|
assert_different_registers(x, xlen, y, ylen, z, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, product_hi);
|
||||||
|
|
||||||
const Register idx = tmp1;
|
const Register idx = tmp1;
|
||||||
const Register kdx = tmp2;
|
const Register kdx = tmp2;
|
||||||
|
@ -3903,7 +3903,7 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Regi
|
||||||
const Register y_idx = tmp4;
|
const Register y_idx = tmp4;
|
||||||
const Register carry = tmp5;
|
const Register carry = tmp5;
|
||||||
const Register product = xlen;
|
const Register product = xlen;
|
||||||
const Register x_xstart = zlen; // reuse register
|
const Register x_xstart = tmp0;
|
||||||
|
|
||||||
// First Loop.
|
// First Loop.
|
||||||
//
|
//
|
||||||
|
@ -3919,9 +3919,9 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Regi
|
||||||
// z[xstart] = (int)carry;
|
// z[xstart] = (int)carry;
|
||||||
//
|
//
|
||||||
|
|
||||||
movw(idx, ylen); // idx = ylen;
|
movw(idx, ylen); // idx = ylen;
|
||||||
movw(kdx, zlen); // kdx = xlen+ylen;
|
addw(kdx, xlen, ylen); // kdx = xlen+ylen;
|
||||||
mov(carry, zr); // carry = 0;
|
mov(carry, zr); // carry = 0;
|
||||||
|
|
||||||
Label L_done;
|
Label L_done;
|
||||||
|
|
||||||
|
|
|
@ -1510,7 +1510,7 @@ public:
|
||||||
void ghash_load_wide(int index, Register data, FloatRegister result, FloatRegister state);
|
void ghash_load_wide(int index, Register data, FloatRegister result, FloatRegister state);
|
||||||
public:
|
public:
|
||||||
void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z,
|
void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z,
|
||||||
Register zlen, Register tmp1, Register tmp2, Register tmp3,
|
Register tmp0, Register tmp1, Register tmp2, Register tmp3,
|
||||||
Register tmp4, Register tmp5, Register tmp6, Register tmp7);
|
Register tmp4, Register tmp5, Register tmp6, Register tmp7);
|
||||||
void mul_add(Register out, Register in, Register offs, Register len, Register k);
|
void mul_add(Register out, Register in, Register offs, Register len, Register k);
|
||||||
void ghash_multiply(FloatRegister result_lo, FloatRegister result_hi,
|
void ghash_multiply(FloatRegister result_lo, FloatRegister result_hi,
|
||||||
|
|
|
@ -4645,7 +4645,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||||
* c_rarg2 - y address
|
* c_rarg2 - y address
|
||||||
* c_rarg3 - y length
|
* c_rarg3 - y length
|
||||||
* c_rarg4 - z address
|
* c_rarg4 - z address
|
||||||
* c_rarg5 - z length
|
|
||||||
*/
|
*/
|
||||||
address generate_multiplyToLen() {
|
address generate_multiplyToLen() {
|
||||||
__ align(CodeEntryAlignment);
|
__ align(CodeEntryAlignment);
|
||||||
|
@ -4657,8 +4656,8 @@ class StubGenerator: public StubCodeGenerator {
|
||||||
const Register y = r2;
|
const Register y = r2;
|
||||||
const Register ylen = r3;
|
const Register ylen = r3;
|
||||||
const Register z = r4;
|
const Register z = r4;
|
||||||
const Register zlen = r5;
|
|
||||||
|
|
||||||
|
const Register tmp0 = r5;
|
||||||
const Register tmp1 = r10;
|
const Register tmp1 = r10;
|
||||||
const Register tmp2 = r11;
|
const Register tmp2 = r11;
|
||||||
const Register tmp3 = r12;
|
const Register tmp3 = r12;
|
||||||
|
@ -4669,7 +4668,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||||
|
|
||||||
BLOCK_COMMENT("Entry:");
|
BLOCK_COMMENT("Entry:");
|
||||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||||
__ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
|
__ multiply_to_len(x, xlen, y, ylen, z, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
|
||||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||||
__ ret(lr);
|
__ ret(lr);
|
||||||
|
|
||||||
|
@ -4687,10 +4686,10 @@ class StubGenerator: public StubCodeGenerator {
|
||||||
const Register x = r0;
|
const Register x = r0;
|
||||||
const Register xlen = r1;
|
const Register xlen = r1;
|
||||||
const Register z = r2;
|
const Register z = r2;
|
||||||
const Register zlen = r3;
|
|
||||||
const Register y = r4; // == x
|
const Register y = r4; // == x
|
||||||
const Register ylen = r5; // == xlen
|
const Register ylen = r5; // == xlen
|
||||||
|
|
||||||
|
const Register tmp0 = r3;
|
||||||
const Register tmp1 = r10;
|
const Register tmp1 = r10;
|
||||||
const Register tmp2 = r11;
|
const Register tmp2 = r11;
|
||||||
const Register tmp3 = r12;
|
const Register tmp3 = r12;
|
||||||
|
@ -4705,7 +4704,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||||
__ push(spilled_regs, sp);
|
__ push(spilled_regs, sp);
|
||||||
__ mov(y, x);
|
__ mov(y, x);
|
||||||
__ mov(ylen, xlen);
|
__ mov(ylen, xlen);
|
||||||
__ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
|
__ multiply_to_len(x, xlen, y, ylen, z, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
|
||||||
__ pop(spilled_regs, sp);
|
__ pop(spilled_regs, sp);
|
||||||
__ leave();
|
__ leave();
|
||||||
__ ret(lr);
|
__ ret(lr);
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||||
* Copyright (c) 2012, 2023 SAP SE. All rights reserved.
|
* Copyright (c) 2012, 2023 SAP SE. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
|
@ -3898,7 +3898,7 @@ void MacroAssembler::muladd(Register out, Register in,
|
||||||
|
|
||||||
void MacroAssembler::multiply_to_len(Register x, Register xlen,
|
void MacroAssembler::multiply_to_len(Register x, Register xlen,
|
||||||
Register y, Register ylen,
|
Register y, Register ylen,
|
||||||
Register z, Register zlen,
|
Register z,
|
||||||
Register tmp1, Register tmp2,
|
Register tmp1, Register tmp2,
|
||||||
Register tmp3, Register tmp4,
|
Register tmp3, Register tmp4,
|
||||||
Register tmp5, Register tmp6,
|
Register tmp5, Register tmp6,
|
||||||
|
@ -3909,11 +3909,11 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen,
|
||||||
|
|
||||||
ShortBranchVerifier sbv(this);
|
ShortBranchVerifier sbv(this);
|
||||||
|
|
||||||
assert_different_registers(x, xlen, y, ylen, z, zlen,
|
assert_different_registers(x, xlen, y, ylen, z,
|
||||||
tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
|
tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
|
||||||
assert_different_registers(x, xlen, y, ylen, z, zlen,
|
assert_different_registers(x, xlen, y, ylen, z,
|
||||||
tmp1, tmp2, tmp3, tmp4, tmp5, tmp7);
|
tmp1, tmp2, tmp3, tmp4, tmp5, tmp7);
|
||||||
assert_different_registers(x, xlen, y, ylen, z, zlen,
|
assert_different_registers(x, xlen, y, ylen, z,
|
||||||
tmp1, tmp2, tmp3, tmp4, tmp5, tmp8);
|
tmp1, tmp2, tmp3, tmp4, tmp5, tmp8);
|
||||||
|
|
||||||
const Register idx = tmp1;
|
const Register idx = tmp1;
|
||||||
|
@ -3941,7 +3941,7 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen,
|
||||||
// z[xstart] = (int)carry;
|
// z[xstart] = (int)carry;
|
||||||
|
|
||||||
mr_if_needed(idx, ylen); // idx = ylen
|
mr_if_needed(idx, ylen); // idx = ylen
|
||||||
mr_if_needed(kdx, zlen); // kdx = xlen + ylen
|
add(kdx, xlen, ylen); // kdx = xlen + ylen
|
||||||
li(carry, 0); // carry = 0
|
li(carry, 0); // carry = 0
|
||||||
|
|
||||||
Label L_done;
|
Label L_done;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2002, 2023, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2002, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||||
* Copyright (c) 2012, 2023 SAP SE. All rights reserved.
|
* Copyright (c) 2012, 2023 SAP SE. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
|
@ -784,7 +784,7 @@ class MacroAssembler: public Assembler {
|
||||||
Register tmp1, Register tmp2, Register carry);
|
Register tmp1, Register tmp2, Register carry);
|
||||||
void multiply_to_len(Register x, Register xlen,
|
void multiply_to_len(Register x, Register xlen,
|
||||||
Register y, Register ylen,
|
Register y, Register ylen,
|
||||||
Register z, Register zlen,
|
Register z,
|
||||||
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
|
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
|
||||||
Register tmp6, Register tmp7, Register tmp8, Register tmp9, Register tmp10,
|
Register tmp6, Register tmp7, Register tmp8, Register tmp9, Register tmp10,
|
||||||
Register tmp11, Register tmp12, Register tmp13);
|
Register tmp11, Register tmp12, Register tmp13);
|
||||||
|
|
|
@ -3204,7 +3204,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||||
// R5 - y address
|
// R5 - y address
|
||||||
// R6 - y length
|
// R6 - y length
|
||||||
// R7 - z address
|
// R7 - z address
|
||||||
// R8 - z length
|
|
||||||
//
|
//
|
||||||
address generate_multiplyToLen() {
|
address generate_multiplyToLen() {
|
||||||
|
|
||||||
|
@ -3217,7 +3216,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||||
const Register y = R5;
|
const Register y = R5;
|
||||||
const Register ylen = R6;
|
const Register ylen = R6;
|
||||||
const Register z = R7;
|
const Register z = R7;
|
||||||
const Register zlen = R8;
|
|
||||||
|
|
||||||
const Register tmp1 = R2; // TOC not used.
|
const Register tmp1 = R2; // TOC not used.
|
||||||
const Register tmp2 = R9;
|
const Register tmp2 = R9;
|
||||||
|
@ -3240,7 +3238,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||||
// C2 does not respect int to long conversion for stub calls.
|
// C2 does not respect int to long conversion for stub calls.
|
||||||
__ clrldi(xlen, xlen, 32);
|
__ clrldi(xlen, xlen, 32);
|
||||||
__ clrldi(ylen, ylen, 32);
|
__ clrldi(ylen, ylen, 32);
|
||||||
__ clrldi(zlen, zlen, 32);
|
|
||||||
|
|
||||||
// Save non-volatile regs (frameless).
|
// Save non-volatile regs (frameless).
|
||||||
int current_offs = 8;
|
int current_offs = 8;
|
||||||
|
@ -3253,7 +3250,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||||
__ std(R30, -current_offs, R1_SP); current_offs += 8;
|
__ std(R30, -current_offs, R1_SP); current_offs += 8;
|
||||||
__ std(R31, -current_offs, R1_SP);
|
__ std(R31, -current_offs, R1_SP);
|
||||||
|
|
||||||
__ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5,
|
__ multiply_to_len(x, xlen, y, ylen, z, tmp1, tmp2, tmp3, tmp4, tmp5,
|
||||||
tmp6, tmp7, tmp8, tmp9, tmp10, tmp11, tmp12, tmp13);
|
tmp6, tmp7, tmp8, tmp9, tmp10, tmp11, tmp12, tmp13);
|
||||||
|
|
||||||
// Restore non-volatile regs.
|
// Restore non-volatile regs.
|
||||||
|
|
|
@ -4120,7 +4120,7 @@ void MacroAssembler::multiply_128_x_128_loop(Register y, Register z,
|
||||||
* x12: y
|
* x12: y
|
||||||
* x13: ylen
|
* x13: ylen
|
||||||
* x14: z
|
* x14: z
|
||||||
* x15: zlen
|
* x15: tmp0
|
||||||
* x16: tmp1
|
* x16: tmp1
|
||||||
* x17: tmp2
|
* x17: tmp2
|
||||||
* x7: tmp3
|
* x7: tmp3
|
||||||
|
@ -4130,10 +4130,10 @@ void MacroAssembler::multiply_128_x_128_loop(Register y, Register z,
|
||||||
* x31: tmp7
|
* x31: tmp7
|
||||||
*/
|
*/
|
||||||
void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen,
|
void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen,
|
||||||
Register z, Register zlen,
|
Register z, Register tmp0,
|
||||||
Register tmp1, Register tmp2, Register tmp3, Register tmp4,
|
Register tmp1, Register tmp2, Register tmp3, Register tmp4,
|
||||||
Register tmp5, Register tmp6, Register product_hi) {
|
Register tmp5, Register tmp6, Register product_hi) {
|
||||||
assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
|
assert_different_registers(x, xlen, y, ylen, z, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
|
||||||
|
|
||||||
const Register idx = tmp1;
|
const Register idx = tmp1;
|
||||||
const Register kdx = tmp2;
|
const Register kdx = tmp2;
|
||||||
|
@ -4142,11 +4142,11 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Regi
|
||||||
const Register y_idx = tmp4;
|
const Register y_idx = tmp4;
|
||||||
const Register carry = tmp5;
|
const Register carry = tmp5;
|
||||||
const Register product = xlen;
|
const Register product = xlen;
|
||||||
const Register x_xstart = zlen; // reuse register
|
const Register x_xstart = tmp0;
|
||||||
|
|
||||||
mv(idx, ylen); // idx = ylen;
|
mv(idx, ylen); // idx = ylen;
|
||||||
mv(kdx, zlen); // kdx = xlen+ylen;
|
addw(kdx, xlen, ylen); // kdx = xlen+ylen;
|
||||||
mv(carry, zr); // carry = 0;
|
mv(carry, zr); // carry = 0;
|
||||||
|
|
||||||
Label L_multiply_64_x_64_loop, L_done;
|
Label L_multiply_64_x_64_loop, L_done;
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||||
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
|
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
|
||||||
* Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
* Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
@ -1287,7 +1287,7 @@ public:
|
||||||
Register tmp, Register tmp3, Register tmp4,
|
Register tmp, Register tmp3, Register tmp4,
|
||||||
Register tmp6, Register product_hi);
|
Register tmp6, Register product_hi);
|
||||||
void multiply_to_len(Register x, Register xlen, Register y, Register ylen,
|
void multiply_to_len(Register x, Register xlen, Register y, Register ylen,
|
||||||
Register z, Register zlen,
|
Register z, Register tmp0,
|
||||||
Register tmp1, Register tmp2, Register tmp3, Register tmp4,
|
Register tmp1, Register tmp2, Register tmp3, Register tmp4,
|
||||||
Register tmp5, Register tmp6, Register product_hi);
|
Register tmp5, Register tmp6, Register product_hi);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -2840,7 +2840,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||||
* c_rarg2 - y address
|
* c_rarg2 - y address
|
||||||
* c_rarg3 - y length
|
* c_rarg3 - y length
|
||||||
* c_rarg4 - z address
|
* c_rarg4 - z address
|
||||||
* c_rarg5 - z length
|
|
||||||
*/
|
*/
|
||||||
address generate_multiplyToLen()
|
address generate_multiplyToLen()
|
||||||
{
|
{
|
||||||
|
@ -2853,8 +2852,8 @@ class StubGenerator: public StubCodeGenerator {
|
||||||
const Register y = x12;
|
const Register y = x12;
|
||||||
const Register ylen = x13;
|
const Register ylen = x13;
|
||||||
const Register z = x14;
|
const Register z = x14;
|
||||||
const Register zlen = x15;
|
|
||||||
|
|
||||||
|
const Register tmp0 = x15;
|
||||||
const Register tmp1 = x16;
|
const Register tmp1 = x16;
|
||||||
const Register tmp2 = x17;
|
const Register tmp2 = x17;
|
||||||
const Register tmp3 = x7;
|
const Register tmp3 = x7;
|
||||||
|
@ -2865,7 +2864,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||||
|
|
||||||
BLOCK_COMMENT("Entry:");
|
BLOCK_COMMENT("Entry:");
|
||||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||||
__ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
|
__ multiply_to_len(x, xlen, y, ylen, z, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
|
||||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||||
__ ret();
|
__ ret();
|
||||||
|
|
||||||
|
@ -2881,10 +2880,10 @@ class StubGenerator: public StubCodeGenerator {
|
||||||
const Register x = x10;
|
const Register x = x10;
|
||||||
const Register xlen = x11;
|
const Register xlen = x11;
|
||||||
const Register z = x12;
|
const Register z = x12;
|
||||||
const Register zlen = x13;
|
|
||||||
const Register y = x14; // == x
|
const Register y = x14; // == x
|
||||||
const Register ylen = x15; // == xlen
|
const Register ylen = x15; // == xlen
|
||||||
|
|
||||||
|
const Register tmp0 = x13; // zlen, unused
|
||||||
const Register tmp1 = x16;
|
const Register tmp1 = x16;
|
||||||
const Register tmp2 = x17;
|
const Register tmp2 = x17;
|
||||||
const Register tmp3 = x7;
|
const Register tmp3 = x7;
|
||||||
|
@ -2897,7 +2896,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||||
__ enter();
|
__ enter();
|
||||||
__ mv(y, x);
|
__ mv(y, x);
|
||||||
__ mv(ylen, xlen);
|
__ mv(ylen, xlen);
|
||||||
__ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
|
__ multiply_to_len(x, xlen, y, ylen, z, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
|
||||||
__ leave();
|
__ leave();
|
||||||
__ ret();
|
__ ret();
|
||||||
|
|
||||||
|
|
|
@ -5281,9 +5281,6 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen,
|
||||||
|
|
||||||
z_stmg(Z_R7, Z_R13, _z_abi(gpr7), Z_SP);
|
z_stmg(Z_R7, Z_R13, _z_abi(gpr7), Z_SP);
|
||||||
|
|
||||||
// In openJdk, we store the argument as 32-bit value to slot.
|
|
||||||
Address zlen(Z_SP, _z_abi(remaining_cargs)); // Int in long on big endian.
|
|
||||||
|
|
||||||
const Register idx = tmp1;
|
const Register idx = tmp1;
|
||||||
const Register kdx = tmp2;
|
const Register kdx = tmp2;
|
||||||
const Register xstart = tmp3;
|
const Register xstart = tmp3;
|
||||||
|
@ -5308,7 +5305,7 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen,
|
||||||
//
|
//
|
||||||
|
|
||||||
lgr_if_needed(idx, ylen); // idx = ylen
|
lgr_if_needed(idx, ylen); // idx = ylen
|
||||||
z_llgf(kdx, zlen); // C2 does not respect int to long conversion for stub calls, thus load zero-extended.
|
z_agrk(kdx, xlen, ylen); // kdx = xlen + ylen
|
||||||
clear_reg(carry); // carry = 0
|
clear_reg(carry); // carry = 0
|
||||||
|
|
||||||
Label L_done;
|
Label L_done;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2016, 2023, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||||
* Copyright (c) 2016, 2023 SAP SE. All rights reserved.
|
* Copyright (c) 2016, 2023 SAP SE. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
|
@ -2981,7 +2981,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||||
// Z_ARG3 - y address
|
// Z_ARG3 - y address
|
||||||
// Z_ARG4 - y length
|
// Z_ARG4 - y length
|
||||||
// Z_ARG5 - z address
|
// Z_ARG5 - z address
|
||||||
// 160[Z_SP] - z length
|
|
||||||
address generate_multiplyToLen() {
|
address generate_multiplyToLen() {
|
||||||
__ align(CodeEntryAlignment);
|
__ align(CodeEntryAlignment);
|
||||||
StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
|
StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
|
||||||
|
@ -2993,8 +2992,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||||
const Register y = Z_ARG3;
|
const Register y = Z_ARG3;
|
||||||
const Register ylen = Z_ARG4;
|
const Register ylen = Z_ARG4;
|
||||||
const Register z = Z_ARG5;
|
const Register z = Z_ARG5;
|
||||||
// zlen is passed on the stack:
|
|
||||||
// Address zlen(Z_SP, _z_abi(remaining_cargs));
|
|
||||||
|
|
||||||
// Next registers will be saved on stack in multiply_to_len().
|
// Next registers will be saved on stack in multiply_to_len().
|
||||||
const Register tmp1 = Z_tmp_1;
|
const Register tmp1 = Z_tmp_1;
|
||||||
|
|
|
@ -6983,7 +6983,7 @@ void MacroAssembler::multiply_128_x_128_bmi2_loop(Register y, Register z,
|
||||||
* rsi: y
|
* rsi: y
|
||||||
* rcx: ylen
|
* rcx: ylen
|
||||||
* r8: z
|
* r8: z
|
||||||
* r11: zlen
|
* r11: tmp0
|
||||||
* r12: tmp1
|
* r12: tmp1
|
||||||
* r13: tmp2
|
* r13: tmp2
|
||||||
* r14: tmp3
|
* r14: tmp3
|
||||||
|
@ -6991,11 +6991,12 @@ void MacroAssembler::multiply_128_x_128_bmi2_loop(Register y, Register z,
|
||||||
* rbx: tmp5
|
* rbx: tmp5
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register zlen,
|
void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register tmp0,
|
||||||
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5) {
|
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5) {
|
||||||
ShortBranchVerifier sbv(this);
|
ShortBranchVerifier sbv(this);
|
||||||
assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx);
|
assert_different_registers(x, xlen, y, ylen, z, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, rdx);
|
||||||
|
|
||||||
|
push(tmp0);
|
||||||
push(tmp1);
|
push(tmp1);
|
||||||
push(tmp2);
|
push(tmp2);
|
||||||
push(tmp3);
|
push(tmp3);
|
||||||
|
@ -7003,7 +7004,6 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Regi
|
||||||
push(tmp5);
|
push(tmp5);
|
||||||
|
|
||||||
push(xlen);
|
push(xlen);
|
||||||
push(zlen);
|
|
||||||
|
|
||||||
const Register idx = tmp1;
|
const Register idx = tmp1;
|
||||||
const Register kdx = tmp2;
|
const Register kdx = tmp2;
|
||||||
|
@ -7012,7 +7012,7 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Regi
|
||||||
const Register y_idx = tmp4;
|
const Register y_idx = tmp4;
|
||||||
const Register carry = tmp5;
|
const Register carry = tmp5;
|
||||||
const Register product = xlen;
|
const Register product = xlen;
|
||||||
const Register x_xstart = zlen; // reuse register
|
const Register x_xstart = tmp0;
|
||||||
|
|
||||||
// First Loop.
|
// First Loop.
|
||||||
//
|
//
|
||||||
|
@ -7028,9 +7028,9 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Regi
|
||||||
// z[xstart] = (int)carry;
|
// z[xstart] = (int)carry;
|
||||||
//
|
//
|
||||||
|
|
||||||
movl(idx, ylen); // idx = ylen;
|
movl(idx, ylen); // idx = ylen;
|
||||||
movl(kdx, zlen); // kdx = xlen+ylen;
|
lea(kdx, Address(xlen, ylen)); // kdx = xlen+ylen;
|
||||||
xorq(carry, carry); // carry = 0;
|
xorq(carry, carry); // carry = 0;
|
||||||
|
|
||||||
Label L_done;
|
Label L_done;
|
||||||
|
|
||||||
|
@ -7134,7 +7134,6 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Regi
|
||||||
|
|
||||||
bind(L_done);
|
bind(L_done);
|
||||||
|
|
||||||
pop(zlen);
|
|
||||||
pop(xlen);
|
pop(xlen);
|
||||||
|
|
||||||
pop(tmp5);
|
pop(tmp5);
|
||||||
|
@ -7142,6 +7141,7 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Regi
|
||||||
pop(tmp3);
|
pop(tmp3);
|
||||||
pop(tmp2);
|
pop(tmp2);
|
||||||
pop(tmp1);
|
pop(tmp1);
|
||||||
|
pop(tmp0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register length, Register log2_array_indxscale,
|
void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register length, Register log2_array_indxscale,
|
||||||
|
|
|
@ -2018,7 +2018,7 @@ public:
|
||||||
Register yz_idx, Register idx, Register jdx,
|
Register yz_idx, Register idx, Register jdx,
|
||||||
Register carry, Register product,
|
Register carry, Register product,
|
||||||
Register carry2);
|
Register carry2);
|
||||||
void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register zlen,
|
void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register tmp0,
|
||||||
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5);
|
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5);
|
||||||
void square_rshift(Register x, Register len, Register z, Register tmp1, Register tmp3,
|
void square_rshift(Register x, Register len, Register z, Register tmp1, Register tmp3,
|
||||||
Register tmp4, Register tmp5, Register rdxReg, Register raxReg);
|
Register tmp4, Register tmp5, Register rdxReg, Register raxReg);
|
||||||
|
|
|
@ -3044,10 +3044,8 @@ address StubGenerator::generate_updateBytesCRC32C(bool is_pclmulqdq_supported) {
|
||||||
* c_rarg3 - y length
|
* c_rarg3 - y length
|
||||||
* not Win64
|
* not Win64
|
||||||
* c_rarg4 - z address
|
* c_rarg4 - z address
|
||||||
* c_rarg5 - z length
|
|
||||||
* Win64
|
* Win64
|
||||||
* rsp+40 - z address
|
* rsp+40 - z address
|
||||||
* rsp+48 - z length
|
|
||||||
*/
|
*/
|
||||||
address StubGenerator::generate_multiplyToLen() {
|
address StubGenerator::generate_multiplyToLen() {
|
||||||
__ align(CodeEntryAlignment);
|
__ align(CodeEntryAlignment);
|
||||||
|
@ -3061,9 +3059,9 @@ address StubGenerator::generate_multiplyToLen() {
|
||||||
const Register y = rsi;
|
const Register y = rsi;
|
||||||
const Register ylen = rcx;
|
const Register ylen = rcx;
|
||||||
const Register z = r8;
|
const Register z = r8;
|
||||||
const Register zlen = r11;
|
|
||||||
|
|
||||||
// Next registers will be saved on stack in multiply_to_len().
|
// Next registers will be saved on stack in multiply_to_len().
|
||||||
|
const Register tmp0 = r11;
|
||||||
const Register tmp1 = r12;
|
const Register tmp1 = r12;
|
||||||
const Register tmp2 = r13;
|
const Register tmp2 = r13;
|
||||||
const Register tmp3 = r14;
|
const Register tmp3 = r14;
|
||||||
|
@ -3073,21 +3071,17 @@ address StubGenerator::generate_multiplyToLen() {
|
||||||
BLOCK_COMMENT("Entry:");
|
BLOCK_COMMENT("Entry:");
|
||||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||||
|
|
||||||
#ifndef _WIN64
|
|
||||||
__ movptr(zlen, r9); // Save r9 in r11 - zlen
|
|
||||||
#endif
|
|
||||||
setup_arg_regs(4); // x => rdi, xlen => rsi, y => rdx
|
setup_arg_regs(4); // x => rdi, xlen => rsi, y => rdx
|
||||||
// ylen => rcx, z => r8, zlen => r11
|
// ylen => rcx, z => r8
|
||||||
// r9 and r10 may be used to save non-volatile registers
|
// r9 and r10 may be used to save non-volatile registers
|
||||||
#ifdef _WIN64
|
#ifdef _WIN64
|
||||||
// last 2 arguments (#4, #5) are on stack on Win64
|
// last argument (#4) is on stack on Win64
|
||||||
__ movptr(z, Address(rsp, 6 * wordSize));
|
__ movptr(z, Address(rsp, 6 * wordSize));
|
||||||
__ movptr(zlen, Address(rsp, 7 * wordSize));
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
__ movptr(xlen, rsi);
|
__ movptr(xlen, rsi);
|
||||||
__ movptr(y, rdx);
|
__ movptr(y, rdx);
|
||||||
__ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5);
|
__ multiply_to_len(x, xlen, y, ylen, z, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
|
||||||
|
|
||||||
restore_arg_regs();
|
restore_arg_regs();
|
||||||
|
|
||||||
|
|
|
@ -5978,71 +5978,17 @@ bool LibraryCallKit::inline_multiplyToLen() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set the original stack and the reexecute bit for the interpreter to reexecute
|
Node* x_start = array_element_address(x, intcon(0), x_elem);
|
||||||
// the bytecode that invokes BigInteger.multiplyToLen() if deoptimization happens
|
Node* y_start = array_element_address(y, intcon(0), y_elem);
|
||||||
// on the return from z array allocation in runtime.
|
// 'x_start' points to x array + scaled xlen
|
||||||
{ PreserveReexecuteState preexecs(this);
|
// 'y_start' points to y array + scaled ylen
|
||||||
jvms()->set_should_reexecute(true);
|
|
||||||
|
|
||||||
Node* x_start = array_element_address(x, intcon(0), x_elem);
|
Node* z_start = array_element_address(z, intcon(0), T_INT);
|
||||||
Node* y_start = array_element_address(y, intcon(0), y_elem);
|
|
||||||
// 'x_start' points to x array + scaled xlen
|
|
||||||
// 'y_start' points to y array + scaled ylen
|
|
||||||
|
|
||||||
// Allocate the result array
|
Node* call = make_runtime_call(RC_LEAF|RC_NO_FP,
|
||||||
Node* zlen = _gvn.transform(new AddINode(xlen, ylen));
|
OptoRuntime::multiplyToLen_Type(),
|
||||||
ciKlass* klass = ciTypeArrayKlass::make(T_INT);
|
stubAddr, stubName, TypePtr::BOTTOM,
|
||||||
Node* klass_node = makecon(TypeKlassPtr::make(klass));
|
x_start, xlen, y_start, ylen, z_start);
|
||||||
|
|
||||||
IdealKit ideal(this);
|
|
||||||
|
|
||||||
#define __ ideal.
|
|
||||||
Node* one = __ ConI(1);
|
|
||||||
Node* zero = __ ConI(0);
|
|
||||||
IdealVariable need_alloc(ideal), z_alloc(ideal); __ declarations_done();
|
|
||||||
__ set(need_alloc, zero);
|
|
||||||
__ set(z_alloc, z);
|
|
||||||
__ if_then(z, BoolTest::eq, null()); {
|
|
||||||
__ increment (need_alloc, one);
|
|
||||||
} __ else_(); {
|
|
||||||
// Update graphKit memory and control from IdealKit.
|
|
||||||
sync_kit(ideal);
|
|
||||||
Node* cast = new CastPPNode(control(), z, TypePtr::NOTNULL);
|
|
||||||
_gvn.set_type(cast, cast->bottom_type());
|
|
||||||
C->record_for_igvn(cast);
|
|
||||||
|
|
||||||
Node* zlen_arg = load_array_length(cast);
|
|
||||||
// Update IdealKit memory and control from graphKit.
|
|
||||||
__ sync_kit(this);
|
|
||||||
__ if_then(zlen_arg, BoolTest::lt, zlen); {
|
|
||||||
__ increment (need_alloc, one);
|
|
||||||
} __ end_if();
|
|
||||||
} __ end_if();
|
|
||||||
|
|
||||||
__ if_then(__ value(need_alloc), BoolTest::ne, zero); {
|
|
||||||
// Update graphKit memory and control from IdealKit.
|
|
||||||
sync_kit(ideal);
|
|
||||||
Node * narr = new_array(klass_node, zlen, 1);
|
|
||||||
// Update IdealKit memory and control from graphKit.
|
|
||||||
__ sync_kit(this);
|
|
||||||
__ set(z_alloc, narr);
|
|
||||||
} __ end_if();
|
|
||||||
|
|
||||||
sync_kit(ideal);
|
|
||||||
z = __ value(z_alloc);
|
|
||||||
// Can't use TypeAryPtr::INTS which uses Bottom offset.
|
|
||||||
_gvn.set_type(z, TypeOopPtr::make_from_klass(klass));
|
|
||||||
// Final sync IdealKit and GraphKit.
|
|
||||||
final_sync(ideal);
|
|
||||||
#undef __
|
|
||||||
|
|
||||||
Node* z_start = array_element_address(z, intcon(0), T_INT);
|
|
||||||
|
|
||||||
Node* call = make_runtime_call(RC_LEAF|RC_NO_FP,
|
|
||||||
OptoRuntime::multiplyToLen_Type(),
|
|
||||||
stubAddr, stubName, TypePtr::BOTTOM,
|
|
||||||
x_start, xlen, y_start, ylen, z_start, zlen);
|
|
||||||
} // original reexecute is set back here
|
|
||||||
|
|
||||||
C->set_has_split_ifs(true); // Has chance for split-if optimization
|
C->set_has_split_ifs(true); // Has chance for split-if optimization
|
||||||
set_result(z);
|
set_result(z);
|
||||||
|
|
|
@ -1150,7 +1150,7 @@ const TypeFunc* OptoRuntime::digestBase_implCompressMB_Type(bool is_sha3) {
|
||||||
|
|
||||||
const TypeFunc* OptoRuntime::multiplyToLen_Type() {
|
const TypeFunc* OptoRuntime::multiplyToLen_Type() {
|
||||||
// create input type (domain)
|
// create input type (domain)
|
||||||
int num_args = 6;
|
int num_args = 5;
|
||||||
int argcnt = num_args;
|
int argcnt = num_args;
|
||||||
const Type** fields = TypeTuple::fields(argcnt);
|
const Type** fields = TypeTuple::fields(argcnt);
|
||||||
int argp = TypeFunc::Parms;
|
int argp = TypeFunc::Parms;
|
||||||
|
@ -1159,7 +1159,6 @@ const TypeFunc* OptoRuntime::multiplyToLen_Type() {
|
||||||
fields[argp++] = TypePtr::NOTNULL; // y
|
fields[argp++] = TypePtr::NOTNULL; // y
|
||||||
fields[argp++] = TypeInt::INT; // ylen
|
fields[argp++] = TypeInt::INT; // ylen
|
||||||
fields[argp++] = TypePtr::NOTNULL; // z
|
fields[argp++] = TypePtr::NOTNULL; // z
|
||||||
fields[argp++] = TypeInt::INT; // zlen
|
|
||||||
assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
|
assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
|
||||||
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
|
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
|
||||||
|
|
||||||
|
|
|
@ -1831,6 +1831,10 @@ public class BigInteger extends Number implements Comparable<BigInteger> {
|
||||||
private static int[] multiplyToLen(int[] x, int xlen, int[] y, int ylen, int[] z) {
|
private static int[] multiplyToLen(int[] x, int xlen, int[] y, int ylen, int[] z) {
|
||||||
multiplyToLenCheck(x, xlen);
|
multiplyToLenCheck(x, xlen);
|
||||||
multiplyToLenCheck(y, ylen);
|
multiplyToLenCheck(y, ylen);
|
||||||
|
|
||||||
|
if (z == null || z.length < (xlen + ylen))
|
||||||
|
z = new int[xlen + ylen];
|
||||||
|
|
||||||
return implMultiplyToLen(x, xlen, y, ylen, z);
|
return implMultiplyToLen(x, xlen, y, ylen, z);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1839,9 +1843,6 @@ public class BigInteger extends Number implements Comparable<BigInteger> {
|
||||||
int xstart = xlen - 1;
|
int xstart = xlen - 1;
|
||||||
int ystart = ylen - 1;
|
int ystart = ylen - 1;
|
||||||
|
|
||||||
if (z == null || z.length < (xlen+ ylen))
|
|
||||||
z = new int[xlen+ylen];
|
|
||||||
|
|
||||||
long carry = 0;
|
long carry = 0;
|
||||||
for (int j=ystart, k=ystart+1+xstart; j >= 0; j--, k--) {
|
for (int j=ystart, k=ystart+1+xstart; j >= 0; j--, k--) {
|
||||||
long product = (y[j] & LONG_MASK) *
|
long product = (y[j] & LONG_MASK) *
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue