8329538: Accelerate P256 on x86_64 using Montgomery intrinsic

Reviewed-by: ihse, ascarpino, sviswanathan
2025-08-27 14:54:52 +02:00 · 2024-05-22 16:27:27 +00:00 · 2024-05-22 16:27:27 +00:00 · afed7d0b05
commit afed7d0b05
parent 9ca90ccd6b
36 changed files with 2252 additions and 315 deletions
--- a/make/jdk/src/classes/build/tools/intpoly/FieldGen.java
+++ b/make/jdk/src/classes/build/tools/intpoly/FieldGen.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -778,7 +778,7 @@ public class FieldGen {
        result.appendLine("}");
        result.appendLine("@Override");
-        result.appendLine("protected void mult(long[] a, long[] b, long[] r) {");
+        result.appendLine("protected int mult(long[] a, long[] b, long[] r) {");
        result.incrIndent();
        for (int i = 0; i < 2 * params.getNumLimbs() - 1; i++) {
            result.appendIndent();
@ -804,6 +804,9 @@ public class FieldGen {
            }
        }
        result.append(");\n");
        result.appendIndent();
        result.append("return 0;");
        result.appendLine();
        result.decrIndent();
        result.appendLine("}");
@ -833,7 +836,7 @@ public class FieldGen {
        //      }
        //  }
        result.appendLine("@Override");
-        result.appendLine("protected void square(long[] a, long[] r) {");
+        result.appendLine("protected int square(long[] a, long[] r) {");
        result.incrIndent();
        for (int i = 0; i < 2 * params.getNumLimbs() - 1; i++) {
            result.appendIndent();
@ -874,6 +877,9 @@ public class FieldGen {
            }
        }
        result.append(");\n");
        result.appendIndent();
        result.append("return 0;");
        result.appendLine();
        result.decrIndent();
        result.appendLine("}");
--- a/make/test/BuildMicrobenchmark.gmk
+++ b/make/test/BuildMicrobenchmark.gmk
@ -109,6 +109,8 @@ $(eval $(call SetupJavaCompilation, BUILD_JDK_MICROBENCHMARK, \
        --add-exports java.base/jdk.internal.vm=ALL-UNNAMED \
        --add-exports java.base/sun.invoke.util=ALL-UNNAMED \
        --add-exports java.base/sun.security.util=ALL-UNNAMED \
        --add-exports java.base/sun.security.util.math=ALL-UNNAMED \
        --add-exports java.base/sun.security.util.math.intpoly=ALL-UNNAMED \
        --enable-preview \
        -XDsuppressNotes \
        -processor org.openjdk.jmh.generators.BenchmarkProcessor, \
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
@ -1549,6 +1549,8 @@ public:
      Assembler::evpsrlvd(dst, mask, nds, src, merge, vector_len);
    }
  }
  using Assembler::evpsrlq;
  void evpsrlq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
    if (!is_varshift) {
      Assembler::evpsrlq(dst, mask, nds, src, merge, vector_len);
@ -1570,6 +1572,7 @@ public:
      Assembler::evpsravd(dst, mask, nds, src, merge, vector_len);
    }
  }
  using Assembler::evpsraq;
  void evpsraq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
    if (!is_varshift) {
      Assembler::evpsraq(dst, mask, nds, src, merge, vector_len);
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
@ -4255,6 +4255,11 @@ void StubGenerator::generate_compiler_stubs() {
    StubRoutines::_poly1305_processBlocks = generate_poly1305_processBlocks();
  }
  if (UseIntPolyIntrinsics) {
    StubRoutines::_intpoly_montgomeryMult_P256 = generate_intpoly_montgomeryMult_P256();
    StubRoutines::_intpoly_assign = generate_intpoly_assign();
  }
  if (UseMD5Intrinsics) {
    StubRoutines::_md5_implCompress = generate_md5_implCompress(false, "md5_implCompress");
    StubRoutines::_md5_implCompressMB = generate_md5_implCompress(true, "md5_implCompressMB");
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp
@ -483,6 +483,9 @@ class StubGenerator: public StubCodeGenerator {
                               const XMMRegister P2L, const XMMRegister P2H,
                               const XMMRegister YTMP1, const Register rscratch);
  address generate_intpoly_montgomeryMult_P256();
  address generate_intpoly_assign();
  // BASE64 stubs
  address base64_shuffle_addr();
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_poly1305.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_poly1305.cpp
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_poly_mont.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_poly_mont.cpp
@ -0,0 +1,376 @@
 /*
 * Copyright (c) 2024, Intel Corporation. All rights reserved.
 *
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */
 #include "precompiled.hpp"
 #include "macroAssembler_x86.hpp"
 #include "stubGenerator_x86_64.hpp"
 #define __ _masm->
 ATTRIBUTE_ALIGNED(64) uint64_t MODULUS_P256[] = {
  0x000fffffffffffffULL, 0x00000fffffffffffULL,
  0x0000000000000000ULL, 0x0000001000000000ULL,
  0x0000ffffffff0000ULL, 0x0000000000000000ULL,
  0x0000000000000000ULL, 0x0000000000000000ULL
 };
 static address modulus_p256() {
  return (address)MODULUS_P256;
 }
 ATTRIBUTE_ALIGNED(64) uint64_t P256_MASK52[] = {
  0x000fffffffffffffULL, 0x000fffffffffffffULL,
  0x000fffffffffffffULL, 0x000fffffffffffffULL,
  0xffffffffffffffffULL, 0xffffffffffffffffULL,
  0xffffffffffffffffULL, 0xffffffffffffffffULL,
 };
 static address p256_mask52() {
  return (address)P256_MASK52;
 }
 ATTRIBUTE_ALIGNED(64) uint64_t SHIFT1R[] = {
  0x0000000000000001ULL, 0x0000000000000002ULL,
  0x0000000000000003ULL, 0x0000000000000004ULL,
  0x0000000000000005ULL, 0x0000000000000006ULL,
  0x0000000000000007ULL, 0x0000000000000000ULL,
 };
 static address shift_1R() {
  return (address)SHIFT1R;
 }
 ATTRIBUTE_ALIGNED(64) uint64_t SHIFT1L[] = {
  0x0000000000000007ULL, 0x0000000000000000ULL,
  0x0000000000000001ULL, 0x0000000000000002ULL,
  0x0000000000000003ULL, 0x0000000000000004ULL,
  0x0000000000000005ULL, 0x0000000000000006ULL,
 };
 static address shift_1L() {
  return (address)SHIFT1L;
 }
 /**
 * Unrolled Word-by-Word Montgomery Multiplication
 * r = a * b * 2^-260 (mod P)
 *
 * Reference [1]: Shay Gueron and Vlad Krasnov
 *    "Fast Prime Field Elliptic Curve Cryptography with 256 Bit Primes"
 *    See Figure 5. "Algorithm 2: Word-by-Word Montgomery Multiplication for a Montgomery
 *    Friendly modulus p". Note: Step 6. Skipped; Instead use numAdds to reuse existing overflow
 *    logic.
 *
 * Pseudocode:
 *
 *                                                     +--+--+--+--+--+--+--+--+
 *   M = load(*modulus_p256)                           | 0| 0| 0|m5|m4|m3|m2|m1|
 *                                                     +--+--+--+--+--+--+--+--+
 *   A = load(*aLimbs)                                 | 0| 0| 0|a5|a4|a3|a2|a1|
 *                                                     +--+--+--+--+--+--+--+--+
 *   Acc1 = 0                                          | 0| 0| 0| 0| 0| 0| 0| 0|
 *                                                     +--+--+--+--+--+--+--+--+
 *      ---- for i = 0 to 4
 *                                                     +--+--+--+--+--+--+--+--+
 *          Acc2 = 0                                   | 0| 0| 0| 0| 0| 0| 0| 0|
 *                                                     +--+--+--+--+--+--+--+--+
 *          B = replicate(bLimbs[i])                   |bi|bi|bi|bi|bi|bi|bi|bi|
 *                                                     +--+--+--+--+--+--+--+--+
 *                                                     +--+--+--+--+--+--+--+--+
 *                                               Acc1+=| 0| 0| 0|c5|c4|c3|c2|c1|
 *                                                    *| 0| 0| 0|a5|a4|a3|a2|a1|
 *          Acc1 += A *  B                             |bi|bi|bi|bi|bi|bi|bi|bi|
 *                                                     +--+--+--+--+--+--+--+--+
 *                                               Acc2+=| 0| 0| 0| 0| 0| 0| 0| 0|
 *                                                   *h| 0| 0| 0|a5|a4|a3|a2|a1|
 *          Acc2 += A *h B                             |bi|bi|bi|bi|bi|bi|bi|bi|
 *                                                     +--+--+--+--+--+--+--+--+
 *          N = replicate(Acc1[0])                     |n0|n0|n0|n0|n0|n0|n0|n0|
 *                                                     +--+--+--+--+--+--+--+--+
 *                                                     +--+--+--+--+--+--+--+--+
 *                                               Acc1+=| 0| 0| 0|c5|c4|c3|c2|c1|
 *                                                    *| 0| 0| 0|m5|m4|m3|m2|m1|
 *          Acc1 += M *  N                             |n0|n0|n0|n0|n0|n0|n0|n0| Note: 52 low bits of Acc1[0] == 0 due to Montgomery!
 *                                                     +--+--+--+--+--+--+--+--+
 *                                               Acc2+=| 0| 0| 0|d5|d4|d3|d2|d1|
 *                                                   *h| 0| 0| 0|m5|m4|m3|m2|m1|
 *          Acc2 += M *h N                             |n0|n0|n0|n0|n0|n0|n0|n0|
 *                                                     +--+--+--+--+--+--+--+--+
 *          if (i == 4) break;
 *          // Combine high/low partial sums Acc1 + Acc2
 *                                                     +--+--+--+--+--+--+--+--+
 *          carry = Acc1[0] >> 52                      | 0| 0| 0| 0| 0| 0| 0|c1|
 *                                                     +--+--+--+--+--+--+--+--+
 *          Acc2[0] += carry
 *                                                     +--+--+--+--+--+--+--+--+
 *          Acc1 = Acc1 shift one q element>>          | 0| 0| 0| 0|c5|c4|c3|c2|
 *                                                     +--+--+--+--+--+--+--+--+
 *          Acc1 = Acc1 + Acc2
 *      ---- done
 *   // Last Carry round: Combine high/low partial sums Acc1<high_bits> + Acc1 + Acc2
 *   carry = Acc1 >> 52
 *   Acc1 = Acc1 shift one q element >>
 *   Acc1  = mask52(Acc1)
 *   Acc2  += carry
 *   Acc1 = Acc1 + Acc2
 *   output to rLimbs
 */
 void montgomeryMultiply(const Register aLimbs, const Register bLimbs, const Register rLimbs, const Register tmp, MacroAssembler* _masm) {
  Register t0 = tmp;
  Register rscratch = tmp;
  // Inputs
  XMMRegister A = xmm0;
  XMMRegister B = xmm1;
  XMMRegister T = xmm2;
  // Intermediates
  XMMRegister Acc1 = xmm10;
  XMMRegister Acc2 = xmm11;
  XMMRegister N    = xmm12;
  XMMRegister carry = xmm13;
  // // Constants
  XMMRegister modulus = xmm20;
  XMMRegister shift1L = xmm21;
  XMMRegister shift1R = xmm22;
  XMMRegister mask52  = xmm23;
  KRegister limb0    = k1;
  KRegister allLimbs = k2;
  __ mov64(t0, 0x1);
  __ kmovql(limb0, t0);
  __ mov64(t0, 0x1f);
  __ kmovql(allLimbs, t0);
  __ evmovdquq(shift1L, allLimbs, ExternalAddress(shift_1L()), false, Assembler::AVX_512bit, rscratch);
  __ evmovdquq(shift1R, allLimbs, ExternalAddress(shift_1R()), false, Assembler::AVX_512bit, rscratch);
  __ evmovdquq(mask52, allLimbs, ExternalAddress(p256_mask52()), false, Assembler::AVX_512bit, rscratch);
  // M = load(*modulus_p256)
  __ evmovdquq(modulus, allLimbs, ExternalAddress(modulus_p256()), false, Assembler::AVX_512bit, rscratch);
  // A = load(*aLimbs);  masked evmovdquq() can be slow. Instead load full 256bit, and compbine with 64bit
  __ evmovdquq(A, Address(aLimbs, 8), Assembler::AVX_256bit);
  __ evpermq(A, allLimbs, shift1L, A, false, Assembler::AVX_512bit);
  __ movq(T, Address(aLimbs, 0));
  __ evporq(A, A, T, Assembler::AVX_512bit);
  // Acc1 = 0
  __ vpxorq(Acc1, Acc1, Acc1, Assembler::AVX_512bit);
  for (int i = 0; i< 5; i++) {
      // Acc2 = 0
      __ vpxorq(Acc2, Acc2, Acc2, Assembler::AVX_512bit);
      // B = replicate(bLimbs[i])
      __ vpbroadcastq(B, Address(bLimbs, i*8), Assembler::AVX_512bit);
      // Acc1 += A * B
      __ evpmadd52luq(Acc1, A, B, Assembler::AVX_512bit);
      // Acc2 += A *h B
      __ evpmadd52huq(Acc2, A, B, Assembler::AVX_512bit);
      // N = replicate(Acc1[0])
      __ vpbroadcastq(N, Acc1, Assembler::AVX_512bit);
      // Acc1 += M *  N
      __ evpmadd52luq(Acc1, modulus, N, Assembler::AVX_512bit);
      // Acc2 += M *h N
      __ evpmadd52huq(Acc2, modulus, N, Assembler::AVX_512bit);
      if (i == 4) break;
      // Combine high/low partial sums Acc1 + Acc2
      // carry = Acc1[0] >> 52
      __ evpsrlq(carry, limb0, Acc1, 52, true, Assembler::AVX_512bit);
      // Acc2[0] += carry
      __ evpaddq(Acc2, limb0, carry, Acc2, true, Assembler::AVX_512bit);
      // Acc1 = Acc1 shift one q element >>
      __ evpermq(Acc1, allLimbs, shift1R, Acc1, false, Assembler::AVX_512bit);
      // Acc1 = Acc1 + Acc2
      __ vpaddq(Acc1, Acc1, Acc2, Assembler::AVX_512bit);
  }
  // Last Carry round: Combine high/low partial sums Acc1<high_bits> + Acc1 + Acc2
  // carry = Acc1 >> 52
  __ evpsrlq(carry, allLimbs, Acc1, 52, true, Assembler::AVX_512bit);
  // Acc1 = Acc1 shift one q element >>
  __ evpermq(Acc1, allLimbs, shift1R, Acc1, false, Assembler::AVX_512bit);
  // Acc1  = mask52(Acc1)
  __ evpandq(Acc1, Acc1, mask52, Assembler::AVX_512bit); // Clear top 12 bits
  // Acc2 += carry
  __ evpaddq(Acc2, allLimbs, carry, Acc2, true, Assembler::AVX_512bit);
  // Acc1 = Acc1 + Acc2
  __ vpaddq(Acc1, Acc1, Acc2, Assembler::AVX_512bit);
  // output to rLimbs (1 + 4 limbs)
  __ movq(Address(rLimbs, 0), Acc1);
  __ evpermq(Acc1, k0, shift1R, Acc1, true, Assembler::AVX_512bit);
  __ evmovdquq(Address(rLimbs, 8), k0, Acc1, true, Assembler::AVX_256bit);
 }
 address StubGenerator::generate_intpoly_montgomeryMult_P256() {
  __ align(CodeEntryAlignment);
  StubCodeMark mark(this, "StubRoutines", "intpoly_montgomeryMult_P256");
  address start = __ pc();
  __ enter();
  // Register Map
  const Register aLimbs  = c_rarg0; // rdi | rcx
  const Register bLimbs  = c_rarg1; // rsi | rdx
  const Register rLimbs  = c_rarg2; // rdx | r8
  const Register tmp     = r9;
  montgomeryMultiply(aLimbs, bLimbs, rLimbs, tmp, _masm);
  __ mov64(rax, 0x1); // Return 1 (Fig. 5, Step 6 [1] skipped in montgomeryMultiply)
  __ leave();
  __ ret(0);
  return start;
 }
 // A = B if select
 // Must be:
 //  - constant time (i.e. no branches)
 //  - no-side channel (i.e. all memory must always be accessed, and in same order)
 void assign_avx(XMMRegister A, Address aAddr, XMMRegister B, Address bAddr, KRegister select, int vector_len, MacroAssembler* _masm) {
  __ evmovdquq(A, aAddr, vector_len);
  __ evmovdquq(B, bAddr, vector_len);
  __ evmovdquq(A, select, B, true, vector_len);
  __ evmovdquq(aAddr, A, vector_len);
 }
 void assign_scalar(Address aAddr, Address bAddr, Register select, Register tmp, MacroAssembler* _masm) {
  // Original java:
  // long dummyLimbs = maskValue & (a[i] ^ b[i]);
  // a[i] = dummyLimbs ^ a[i];
  __ movq(tmp, aAddr);
  __ xorq(tmp, bAddr);
  __ andq(tmp, select);
  __ xorq(aAddr, tmp);
 }
 address StubGenerator::generate_intpoly_assign() {
  // KNOWN Lengths:
  //   MontgomeryIntPolynP256:  5 = 4 + 1
  //   IntegerPolynomial1305:   5 = 4 + 1
  //   IntegerPolynomial25519: 10 = 8 + 2
  //   IntegerPolynomialP256:  10 = 8 + 2
  //   Curve25519OrderField:   10 = 8 + 2
  //   Curve25519OrderField:   10 = 8 + 2
  //   P256OrderField:         10 = 8 + 2
  //   IntegerPolynomialP384:  14 = 8 + 4 + 2
  //   P384OrderField:         14 = 8 + 4 + 2
  //   IntegerPolynomial448:   16 = 8 + 8
  //   Curve448OrderField:     16 = 8 + 8
  //   Curve448OrderField:     16 = 8 + 8
  //   IntegerPolynomialP521:  19 = 8 + 8 + 2 + 1
  //   P521OrderField:         19 = 8 + 8 + 2 + 1
  // Special Cases 5, 10, 14, 16, 19
  __ align(CodeEntryAlignment);
  StubCodeMark mark(this, "StubRoutines", "intpoly_assign");
  address start = __ pc();
  __ enter();
  // Inputs
  const Register set     = c_rarg0;
  const Register aLimbs  = c_rarg1;
  const Register bLimbs  = c_rarg2;
  const Register length  = c_rarg3;
  XMMRegister A = xmm0;
  XMMRegister B = xmm1;
  Register tmp = r9;
  KRegister select = k1;
  Label L_Length5, L_Length10, L_Length14, L_Length16, L_Length19, L_DefaultLoop, L_Done;
  __ negq(set);
  __ kmovql(select, set);
  // NOTE! Crypto code cannot branch on user input. However; allowed to branch on number of limbs;
  // Number of limbs is a constant in each IntegerPolynomial (i.e. this side-channel branch leaks
  //   number of limbs which is not a secret)
  __ cmpl(length, 5);
  __ jcc(Assembler::equal, L_Length5);
  __ cmpl(length, 10);
  __ jcc(Assembler::equal, L_Length10);
  __ cmpl(length, 14);
  __ jcc(Assembler::equal, L_Length14);
  __ cmpl(length, 16);
  __ jcc(Assembler::equal, L_Length16);
  __ cmpl(length, 19);
  __ jcc(Assembler::equal, L_Length19);
  // Default copy loop (UNLIKELY)
  __ cmpl(length, 0);
  __ jcc(Assembler::lessEqual, L_Done);
  __ bind(L_DefaultLoop);
  assign_scalar(Address(aLimbs, 0), Address(bLimbs, 0), set, tmp, _masm);
  __ subl(length, 1);
  __ lea(aLimbs, Address(aLimbs,8));
  __ lea(bLimbs, Address(bLimbs,8));
  __ cmpl(length, 0);
  __ jcc(Assembler::greater, L_DefaultLoop);
  __ jmp(L_Done);
  __ bind(L_Length5); // 1 + 4
  assign_scalar(Address(aLimbs, 0), Address(bLimbs, 0), set, tmp, _masm);
  assign_avx(A, Address(aLimbs, 8), B, Address(bLimbs, 8), select, Assembler::AVX_256bit, _masm);
  __ jmp(L_Done);
  __ bind(L_Length10); // 2 + 8
  assign_avx(A, Address(aLimbs, 0),  B, Address(bLimbs, 0),  select, Assembler::AVX_128bit, _masm);
  assign_avx(A, Address(aLimbs, 16), B, Address(bLimbs, 16), select, Assembler::AVX_512bit, _masm);
  __ jmp(L_Done);
  __ bind(L_Length14); // 2 + 4 + 8
  assign_avx(A, Address(aLimbs, 0),  B, Address(bLimbs, 0),  select, Assembler::AVX_128bit, _masm);
  assign_avx(A, Address(aLimbs, 16), B, Address(bLimbs, 16), select, Assembler::AVX_256bit, _masm);
  assign_avx(A, Address(aLimbs, 48), B, Address(bLimbs, 48), select, Assembler::AVX_512bit, _masm);
  __ jmp(L_Done);
  __ bind(L_Length16); // 8 + 8
  assign_avx(A, Address(aLimbs, 0),  B, Address(bLimbs, 0),  select, Assembler::AVX_512bit, _masm);
  assign_avx(A, Address(aLimbs, 64), B, Address(bLimbs, 64), select, Assembler::AVX_512bit, _masm);
  __ jmp(L_Done);
  __ bind(L_Length19); // 1 + 2 + 8 + 8
  assign_scalar(Address(aLimbs, 0), Address(bLimbs, 0), set, tmp, _masm);
  assign_avx(A, Address(aLimbs, 8),  B, Address(bLimbs, 8),  select, Assembler::AVX_128bit, _masm);
  assign_avx(A, Address(aLimbs, 24), B, Address(bLimbs, 24), select, Assembler::AVX_512bit, _masm);
  assign_avx(A, Address(aLimbs, 88), B, Address(bLimbs, 88), select, Assembler::AVX_512bit, _masm);
  __ bind(L_Done);
  __ leave();
  __ ret(0);
  return start;
 }
--- a/src/hotspot/cpu/x86/vm_version_x86.cpp
+++ b/src/hotspot/cpu/x86/vm_version_x86.cpp
@ -1366,6 +1366,18 @@ void VM_Version::get_processor_features() {
    FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
  }
 #ifdef _LP64
  if (supports_avx512ifma() && supports_avx512vlbw()) {
    if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
      FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
    }
  } else
 #endif
  if (UseIntPolyIntrinsics) {
    warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
    FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
  }
 #ifdef _LP64
  if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
    UseMultiplyToLenIntrinsic = true;
--- a/src/hotspot/share/classfile/vmIntrinsics.cpp
+++ b/src/hotspot/share/classfile/vmIntrinsics.cpp
@ -492,6 +492,10 @@ bool vmIntrinsics::disabled_by_jvm_flags(vmIntrinsics::ID id) {
  case vmIntrinsics::_poly1305_processBlocks:
    if (!UsePoly1305Intrinsics) return true;
    break;
  case vmIntrinsics::_intpoly_montgomeryMult_P256:
  case vmIntrinsics::_intpoly_assign:
    if (!UseIntPolyIntrinsics) return true;
    break;
  case vmIntrinsics::_updateBytesCRC32C:
  case vmIntrinsics::_updateDirectByteBufferCRC32C:
    if (!UseCRC32CIntrinsics) return true;
--- a/src/hotspot/share/classfile/vmIntrinsics.hpp
+++ b/src/hotspot/share/classfile/vmIntrinsics.hpp
@ -526,7 +526,18 @@ class methodHandle;
  do_intrinsic(_digestBase_implCompressMB, sun_security_provider_digestbase, implCompressMB_name, countPositives_signature, F_R)   \
   do_name(     implCompressMB_name,                               "implCompressMultiBlock0")                           \
                                                                                                                        \
-   /* support for java.util.Base64.Encoder*/                                                                            \
+  /* support for sun.security.util.math.intpoly.MontgomeryIntegerPolynomialP256 */                                      \
  do_class(sun_security_util_math_intpoly_MontgomeryIntegerPolynomialP256, "sun/security/util/math/intpoly/MontgomeryIntegerPolynomialP256")  \
  do_intrinsic(_intpoly_montgomeryMult_P256, sun_security_util_math_intpoly_MontgomeryIntegerPolynomialP256, intPolyMult_name, intPolyMult_signature, F_R) \
  do_name(intPolyMult_name, "mult")                                                                                     \
  do_signature(intPolyMult_signature, "([J[J[J)I")                                                                      \
                                                                                                                        \
  do_class(sun_security_util_math_intpoly_IntegerPolynomial, "sun/security/util/math/intpoly/IntegerPolynomial")        \
  do_intrinsic(_intpoly_assign, sun_security_util_math_intpoly_IntegerPolynomial, intPolyAssign_name, intPolyAssign_signature, F_S) \
   do_name(intPolyAssign_name, "conditionalAssign")                                                                     \
   do_signature(intPolyAssign_signature, "(I[J[J)V")                                                                    \
                                                                                                                        \
  /* support for java.util.Base64.Encoder*/                                                                             \
  do_class(java_util_Base64_Encoder, "java/util/Base64$Encoder")                                                        \
  do_intrinsic(_base64_encodeBlock, java_util_Base64_Encoder, encodeBlock_name, encodeBlock_signature, F_R)             \
  do_name(encodeBlock_name, "encodeBlock")                                                                              \
--- a/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp
+++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp
@ -463,6 +463,12 @@ void ShenandoahBarrierC2Support::verify(RootNode* root) {
        "decodeBlock",
        { { TypeFunc::Parms, ShenandoahLoad },  { TypeFunc::Parms+3, ShenandoahStore },   { -1, ShenandoahNone },
          { -1,  ShenandoahNone},                 { -1,  ShenandoahNone},                 { -1,  ShenandoahNone} },
        "intpoly_montgomeryMult_P256",
        { { TypeFunc::Parms, ShenandoahLoad },  { TypeFunc::Parms+1, ShenandoahLoad  },   { TypeFunc::Parms+2, ShenandoahStore },
          { -1,  ShenandoahNone},                 { -1,  ShenandoahNone},                 { -1,  ShenandoahNone} },
        "intpoly_assign",
        { { TypeFunc::Parms+1, ShenandoahStore }, { TypeFunc::Parms+2, ShenandoahLoad },  { -1, ShenandoahNone },
          { -1,  ShenandoahNone},                 { -1,  ShenandoahNone},                 { -1,  ShenandoahNone} },
      };
      if (call->is_call_to_arraycopystub()) {
--- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp
+++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp
@ -361,6 +361,8 @@
  static_field(StubRoutines,                _md5_implCompressMB,                              address)                               \
  static_field(StubRoutines,                _chacha20Block,                                   address)                               \
  static_field(StubRoutines,                _poly1305_processBlocks,                          address)                               \
  static_field(StubRoutines,                _intpoly_montgomeryMult_P256,                     address)                               \
  static_field(StubRoutines,                _intpoly_assign,                                  address)                               \
  static_field(StubRoutines,                _sha1_implCompress,                               address)                               \
  static_field(StubRoutines,                _sha1_implCompressMB,                             address)                               \
  static_field(StubRoutines,                _sha256_implCompress,                             address)                               \
--- a/src/hotspot/share/opto/c2compiler.cpp
+++ b/src/hotspot/share/opto/c2compiler.cpp
@ -786,6 +786,8 @@ bool C2Compiler::is_intrinsic_supported(vmIntrinsics::ID id) {
  case vmIntrinsics::_base64_encodeBlock:
  case vmIntrinsics::_base64_decodeBlock:
  case vmIntrinsics::_poly1305_processBlocks:
  case vmIntrinsics::_intpoly_montgomeryMult_P256:
  case vmIntrinsics::_intpoly_assign:
  case vmIntrinsics::_updateCRC32:
  case vmIntrinsics::_updateBytesCRC32:
  case vmIntrinsics::_updateByteBufferCRC32:
--- a/src/hotspot/share/opto/escape.cpp
+++ b/src/hotspot/share/opto/escape.cpp
@ -2173,6 +2173,8 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
                  strcmp(call->as_CallLeaf()->_name, "counterMode_AESCrypt") == 0 ||
                  strcmp(call->as_CallLeaf()->_name, "galoisCounterMode_AESCrypt") == 0 ||
                  strcmp(call->as_CallLeaf()->_name, "poly1305_processBlocks") == 0 ||
                  strcmp(call->as_CallLeaf()->_name, "intpoly_montgomeryMult_P256") == 0 ||
                  strcmp(call->as_CallLeaf()->_name, "intpoly_assign") == 0 ||
                  strcmp(call->as_CallLeaf()->_name, "ghash_processBlocks") == 0 ||
                  strcmp(call->as_CallLeaf()->_name, "chacha20Block") == 0 ||
                  strcmp(call->as_CallLeaf()->_name, "encodeBlock") == 0 ||
--- a/src/hotspot/share/opto/library_call.cpp
+++ b/src/hotspot/share/opto/library_call.cpp
@ -638,7 +638,10 @@ bool LibraryCallKit::try_to_inline(int predicate) {
    return inline_base64_decodeBlock();
  case vmIntrinsics::_poly1305_processBlocks:
    return inline_poly1305_processBlocks();
-
+  case vmIntrinsics::_intpoly_montgomeryMult_P256:
    return inline_intpoly_montgomeryMult_P256();
  case vmIntrinsics::_intpoly_assign:
    return inline_intpoly_assign();
  case vmIntrinsics::_encodeISOArray:
  case vmIntrinsics::_encodeByteISOArray:
    return inline_encodeISOArray(false);
@ -7568,6 +7571,69 @@ bool LibraryCallKit::inline_poly1305_processBlocks() {
  return true;
 }
 bool LibraryCallKit::inline_intpoly_montgomeryMult_P256() {
  address stubAddr;
  const char *stubName;
  assert(UseIntPolyIntrinsics, "need intpoly intrinsics support");
  assert(callee()->signature()->size() == 3, "intpoly_montgomeryMult_P256 has %d parameters", callee()->signature()->size());
  stubAddr = StubRoutines::intpoly_montgomeryMult_P256();
  stubName = "intpoly_montgomeryMult_P256";
  if (!stubAddr) return false;
  null_check_receiver();  // null-check receiver
  if (stopped())  return true;
  Node* a = argument(1);
  Node* b = argument(2);
  Node* r = argument(3);
  a = must_be_not_null(a, true);
  b = must_be_not_null(b, true);
  r = must_be_not_null(r, true);
  Node* a_start = array_element_address(a, intcon(0), T_LONG);
  assert(a_start, "a array is NULL");
  Node* b_start = array_element_address(b, intcon(0), T_LONG);
  assert(b_start, "b array is NULL");
  Node* r_start = array_element_address(r, intcon(0), T_LONG);
  assert(r_start, "r array is NULL");
  Node* call = make_runtime_call(RC_LEAF | RC_NO_FP,
                                 OptoRuntime::intpoly_montgomeryMult_P256_Type(),
                                 stubAddr, stubName, TypePtr::BOTTOM,
                                 a_start, b_start, r_start);
  Node* result = _gvn.transform(new ProjNode(call, TypeFunc::Parms));
  set_result(result);
  return true;
 }
 bool LibraryCallKit::inline_intpoly_assign() {
  assert(UseIntPolyIntrinsics, "need intpoly intrinsics support");
  assert(callee()->signature()->size() == 3, "intpoly_assign has %d parameters", callee()->signature()->size());
  const char *stubName = "intpoly_assign";
  address stubAddr = StubRoutines::intpoly_assign();
  if (!stubAddr) return false;
  Node* set = argument(0);
  Node* a = argument(1);
  Node* b = argument(2);
  Node* arr_length = load_array_length(a);
  a = must_be_not_null(a, true);
  b = must_be_not_null(b, true);
  Node* a_start = array_element_address(a, intcon(0), T_LONG);
  assert(a_start, "a array is NULL");
  Node* b_start = array_element_address(b, intcon(0), T_LONG);
  assert(b_start, "b array is NULL");
  Node* call = make_runtime_call(RC_LEAF | RC_NO_FP,
                                 OptoRuntime::intpoly_assign_Type(),
                                 stubAddr, stubName, TypePtr::BOTTOM,
                                 set, a_start, b_start, arr_length);
  return true;
 }
 //------------------------------inline_digestBase_implCompress-----------------------
 //
 // Calculate MD5 for single-block byte[] array.
--- a/src/hotspot/share/opto/library_call.hpp
+++ b/src/hotspot/share/opto/library_call.hpp
@ -307,6 +307,8 @@ class LibraryCallKit : public GraphKit {
  bool inline_base64_encodeBlock();
  bool inline_base64_decodeBlock();
  bool inline_poly1305_processBlocks();
  bool inline_intpoly_montgomeryMult_P256();
  bool inline_intpoly_assign();
  bool inline_digestBase_implCompress(vmIntrinsics::ID id);
  bool inline_digestBase_implCompressMB(int predicate);
  bool inline_digestBase_implCompressMB(Node* digestBaseObj, ciInstanceKlass* instklass,
--- a/src/hotspot/share/opto/runtime.cpp
+++ b/src/hotspot/share/opto/runtime.cpp
@ -1401,6 +1401,45 @@ const TypeFunc* OptoRuntime::poly1305_processBlocks_Type() {
  return TypeFunc::make(domain, range);
 }
 // MontgomeryIntegerPolynomialP256 multiply function
 const TypeFunc* OptoRuntime::intpoly_montgomeryMult_P256_Type() {
  int argcnt = 3;
  const Type** fields = TypeTuple::fields(argcnt);
  int argp = TypeFunc::Parms;
  fields[argp++] = TypePtr::NOTNULL;    // a array
  fields[argp++] = TypePtr::NOTNULL;    // b array
  fields[argp++] = TypePtr::NOTNULL;    // r(esult) array
  assert(argp == TypeFunc::Parms + argcnt, "correct decoding");
  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
  // result type needed
  fields = TypeTuple::fields(1);
  fields[TypeFunc::Parms + 0] = TypeInt::INT; // carry bits in output
  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields);
  return TypeFunc::make(domain, range);
 }
 // IntegerPolynomial constant time assignment function
 const TypeFunc* OptoRuntime::intpoly_assign_Type() {
  int argcnt = 4;
  const Type** fields = TypeTuple::fields(argcnt);
  int argp = TypeFunc::Parms;
  fields[argp++] = TypeInt::INT;        // set flag
  fields[argp++] = TypePtr::NOTNULL;    // a array (result)
  fields[argp++] = TypePtr::NOTNULL;    // b array (if set is set)
  fields[argp++] = TypeInt::INT;        // array length
  assert(argp == TypeFunc::Parms + argcnt, "correct decoding");
  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
  // result type needed
  fields = TypeTuple::fields(1);
  fields[TypeFunc::Parms + 0] = NULL; // void
  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
  return TypeFunc::make(domain, range);
 }
 //------------- Interpreter state access for on stack replacement
 const TypeFunc* OptoRuntime::osr_end_Type() {
  // create input type (domain)
--- a/src/hotspot/share/opto/runtime.hpp
+++ b/src/hotspot/share/opto/runtime.hpp
@ -298,6 +298,8 @@ private:
  static const TypeFunc* base64_encodeBlock_Type();
  static const TypeFunc* base64_decodeBlock_Type();
  static const TypeFunc* poly1305_processBlocks_Type();
  static const TypeFunc* intpoly_montgomeryMult_P256_Type();
  static const TypeFunc* intpoly_assign_Type();
  static const TypeFunc* updateBytesCRC32_Type();
  static const TypeFunc* updateBytesCRC32C_Type();
--- a/src/hotspot/share/runtime/globals.hpp
+++ b/src/hotspot/share/runtime/globals.hpp
@ -233,6 +233,8 @@ const int ObjectAlignmentInBytes = 8;
                                                                            \
  product(bool, UsePoly1305Intrinsics, false, DIAGNOSTIC,                   \
          "Use intrinsics for sun.security.util.math.intpoly")              \
  product(bool, UseIntPolyIntrinsics, false, DIAGNOSTIC,                   \
          "Use intrinsics for sun.security.util.math.intpoly.MontgomeryIntegerPolynomialP256") \
                                                                            \
  product(size_t, LargePageSizeInBytes, 0,                                  \
          "Maximum large page size used (0 will use the default large "     \
--- a/src/hotspot/share/runtime/stubRoutines.cpp
+++ b/src/hotspot/share/runtime/stubRoutines.cpp
@ -132,6 +132,8 @@ address StubRoutines::_chacha20Block                       = nullptr;
 address StubRoutines::_base64_encodeBlock                  = nullptr;
 address StubRoutines::_base64_decodeBlock                  = nullptr;
 address StubRoutines::_poly1305_processBlocks              = nullptr;
 address StubRoutines::_intpoly_montgomeryMult_P256         = nullptr;
 address StubRoutines::_intpoly_assign                      = nullptr;
 address StubRoutines::_md5_implCompress      = nullptr;
 address StubRoutines::_md5_implCompressMB    = nullptr;
--- a/src/hotspot/share/runtime/stubRoutines.hpp
+++ b/src/hotspot/share/runtime/stubRoutines.hpp
@ -215,6 +215,8 @@ class StubRoutines: AllStatic {
  static address _base64_encodeBlock;
  static address _base64_decodeBlock;
  static address _poly1305_processBlocks;
  static address _intpoly_montgomeryMult_P256;
  static address _intpoly_assign;
  static address _md5_implCompress;
  static address _md5_implCompressMB;
@ -409,6 +411,8 @@ class StubRoutines: AllStatic {
  static address electronicCodeBook_encryptAESCrypt()   { return _electronicCodeBook_encryptAESCrypt; }
  static address electronicCodeBook_decryptAESCrypt()   { return _electronicCodeBook_decryptAESCrypt; }
  static address poly1305_processBlocks()               { return _poly1305_processBlocks; }
  static address intpoly_montgomeryMult_P256()          { return _intpoly_montgomeryMult_P256; }
  static address intpoly_assign()        { return _intpoly_assign; }
  static address counterMode_AESCrypt()  { return _counterMode_AESCrypt; }
  static address ghash_processBlocks()   { return _ghash_processBlocks; }
  static address chacha20Block()         { return _chacha20Block; }
--- a/src/java.base/share/classes/sun/security/ec/ECDHKeyAgreement.java
+++ b/src/java.base/share/classes/sun/security/ec/ECDHKeyAgreement.java
@ -31,6 +31,7 @@ import sun.security.util.CurveDB;
 import sun.security.util.ECUtil;
 import sun.security.util.NamedCurve;
 import sun.security.util.math.IntegerFieldModuloP;
 import sun.security.util.math.IntegerMontgomeryFieldModuloP;
 import sun.security.util.math.MutableIntegerModuloP;
 import sun.security.util.math.SmallValue;
@ -265,6 +266,11 @@ public final class ECDHKeyAgreement extends KeyAgreementSpi {
        ECPublicKey pubKey) throws InvalidKeyException {
        IntegerFieldModuloP field = ops.getField();
        if (field instanceof IntegerMontgomeryFieldModuloP) {
            // No point of doing a single SmallValue operation in Montgomery domain
            field = ((IntegerMontgomeryFieldModuloP)field).residueField();
        }
        // convert s array into field element and multiply by the cofactor
        MutableIntegerModuloP scalar = field.getElement(priv.getS()).mutable();
        SmallValue cofactor =
--- a/src/java.base/share/classes/sun/security/ec/ECDSAOperations.java
+++ b/src/java.base/share/classes/sun/security/ec/ECDSAOperations.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -252,7 +252,7 @@ public class ECDSAOperations {
        MutablePoint p1 = ecOps.multiply(basePoint, temp1);
        MutablePoint p2 = ecOps.multiply(pp, temp2);
-        ecOps.setSum(p1, p2.asAffine());
+        ecOps.setSum(p1, p2);
        IntegerModuloP result = p1.asAffine().getX();
        b2a(result, orderField, temp1);
        return MessageDigest.isEqual(temp1, r);
--- a/src/java.base/share/classes/sun/security/ec/ECOperations.java
+++ b/src/java.base/share/classes/sun/security/ec/ECOperations.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -46,12 +46,7 @@ import java.util.Optional;
 * Formulas are derived from "Complete addition formulas for prime order
 * elliptic curves" by Renes, Costello, and Batina.
 */
 public class ECOperations {
    private static final ECOperations secp256r1Ops =
        new ECOperations(IntegerPolynomialP256.ONE.getElement(
                CurveDB.lookup(KnownOIDs.secp256r1.value()).getCurve().getB()),
                P256OrderField.ONE);
    /*
     * An exception indicating a problem with an intermediate value produced
@ -64,7 +59,7 @@ public class ECOperations {
    }
    static final Map<BigInteger, IntegerFieldModuloP> fields = Map.of(
-        IntegerPolynomialP256.MODULUS, IntegerPolynomialP256.ONE,
+        IntegerPolynomialP256.MODULUS, MontgomeryIntegerPolynomialP256.ONE,
        IntegerPolynomialP384.MODULUS, IntegerPolynomialP384.ONE,
        IntegerPolynomialP521.MODULUS, IntegerPolynomialP521.ONE
    );
@ -207,11 +202,28 @@ public class ECOperations {
     * @return the product
     */
    public MutablePoint multiply(AffinePoint affineP, byte[] s) {
-        return PointMultiplier.of(this, affineP).pointMultiply(s);
+        PointMultiplier multiplier = null;
        if (getField() instanceof IntegerMontgomeryFieldModuloP
                && affineP.equals(Secp256R1GeneratorMontgomeryMultiplier.generator)) {
            // Lazy class loading here
            multiplier = Secp256R1GeneratorMontgomeryMultiplier.multiplier;
        } else {
            multiplier = new DefaultMultiplier(this, affineP);
        }
        return multiplier.pointMultiply(s);
    }
    /**
     * Multiply an affine ecpoint point by a scalar and return the result as a
     * mutable point.
     *
     * @param ecPoint the point
     * @param s the scalar as a little-endian array
     * @return the product
     */
    public MutablePoint multiply(ECPoint ecPoint, byte[] s) {
-        return PointMultiplier.of(this, ecPoint).pointMultiply(s);
+        return multiply(AffinePoint.fromECPoint(ecPoint, getField()), s);
    }
    /*
@ -264,21 +276,26 @@ public class ECOperations {
    }
-    /*
+    /**
-     * Mixed point addition. This method constructs new temporaries each time
+     * Adds second Mutable (Projective) point to first.
-     * it is called. For better efficiency, the method that reuses temporaries
+     *
-     * should be used if more than one sum will be computed.
+     * Used by ECDSAOperations. This method constructs new temporaries each time
     * it is called. For better efficiency, the (private) method that reuses
     * temporaries should be used if more than one sum will be computed.
     *
     * @param p first point and result
     * @param p2 second point to add
     */
-    public void setSum(MutablePoint p, AffinePoint p2) {
+    public void setSum(MutablePoint p, MutablePoint p2) {
        IntegerModuloP zero = p.getField().get0();
        MutableIntegerModuloP t0 = zero.mutable();
        MutableIntegerModuloP t1 = zero.mutable();
        MutableIntegerModuloP t2 = zero.mutable();
        MutableIntegerModuloP t3 = zero.mutable();
        MutableIntegerModuloP t4 = zero.mutable();
        setSum((ProjectivePoint.Mutable) p, p2, t0, t1, t2, t3, t4);
        setSum((ProjectivePoint.Mutable) p, (ProjectivePoint.Mutable) p2,
            t0, t1, t2, t3, t4);
    }
    /*
@ -289,18 +306,18 @@ public class ECOperations {
        MutableIntegerModuloP t2, MutableIntegerModuloP t3,
        MutableIntegerModuloP t4) {
-        t0.setValue(p.getX()).setProduct(p2.getX());
+        t0.setValue(p.getX()).setProduct(p2.getX(false));
-        t1.setValue(p.getY()).setProduct(p2.getY());
+        t1.setValue(p.getY()).setProduct(p2.getY(false));
-        t3.setValue(p2.getX()).setSum(p2.getY());
+        t3.setValue(p2.getX(false)).setSum(p2.getY(false));
        t4.setValue(p.getX()).setSum(p.getY());
        t3.setProduct(t4);
        t4.setValue(t0).setSum(t1);
        t3.setDifference(t4);
-        t4.setValue(p2.getY()).setProduct(p.getZ());
+        t4.setValue(p2.getY(false)).setProduct(p.getZ());
        t4.setSum(p.getY());
-        p.getY().setValue(p2.getX()).setProduct(p.getZ());
+        p.getY().setValue(p2.getX(false)).setProduct(p.getZ());
        p.getY().setSum(p.getX());
        t2.setValue(p.getZ());
        p.getZ().setProduct(b);
@ -412,11 +429,8 @@ public class ECOperations {
        return isNeutral(this.multiply(ap, scalar));
    }
-    sealed interface PointMultiplier {
+    sealed interface PointMultiplier
-        Map<ECPoint, PointMultiplier> multipliers = Map.of(
+        permits DefaultMultiplier, Secp256R1GeneratorMontgomeryMultiplier {
                Secp256R1GeneratorMultiplier.generator,
                Secp256R1GeneratorMultiplier.multiplier);
        // Multiply the point by a scalar and return the result as a mutable
        // point.  The multiplier point is specified by the implementation of
        // this interface, which could be a general EC point or EC generator
@ -429,26 +443,6 @@ public class ECOperations {
        // in little endian byte array representation.
        ProjectivePoint.Mutable pointMultiply(byte[] scalar);
        static PointMultiplier of(ECOperations ecOps, AffinePoint affPoint) {
            PointMultiplier multiplier = multipliers.get(affPoint.toECPoint());
            if (multiplier == null) {
                multiplier = new Default(ecOps, affPoint);
            }
            return multiplier;
        }
        static PointMultiplier of(ECOperations ecOps, ECPoint ecPoint) {
            PointMultiplier multiplier = multipliers.get(ecPoint);
            if (multiplier == null) {
                AffinePoint affPoint =
                        AffinePoint.fromECPoint(ecPoint, ecOps.getField());
                multiplier = new Default(ecOps, affPoint);
            }
            return multiplier;
        }
        private static void lookup(
                ProjectivePoint.Immutable[] ips, int index,
                ProjectivePoint.Mutable result) {
@ -465,232 +459,249 @@ public class ECOperations {
                result.conditionalSet(pi, set);
            }
        }
    }
-        final class Default implements PointMultiplier {
+    final static class DefaultMultiplier implements PointMultiplier {
-            private final AffinePoint affineP;
+        private final ECOperations ecOps;
-            private final ECOperations ecOps;
+        private final ProjectivePoint.Immutable[] pointMultiples;
-            private Default(ECOperations ecOps, AffinePoint affineP) {
+        DefaultMultiplier(ECOperations ecOps, AffinePoint affineP) {
-                this.ecOps = ecOps;
+            this.ecOps = ecOps;
-                this.affineP = affineP;
+
            // Precompute and cache point multiples
            this.pointMultiples = new ProjectivePoint.Immutable[16];
            IntegerFieldModuloP field = ecOps.getField();
            ImmutableIntegerModuloP zero = field.get0();
            // temporaries
            MutableIntegerModuloP t0 = zero.mutable();
            MutableIntegerModuloP t1 = zero.mutable();
            MutableIntegerModuloP t2 = zero.mutable();
            MutableIntegerModuloP t3 = zero.mutable();
            MutableIntegerModuloP t4 = zero.mutable();
            ProjectivePoint.Mutable ps =
                new ProjectivePoint.Mutable(field);
            ps.getY().setValue(field.get1().mutable());
            // 0P is neutral---same as initial result value
            pointMultiples[0] = ps.fixed();
            ps.setValue(affineP);
            // 1P = P
            pointMultiples[1] = ps.fixed();
            // the rest are calculated using mixed point addition
            for (int i = 2; i < 16; i++) {
                ecOps.setSum(ps, affineP, t0, t1, t2, t3, t4);
                pointMultiples[i] = ps.fixed();
            }
        }
        @Override
        public ProjectivePoint.Mutable pointMultiply(byte[] s) {
            // 4-bit windowed multiply with branchless lookup.
            // The mixed addition is faster, so it is used to construct
            // the array at the beginning of the operation.
            IntegerFieldModuloP field = ecOps.getField();
            ImmutableIntegerModuloP zero = field.get0();
            // temporaries
            MutableIntegerModuloP t0 = zero.mutable();
            MutableIntegerModuloP t1 = zero.mutable();
            MutableIntegerModuloP t2 = zero.mutable();
            MutableIntegerModuloP t3 = zero.mutable();
            MutableIntegerModuloP t4 = zero.mutable();
            ProjectivePoint.Mutable result = new ProjectivePoint.Mutable(field);
            result.getY().setValue(field.get1().mutable());
            ProjectivePoint.Mutable lookupResult = new ProjectivePoint.Mutable(field);
            for (int i = s.length - 1; i >= 0; i--) {
                double4(result, t0, t1, t2, t3, t4);
                int high = (0xFF & s[i]) >>> 4;
                PointMultiplier.lookup(pointMultiples, high, lookupResult);
                ecOps.setSum(result, lookupResult, t0, t1, t2, t3, t4);
                double4(result, t0, t1, t2, t3, t4);
                int low = 0xF & s[i];
                PointMultiplier.lookup(pointMultiples, low, lookupResult);
                ecOps.setSum(result, lookupResult, t0, t1, t2, t3, t4);
            }
-            @Override
+            return result;
-            public ProjectivePoint.Mutable pointMultiply(byte[] s) {
+        }
                // 4-bit windowed multiply with branchless lookup.
                // The mixed addition is faster, so it is used to construct
                // the array at the beginning of the operation.
-                IntegerFieldModuloP field = affineP.getX().getField();
+        private void double4(ProjectivePoint.Mutable p,
-                ImmutableIntegerModuloP zero = field.get0();
+                MutableIntegerModuloP t0, MutableIntegerModuloP t1,
-                // temporaries
+                MutableIntegerModuloP t2, MutableIntegerModuloP t3,
-                MutableIntegerModuloP t0 = zero.mutable();
+                MutableIntegerModuloP t4) {
-                MutableIntegerModuloP t1 = zero.mutable();
+            for (int i = 0; i < 4; i++) {
-                MutableIntegerModuloP t2 = zero.mutable();
+                ecOps.setDouble(p, t0, t1, t2, t3, t4);
-                MutableIntegerModuloP t3 = zero.mutable();
+            }
-                MutableIntegerModuloP t4 = zero.mutable();
+        }
    }
-                ProjectivePoint.Mutable result =
+    // Represents a multiplier with a larger precomputed table. Intended to be
-                        new ProjectivePoint.Mutable(field);
+    // used for Basepoint multiplication
-                result.getY().setValue(field.get1().mutable());
+    final static class Secp256R1GeneratorMontgomeryMultiplier
        implements PointMultiplier {
        private static final ECOperations secp256r1Ops = new ECOperations(
            MontgomeryIntegerPolynomialP256.ONE.getElement(
                    CurveDB.P_256.getCurve().getB()), P256OrderField.ONE);
        public static final AffinePoint generator = AffinePoint.fromECPoint(
            CurveDB.P_256.getGenerator(), secp256r1Ops.getField());
        public static final PointMultiplier multiplier =
            new Secp256R1GeneratorMontgomeryMultiplier();
-                ProjectivePoint.Immutable[] pointMultiples =
+        private final ImmutableIntegerModuloP zero;
-                        new ProjectivePoint.Immutable[16];
+        private final ImmutableIntegerModuloP one;
-                // 0P is neutral---same as initial result value
+        private final ProjectivePoint.Immutable[][] points;
-                pointMultiples[0] = result.fixed();
+        private final BigInteger[] base;
-                ProjectivePoint.Mutable ps = new ProjectivePoint.Mutable(field);
+        private Secp256R1GeneratorMontgomeryMultiplier() {
-                ps.setValue(affineP);
+            this(MontgomeryIntegerPolynomialP256.ONE,
-                // 1P = P
+                new DefaultMultiplier(secp256r1Ops, generator));
                pointMultiples[1] = ps.fixed();
-                // the rest are calculated using mixed point addition
+            // Check that the tables are correctly generated.
-                for (int i = 2; i < 16; i++) {
+            if (ECOperations.class.desiredAssertionStatus()) {
-                    ecOps.setSum(ps, affineP, t0, t1, t2, t3, t4);
+                verifyTables(this);
-                    pointMultiples[i] = ps.fixed();
+            }
        }
        private Secp256R1GeneratorMontgomeryMultiplier(
            IntegerFieldModuloP field, PointMultiplier smallTableMultiplier) {
            zero = field.get0();
            one = field.get1();
            // Pre-computed table to speed up the point multiplication.
            //
            // This is a 4x16 array of ProjectivePoint.Immutable elements.
            // The first row contains the following multiples of the
            // generator.
            //
            // index   |    point
            // --------+----------------
            // 0x0000  | 0G
            // 0x0001  | 1G
            // 0x0002  | (2^64)G
            // 0x0003  | (2^64 + 1)G
            // 0x0004  | 2^128G
            // 0x0005  | (2^128 + 1)G
            // 0x0006  | (2^128 + 2^64)G
            // 0x0007  | (2^128 + 2^64 + 1)G
            // 0x0008  | 2^192G
            // 0x0009  | (2^192 + 1)G
            // 0x000A  | (2^192 + 2^64)G
            // 0x000B  | (2^192 + 2^64 + 1)G
            // 0x000C  | (2^192 + 2^128)G
            // 0x000D  | (2^192 + 2^128 + 1)G
            // 0x000E  | (2^192 + 2^128 + 2^64)G
            // 0x000F  | (2^192 + 2^128 + 2^64 + 1)G
            //
            // For the other 3 rows, points[i][j] = 2^16 * (points[i-1][j].
            // Generate the pre-computed tables.  This block may be
            // replaced with hard-coded tables in order to speed up
            // the class loading.
            points = new ProjectivePoint.Immutable[4][16];
            BigInteger[] factors = new BigInteger[] {
                    BigInteger.ONE,
                    BigInteger.TWO.pow(64),
                    BigInteger.TWO.pow(128),
                    BigInteger.TWO.pow(192)
            };
            base = new BigInteger[16];
            base[0] = BigInteger.ZERO;
            base[1] = BigInteger.ONE;
            base[2] = factors[1];
            for (int i = 3; i < 16; i++) {
                base[i] = BigInteger.ZERO;
                for (int k = 0; k < 4; k++) {
                    if (((i >>> k) & 0x01) != 0) {
                        base[i] = base[i].add(factors[k]);
                    }
                }
                ProjectivePoint.Mutable lookupResult = ps.mutable();
                for (int i = s.length - 1; i >= 0; i--) {
                    double4(result, t0, t1, t2, t3, t4);
                    int high = (0xFF & s[i]) >>> 4;
                    lookup(pointMultiples, high, lookupResult);
                    ecOps.setSum(result, lookupResult, t0, t1, t2, t3, t4);
                    double4(result, t0, t1, t2, t3, t4);
                    int low = 0xF & s[i];
                    lookup(pointMultiples, low, lookupResult);
                    ecOps.setSum(result, lookupResult, t0, t1, t2, t3, t4);
                }
                return result;
            }
-            private void double4(ProjectivePoint.Mutable p,
+            for (int d = 0; d < 4; d++) {
-                    MutableIntegerModuloP t0, MutableIntegerModuloP t1,
+                for (int w = 0; w < 16; w++) {
-                    MutableIntegerModuloP t2, MutableIntegerModuloP t3,
+                    BigInteger bi = base[w];
-                    MutableIntegerModuloP t4) {
+                    if (d != 0) {
-                for (int i = 0; i < 4; i++) {
+                        bi = bi.multiply(BigInteger.TWO.pow(d * 16));
-                    ecOps.setDouble(p, t0, t1, t2, t3, t4);
+                    }
                    if (w == 0) {
                        points[d][0] = new ProjectivePoint.Immutable(
                            zero.fixed(), one.fixed(), zero.fixed());
                    } else {
                        byte[] s = bi.toByteArray();
                        ArrayUtil.reverse(s);
                        ProjectivePoint.Mutable m = smallTableMultiplier.pointMultiply(s);
                        points[d][w] = m.fixed();
                    }
                }
            }
        }
-        final class Secp256R1GeneratorMultiplier implements PointMultiplier {
+        public ProjectivePoint.Mutable pointMultiply(byte[] s) {
-            private static final ECPoint generator =
+            MutableIntegerModuloP t0 = zero.mutable();
-                    CurveDB.P_256.getGenerator();
+            MutableIntegerModuloP t1 = zero.mutable();
-            private static final PointMultiplier multiplier =
+            MutableIntegerModuloP t2 = zero.mutable();
-                    new Secp256R1GeneratorMultiplier();
+            MutableIntegerModuloP t3 = zero.mutable();
            MutableIntegerModuloP t4 = zero.mutable();
-            private static final ImmutableIntegerModuloP zero =
+            ProjectivePoint.Mutable d = new ProjectivePoint.Mutable(
-                    IntegerPolynomialP256.ONE.get0();
+                    zero.mutable(),
-            private static final ImmutableIntegerModuloP one =
+                    one.mutable(),
-                    IntegerPolynomialP256.ONE.get1();
+                    zero.mutable());
            ProjectivePoint.Mutable r = d.mutable();
            for (int i = 15; i >= 0; i--) {
                secp256r1Ops.setDouble(d, t0, t1, t2, t3, t4);
                for (int j = 3; j >= 0; j--) {
                    int pos = i + j * 16;
                    int index = (bit(s, pos + 192) << 3) |
                                (bit(s, pos + 128) << 2) |
                                (bit(s, pos +  64) << 1) |
                                    bit(s, pos);
-            @Override
+                    PointMultiplier.lookup(points[j], index, r);
-            public ProjectivePoint.Mutable pointMultiply(byte[] s) {
+                    secp256r1Ops.setSum(d, r, t0, t1, t2, t3, t4);
                MutableIntegerModuloP t0 = zero.mutable();
                MutableIntegerModuloP t1 = zero.mutable();
                MutableIntegerModuloP t2 = zero.mutable();
                MutableIntegerModuloP t3 = zero.mutable();
                MutableIntegerModuloP t4 = zero.mutable();
                ProjectivePoint.Mutable d = new ProjectivePoint.Mutable(
                        zero.mutable(),
                        one.mutable(),
                        zero.mutable());
                ProjectivePoint.Mutable r = d.mutable();
                for (int i = 15; i >= 0; i--) {
                    secp256r1Ops.setDouble(d, t0, t1, t2, t3, t4);
                    for (int j = 3; j >= 0; j--) {
                        int pos = i + j * 16;
                        int index = (bit(s, pos + 192) << 3) |
                                    (bit(s, pos + 128) << 2) |
                                    (bit(s, pos +  64) << 1) |
                                     bit(s, pos);
                        lookup(P256.points[j], index, r);
                        secp256r1Ops.setSum(d, r, t0, t1, t2, t3, t4);
                    }
                }
                return d;
            }
-            private static int bit(byte[] k, int i) {
+            return d;
-                return (k[i >> 3] >> (i & 0x07)) & 0x01;
+        }
            }
-            // Lazy loading of the tables.
+        private static int bit(byte[] k, int i) {
-            private static final class P256 {
+            return (k[i >> 3] >> (i & 0x07)) & 0x01;
-                // Pre-computed table to speed up the point multiplication.
+        }
                //
                // This is a 4x16 array of ProjectivePoint.Immutable elements.
                // The first row contains the following multiples of the
                // generator.
                //
                // index   |    point
                // --------+----------------
                // 0x0000  | 0G
                // 0x0001  | 1G
                // 0x0002  | (2^64)G
                // 0x0003  | (2^64 + 1)G
                // 0x0004  | 2^128G
                // 0x0005  | (2^128 + 1)G
                // 0x0006  | (2^128 + 2^64)G
                // 0x0007  | (2^128 + 2^64 + 1)G
                // 0x0008  | 2^192G
                // 0x0009  | (2^192 + 1)G
                // 0x000A  | (2^192 + 2^64)G
                // 0x000B  | (2^192 + 2^64 + 1)G
                // 0x000C  | (2^192 + 2^128)G
                // 0x000D  | (2^192 + 2^128 + 1)G
                // 0x000E  | (2^192 + 2^128 + 2^64)G
                // 0x000F  | (2^192 + 2^128 + 2^64 + 1)G
                //
                // For the other 3 rows, points[i][j] = 2^16 * (points[i-1][j].
                private static final ProjectivePoint.Immutable[][] points;
-                // Generate the pre-computed tables.  This block may be
+        protected void verifyTables(PointMultiplier multiplier) {
-                // replaced with hard-coded tables in order to speed up
+            for (int d = 0; d < 4; d++) {
-                // the class loading.
+                for (int w = 0; w < 16; w++) {
-                static {
+                    BigInteger bi = base[w];
-                    points = new ProjectivePoint.Immutable[4][16];
+                    if (d != 0) {
-                    BigInteger[] factors = new BigInteger[] {
+                        bi = bi.multiply(BigInteger.TWO.pow(d * 16));
                            BigInteger.ONE,
                            BigInteger.TWO.pow(64),
                            BigInteger.TWO.pow(128),
                            BigInteger.TWO.pow(192)
                    };
                    BigInteger[] base = new BigInteger[16];
                    base[0] = BigInteger.ZERO;
                    base[1] = BigInteger.ONE;
                    base[2] = factors[1];
                    for (int i = 3; i < 16; i++) {
                        base[i] = BigInteger.ZERO;
                        for (int k = 0; k < 4; k++) {
                            if (((i >>> k) & 0x01) != 0) {
                                base[i] = base[i].add(factors[k]);
                            }
                        }
                    }
                    if (w != 0) {
                        byte[] s = new byte[32];
                        byte[] b = bi.toByteArray();
                        ArrayUtil.reverse(b);
                        System.arraycopy(b, 0, s, 0, b.length);
-                    for (int d = 0; d < 4; d++) {
+                        // Compare this multiplier to the table
-                        for (int w = 0; w < 16; w++) {
+                        // (generated by Default multiplier)
-                            BigInteger bi = base[w];
+                        AffinePoint m = multiplier.pointMultiply(s).asAffine();
-                            if (d != 0) {
+                        AffinePoint v = points[d][w].asAffine();
-                                bi = bi.multiply(BigInteger.TWO.pow(d * 16));
+                        if (!m.equals(v)) {
-                            }
+                            java.util.HexFormat hex = java.util.HexFormat.of();
-                            if (w == 0) {
+                            throw new RuntimeException(
-                                points[d][0] = new ProjectivePoint.Immutable(
+                                "Bad multiple found at [" +d+"]["+w+"]" +
-                                    zero.fixed(), one.fixed(), zero.fixed());
+                                hex.formatHex(s) + " " + m.getX().asBigInteger()
-                            } else {
+                            );
                                PointMultiplier multiplier = new Default(
                                    secp256r1Ops, AffinePoint.fromECPoint(
                                        generator, zero.getField()));
                                byte[] s = bi.toByteArray();
                                ArrayUtil.reverse(s);
                                ProjectivePoint.Mutable m =
                                        multiplier.pointMultiply(s);
                                points[d][w] = m.setValue(m.asAffine()).fixed();
                            }
                        }
                    }
                    // Check that the tables are correctly generated.
                    if (ECOperations.class.desiredAssertionStatus()) {
                        verifyTables(base);
                    }
                }
                private static void verifyTables(BigInteger[] base) {
                    for (int d = 0; d < 4; d++) {
                        for (int w = 0; w < 16; w++) {
                            BigInteger bi = base[w];
                            if (d != 0) {
                                bi = bi.multiply(BigInteger.TWO.pow(d * 16));
                            }
                            if (w != 0) {
                                byte[] s = new byte[32];
                                byte[] b = bi.toByteArray();
                                ArrayUtil.reverse(b);
                                System.arraycopy(b, 0, s, 0, b.length);
                                ProjectivePoint.Mutable m =
                                        multiplier.pointMultiply(s);
                                ProjectivePoint.Immutable v =
                                        m.setValue(m.asAffine()).fixed();
                                if (!v.getX().asBigInteger().equals(
                                        points[d][w].getX().asBigInteger()) ||
                                    !v.getY().asBigInteger().equals(
                                        points[d][w].getY().asBigInteger())) {
                                    throw new RuntimeException();
                                }
                            }
                        }
                    }
                }
--- a/src/java.base/share/classes/sun/security/ec/point/AffinePoint.java
+++ b/src/java.base/share/classes/sun/security/ec/point/AffinePoint.java
@ -26,6 +26,7 @@ package sun.security.ec.point;
 import sun.security.util.math.ImmutableIntegerModuloP;
 import sun.security.util.math.IntegerFieldModuloP;
 import sun.security.util.math.IntegerMontgomeryFieldModuloP;
 import java.security.spec.ECPoint;
 import java.util.Objects;
@ -54,14 +55,30 @@ public class AffinePoint {
    }
    public ECPoint toECPoint() {
-        return new ECPoint(x.asBigInteger(), y.asBigInteger());
+        return new ECPoint(getX().asBigInteger(), getY().asBigInteger());
    }
    public ImmutableIntegerModuloP getX() {
        return getX(true);
    }
    public ImmutableIntegerModuloP getX(boolean fieldCheck) {
        IntegerFieldModuloP field = x.getField();
        if (fieldCheck && field instanceof IntegerMontgomeryFieldModuloP) {
            return ((IntegerMontgomeryFieldModuloP)field).fromMontgomery(x);
        }
        return x;
    }
    public ImmutableIntegerModuloP getY() {
        return getY(true);
    }
    public ImmutableIntegerModuloP getY(boolean fieldCheck) {
        IntegerFieldModuloP field = y.getField();
        if (fieldCheck && field instanceof IntegerMontgomeryFieldModuloP) {
            return ((IntegerMontgomeryFieldModuloP)field).fromMontgomery(y);
        }
        return y;
    }
@ -71,8 +88,30 @@ public class AffinePoint {
            return false;
        }
        AffinePoint p = (AffinePoint) obj;
-        boolean xEquals = x.asBigInteger().equals(p.x.asBigInteger());
+        boolean xEquals, yEquals;
-        boolean yEquals = y.asBigInteger().equals(p.y.asBigInteger());
+        boolean thisMont = x.getField() instanceof IntegerMontgomeryFieldModuloP;
        boolean objMont = p.x.getField() instanceof IntegerMontgomeryFieldModuloP;
        if (thisMont ^ objMont == false) {
            // both fields same
            xEquals = x.asBigInteger().equals(p.x.asBigInteger());
            yEquals = y.asBigInteger().equals(p.y.asBigInteger());
        } else if (thisMont) {
            // mismatched fields should not happen in production, but useful in
            // testing
            IntegerMontgomeryFieldModuloP field =
                (IntegerMontgomeryFieldModuloP)x.getField();
            xEquals = x.asBigInteger().equals(
                field.getElement(p.x.asBigInteger()).asBigInteger());
            yEquals = y.asBigInteger().equals(
                field.getElement(p.y.asBigInteger()).asBigInteger());
        } else {
            IntegerMontgomeryFieldModuloP field =
                (IntegerMontgomeryFieldModuloP)p.x.getField();
            xEquals = field.getElement(
                x.asBigInteger()).asBigInteger().equals(p.x.asBigInteger());
            yEquals = field.getElement(
                y.asBigInteger()).asBigInteger().equals(p.y.asBigInteger());
        }
        return xEquals && yEquals;
    }
--- a/src/java.base/share/classes/sun/security/ec/point/ProjectivePoint.java
+++ b/src/java.base/share/classes/sun/security/ec/point/ProjectivePoint.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -25,6 +25,7 @@
 package sun.security.ec.point;
 import sun.security.util.math.*;
 import jdk.internal.vm.annotation.ForceInline;
 /**
 * Elliptic curve point in projective coordinates (X, Y, Z) where
@ -145,6 +146,7 @@ public abstract class ProjectivePoint
            return conditionalSet(pp, set);
        }
        @ForceInline
        private <T extends IntegerModuloP>
        Mutable conditionalSet(ProjectivePoint<T> pp, int set) {
@ -157,9 +159,9 @@ public abstract class ProjectivePoint
        @Override
        public Mutable setValue(AffinePoint p) {
-            x.setValue(p.getX());
+            x.setValue(p.getX(false));
-            y.setValue(p.getY());
+            y.setValue(p.getY(false));
-            z.setValue(p.getX().getField().get1());
+            z.setValue(p.getX(false).getField().get1());
            return this;
        }
--- a/src/java.base/share/classes/sun/security/util/math/IntegerMontgomeryFieldModuloP.java
+++ b/src/java.base/share/classes/sun/security/util/math/IntegerMontgomeryFieldModuloP.java
@ -0,0 +1,40 @@
 /*
 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
 package sun.security.util.math;
 import java.math.BigInteger;
 /**
 * An interface for the field of integers modulo a prime number. An
 * implementation of this interface can be used to get properties of the
 * field and to produce field elements of type ImmutableIntegerModuloP from
 * other objects and representations of field elements.
 */
 public interface IntegerMontgomeryFieldModuloP extends IntegerFieldModuloP {
    ImmutableIntegerModuloP fromMontgomery(ImmutableIntegerModuloP m);
    IntegerFieldModuloP residueField();
 }
--- a/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomial.java
+++ b/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomial.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -32,6 +32,9 @@ import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
 import java.util.Arrays;
 import jdk.internal.vm.annotation.ForceInline;
 import jdk.internal.vm.annotation.IntrinsicCandidate;
 /**
 * A large number polynomial representation using sparse limbs of signed
 * long (64-bit) values. Limb values will always fit within a long, so inputs
@ -62,10 +65,9 @@ import java.util.Arrays;
 public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP
    permits IntegerPolynomial1305, IntegerPolynomial25519,
            IntegerPolynomial448, IntegerPolynomialP256,
-            IntegerPolynomialP384, IntegerPolynomialP521,
+            MontgomeryIntegerPolynomialP256, IntegerPolynomialP384,
-            IntegerPolynomialModBinP, P256OrderField,
+            IntegerPolynomialP521, IntegerPolynomialModBinP, P256OrderField,
-            P384OrderField, P521OrderField,
+            P384OrderField, P521OrderField, Curve25519OrderField,
            Curve25519OrderField,
            Curve448OrderField {
    protected static final BigInteger TWO = BigInteger.valueOf(2);
@ -74,7 +76,8 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP
    private final BigInteger modulus;
    protected final int bitsPerLimb;
    private final long[] posModLimbs;
-    private final int maxAdds;
+    private final int maxAddsMul; // max additions before a multiplication
    private final int maxAddsAdd; // max additions before an addition
    /**
     * Reduce an IntegerPolynomial representation (a) and store the result
@ -87,11 +90,12 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP
     * store the result in an IntegerPolynomial representation in a. Requires
     * that a.length == numLimbs.
     */
-    protected void multByInt(long[] a, long b) {
+    protected int multByInt(long[] a, long b) {
        for (int i = 0; i < a.length; i++) {
            a[i] *= b;
        }
        reduce(a);
        return 0;
    }
    /**
@ -100,7 +104,7 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP
     * a.length == b.length == r.length == numLimbs. It is allowed for a and r
     * to be the same array.
     */
-    protected abstract void mult(long[] a, long[] b, long[] r);
+    protected abstract int mult(long[] a, long[] b, long[] r);
    /**
     * Multiply an IntegerPolynomial representation (a) with itself and store
@ -108,19 +112,23 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP
     * a.length == r.length == numLimbs. It is allowed for a and r
     * to be the same array.
     */
-    protected abstract void square(long[] a, long[] r);
+    protected abstract int square(long[] a, long[] r);
    IntegerPolynomial(int bitsPerLimb,
                      int numLimbs,
-                      int maxAdds,
+                      int maxAddsMul,
                      BigInteger modulus) {
        this.numLimbs = numLimbs;
        this.modulus = modulus;
        this.bitsPerLimb = bitsPerLimb;
-        this.maxAdds = maxAdds;
+        this.maxAddsMul = maxAddsMul;
-
+        if (bitsPerLimb>32) {
            this.maxAddsAdd = 64 - bitsPerLimb;
        } else {
            this.maxAddsAdd = 32 - bitsPerLimb;
        }
        posModLimbs = setPosModLimbs();
    }
@ -135,7 +143,7 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP
    }
    public int getMaxAdds() {
-        return maxAdds;
+        return maxAddsMul;
    }
    @Override
@ -327,10 +335,9 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP
    }
    protected void setLimbsValuePositive(BigInteger v, long[] limbs) {
        assert bitsPerLimb < 32;
        long limbMask = (1L << bitsPerLimb) - 1;
        for (int i = 0; i < limbs.length; i++) {
-            limbs[i] = v.intValue() & limbMask;
+            limbs[i] = v.longValue() & limbMask;
            v = v.shiftRight(bitsPerLimb);
        }
    }
@ -449,6 +456,8 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP
     * will be unchanged. If set==1, then the values of b will be assigned to a.
     * The behavior is undefined if swap has any value other than 0 or 1.
     */
    @ForceInline
    @IntrinsicCandidate
    protected static void conditionalAssign(int set, long[] a, long[] b) {
        int maskValue = -set;
        for (int i = 0; i < a.length; i++) {
@ -557,14 +566,12 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP
            Element b = (Element)genB;
            // Reduce if required.
-            // if (numAdds >= maxAdds) {
+            if (numAdds > maxAddsAdd) {
            if (numAdds > 32 - bitsPerLimb) {
               reduce(limbs);
               numAdds = 0;
            }
-            // if (b.numAdds >= maxAdds) {
+            if (b.numAdds > maxAddsAdd) {
            if (b.numAdds > 32 - bitsPerLimb) {
                reduce(b.limbs);
                b.numAdds = 0;
            }
@ -586,7 +593,7 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP
                newLimbs[i] = -limbs[i];
            }
-            return new ImmutableElement(newLimbs, numAdds);
+            return new ImmutableElement(newLimbs, numAdds+1);
        }
        protected long[] cloneLow(long[] limbs) {
@ -604,32 +611,32 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP
            Element b = (Element)genB;
            // Reduce if required.
-            if (numAdds > maxAdds) {
+            if (numAdds > maxAddsMul) {
                reduce(limbs);
                numAdds = 0;
            }
-            if (b.numAdds > maxAdds) {
+            if (b.numAdds > maxAddsMul) {
                reduce(b.limbs);
                b.numAdds = 0;
            }
            long[] newLimbs = new long[limbs.length];
-            mult(limbs, b.limbs, newLimbs);
+            int numAdds = mult(limbs, b.limbs, newLimbs);
-            return new ImmutableElement(newLimbs, 0);
+            return new ImmutableElement(newLimbs, numAdds);
        }
        @Override
        public ImmutableElement square() {
            // Reduce if required.
-            if (numAdds > maxAdds) {
+            if (numAdds > maxAddsMul) {
                reduce(limbs);
                numAdds = 0;
            }
            long[] newLimbs = new long[limbs.length];
-            IntegerPolynomial.this.square(limbs, newLimbs);
+            int numAdds = IntegerPolynomial.this.square(limbs, newLimbs);
-            return new ImmutableElement(newLimbs, 0);
+            return new ImmutableElement(newLimbs, numAdds);
        }
        public void addModPowerTwo(IntegerModuloP arg, byte[] result) {
@ -637,12 +644,12 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP
            Element other = (Element)arg;
            // Reduce if required.
-            if (numAdds > 32 - bitsPerLimb) {
+            if (numAdds > maxAddsAdd) {
                reduce(limbs);
                numAdds = 0;
            }
-            if (other.numAdds > 32 - bitsPerLimb) {
+            if (other.numAdds > maxAddsAdd) {
                reduce(other.limbs);
                other.numAdds = 0;
            }
@ -734,32 +741,30 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP
            Element b = (Element)genB;
            // Reduce if required.
-            if (numAdds > maxAdds) {
+            if (numAdds > maxAddsMul) {
                reduce(limbs);
                numAdds = 0;
            }
-            if (b.numAdds > maxAdds) {
+            if (b.numAdds > maxAddsMul) {
                reduce(b.limbs);
                b.numAdds = 0;
            }
-            mult(limbs, b.limbs, limbs);
+            numAdds = mult(limbs, b.limbs, limbs);
            numAdds = 0;
            return this;
        }
        @Override
        public MutableElement setProduct(SmallValue v) {
            // Reduce if required.
-            if (numAdds > maxAdds) {
+            if (numAdds > maxAddsMul) {
                reduce(limbs);
                numAdds = 0;
            }
            int value = ((Limb)v).value;
-            multByInt(limbs, value);
+            numAdds += multByInt(limbs, value);
            numAdds = 0;
            return this;
        }
@ -769,14 +774,12 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP
            Element b = (Element)genB;
            // Reduce if required.
-            // if (numAdds >= maxAdds) {
+            if (numAdds > maxAddsAdd) {
            if (numAdds > 32 - bitsPerLimb) {
               reduce(limbs);
               numAdds = 0;
            }
-            // if (b.numAdds >= maxAdds) {
+            if (b.numAdds > maxAddsAdd) {
            if (b.numAdds > 32 - bitsPerLimb) {
                reduce(b.limbs);
                b.numAdds = 0;
            }
@ -795,14 +798,12 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP
            Element b = (Element)genB;
            // Reduce if required.
-            // if (numAdds >= maxAdds) {
+            if (numAdds > maxAddsAdd) {
            if (numAdds > 32 - bitsPerLimb) {
               reduce(limbs);
               numAdds = 0;
            }
-            // if (b.numAdds >= maxAdds) {
+            if (b.numAdds > maxAddsAdd) {
            if (b.numAdds > 32 - bitsPerLimb) {
                reduce(b.limbs);
                b.numAdds = 0;
            }
@ -818,13 +819,12 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP
        @Override
        public MutableElement setSquare() {
            // Reduce if required.
-            if (numAdds > maxAdds) {
+            if (numAdds > maxAddsMul) {
                reduce(limbs);
                numAdds = 0;
            }
-            IntegerPolynomial.this.square(limbs, limbs);
+            numAdds = IntegerPolynomial.this.square(limbs, limbs);;
            numAdds = 0;
            return this;
        }
@ -833,6 +833,7 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP
            for (int i = 0; i < limbs.length; i++) {
                limbs[i] = -limbs[i];
            }
            numAdds++;
            return this;
        }
    }
--- a/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomial1305.java
+++ b/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomial1305.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -50,7 +50,7 @@ public final class IntegerPolynomial1305 extends IntegerPolynomial {
        super(BITS_PER_LIMB, NUM_LIMBS, 1, MODULUS);
    }
-    protected void mult(long[] a, long[] b, long[] r) {
+    protected int mult(long[] a, long[] b, long[] r) {
        // Use grade-school multiplication into primitives to avoid the
        // temporary array allocation. This is equivalent to the following
@ -73,6 +73,7 @@ public final class IntegerPolynomial1305 extends IntegerPolynomial {
        long c8 = (a[4] * b[4]);
        carryReduce(r, c0, c1, c2, c3, c4, c5, c6, c7, c8);
        return 0;
    }
    private void carryReduce(long[] r, long c0, long c1, long c2, long c3,
@ -99,7 +100,7 @@ public final class IntegerPolynomial1305 extends IntegerPolynomial {
    }
    @Override
-    protected void square(long[] a, long[] r) {
+    protected int square(long[] a, long[] r) {
        // Use grade-school multiplication with a simple squaring optimization.
        // Multiply into primitives to avoid the temporary array allocation.
        // This is equivalent to the following code:
@ -122,6 +123,7 @@ public final class IntegerPolynomial1305 extends IntegerPolynomial {
        long c8 = (a[4] * a[4]);
        carryReduce(r, c0, c1, c2, c3, c4, c5, c6, c7, c8);
        return 0;
    }
    @Override
--- a/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomialModBinP.java
+++ b/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomialModBinP.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -131,11 +131,12 @@ public sealed class IntegerPolynomialModBinP extends IntegerPolynomial {
    }
    @Override
-    protected void mult(long[] a, long[] b, long[] r) {
+    protected int mult(long[] a, long[] b, long[] r) {
        long[] c = new long[2 * numLimbs];
        multOnly(a, b, c);
        carryReduce(c, r);
        return 0;
    }
    private void modReduceInBits(long[] limbs, int index, int bits, long x) {
@ -188,7 +189,7 @@ public sealed class IntegerPolynomialModBinP extends IntegerPolynomial {
    }
    @Override
-    protected void square(long[] a, long[] r) {
+    protected int square(long[] a, long[] r) {
        long[] c = new long[2 * numLimbs];
        for (int i = 0; i < numLimbs; i++) {
@ -199,7 +200,7 @@ public sealed class IntegerPolynomialModBinP extends IntegerPolynomial {
        }
        carryReduce(c, r);
-
+        return 0;
    }
    /**
--- a/src/java.base/share/classes/sun/security/util/math/intpoly/MontgomeryIntegerPolynomialP256.java
+++ b/src/java.base/share/classes/sun/security/util/math/intpoly/MontgomeryIntegerPolynomialP256.java
@ -0,0 +1,560 @@
 /*
 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
 package sun.security.util.math.intpoly;
 import sun.security.util.math.ImmutableIntegerModuloP;
 import sun.security.util.math.IntegerMontgomeryFieldModuloP;
 import sun.security.util.math.SmallValue;
 import sun.security.util.math.IntegerFieldModuloP;
 import java.lang.Math;
 import java.math.BigInteger;
 import jdk.internal.vm.annotation.IntrinsicCandidate;
 // Reference:
 // - [1] Shay Gueron and Vlad Krasnov "Fast Prime Field Elliptic Curve
 //       Cryptography with 256 Bit Primes"
 //
 public final class MontgomeryIntegerPolynomialP256 extends IntegerPolynomial
        implements IntegerMontgomeryFieldModuloP {
    private static final int BITS_PER_LIMB = 52;
    private static final int NUM_LIMBS = 5;
    private static final int MAX_ADDS = 0;
    public static final BigInteger MODULUS = evaluateModulus();
    private static final long LIMB_MASK = -1L >>> (64 - BITS_PER_LIMB);
    public static final MontgomeryIntegerPolynomialP256 ONE = new MontgomeryIntegerPolynomialP256();
    // h = 2^(2*260)%p = 0x4fffffffdfffffffffffffffefffffffbffffffff000000000000000300
    // oneActual = 1
    // oneMont = (1*2^260) mod p
    // modulus = p
    private static final long[] h = new long[] {
        0x0000000000000300L, 0x000ffffffff00000L, 0x000ffffefffffffbL,
        0x000fdfffffffffffL, 0x0000000004ffffffL };
    private static final long[] oneActual = new long[] {
        0x0000000000000001L, 0x0000000000000000L, 0x0000000000000000L,
        0x0000000000000000L, 0x0000000000000000L };
    private static final long[] oneMont = new long[] {
        0x0000000000000010L, 0x000f000000000000L, 0x000fffffffffffffL,
        0x000ffeffffffffffL, 0x00000000000fffffL };
    private static final long[] zero = new long[] {
        0x0000000000000000L, 0x0000000000000000L, 0x0000000000000000L,
        0x0000000000000000L, 0x0000000000000000L };
    private static final long[] modulus = new long[] {
        0x000fffffffffffffL, 0x00000fffffffffffL, 0x0000000000000000L,
        0x0000001000000000L, 0x0000ffffffff0000L };
    private MontgomeryIntegerPolynomialP256() {
        super(BITS_PER_LIMB, NUM_LIMBS, MAX_ADDS, MODULUS);
    }
    public IntegerFieldModuloP residueField() {
        return IntegerPolynomialP256.ONE;
    }
    // (224%nat,-1)::(192%nat,1)::(96%nat,1)::(0%nat,-1)::nil.
    private static BigInteger evaluateModulus() {
        BigInteger result = BigInteger.valueOf(2).pow(256);
        result = result.subtract(BigInteger.valueOf(1).shiftLeft(224));
        result = result.add(BigInteger.valueOf(1).shiftLeft(192));
        result = result.add(BigInteger.valueOf(1).shiftLeft(96));
        result = result.subtract(BigInteger.valueOf(1));
        return result;
    }
    @Override
    public ImmutableElement get0() {
        return new ImmutableElement(zero, 0);
    }
    // One in montgomery domain: (1*2^260) mod p
    @Override
    public ImmutableElement get1() {
        return new ImmutableElement(oneMont, 0);
    }
    // Convert v to Montgomery domain
    @Override
    public ImmutableElement getElement(BigInteger v) {
        long[] vLimbs = new long[NUM_LIMBS];
        long[] montLimbs = new long[NUM_LIMBS];
        setLimbsValuePositive(v, vLimbs);
        // Convert to Montgomery domain
        int numAdds = mult(vLimbs, h, montLimbs);
        return new ImmutableElement(montLimbs, numAdds);
    }
    @Override
    public SmallValue getSmallValue(int value) {
        // Explicitely here as reminder that SmallValue stays in residue domain
        // See multByInt below for how this is used
        return super.getSmallValue(value);
    }
    /*
     * This function is used by IntegerPolynomial.setProduct(SmallValue v) to
     * multiply by a small constant (i.e. (int) 1,2,3,4). Instead of doing a
     * montgomery conversion followed by a montgomery multiplication, just use
     * the spare top (64-BITS_PER_LIMB) bits to multiply by a constant. (See [1]
     * Section 4 )
     *
     * Will return an unreduced value
     */
    @Override
    protected int multByInt(long[] a, long b) {
        assert (b < (1 << BITS_PER_LIMB));
        for (int i = 0; i < a.length; i++) {
            a[i] *= b;
        }
        return (int) (b - 1);
    }
    @Override
    public ImmutableIntegerModuloP fromMontgomery(ImmutableIntegerModuloP n) {
        assert n.getField() == MontgomeryIntegerPolynomialP256.ONE;
        ImmutableElement nn = (ImmutableElement) n;
        long[] r1 = new long[NUM_LIMBS];
        long[] r2 = new long[2 * NUM_LIMBS];
        long[] limbs = nn.getLimbs();
        reduce(limbs);
        MontgomeryIntegerPolynomialP256.ONE.mult(limbs, oneActual, r1);
        reduce(r1);
        halfLimbs(r1, r2);
        return IntegerPolynomialP256.ONE.new ImmutableElement(r2, 0);
    }
    private void halfLimbs(long[] a, long[] r) {
        final long HALF_BITS_LIMB = BITS_PER_LIMB / 2;
        final long HALF_LIMB_MASK = -1L >>> (64 - HALF_BITS_LIMB);
        r[0] = a[0] & HALF_LIMB_MASK;
        r[1] = a[0] >> HALF_BITS_LIMB;
        r[2] = a[1] & HALF_LIMB_MASK;
        r[3] = a[1] >> HALF_BITS_LIMB;
        r[4] = a[2] & HALF_LIMB_MASK;
        r[5] = a[2] >> HALF_BITS_LIMB;
        r[6] = a[3] & HALF_LIMB_MASK;
        r[7] = a[3] >> HALF_BITS_LIMB;
        r[8] = a[4] & HALF_LIMB_MASK;
        r[9] = a[4] >> HALF_BITS_LIMB;
    }
    @Override
    protected int square(long[] a, long[] r) {
        return mult(a, a, r);
    }
    /**
     * Unrolled Word-by-Word Montgomery Multiplication r = a * b * 2^-260 (mod P)
     *
     * See [1] Figure 5. "Algorithm 2: Word-by-Word Montgomery Multiplication
     * for a Montgomery Friendly modulus p". Note: Step 6. Skipped; Instead use
     * numAdds to reuse existing overflow logic.
     */
    @IntrinsicCandidate
    protected int mult(long[] a, long[] b, long[] r) {
        long aa0 = a[0];
        long aa1 = a[1];
        long aa2 = a[2];
        long aa3 = a[3];
        long aa4 = a[4];
        long bb0 = b[0];
        long bb1 = b[1];
        long bb2 = b[2];
        long bb3 = b[3];
        long bb4 = b[4];
        final long shift1 = 64 - BITS_PER_LIMB; // 12
        final long shift2 = BITS_PER_LIMB; // 40
        long d0, d1, d2, d3, d4;      // low digits from multiplication
        long dd0, dd1, dd2, dd3, dd4; // high digits from multiplication
        long n, n0, n1, n2, n3, n4,
            nn0, nn1, nn2, nn3, nn4; // modulus multiple digits
        long c0, c1, c2, c3, c4, c5, c6, c7, c8, c9; // multiplication result
                                                     // digits for each column
        // Row 0 - multiply by aa0 and reduce out c0
        d0 = aa0 * bb0;
        dd0 = Math.unsignedMultiplyHigh(aa0, bb0) << shift1 | (d0 >>> shift2);
        d0 &= LIMB_MASK;
        n = d0;
        d1 = aa0 * bb1;
        dd1 = Math.unsignedMultiplyHigh(aa0, bb1) << shift1 | (d1 >>> shift2);
        d1 &= LIMB_MASK;
        d2 = aa0 * bb2;
        dd2 = Math.unsignedMultiplyHigh(aa0, bb2) << shift1 | (d2 >>> shift2);
        d2 &= LIMB_MASK;
        d3 = aa0 * bb3;
        dd3 = Math.unsignedMultiplyHigh(aa0, bb3) << shift1 | (d3 >>> shift2);
        d3 &= LIMB_MASK;
        d4 = aa0 * bb4;
        dd4 = Math.unsignedMultiplyHigh(aa0, bb4) << shift1 | (d4 >>> shift2);
        d4 &= LIMB_MASK;
        n0 = n * modulus[0];
        nn0 = Math.unsignedMultiplyHigh(n, modulus[0]) << shift1 | (n0 >>> shift2);
        n0 &= LIMB_MASK;
        n1 = n * modulus[1];
        nn1 = Math.unsignedMultiplyHigh(n, modulus[1]) << shift1 | (n1 >>> shift2);
        n1 &= LIMB_MASK;
        n2 = n * modulus[2];
        nn2 = Math.unsignedMultiplyHigh(n, modulus[2]) << shift1 | (n2 >>> shift2);
        n2 &= LIMB_MASK;
        n3 = n * modulus[3];
        nn3 = Math.unsignedMultiplyHigh(n, modulus[3]) << shift1 | (n3 >>> shift2);
        n3 &= LIMB_MASK;
        n4 = n * modulus[4];
        nn4 = Math.unsignedMultiplyHigh(n, modulus[4]) << shift1 | (n4 >>> shift2);
        n4 &= LIMB_MASK;
        dd0 += nn0;
        d0 += n0;
        dd1 += nn1;
        d1 += n1;
        dd2 += nn2;
        d2 += n2;
        dd3 += nn3;
        d3 += n3;
        dd4 += nn4;
        d4 += n4;
        c1 = d1 + dd0 + (d0 >>> BITS_PER_LIMB);
        c2 = d2 + dd1;
        c3 = d3 + dd2;
        c4 = d4 + dd3;
        c5 = dd4;
        // Row 1 - multiply by aa1 and reduce out c1
        d0 = aa1 * bb0;
        dd0 = Math.unsignedMultiplyHigh(aa1, bb0) << shift1 | (d0 >>> shift2);
        d0 &= LIMB_MASK;
        d0 += c1;
        n = d0 & LIMB_MASK;
        d1 = aa1 * bb1;
        dd1 = Math.unsignedMultiplyHigh(aa1, bb1) << shift1 | (d1 >>> shift2);
        d1 &= LIMB_MASK;
        d2 = aa1 * bb2;
        dd2 = Math.unsignedMultiplyHigh(aa1, bb2) << shift1 | (d2 >>> shift2);
        d2 &= LIMB_MASK;
        d3 = aa1 * bb3;
        dd3 = Math.unsignedMultiplyHigh(aa1, bb3) << shift1 | (d3 >>> shift2);
        d3 &= LIMB_MASK;
        d4 = aa1 * bb4;
        dd4 = Math.unsignedMultiplyHigh(aa1, bb4) << shift1 | (d4 >>> shift2);
        d4 &= LIMB_MASK;
        n0 = n * modulus[0];
        dd0 += Math.unsignedMultiplyHigh(n, modulus[0]) << shift1 | (n0 >>> shift2);
        d0 += n0 & LIMB_MASK;
        n1 = n * modulus[1];
        dd1 += Math.unsignedMultiplyHigh(n, modulus[1]) << shift1 | (n1 >>> shift2);
        d1 += n1 & LIMB_MASK;
        n2 = n * modulus[2];
        dd2 += Math.unsignedMultiplyHigh(n, modulus[2]) << shift1 | (n2 >>> shift2);
        d2 += n2 & LIMB_MASK;
        n3 = n * modulus[3];
        dd3 += Math.unsignedMultiplyHigh(n, modulus[3]) << shift1 | (n3 >>> shift2);
        d3 += n3 & LIMB_MASK;
        n4 = n * modulus[4];
        dd4 += Math.unsignedMultiplyHigh(n, modulus[4]) << shift1 | (n4 >>> shift2);
        d4 += n4 & LIMB_MASK;
        c2 += d1 + dd0 + (d0 >>> BITS_PER_LIMB);
        c3 += d2 + dd1;
        c4 += d3 + dd2;
        c5 += d4 + dd3;
        c6 = dd4;
        // Row 2 - multiply by aa2 and reduce out c2
        d0 = aa2 * bb0;
        dd0 = Math.unsignedMultiplyHigh(aa2, bb0) << shift1 | (d0 >>> shift2);
        d0 &= LIMB_MASK;
        d0 += c2;
        n = d0 & LIMB_MASK;
        d1 = aa2 * bb1;
        dd1 = Math.unsignedMultiplyHigh(aa2, bb1) << shift1 | (d1 >>> shift2);
        d1 &= LIMB_MASK;
        d2 = aa2 * bb2;
        dd2 = Math.unsignedMultiplyHigh(aa2, bb2) << shift1 | (d2 >>> shift2);
        d2 &= LIMB_MASK;
        d3 = aa2 * bb3;
        dd3 = Math.unsignedMultiplyHigh(aa2, bb3) << shift1 | (d3 >>> shift2);
        d3 &= LIMB_MASK;
        d4 = aa2 * bb4;
        dd4 = Math.unsignedMultiplyHigh(aa2, bb4) << shift1 | (d4 >>> shift2);
        d4 &= LIMB_MASK;
        n0 = n * modulus[0];
        dd0 += Math.unsignedMultiplyHigh(n, modulus[0]) << shift1 | (n0 >>> shift2);
        d0 += n0 & LIMB_MASK;
        n1 = n * modulus[1];
        dd1 += Math.unsignedMultiplyHigh(n, modulus[1]) << shift1 | (n1 >>> shift2);
        d1 += n1 & LIMB_MASK;
        n2 = n * modulus[2];
        dd2 += Math.unsignedMultiplyHigh(n, modulus[2]) << shift1 | (n2 >>> shift2);
        d2 += n2 & LIMB_MASK;
        n3 = n * modulus[3];
        dd3 += Math.unsignedMultiplyHigh(n, modulus[3]) << shift1 | (n3 >>> shift2);
        d3 += n3 & LIMB_MASK;
        n4 = n * modulus[4];
        dd4 += Math.unsignedMultiplyHigh(n, modulus[4]) << shift1 | (n4 >>> shift2);
        d4 += n4 & LIMB_MASK;
        c3 += d1 + dd0 + (d0 >>> BITS_PER_LIMB);
        c4 += d2 + dd1;
        c5 += d3 + dd2;
        c6 += d4 + dd3;
        c7 = dd4;
        // Row 3 - multiply by aa3 and reduce out c3
        d0 = aa3 * bb0;
        dd0 = Math.unsignedMultiplyHigh(aa3, bb0) << shift1 | (d0 >>> shift2);
        d0 &= LIMB_MASK;
        d0 += c3;
        n = d0 & LIMB_MASK;
        d1 = aa3 * bb1;
        dd1 = Math.unsignedMultiplyHigh(aa3, bb1) << shift1 | (d1 >>> shift2);
        d1 &= LIMB_MASK;
        d2 = aa3 * bb2;
        dd2 = Math.unsignedMultiplyHigh(aa3, bb2) << shift1 | (d2 >>> shift2);
        d2 &= LIMB_MASK;
        d3 = aa3 * bb3;
        dd3 = Math.unsignedMultiplyHigh(aa3, bb3) << shift1 | (d3 >>> shift2);
        d3 &= LIMB_MASK;
        d4 = aa3 * bb4;
        dd4 = Math.unsignedMultiplyHigh(aa3, bb4) << shift1 | (d4 >>> shift2);
        d4 &= LIMB_MASK;
        n0 = n * modulus[0];
        dd0 += Math.unsignedMultiplyHigh(n, modulus[0]) << shift1 | (n0 >>> shift2);
        d0 += n0 & LIMB_MASK;
        n1 = n * modulus[1];
        dd1 += Math.unsignedMultiplyHigh(n, modulus[1]) << shift1 | (n1 >>> shift2);
        d1 += n1 & LIMB_MASK;
        n2 = n * modulus[2];
        dd2 += Math.unsignedMultiplyHigh(n, modulus[2]) << shift1 | (n2 >>> shift2);
        d2 += n2 & LIMB_MASK;
        n3 = n * modulus[3];
        dd3 += Math.unsignedMultiplyHigh(n, modulus[3]) << shift1 | (n3 >>> shift2);
        d3 += n3 & LIMB_MASK;
        n4 = n * modulus[4];
        dd4 += Math.unsignedMultiplyHigh(n, modulus[4]) << shift1 | (n4 >>> shift2);
        d4 += n4 & LIMB_MASK;
        c4 += d1 + dd0 + (d0 >>> BITS_PER_LIMB);
        c5 += d2 + dd1;
        c6 += d3 + dd2;
        c7 += d4 + dd3;
        c8 = dd4;
        // Row 4 - multiply by aa3 and reduce out c4
        d0 = aa4 * bb0;
        dd0 = Math.unsignedMultiplyHigh(aa4, bb0) << shift1 | (d0 >>> shift2);
        d0 &= LIMB_MASK;
        d0 += c4;
        n = d0 & LIMB_MASK;
        d1 = aa4 * bb1;
        dd1 = Math.unsignedMultiplyHigh(aa4, bb1) << shift1 | (d1 >>> shift2);
        d1 &= LIMB_MASK;
        d2 = aa4 * bb2;
        dd2 = Math.unsignedMultiplyHigh(aa4, bb2) << shift1 | (d2 >>> shift2);
        d2 &= LIMB_MASK;
        d3 = aa4 * bb3;
        dd3 = Math.unsignedMultiplyHigh(aa4, bb3) << shift1 | (d3 >>> shift2);
        d3 &= LIMB_MASK;
        d4 = aa4 * bb4;
        dd4 = Math.unsignedMultiplyHigh(aa4, bb4) << shift1 | (d4 >>> shift2);
        d4 &= LIMB_MASK;
        n0 = n * modulus[0];
        dd0 += Math.unsignedMultiplyHigh(n, modulus[0]) << shift1 | (n0 >>> shift2);
        d0 += n0 & LIMB_MASK;
        n1 = n * modulus[1];
        dd1 += Math.unsignedMultiplyHigh(n, modulus[1]) << shift1 | (n1 >>> shift2);
        d1 += n1 & LIMB_MASK;
        n2 = n * modulus[2];
        dd2 += Math.unsignedMultiplyHigh(n, modulus[2]) << shift1 | (n2 >>> shift2);
        d2 += n2 & LIMB_MASK;
        n3 = n * modulus[3];
        dd3 += Math.unsignedMultiplyHigh(n, modulus[3]) << shift1 | (n3 >>> shift2);
        d3 += n3 & LIMB_MASK;
        n4 = n * modulus[4];
        dd4 += Math.unsignedMultiplyHigh(n, modulus[4]) << shift1 | (n4 >>> shift2);
        d4 += n4 & LIMB_MASK;
        c5 += d1 + dd0 + (d0 >>> BITS_PER_LIMB);
        c6 += d2 + dd1 + (c5 >>> BITS_PER_LIMB);
        c7 += d3 + dd2 + (c6 >>> BITS_PER_LIMB);
        c8 += d4 + dd3 + (c7 >>> BITS_PER_LIMB);
        c9 = dd4 + (c8 >>> BITS_PER_LIMB);
        c5 &= LIMB_MASK;
        c6 &= LIMB_MASK;
        c7 &= LIMB_MASK;
        c8 &= LIMB_MASK;
        // At this point, the result could overflow by one modulus.
        c0 = c5 - modulus[0];
        c1 = c6 - modulus[1] + (c0 >> BITS_PER_LIMB);
        c0 &= LIMB_MASK;
        c2 = c7 - modulus[2] + (c1 >> BITS_PER_LIMB);
        c1 &= LIMB_MASK;
        c3 = c8 - modulus[3] + (c2 >> BITS_PER_LIMB);
        c2 &= LIMB_MASK;
        c4 = c9 - modulus[4] + (c3 >> BITS_PER_LIMB);
        c3 &= LIMB_MASK;
        long mask = c4 >> BITS_PER_LIMB; // Signed shift!
        r[0] = ((c5 & mask) | (c0 & ~mask));
        r[1] = ((c6 & mask) | (c1 & ~mask));
        r[2] = ((c7 & mask) | (c2 & ~mask));
        r[3] = ((c8 & mask) | (c3 & ~mask));
        r[4] = ((c9 & mask) | (c4 & ~mask));
        return 0;
    }
    @Override
    protected void finalCarryReduceLast(long[] limbs) {
        reduce(limbs);
    }
    @Override
    protected long carryValue(long x) {
        return x >> BITS_PER_LIMB;
    }
    @Override
    protected void postEncodeCarry(long[] v) {
        // not needed because carry is unsigned
    }
    // Proof:
    // carry * 2^256 (mod p) ==  carry * [2^256 - p] (mod p)
    //                       ==  carry * [2^256 - (2^256 -2^224 +2^192 +2^96 -1)] (mod p)
    //                       ==  carry * [2^224 -2^192 -2^96 +1] (mod p)
    @Override
    protected void reduce(long[] limbs) {
        long b0 = limbs[0];
        long b1 = limbs[1];
        long b2 = limbs[2];
        long b3 = limbs[3];
        long b4 = limbs[4];
        long carry = b4 >> 48; // max 16-bits
        b4 -= carry << 48;
        // 2^0 position
        b0 += carry;
        // -2^96
        b1 -= carry << 44;
        // -2^192
        b3 -= carry << 36;
        // 2^224
        b4 += carry << 16;
        b1 += b0 >> BITS_PER_LIMB;
        b2 += b1 >> BITS_PER_LIMB;
        b3 += b2 >> BITS_PER_LIMB;
        b4 += b3 >> BITS_PER_LIMB;
        b0 &= LIMB_MASK;
        b1 &= LIMB_MASK;
        b2 &= LIMB_MASK;
        b3 &= LIMB_MASK;
        long c0, c1, c2, c3, c4;
        c0 = modulus[0] + b0;
        c1 = modulus[1] + b1 + (c0 >> BITS_PER_LIMB);
        c0 &= LIMB_MASK;
        c2 = modulus[2] + b2 + (c1 >> BITS_PER_LIMB);
        c1 &= LIMB_MASK;
        c3 = modulus[3] + b3 + (c2 >> BITS_PER_LIMB);
        c2 &= LIMB_MASK;
        c4 = modulus[4] + b4 + (c3 >> BITS_PER_LIMB);
        c3 &= LIMB_MASK;
        long mask = b4 >> BITS_PER_LIMB; // Signed shift!
        limbs[0] = (b0 & ~mask) | (c0 & mask);
        limbs[1] = (b1 & ~mask) | (c1 & mask);
        limbs[2] = (b2 & ~mask) | (c2 & mask);
        limbs[3] = (b3 & ~mask) | (c3 & mask);
        limbs[4] = (b4 & ~mask) | (c4 & mask);
    }
    public ImmutableElement getElement(byte[] v, int offset, int length,
            byte highByte) {
        long[] vLimbs = new long[NUM_LIMBS];
        long[] montLimbs = new long[NUM_LIMBS];
        super.encode(v, offset, length, highByte, vLimbs);
        // Convert to Montgomery domain
        int numAdds = mult(vLimbs, h, montLimbs);
        return new ImmutableElement(montLimbs, numAdds);
    }
    /*
     * This function 'moves/reduces' digit 'v' to the 'lower' limbs
     *
     * The result is not reduced further. Carry propagation is not performed
     * (see IntegerPolynomial.reduceHigh() for how this method is used)
     *
     * Proof:
     *   v * 2^(i*52) (mod p) ==  v * 2^(52i) - v * 2^(52i-256) * p                               (mod p)
     *                        ==  v * 2^(52i) - v * 2^(52i-256) * (2^256 -2^224 +2^192 +2^96 -1)  (mod p)
     *                        ==  v * 2^(52i) - v * [2^(52i-256+256) -2^(52i-256+224) +2^(52i-256+192) +2^(52i-256+96) -2^(52i-256)] (mod p)
     *                        ==  v * 2^(52i) - v * [2^(52i) -2^(52i-32) +2^(52i-64) +2^(52i-160) -2^(52i-256)]                      (mod p)
     *
     *                        ==  v * [2^(52i-32) +2^(52i-52-12) +2^(52i-3*52-4) -2^(52i-4*52-48)] (mod p)
     */
    @Override
    protected void reduceIn(long[] limbs, long v, int i) {
        // Since top term (2^(52i-32)) will leave top 20 bits back in the same
        // position i,
        // "repeat same reduction on top 20 bits"
        v += v >> 32;
        // 2^(52i-32)
        limbs[i - 1] += (v << 20) & LIMB_MASK;
        // 2^(52i-52-12)
        limbs[i - 2] -= (v << 40) & LIMB_MASK;
        limbs[i - 1] -= v >> 12;
        // 2^(52i-3*52-4)
        limbs[i - 4] -= (v << 48) & LIMB_MASK;
        limbs[i - 3] -= v >> 4;
        // 2^(52i-4*52-48)
        limbs[i - 5] += (v << 4) & LIMB_MASK;
        limbs[i - 4] += v >> 48;
    }
 }
--- a/test/jdk/com/sun/security/ec/ECOperationsFuzzTest.java
+++ b/test/jdk/com/sun/security/ec/ECOperationsFuzzTest.java
@ -0,0 +1,171 @@
 /*
 * Copyright (c) 2024, Intel Corporation. All rights reserved.
 *
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
 import java.util.Random;
 import java.math.BigInteger;
 import java.lang.reflect.Field;
 import java.security.spec.ECParameterSpec;
 import sun.security.ec.ECOperations;
 import sun.security.util.ECUtil;
 import sun.security.util.NamedCurve;
 import sun.security.util.CurveDB;
 import sun.security.ec.point.*;
 import java.security.spec.ECPoint;
 import sun.security.util.KnownOIDs;
 import sun.security.util.math.IntegerMontgomeryFieldModuloP;
 import sun.security.util.math.intpoly.*;
 /*
 * @test
 * @key randomness
 * @modules java.base/sun.security.ec java.base/sun.security.ec.point
 *          java.base/sun.security.util java.base/sun.security.util.math
 *          java.base/sun.security.util.math.intpoly
 * @run main/othervm/timeout=1200 --add-opens
 *      java.base/sun.security.ec=ALL-UNNAMED -XX:+UnlockDiagnosticVMOptions
 *      -XX:-UseIntPolyIntrinsics ECOperationsFuzzTest
 * @summary Unit test ECOperationsFuzzTest.
 */
 /*
 * @test
 * @key randomness
 * @modules java.base/sun.security.ec java.base/sun.security.ec.point
 *          java.base/sun.security.util java.base/sun.security.util.math
 *          java.base/sun.security.util.math.intpoly
 * @run main/othervm/timeout=1200 --add-opens
 *      java.base/sun.security.ec=ALL-UNNAMED -XX:+UnlockDiagnosticVMOptions
 *      -XX:+UseIntPolyIntrinsics ECOperationsFuzzTest
 * @summary Unit test ECOperationsFuzzTest.
 */
 // This test case is NOT entirely deterministic, it uses a random seed for
 // pseudo-random number generator. If a failure occurs, hardcode the seed to
 // make the test case deterministic
 public class ECOperationsFuzzTest {
    public static void main(String[] args) throws Exception {
        // Note: it might be useful to increase this number during development
        final int repeat = 10000;
        test(repeat);
        System.out.println("Fuzz Success");
    }
    private static void check(MutablePoint reference, MutablePoint testValue,
            long seed, int iter) {
        AffinePoint affineRef = reference.asAffine();
        AffinePoint affine = testValue.asAffine();
        if (!affineRef.equals(affine)) {
            throw new RuntimeException(
                    "Found error with seed " + seed + "at iteration " + iter);
        }
    }
    public static void test(int repeat) throws Exception {
        Random rnd = new Random();
        long seed = rnd.nextLong();
        rnd.setSeed(seed);
        int keySize = 256;
        ECParameterSpec params = ECUtil.getECParameterSpec(keySize);
        NamedCurve curve = CurveDB.lookup(KnownOIDs.secp256r1.value());
        ECPoint generator = curve.getGenerator();
        BigInteger b = curve.getCurve().getB();
        if (params == null || generator == null) {
            throw new RuntimeException(
                    "No EC parameters available for key size " + keySize + " bits");
        }
        ECOperations ops = ECOperations.forParameters(params).get();
        ECOperations opsReference = new ECOperations(
                IntegerPolynomialP256.ONE.getElement(b), P256OrderField.ONE);
        boolean instanceTest1 = ops
                .getField() instanceof IntegerMontgomeryFieldModuloP;
        boolean instanceTest2 = opsReference
                .getField() instanceof IntegerMontgomeryFieldModuloP;
        if (instanceTest1 == false || instanceTest2 == true) {
            throw new RuntimeException("Bad Initialization: ["
                + instanceTest1 + "," + instanceTest2 + "]");
        }
        byte[] multiple = new byte[keySize / 8];
        rnd.nextBytes(multiple);
        multiple[keySize/8 - 1] &= 0x7f; // from opsReference.seedToScalar(multiple);
        MutablePoint referencePoint = opsReference.multiply(generator, multiple);
        MutablePoint point = ops.multiply(generator, multiple);
        check(referencePoint, point, seed, -1);
        AffinePoint refAffineGenerator = AffinePoint.fromECPoint(generator,
                referencePoint.getField());
        AffinePoint montAffineGenerator = AffinePoint.fromECPoint(generator,
                point.getField());
        MutablePoint refProjGenerator = new ProjectivePoint.Mutable(
                refAffineGenerator.getX(false).mutable(),
                refAffineGenerator.getY(false).mutable(),
                referencePoint.getField().get1().mutable());
        MutablePoint projGenerator = new ProjectivePoint.Mutable(
                montAffineGenerator.getX(false).mutable(),
                montAffineGenerator.getY(false).mutable(),
                point.getField().get1().mutable());
        for (int i = 0; i < repeat; i++) {
            rnd.nextBytes(multiple);
            multiple[keySize/8 - 1] &= 0x7f; // opsReference.seedToScalar(multiple);
            MutablePoint nextReferencePoint = opsReference
                    .multiply(referencePoint.asAffine(), multiple);
            MutablePoint nextPoint = ops.multiply(point.asAffine().toECPoint(),
                    multiple);
            check(nextReferencePoint, nextPoint, seed, i);
            if (rnd.nextBoolean()) {
                opsReference.setSum(nextReferencePoint, referencePoint);
                ops.setSum(nextPoint, point);
                check(nextReferencePoint, nextPoint, seed, i);
            }
            if (rnd.nextBoolean()) {
                opsReference.setSum(nextReferencePoint, refProjGenerator);
                ops.setSum(nextPoint, projGenerator);
                check(nextReferencePoint, nextPoint, seed, i);
            }
            if (rnd.nextInt(100) < 10) { // 10% Reset point to generator, test
                                         // generator multiplier
                referencePoint = opsReference.multiply(generator, multiple);
                point = ops.multiply(generator, multiple);
                check(referencePoint, point, seed, i);
            } else {
                referencePoint = nextReferencePoint;
                point = nextPoint;
            }
        }
    }
 }
 // make test TEST="test/jdk/com/sun/security/ec/ECOperationsFuzzTest.java"
--- a/test/jdk/com/sun/security/ec/ECOperationsKATTest.java
+++ b/test/jdk/com/sun/security/ec/ECOperationsKATTest.java
@ -0,0 +1,253 @@
 /*
 * Copyright (c) 2024, Intel Corporation. All rights reserved.
 *
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
 import java.util.Random;
 import java.util.List;
 import java.util.LinkedList;
 import java.math.BigInteger;
 import java.lang.reflect.Field;
 import java.security.spec.ECParameterSpec;
 import sun.security.ec.ECOperations;
 import sun.security.util.ECUtil;
 import sun.security.util.NamedCurve;
 import sun.security.util.CurveDB;
 import sun.security.ec.point.*;
 import java.security.spec.ECPoint;
 import sun.security.util.KnownOIDs;
 import sun.security.util.math.IntegerMontgomeryFieldModuloP;
 import sun.security.util.math.intpoly.*;
 /*
 * @test
 * @modules java.base/sun.security.ec java.base/sun.security.ec.point
 *          java.base/sun.security.util java.base/sun.security.util.math
 *          java.base/sun.security.util.math.intpoly
 * @run main/othervm --add-opens java.base/sun.security.ec=ALL-UNNAMED
 *      ECOperationsKATTest
 * @summary Unit test ECOperationsKATTest.
 */
 /*
 * @test
 * @modules java.base/sun.security.ec java.base/sun.security.ec.point
 *          java.base/sun.security.util java.base/sun.security.util.math
 *          java.base/sun.security.util.math.intpoly
 * @run main/othervm -XX:+UnlockDiagnosticVMOptions -Xcomp
 *      -XX:-TieredCompilation --add-opens java.base/sun.security.ec=ALL-UNNAMED
 *      -XX:+UnlockDiagnosticVMOptions ECOperationsKATTest
 * @summary Unit test ECOperationsKATTest.
 */
 public class ECOperationsKATTest {
    final private static java.util.HexFormat hex = java.util.HexFormat.of();
    public static void main(String args[]) throws Exception {
        int testsPassed = 0;
        int testNumber = 0;
        for (TestData test : testList) {
            System.out.println("*** Test " + ++testNumber + ": " + test.testName);
            if (runSingleTest(test)) {
                testsPassed++;
            }
        }
        System.out.println();
        if (testsPassed != testNumber) {
            throw new RuntimeException(
                    "One or more tests failed. Check output for details");
        }
    }
    private static boolean check(MutablePoint testValue, ECPoint reference) {
        AffinePoint affine = testValue.asAffine();
        BigInteger x = affine.getX().asBigInteger();
        BigInteger y = affine.getY().asBigInteger();
        BigInteger refX = reference.getAffineX();
        BigInteger refY = reference.getAffineY();
        if (!refX.equals(x) || !refY.equals(y)) {
            System.out.println("ERROR - Output Mismatch!");
            System.out.println("Expected: X: " + refX.toString(16) + " Y: "
                    + refY.toString(16));
            System.out.println(
                    "Result:   X: " + x.toString(16) + " Y: " + y.toString(16));
            return false;
        }
        return true;
    }
    private static class TestData {
        public TestData(String name, String keyStr, String xStr1, String yStr1,
                String xStr2, String yStr2) {
            testName = name;
            // multiplier = (new BigInteger(keyStr, 16)).toByteArray();
            multiplier = hex.parseHex(keyStr);
            sun.security.util.ArrayUtil.reverse(multiplier);
            reference1 = new ECPoint(new BigInteger(xStr1, 16),
                    new BigInteger(yStr1, 16));
            reference2 = new ECPoint(new BigInteger(xStr2, 16),
                    new BigInteger(yStr2, 16));
        }
        String testName;
        byte[] multiplier;
        ECPoint reference1; // For generator multiplier test
        ECPoint reference2; // For non-generator multiplier test
    }
    public static final List<TestData> testList = new LinkedList<TestData>() {{
    // (x1,y1) = mult*generator
    // (x2,y2) = mult*mult*generator
    add(new TestData("Test Vector #1",
        "0000000000000000000000000000000000000000000000000000000000000012", // mult
        "1057E0AB5780F470DEFC9378D1C7C87437BB4C6F9EA55C63D936266DBD781FDA", // x1
        "F6F1645A15CBE5DC9FA9B7DFD96EE5A7DCC11B5C5EF4F1F78D83B3393C6A45A2", // y1
        "4954047A366A91E3FD94E574DB6F2B04F3A8465883DBC55A816EA563BF54A324", // x2
        "B5A54786FD9EA48C9FC38A0557B0C4D54F285908A7291B630D06BEE970F530D3") // y2
    );
    add(new TestData("Test Vector #2",
        "1200000000000000000000000000000000000000000000000000000000000000", // mult
        "DF684E6D0D57AF8B89DA11E8F7436C3D360F531D62BDCE42C5A8B72D73D5C717", // x
        "9D3576BD03C09B8F416EE9C27D70AD4A425119271ACF549312CA48758F4E1FEC", // y
        "57C8257EEAABF5446DCFACB99DEE104367B6C9950C76797C372EB177D5FA23B3", // x
        "1CD3E8A34521C1C8E574EB4B99343CAA57E00725D8618F0231C7C79AA6837725") // y
    );
    add(new TestData("Test Vector #3",
        "0000000000000000000000000000000120000000000000000000000000000012", // mult
        "A69DFD47B24485E5F523BDA5FBACF03F5A7C3D22E0C2BC6705594B7B051A06D0", // x
        "ECF19629416BE5C9AF1E30988F3AA8B803809CF4D12944EB49C5E9892723798A", // y
        "1E28559F5B681C308632EE11A007B9891B3FD592C982C4926153795794295E58", // x
        "3C373046C27BB34609A43C91DF6D4B9AB9EB08F3B69A8F8FAE944211D8297F30") // y
    );
    add(new TestData("Test Vector #4",
        "0000000000000000000000000000000000000000000000000000000000000001", // mult
        "6B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C296", // x
        "4FE342E2FE1A7F9B8EE7EB4A7C0F9E162BCE33576B315ECECBB6406837BF51F5", // y
        "6B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C296", // x
        "4FE342E2FE1A7F9B8EE7EB4A7C0F9E162BCE33576B315ECECBB6406837BF51F5") // y
    );
    add(new TestData("Test Vector #5",
        "EFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", // mult
        "66B71D0BD47344197CCFB0C9578EAF0ADB609E05BB4E8F87D56BD34F24EE7C47", // x
        "14A0ECB7F708C02B2BAE238D2C4607BB9D04FCE64E10A428C911D6FA25B2F0FD", // y
        "D25AAFD0FCC5B5E95C84C0702C138BC4D7FEB4E5F9C2DFB4301E313507EFDF44", // x
        "F3F04EBC7D308511B0392BB7171CF92688D6484A95A8100EDFC933613A359133") // y
    );
    add(new TestData("Test Vector #6",
        "1111111111111111111111111111111111111111111111111111111111111111", // mult
        "0217E617F0B6443928278F96999E69A23A4F2C152BDF6D6CDF66E5B80282D4ED", // x
        "194A7DEBCB97712D2DDA3CA85AA8765A56F45FC758599652F2897C65306E5794", // y
        "A83A07D6AE918359DEBCC385DA1E416EB83417435079CA8DB06005E107C309A0", // x
        "5AACDF816850C33EB3E54F3D0DD759B97B5E7065B2060016F73735E4A6AADE23") // y
    );
    }};
    private static boolean runSingleTest(TestData testData) {
        int keySize = 256;
        ECParameterSpec params = ECUtil.getECParameterSpec(keySize);
        NamedCurve curve = CurveDB.lookup(KnownOIDs.secp256r1.value());
        ECPoint generator = curve.getGenerator();
        BigInteger b = curve.getCurve().getB();
        if (params == null || generator == null) {
            throw new RuntimeException(
                    "No EC parameters available for key size " + keySize + " bits");
        }
        ECOperations ops = ECOperations.forParameters(params).get();
        ECOperations opsReference = new ECOperations(
                IntegerPolynomialP256.ONE.getElement(b), P256OrderField.ONE);
        boolean instanceTest1 = ops
                .getField() instanceof IntegerMontgomeryFieldModuloP;
        boolean instanceTest2 = opsReference
                .getField() instanceof IntegerMontgomeryFieldModuloP;
        if (instanceTest1 == false || instanceTest2 == true) {
            throw new RuntimeException("Bad Initialization: [" + instanceTest1 + ","
                    + instanceTest2 + "]");
        }
        MutablePoint nextPoint = ops.multiply(generator, testData.multiplier);
        MutablePoint nextReferencePoint = opsReference.multiply(generator,
                testData.multiplier);
        if (!check(nextReferencePoint, testData.reference1)
                || !check(nextPoint, testData.reference1)) {
            return false;
        }
        nextPoint = ops.multiply(nextPoint.asAffine(), testData.multiplier);
        nextReferencePoint = opsReference.multiply(nextReferencePoint.asAffine(),
                testData.multiplier);
        if (!check(nextReferencePoint, testData.reference2)
                || !check(nextPoint, testData.reference2)) {
            return false;
        }
        return true;
    }
 }
 //make test TEST="test/jdk/com/sun/security/ec/ECOperationsKATTest.java"
 /*
 * KAT generator using OpenSSL for reference vectors
 * g++ ecpoint.cpp -g -lcrypto -Wno-deprecated-declarations && ./a.out
 * (Some OpenSSL EC operations are marked internal i.e. deprecated)
 *
 #include <openssl/obj_mac.h>
 #include <openssl/ec.h>
 void check(int rc, const char* locator) {
  if (rc != 1) {
    printf("Failed at %s\n", locator);
    exit(55);
  }
 }
 int main(){
  BN_CTX* ctx = BN_CTX_new();
  BIGNUM* k = BN_CTX_get(ctx);
  BIGNUM* x1 = BN_CTX_get(ctx);
  BIGNUM* y1 = BN_CTX_get(ctx);
  BIGNUM* x2 = BN_CTX_get(ctx);
  BIGNUM* y2 = BN_CTX_get(ctx);
  EC_GROUP *ec_group = EC_GROUP_new_by_curve_name(NID_X9_62_prime256v1);
  EC_POINT* pubkey = EC_POINT_new(ec_group);
  EC_POINT* pubkey2 = EC_POINT_new(ec_group);
  int rc;
  rc = BN_hex2bn(&k, "1111111111111111111111111111111111111111111111111111111111111111"); //check(rc, "set raw key");
  rc = EC_POINT_mul(ec_group, pubkey, k, NULL, NULL, ctx);  check(rc, "mult public key");
  rc = EC_POINT_get_affine_coordinates(ec_group, pubkey, x1, y1, ctx);   check(rc, "get affine coordinates");
  rc = EC_POINT_mul(ec_group, pubkey2, NULL, pubkey, k, ctx);  check(rc, "mult public key");
  rc = EC_POINT_get_affine_coordinates(ec_group, pubkey2, x2, y2, ctx);   check(rc, "get affine coordinates");
  printf("k: %s\n", BN_bn2hex(k));
  printf("x: %s\ny: %s\n", BN_bn2hex(x1), BN_bn2hex(y1));
  printf("x: %s\ny: %s\n", BN_bn2hex(x2), BN_bn2hex(y2));
  BN_CTX_free(ctx);
  return 0;
 }
 */
--- a/test/jdk/com/sun/security/util/math/intpoly/IntegerPolynomialTest.java
+++ b/test/jdk/com/sun/security/util/math/intpoly/IntegerPolynomialTest.java
@ -0,0 +1,95 @@
 /*
 * Copyright (c) 2024, Intel Corporation. All rights reserved.
 *
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
 import java.util.Random;
 import java.math.BigInteger;
 import java.util.Arrays;
 import sun.security.util.math.*;
 import sun.security.util.math.intpoly.*;
 /*
 * @test
 * @key randomness
 * @modules java.base/sun.security.util java.base/sun.security.util.math
 * java.base/sun.security.util.math.intpoly
 * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:-UseIntPolyIntrinsics
 * IntegerPolynomialTest
 * @summary Unit test
 * IntegerPolynomial.MutableIntegerModuloP.conditionalAssign().
 */
 /*
 * @test
 * @key randomness
 * @modules java.base/sun.security.util java.base/sun.security.util.math
 * java.base/sun.security.util.math.intpoly
 * @run main/othervm -XX:+UnlockDiagnosticVMOptions -Xcomp
 * -XX:-TieredCompilation -XX:+UseIntPolyIntrinsics IntegerPolynomialTest
 * @summary Unit test
 * IntegerPolynomial.MutableIntegerModuloP.conditionalAssign().
 */
 // This test case is NOT entirely deterministic, it uses a random seed for
 // pseudo-random number generator. If a failure occurs, hardcode the seed to
 // make the test case deterministic
 public class IntegerPolynomialTest {
    public static void main(String[] args) throws Exception {
        Random rnd = new Random();
        long seed = rnd.nextLong();
        rnd.setSeed(seed);
        IntegerPolynomial testFields[] = new IntegerPolynomial[] {
                IntegerPolynomial1305.ONE, IntegerPolynomial25519.ONE,
                IntegerPolynomial448.ONE, IntegerPolynomialP256.ONE,
                MontgomeryIntegerPolynomialP256.ONE, IntegerPolynomialP384.ONE,
                IntegerPolynomialP521.ONE,
                new IntegerPolynomialModBinP.Curve25519OrderField(),
                new IntegerPolynomialModBinP.Curve448OrderField(),
                P256OrderField.ONE, P384OrderField.ONE, P521OrderField.ONE,
                Curve25519OrderField.ONE, Curve448OrderField.ONE };
        for (IntegerPolynomial field : testFields) {
            ImmutableIntegerModuloP aRef = field
                    .getElement(new BigInteger(32 * 64, rnd));
            MutableIntegerModuloP a = aRef.mutable();
            ImmutableIntegerModuloP bRef = field
                    .getElement(new BigInteger(32 * 64, rnd));
            MutableIntegerModuloP b = bRef.mutable();
            a.conditionalSet(b, 0); // Don't assign
            if (Arrays.equals(a.getLimbs(), b.getLimbs())) {
                throw new RuntimeException(
                        "[SEED " + seed + "]: Incorrect assign for " + field);
            }
            a.conditionalSet(b, 1); // Assign
            if (!Arrays.equals(a.getLimbs(), b.getLimbs())) {
                throw new RuntimeException(
                        "[SEED " + seed + "]: Incorrect assign for " + field);
            }
        }
        System.out.println("Test Success");
    }
 }
 //make test TEST="test/jdk/com/sun/security/util/math/intpoly/IntegerPolynomialTest.java"
--- a/test/jdk/com/sun/security/util/math/intpoly/MontgomeryPolynomialFuzzTest.java
+++ b/test/jdk/com/sun/security/util/math/intpoly/MontgomeryPolynomialFuzzTest.java
@ -0,0 +1,100 @@
 /*
 * Copyright (c) 2024, Intel Corporation. All rights reserved.
 *
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
 import java.util.Random;
 import sun.security.util.math.IntegerMontgomeryFieldModuloP;
 import sun.security.util.math.ImmutableIntegerModuloP;
 import java.math.BigInteger;
 import sun.security.util.math.intpoly.*;
 /*
 * @test
 * @key randomness
 * @modules java.base/sun.security.util java.base/sun.security.util.math
 *          java.base/sun.security.util.math.intpoly
 * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:-UseIntPolyIntrinsics
 *      MontgomeryPolynomialFuzzTest
 * @summary Unit test MontgomeryPolynomialFuzzTest.
 */
 /*
 * @test
 * @key randomness
 * @modules java.base/sun.security.util java.base/sun.security.util.math
 *          java.base/sun.security.util.math.intpoly
 * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UseIntPolyIntrinsics
 *      MontgomeryPolynomialFuzzTest
 * @summary Unit test MontgomeryPolynomialFuzzTest.
 */
 // This test case is NOT entirely deterministic, it uses a random seed for pseudo-random number generator
 // If a failure occurs, hardcode the seed to make the test case deterministic
 public class MontgomeryPolynomialFuzzTest {
    public static void main(String[] args) throws Exception {
        // Note: it might be useful to increase this number during development
        final int repeat = 1000000;
        for (int i = 0; i < repeat; i++) {
            run();
        }
        System.out.println("Fuzz Success");
    }
    private static void check(BigInteger reference,
            ImmutableIntegerModuloP testValue, long seed) {
        if (!reference.equals(testValue.asBigInteger())) {
            throw new RuntimeException("SEED: " + seed);
        }
    }
    public static void run() throws Exception {
        Random rnd = new Random();
        long seed = rnd.nextLong();
        rnd.setSeed(seed);
        IntegerMontgomeryFieldModuloP montField = MontgomeryIntegerPolynomialP256.ONE;
        BigInteger P = MontgomeryIntegerPolynomialP256.ONE.MODULUS;
        BigInteger r = BigInteger.ONE.shiftLeft(260).mod(P);
        BigInteger rInv = r.modInverse(P);
        BigInteger aRef = (new BigInteger(P.bitLength(), rnd)).mod(P);
        // Test conversion to montgomery domain
        ImmutableIntegerModuloP a = montField.getElement(aRef);
        aRef = aRef.multiply(r).mod(P);
        check(aRef, a, seed);
        if (rnd.nextBoolean()) {
            aRef = aRef.multiply(aRef).multiply(rInv).mod(P);
            a = a.multiply(a);
            check(aRef, a, seed);
        }
        if (rnd.nextBoolean()) {
            aRef = aRef.add(aRef).mod(P);
            a = a.add(a);
            check(aRef, a, seed);
        }
    }
 }
 //make test TEST="test/jdk/com/sun/security/util/math/intpoly/MontgomeryPolynomialFuzzTest.java"
--- a/test/micro/org/openjdk/bench/javax/crypto/full/PolynomialP256Bench.java
+++ b/test/micro/org/openjdk/bench/javax/crypto/full/PolynomialP256Bench.java
@ -0,0 +1,105 @@
 /*
 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
 package org.openjdk.bench.javax.crypto.full;
 import org.openjdk.jmh.annotations.BenchmarkMode;
 import org.openjdk.jmh.annotations.Fork;
 import org.openjdk.jmh.annotations.Measurement;
 import org.openjdk.jmh.annotations.Mode;
 import org.openjdk.jmh.annotations.OutputTimeUnit;
 import org.openjdk.jmh.annotations.Param;
 import org.openjdk.jmh.annotations.Scope;
 import org.openjdk.jmh.annotations.Setup;
 import org.openjdk.jmh.annotations.State;
 import org.openjdk.jmh.annotations.Warmup;
 import org.openjdk.jmh.annotations.Benchmark;
 import java.math.BigInteger;
 import java.util.concurrent.TimeUnit;
 import sun.security.util.math.intpoly.MontgomeryIntegerPolynomialP256;
 import sun.security.util.math.intpoly.IntegerPolynomialP256;
 import sun.security.util.math.MutableIntegerModuloP;
 import sun.security.util.math.ImmutableIntegerModuloP;
@Fork(jvmArgsAppend = {"-XX:+AlwaysPreTouch",
    "--add-exports", "java.base/sun.security.util.math.intpoly=ALL-UNNAMED",
    "--add-exports", "java.base/sun.security.util.math=ALL-UNNAMED"}, value = 1)
@Warmup(iterations = 3, time = 3)
@Measurement(iterations = 8, time = 2)
@OutputTimeUnit(TimeUnit.SECONDS)
@State(Scope.Thread)
@BenchmarkMode(Mode.Throughput)
 public class PolynomialP256Bench {
    final MontgomeryIntegerPolynomialP256 montField = MontgomeryIntegerPolynomialP256.ONE;
    final IntegerPolynomialP256 residueField = IntegerPolynomialP256.ONE;
    final BigInteger refx =
        new BigInteger("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16);
    final ImmutableIntegerModuloP x = residueField.getElement(refx);
    final ImmutableIntegerModuloP X = montField.getElement(refx);
    final ImmutableIntegerModuloP one = montField.get1();
    @Param({"true", "false"})
    private boolean isMontBench;
    @Benchmark
    public MutableIntegerModuloP benchMultiply() {
        MutableIntegerModuloP test;
        if (isMontBench) {
            test = X.mutable();
        } else {
            test = x.mutable();
        }
        for (int i = 0; i< 10000; i++) {
            test = test.setProduct(test);
        }
        return test;
    }
    @Benchmark
    public MutableIntegerModuloP benchSquare() {
        MutableIntegerModuloP test;
        if (isMontBench) {
            test = X.mutable();
        } else {
            test = x.mutable();
        }
        for (int i = 0; i< 10000; i++) {
            test = test.setSquare();
        }
        return test;
    }
    @Benchmark
    public MutableIntegerModuloP benchAssign() {
        MutableIntegerModuloP test1 = X.mutable();
        MutableIntegerModuloP test2 = one.mutable();
        for (int i = 0; i< 10000; i++) {
            test1.conditionalSet(test2, 0);
            test1.conditionalSet(test2, 1);
            test2.conditionalSet(test1, 0);
            test2.conditionalSet(test1, 1);
        }
        return test2;
    }
 }