diff --git a/make/jdk/src/classes/build/tools/intpoly/FieldGen.java b/make/jdk/src/classes/build/tools/intpoly/FieldGen.java index a65150ac421..234f5cfce0d 100644 --- a/make/jdk/src/classes/build/tools/intpoly/FieldGen.java +++ b/make/jdk/src/classes/build/tools/intpoly/FieldGen.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -778,7 +778,7 @@ public class FieldGen { result.appendLine("}"); result.appendLine("@Override"); - result.appendLine("protected void mult(long[] a, long[] b, long[] r) {"); + result.appendLine("protected int mult(long[] a, long[] b, long[] r) {"); result.incrIndent(); for (int i = 0; i < 2 * params.getNumLimbs() - 1; i++) { result.appendIndent(); @@ -804,6 +804,9 @@ public class FieldGen { } } result.append(");\n"); + result.appendIndent(); + result.append("return 0;"); + result.appendLine(); result.decrIndent(); result.appendLine("}"); @@ -833,7 +836,7 @@ public class FieldGen { // } // } result.appendLine("@Override"); - result.appendLine("protected void square(long[] a, long[] r) {"); + result.appendLine("protected int square(long[] a, long[] r) {"); result.incrIndent(); for (int i = 0; i < 2 * params.getNumLimbs() - 1; i++) { result.appendIndent(); @@ -874,6 +877,9 @@ public class FieldGen { } } result.append(");\n"); + result.appendIndent(); + result.append("return 0;"); + result.appendLine(); result.decrIndent(); result.appendLine("}"); diff --git a/make/test/BuildMicrobenchmark.gmk b/make/test/BuildMicrobenchmark.gmk index 4a6232afbe0..7b65e89610e 100644 --- a/make/test/BuildMicrobenchmark.gmk +++ b/make/test/BuildMicrobenchmark.gmk @@ -109,6 +109,8 @@ $(eval $(call SetupJavaCompilation, BUILD_JDK_MICROBENCHMARK, \ --add-exports java.base/jdk.internal.vm=ALL-UNNAMED \ --add-exports java.base/sun.invoke.util=ALL-UNNAMED \ --add-exports java.base/sun.security.util=ALL-UNNAMED \ + --add-exports java.base/sun.security.util.math=ALL-UNNAMED \ + --add-exports java.base/sun.security.util.math.intpoly=ALL-UNNAMED \ --enable-preview \ -XDsuppressNotes \ -processor org.openjdk.jmh.generators.BenchmarkProcessor, \ diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index e045572a5cd..c69c8c0d447 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -1549,6 +1549,8 @@ public: Assembler::evpsrlvd(dst, mask, nds, src, merge, vector_len); } } + + using Assembler::evpsrlq; void evpsrlq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { if (!is_varshift) { Assembler::evpsrlq(dst, mask, nds, src, merge, vector_len); @@ -1570,6 +1572,7 @@ public: Assembler::evpsravd(dst, mask, nds, src, merge, vector_len); } } + using Assembler::evpsraq; void evpsraq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) { if (!is_varshift) { Assembler::evpsraq(dst, mask, nds, src, merge, vector_len); diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp index 63226a560f4..3f2865e7465 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp @@ -4255,6 +4255,11 @@ void StubGenerator::generate_compiler_stubs() { StubRoutines::_poly1305_processBlocks = generate_poly1305_processBlocks(); } + if (UseIntPolyIntrinsics) { + StubRoutines::_intpoly_montgomeryMult_P256 = generate_intpoly_montgomeryMult_P256(); + StubRoutines::_intpoly_assign = generate_intpoly_assign(); + } + if (UseMD5Intrinsics) { StubRoutines::_md5_implCompress = generate_md5_implCompress(false, "md5_implCompress"); StubRoutines::_md5_implCompressMB = generate_md5_implCompress(true, "md5_implCompressMB"); diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp index 725932b9e03..02435bd172c 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp @@ -483,6 +483,9 @@ class StubGenerator: public StubCodeGenerator { const XMMRegister P2L, const XMMRegister P2H, const XMMRegister YTMP1, const Register rscratch); + address generate_intpoly_montgomeryMult_P256(); + address generate_intpoly_assign(); + // BASE64 stubs address base64_shuffle_addr(); diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_poly.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_poly1305.cpp similarity index 100% rename from src/hotspot/cpu/x86/stubGenerator_x86_64_poly.cpp rename to src/hotspot/cpu/x86/stubGenerator_x86_64_poly1305.cpp diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_poly_mont.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_poly_mont.cpp new file mode 100644 index 00000000000..25ee6807249 --- /dev/null +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_poly_mont.cpp @@ -0,0 +1,376 @@ +/* + * Copyright (c) 2024, Intel Corporation. All rights reserved. + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "macroAssembler_x86.hpp" +#include "stubGenerator_x86_64.hpp" + +#define __ _masm-> + +ATTRIBUTE_ALIGNED(64) uint64_t MODULUS_P256[] = { + 0x000fffffffffffffULL, 0x00000fffffffffffULL, + 0x0000000000000000ULL, 0x0000001000000000ULL, + 0x0000ffffffff0000ULL, 0x0000000000000000ULL, + 0x0000000000000000ULL, 0x0000000000000000ULL +}; +static address modulus_p256() { + return (address)MODULUS_P256; +} + +ATTRIBUTE_ALIGNED(64) uint64_t P256_MASK52[] = { + 0x000fffffffffffffULL, 0x000fffffffffffffULL, + 0x000fffffffffffffULL, 0x000fffffffffffffULL, + 0xffffffffffffffffULL, 0xffffffffffffffffULL, + 0xffffffffffffffffULL, 0xffffffffffffffffULL, +}; +static address p256_mask52() { + return (address)P256_MASK52; +} + +ATTRIBUTE_ALIGNED(64) uint64_t SHIFT1R[] = { + 0x0000000000000001ULL, 0x0000000000000002ULL, + 0x0000000000000003ULL, 0x0000000000000004ULL, + 0x0000000000000005ULL, 0x0000000000000006ULL, + 0x0000000000000007ULL, 0x0000000000000000ULL, +}; +static address shift_1R() { + return (address)SHIFT1R; +} + +ATTRIBUTE_ALIGNED(64) uint64_t SHIFT1L[] = { + 0x0000000000000007ULL, 0x0000000000000000ULL, + 0x0000000000000001ULL, 0x0000000000000002ULL, + 0x0000000000000003ULL, 0x0000000000000004ULL, + 0x0000000000000005ULL, 0x0000000000000006ULL, +}; +static address shift_1L() { + return (address)SHIFT1L; +} + +/** + * Unrolled Word-by-Word Montgomery Multiplication + * r = a * b * 2^-260 (mod P) + * + * Reference [1]: Shay Gueron and Vlad Krasnov + * "Fast Prime Field Elliptic Curve Cryptography with 256 Bit Primes" + * See Figure 5. "Algorithm 2: Word-by-Word Montgomery Multiplication for a Montgomery + * Friendly modulus p". Note: Step 6. Skipped; Instead use numAdds to reuse existing overflow + * logic. + * + * Pseudocode: + * + * +--+--+--+--+--+--+--+--+ + * M = load(*modulus_p256) | 0| 0| 0|m5|m4|m3|m2|m1| + * +--+--+--+--+--+--+--+--+ + * A = load(*aLimbs) | 0| 0| 0|a5|a4|a3|a2|a1| + * +--+--+--+--+--+--+--+--+ + * Acc1 = 0 | 0| 0| 0| 0| 0| 0| 0| 0| + * +--+--+--+--+--+--+--+--+ + * ---- for i = 0 to 4 + * +--+--+--+--+--+--+--+--+ + * Acc2 = 0 | 0| 0| 0| 0| 0| 0| 0| 0| + * +--+--+--+--+--+--+--+--+ + * B = replicate(bLimbs[i]) |bi|bi|bi|bi|bi|bi|bi|bi| + * +--+--+--+--+--+--+--+--+ + * +--+--+--+--+--+--+--+--+ + * Acc1+=| 0| 0| 0|c5|c4|c3|c2|c1| + * *| 0| 0| 0|a5|a4|a3|a2|a1| + * Acc1 += A * B |bi|bi|bi|bi|bi|bi|bi|bi| + * +--+--+--+--+--+--+--+--+ + * Acc2+=| 0| 0| 0| 0| 0| 0| 0| 0| + * *h| 0| 0| 0|a5|a4|a3|a2|a1| + * Acc2 += A *h B |bi|bi|bi|bi|bi|bi|bi|bi| + * +--+--+--+--+--+--+--+--+ + * N = replicate(Acc1[0]) |n0|n0|n0|n0|n0|n0|n0|n0| + * +--+--+--+--+--+--+--+--+ + * +--+--+--+--+--+--+--+--+ + * Acc1+=| 0| 0| 0|c5|c4|c3|c2|c1| + * *| 0| 0| 0|m5|m4|m3|m2|m1| + * Acc1 += M * N |n0|n0|n0|n0|n0|n0|n0|n0| Note: 52 low bits of Acc1[0] == 0 due to Montgomery! + * +--+--+--+--+--+--+--+--+ + * Acc2+=| 0| 0| 0|d5|d4|d3|d2|d1| + * *h| 0| 0| 0|m5|m4|m3|m2|m1| + * Acc2 += M *h N |n0|n0|n0|n0|n0|n0|n0|n0| + * +--+--+--+--+--+--+--+--+ + * if (i == 4) break; + * // Combine high/low partial sums Acc1 + Acc2 + * +--+--+--+--+--+--+--+--+ + * carry = Acc1[0] >> 52 | 0| 0| 0| 0| 0| 0| 0|c1| + * +--+--+--+--+--+--+--+--+ + * Acc2[0] += carry + * +--+--+--+--+--+--+--+--+ + * Acc1 = Acc1 shift one q element>> | 0| 0| 0| 0|c5|c4|c3|c2| + * +--+--+--+--+--+--+--+--+ + * Acc1 = Acc1 + Acc2 + * ---- done + * // Last Carry round: Combine high/low partial sums Acc1 + Acc1 + Acc2 + * carry = Acc1 >> 52 + * Acc1 = Acc1 shift one q element >> + * Acc1 = mask52(Acc1) + * Acc2 += carry + * Acc1 = Acc1 + Acc2 + * output to rLimbs + */ +void montgomeryMultiply(const Register aLimbs, const Register bLimbs, const Register rLimbs, const Register tmp, MacroAssembler* _masm) { + Register t0 = tmp; + Register rscratch = tmp; + + // Inputs + XMMRegister A = xmm0; + XMMRegister B = xmm1; + XMMRegister T = xmm2; + + // Intermediates + XMMRegister Acc1 = xmm10; + XMMRegister Acc2 = xmm11; + XMMRegister N = xmm12; + XMMRegister carry = xmm13; + + // // Constants + XMMRegister modulus = xmm20; + XMMRegister shift1L = xmm21; + XMMRegister shift1R = xmm22; + XMMRegister mask52 = xmm23; + KRegister limb0 = k1; + KRegister allLimbs = k2; + + __ mov64(t0, 0x1); + __ kmovql(limb0, t0); + __ mov64(t0, 0x1f); + __ kmovql(allLimbs, t0); + __ evmovdquq(shift1L, allLimbs, ExternalAddress(shift_1L()), false, Assembler::AVX_512bit, rscratch); + __ evmovdquq(shift1R, allLimbs, ExternalAddress(shift_1R()), false, Assembler::AVX_512bit, rscratch); + __ evmovdquq(mask52, allLimbs, ExternalAddress(p256_mask52()), false, Assembler::AVX_512bit, rscratch); + + // M = load(*modulus_p256) + __ evmovdquq(modulus, allLimbs, ExternalAddress(modulus_p256()), false, Assembler::AVX_512bit, rscratch); + + // A = load(*aLimbs); masked evmovdquq() can be slow. Instead load full 256bit, and compbine with 64bit + __ evmovdquq(A, Address(aLimbs, 8), Assembler::AVX_256bit); + __ evpermq(A, allLimbs, shift1L, A, false, Assembler::AVX_512bit); + __ movq(T, Address(aLimbs, 0)); + __ evporq(A, A, T, Assembler::AVX_512bit); + + // Acc1 = 0 + __ vpxorq(Acc1, Acc1, Acc1, Assembler::AVX_512bit); + for (int i = 0; i< 5; i++) { + // Acc2 = 0 + __ vpxorq(Acc2, Acc2, Acc2, Assembler::AVX_512bit); + + // B = replicate(bLimbs[i]) + __ vpbroadcastq(B, Address(bLimbs, i*8), Assembler::AVX_512bit); + + // Acc1 += A * B + __ evpmadd52luq(Acc1, A, B, Assembler::AVX_512bit); + + // Acc2 += A *h B + __ evpmadd52huq(Acc2, A, B, Assembler::AVX_512bit); + + // N = replicate(Acc1[0]) + __ vpbroadcastq(N, Acc1, Assembler::AVX_512bit); + + // Acc1 += M * N + __ evpmadd52luq(Acc1, modulus, N, Assembler::AVX_512bit); + + // Acc2 += M *h N + __ evpmadd52huq(Acc2, modulus, N, Assembler::AVX_512bit); + + if (i == 4) break; + + // Combine high/low partial sums Acc1 + Acc2 + + // carry = Acc1[0] >> 52 + __ evpsrlq(carry, limb0, Acc1, 52, true, Assembler::AVX_512bit); + + // Acc2[0] += carry + __ evpaddq(Acc2, limb0, carry, Acc2, true, Assembler::AVX_512bit); + + // Acc1 = Acc1 shift one q element >> + __ evpermq(Acc1, allLimbs, shift1R, Acc1, false, Assembler::AVX_512bit); + + // Acc1 = Acc1 + Acc2 + __ vpaddq(Acc1, Acc1, Acc2, Assembler::AVX_512bit); + } + + // Last Carry round: Combine high/low partial sums Acc1 + Acc1 + Acc2 + // carry = Acc1 >> 52 + __ evpsrlq(carry, allLimbs, Acc1, 52, true, Assembler::AVX_512bit); + + // Acc1 = Acc1 shift one q element >> + __ evpermq(Acc1, allLimbs, shift1R, Acc1, false, Assembler::AVX_512bit); + + // Acc1 = mask52(Acc1) + __ evpandq(Acc1, Acc1, mask52, Assembler::AVX_512bit); // Clear top 12 bits + + // Acc2 += carry + __ evpaddq(Acc2, allLimbs, carry, Acc2, true, Assembler::AVX_512bit); + + // Acc1 = Acc1 + Acc2 + __ vpaddq(Acc1, Acc1, Acc2, Assembler::AVX_512bit); + + // output to rLimbs (1 + 4 limbs) + __ movq(Address(rLimbs, 0), Acc1); + __ evpermq(Acc1, k0, shift1R, Acc1, true, Assembler::AVX_512bit); + __ evmovdquq(Address(rLimbs, 8), k0, Acc1, true, Assembler::AVX_256bit); +} + +address StubGenerator::generate_intpoly_montgomeryMult_P256() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "intpoly_montgomeryMult_P256"); + address start = __ pc(); + __ enter(); + + // Register Map + const Register aLimbs = c_rarg0; // rdi | rcx + const Register bLimbs = c_rarg1; // rsi | rdx + const Register rLimbs = c_rarg2; // rdx | r8 + const Register tmp = r9; + + montgomeryMultiply(aLimbs, bLimbs, rLimbs, tmp, _masm); + __ mov64(rax, 0x1); // Return 1 (Fig. 5, Step 6 [1] skipped in montgomeryMultiply) + + __ leave(); + __ ret(0); + return start; +} + +// A = B if select +// Must be: +// - constant time (i.e. no branches) +// - no-side channel (i.e. all memory must always be accessed, and in same order) +void assign_avx(XMMRegister A, Address aAddr, XMMRegister B, Address bAddr, KRegister select, int vector_len, MacroAssembler* _masm) { + __ evmovdquq(A, aAddr, vector_len); + __ evmovdquq(B, bAddr, vector_len); + __ evmovdquq(A, select, B, true, vector_len); + __ evmovdquq(aAddr, A, vector_len); +} + +void assign_scalar(Address aAddr, Address bAddr, Register select, Register tmp, MacroAssembler* _masm) { + // Original java: + // long dummyLimbs = maskValue & (a[i] ^ b[i]); + // a[i] = dummyLimbs ^ a[i]; + + __ movq(tmp, aAddr); + __ xorq(tmp, bAddr); + __ andq(tmp, select); + __ xorq(aAddr, tmp); +} + +address StubGenerator::generate_intpoly_assign() { + // KNOWN Lengths: + // MontgomeryIntPolynP256: 5 = 4 + 1 + // IntegerPolynomial1305: 5 = 4 + 1 + // IntegerPolynomial25519: 10 = 8 + 2 + // IntegerPolynomialP256: 10 = 8 + 2 + // Curve25519OrderField: 10 = 8 + 2 + // Curve25519OrderField: 10 = 8 + 2 + // P256OrderField: 10 = 8 + 2 + // IntegerPolynomialP384: 14 = 8 + 4 + 2 + // P384OrderField: 14 = 8 + 4 + 2 + // IntegerPolynomial448: 16 = 8 + 8 + // Curve448OrderField: 16 = 8 + 8 + // Curve448OrderField: 16 = 8 + 8 + // IntegerPolynomialP521: 19 = 8 + 8 + 2 + 1 + // P521OrderField: 19 = 8 + 8 + 2 + 1 + // Special Cases 5, 10, 14, 16, 19 + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "intpoly_assign"); + address start = __ pc(); + __ enter(); + + // Inputs + const Register set = c_rarg0; + const Register aLimbs = c_rarg1; + const Register bLimbs = c_rarg2; + const Register length = c_rarg3; + XMMRegister A = xmm0; + XMMRegister B = xmm1; + + Register tmp = r9; + KRegister select = k1; + Label L_Length5, L_Length10, L_Length14, L_Length16, L_Length19, L_DefaultLoop, L_Done; + + __ negq(set); + __ kmovql(select, set); + + // NOTE! Crypto code cannot branch on user input. However; allowed to branch on number of limbs; + // Number of limbs is a constant in each IntegerPolynomial (i.e. this side-channel branch leaks + // number of limbs which is not a secret) + __ cmpl(length, 5); + __ jcc(Assembler::equal, L_Length5); + __ cmpl(length, 10); + __ jcc(Assembler::equal, L_Length10); + __ cmpl(length, 14); + __ jcc(Assembler::equal, L_Length14); + __ cmpl(length, 16); + __ jcc(Assembler::equal, L_Length16); + __ cmpl(length, 19); + __ jcc(Assembler::equal, L_Length19); + + // Default copy loop (UNLIKELY) + __ cmpl(length, 0); + __ jcc(Assembler::lessEqual, L_Done); + __ bind(L_DefaultLoop); + assign_scalar(Address(aLimbs, 0), Address(bLimbs, 0), set, tmp, _masm); + __ subl(length, 1); + __ lea(aLimbs, Address(aLimbs,8)); + __ lea(bLimbs, Address(bLimbs,8)); + __ cmpl(length, 0); + __ jcc(Assembler::greater, L_DefaultLoop); + __ jmp(L_Done); + + __ bind(L_Length5); // 1 + 4 + assign_scalar(Address(aLimbs, 0), Address(bLimbs, 0), set, tmp, _masm); + assign_avx(A, Address(aLimbs, 8), B, Address(bLimbs, 8), select, Assembler::AVX_256bit, _masm); + __ jmp(L_Done); + + __ bind(L_Length10); // 2 + 8 + assign_avx(A, Address(aLimbs, 0), B, Address(bLimbs, 0), select, Assembler::AVX_128bit, _masm); + assign_avx(A, Address(aLimbs, 16), B, Address(bLimbs, 16), select, Assembler::AVX_512bit, _masm); + __ jmp(L_Done); + + __ bind(L_Length14); // 2 + 4 + 8 + assign_avx(A, Address(aLimbs, 0), B, Address(bLimbs, 0), select, Assembler::AVX_128bit, _masm); + assign_avx(A, Address(aLimbs, 16), B, Address(bLimbs, 16), select, Assembler::AVX_256bit, _masm); + assign_avx(A, Address(aLimbs, 48), B, Address(bLimbs, 48), select, Assembler::AVX_512bit, _masm); + __ jmp(L_Done); + + __ bind(L_Length16); // 8 + 8 + assign_avx(A, Address(aLimbs, 0), B, Address(bLimbs, 0), select, Assembler::AVX_512bit, _masm); + assign_avx(A, Address(aLimbs, 64), B, Address(bLimbs, 64), select, Assembler::AVX_512bit, _masm); + __ jmp(L_Done); + + __ bind(L_Length19); // 1 + 2 + 8 + 8 + assign_scalar(Address(aLimbs, 0), Address(bLimbs, 0), set, tmp, _masm); + assign_avx(A, Address(aLimbs, 8), B, Address(bLimbs, 8), select, Assembler::AVX_128bit, _masm); + assign_avx(A, Address(aLimbs, 24), B, Address(bLimbs, 24), select, Assembler::AVX_512bit, _masm); + assign_avx(A, Address(aLimbs, 88), B, Address(bLimbs, 88), select, Assembler::AVX_512bit, _masm); + + __ bind(L_Done); + __ leave(); + __ ret(0); + return start; +} diff --git a/src/hotspot/cpu/x86/vm_version_x86.cpp b/src/hotspot/cpu/x86/vm_version_x86.cpp index 0bffb1aee13..fbc952fc8d1 100644 --- a/src/hotspot/cpu/x86/vm_version_x86.cpp +++ b/src/hotspot/cpu/x86/vm_version_x86.cpp @@ -1366,6 +1366,18 @@ void VM_Version::get_processor_features() { FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false); } +#ifdef _LP64 + if (supports_avx512ifma() && supports_avx512vlbw()) { + if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) { + FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true); + } + } else +#endif + if (UseIntPolyIntrinsics) { + warning("Intrinsics for Polynomial crypto functions not available on this CPU."); + FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false); + } + #ifdef _LP64 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { UseMultiplyToLenIntrinsic = true; diff --git a/src/hotspot/share/classfile/vmIntrinsics.cpp b/src/hotspot/share/classfile/vmIntrinsics.cpp index 8d4f57165e1..e60495d1f47 100644 --- a/src/hotspot/share/classfile/vmIntrinsics.cpp +++ b/src/hotspot/share/classfile/vmIntrinsics.cpp @@ -492,6 +492,10 @@ bool vmIntrinsics::disabled_by_jvm_flags(vmIntrinsics::ID id) { case vmIntrinsics::_poly1305_processBlocks: if (!UsePoly1305Intrinsics) return true; break; + case vmIntrinsics::_intpoly_montgomeryMult_P256: + case vmIntrinsics::_intpoly_assign: + if (!UseIntPolyIntrinsics) return true; + break; case vmIntrinsics::_updateBytesCRC32C: case vmIntrinsics::_updateDirectByteBufferCRC32C: if (!UseCRC32CIntrinsics) return true; diff --git a/src/hotspot/share/classfile/vmIntrinsics.hpp b/src/hotspot/share/classfile/vmIntrinsics.hpp index a0db1a65d3a..b8d8c40cc47 100644 --- a/src/hotspot/share/classfile/vmIntrinsics.hpp +++ b/src/hotspot/share/classfile/vmIntrinsics.hpp @@ -526,7 +526,18 @@ class methodHandle; do_intrinsic(_digestBase_implCompressMB, sun_security_provider_digestbase, implCompressMB_name, countPositives_signature, F_R) \ do_name( implCompressMB_name, "implCompressMultiBlock0") \ \ - /* support for java.util.Base64.Encoder*/ \ + /* support for sun.security.util.math.intpoly.MontgomeryIntegerPolynomialP256 */ \ + do_class(sun_security_util_math_intpoly_MontgomeryIntegerPolynomialP256, "sun/security/util/math/intpoly/MontgomeryIntegerPolynomialP256") \ + do_intrinsic(_intpoly_montgomeryMult_P256, sun_security_util_math_intpoly_MontgomeryIntegerPolynomialP256, intPolyMult_name, intPolyMult_signature, F_R) \ + do_name(intPolyMult_name, "mult") \ + do_signature(intPolyMult_signature, "([J[J[J)I") \ + \ + do_class(sun_security_util_math_intpoly_IntegerPolynomial, "sun/security/util/math/intpoly/IntegerPolynomial") \ + do_intrinsic(_intpoly_assign, sun_security_util_math_intpoly_IntegerPolynomial, intPolyAssign_name, intPolyAssign_signature, F_S) \ + do_name(intPolyAssign_name, "conditionalAssign") \ + do_signature(intPolyAssign_signature, "(I[J[J)V") \ + \ + /* support for java.util.Base64.Encoder*/ \ do_class(java_util_Base64_Encoder, "java/util/Base64$Encoder") \ do_intrinsic(_base64_encodeBlock, java_util_Base64_Encoder, encodeBlock_name, encodeBlock_signature, F_R) \ do_name(encodeBlock_name, "encodeBlock") \ diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp index 14fb038a6c6..dbb45995698 100644 --- a/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp +++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp @@ -463,6 +463,12 @@ void ShenandoahBarrierC2Support::verify(RootNode* root) { "decodeBlock", { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+3, ShenandoahStore }, { -1, ShenandoahNone }, { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} }, + "intpoly_montgomeryMult_P256", + { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahLoad }, { TypeFunc::Parms+2, ShenandoahStore }, + { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} }, + "intpoly_assign", + { { TypeFunc::Parms+1, ShenandoahStore }, { TypeFunc::Parms+2, ShenandoahLoad }, { -1, ShenandoahNone }, + { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} }, }; if (call->is_call_to_arraycopystub()) { diff --git a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp index 035c1dd1ce9..c72ca3870b5 100644 --- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp +++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp @@ -361,6 +361,8 @@ static_field(StubRoutines, _md5_implCompressMB, address) \ static_field(StubRoutines, _chacha20Block, address) \ static_field(StubRoutines, _poly1305_processBlocks, address) \ + static_field(StubRoutines, _intpoly_montgomeryMult_P256, address) \ + static_field(StubRoutines, _intpoly_assign, address) \ static_field(StubRoutines, _sha1_implCompress, address) \ static_field(StubRoutines, _sha1_implCompressMB, address) \ static_field(StubRoutines, _sha256_implCompress, address) \ diff --git a/src/hotspot/share/opto/c2compiler.cpp b/src/hotspot/share/opto/c2compiler.cpp index dc15e82dff8..c5e17478477 100644 --- a/src/hotspot/share/opto/c2compiler.cpp +++ b/src/hotspot/share/opto/c2compiler.cpp @@ -786,6 +786,8 @@ bool C2Compiler::is_intrinsic_supported(vmIntrinsics::ID id) { case vmIntrinsics::_base64_encodeBlock: case vmIntrinsics::_base64_decodeBlock: case vmIntrinsics::_poly1305_processBlocks: + case vmIntrinsics::_intpoly_montgomeryMult_P256: + case vmIntrinsics::_intpoly_assign: case vmIntrinsics::_updateCRC32: case vmIntrinsics::_updateBytesCRC32: case vmIntrinsics::_updateByteBufferCRC32: diff --git a/src/hotspot/share/opto/escape.cpp b/src/hotspot/share/opto/escape.cpp index 1a9e74dab3d..b011c9928b6 100644 --- a/src/hotspot/share/opto/escape.cpp +++ b/src/hotspot/share/opto/escape.cpp @@ -2173,6 +2173,8 @@ void ConnectionGraph::process_call_arguments(CallNode *call) { strcmp(call->as_CallLeaf()->_name, "counterMode_AESCrypt") == 0 || strcmp(call->as_CallLeaf()->_name, "galoisCounterMode_AESCrypt") == 0 || strcmp(call->as_CallLeaf()->_name, "poly1305_processBlocks") == 0 || + strcmp(call->as_CallLeaf()->_name, "intpoly_montgomeryMult_P256") == 0 || + strcmp(call->as_CallLeaf()->_name, "intpoly_assign") == 0 || strcmp(call->as_CallLeaf()->_name, "ghash_processBlocks") == 0 || strcmp(call->as_CallLeaf()->_name, "chacha20Block") == 0 || strcmp(call->as_CallLeaf()->_name, "encodeBlock") == 0 || diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp index b018fcf5097..96e88c1a96b 100644 --- a/src/hotspot/share/opto/library_call.cpp +++ b/src/hotspot/share/opto/library_call.cpp @@ -638,7 +638,10 @@ bool LibraryCallKit::try_to_inline(int predicate) { return inline_base64_decodeBlock(); case vmIntrinsics::_poly1305_processBlocks: return inline_poly1305_processBlocks(); - + case vmIntrinsics::_intpoly_montgomeryMult_P256: + return inline_intpoly_montgomeryMult_P256(); + case vmIntrinsics::_intpoly_assign: + return inline_intpoly_assign(); case vmIntrinsics::_encodeISOArray: case vmIntrinsics::_encodeByteISOArray: return inline_encodeISOArray(false); @@ -7568,6 +7571,69 @@ bool LibraryCallKit::inline_poly1305_processBlocks() { return true; } +bool LibraryCallKit::inline_intpoly_montgomeryMult_P256() { + address stubAddr; + const char *stubName; + assert(UseIntPolyIntrinsics, "need intpoly intrinsics support"); + assert(callee()->signature()->size() == 3, "intpoly_montgomeryMult_P256 has %d parameters", callee()->signature()->size()); + stubAddr = StubRoutines::intpoly_montgomeryMult_P256(); + stubName = "intpoly_montgomeryMult_P256"; + + if (!stubAddr) return false; + null_check_receiver(); // null-check receiver + if (stopped()) return true; + + Node* a = argument(1); + Node* b = argument(2); + Node* r = argument(3); + + a = must_be_not_null(a, true); + b = must_be_not_null(b, true); + r = must_be_not_null(r, true); + + Node* a_start = array_element_address(a, intcon(0), T_LONG); + assert(a_start, "a array is NULL"); + Node* b_start = array_element_address(b, intcon(0), T_LONG); + assert(b_start, "b array is NULL"); + Node* r_start = array_element_address(r, intcon(0), T_LONG); + assert(r_start, "r array is NULL"); + + Node* call = make_runtime_call(RC_LEAF | RC_NO_FP, + OptoRuntime::intpoly_montgomeryMult_P256_Type(), + stubAddr, stubName, TypePtr::BOTTOM, + a_start, b_start, r_start); + Node* result = _gvn.transform(new ProjNode(call, TypeFunc::Parms)); + set_result(result); + return true; +} + +bool LibraryCallKit::inline_intpoly_assign() { + assert(UseIntPolyIntrinsics, "need intpoly intrinsics support"); + assert(callee()->signature()->size() == 3, "intpoly_assign has %d parameters", callee()->signature()->size()); + const char *stubName = "intpoly_assign"; + address stubAddr = StubRoutines::intpoly_assign(); + if (!stubAddr) return false; + + Node* set = argument(0); + Node* a = argument(1); + Node* b = argument(2); + Node* arr_length = load_array_length(a); + + a = must_be_not_null(a, true); + b = must_be_not_null(b, true); + + Node* a_start = array_element_address(a, intcon(0), T_LONG); + assert(a_start, "a array is NULL"); + Node* b_start = array_element_address(b, intcon(0), T_LONG); + assert(b_start, "b array is NULL"); + + Node* call = make_runtime_call(RC_LEAF | RC_NO_FP, + OptoRuntime::intpoly_assign_Type(), + stubAddr, stubName, TypePtr::BOTTOM, + set, a_start, b_start, arr_length); + return true; +} + //------------------------------inline_digestBase_implCompress----------------------- // // Calculate MD5 for single-block byte[] array. diff --git a/src/hotspot/share/opto/library_call.hpp b/src/hotspot/share/opto/library_call.hpp index cb4f34a0db6..1111c795114 100644 --- a/src/hotspot/share/opto/library_call.hpp +++ b/src/hotspot/share/opto/library_call.hpp @@ -307,6 +307,8 @@ class LibraryCallKit : public GraphKit { bool inline_base64_encodeBlock(); bool inline_base64_decodeBlock(); bool inline_poly1305_processBlocks(); + bool inline_intpoly_montgomeryMult_P256(); + bool inline_intpoly_assign(); bool inline_digestBase_implCompress(vmIntrinsics::ID id); bool inline_digestBase_implCompressMB(int predicate); bool inline_digestBase_implCompressMB(Node* digestBaseObj, ciInstanceKlass* instklass, diff --git a/src/hotspot/share/opto/runtime.cpp b/src/hotspot/share/opto/runtime.cpp index 2c021504785..3b4519623ad 100644 --- a/src/hotspot/share/opto/runtime.cpp +++ b/src/hotspot/share/opto/runtime.cpp @@ -1401,6 +1401,45 @@ const TypeFunc* OptoRuntime::poly1305_processBlocks_Type() { return TypeFunc::make(domain, range); } +// MontgomeryIntegerPolynomialP256 multiply function +const TypeFunc* OptoRuntime::intpoly_montgomeryMult_P256_Type() { + int argcnt = 3; + + const Type** fields = TypeTuple::fields(argcnt); + int argp = TypeFunc::Parms; + fields[argp++] = TypePtr::NOTNULL; // a array + fields[argp++] = TypePtr::NOTNULL; // b array + fields[argp++] = TypePtr::NOTNULL; // r(esult) array + assert(argp == TypeFunc::Parms + argcnt, "correct decoding"); + const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields); + + // result type needed + fields = TypeTuple::fields(1); + fields[TypeFunc::Parms + 0] = TypeInt::INT; // carry bits in output + const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields); + return TypeFunc::make(domain, range); +} + +// IntegerPolynomial constant time assignment function +const TypeFunc* OptoRuntime::intpoly_assign_Type() { + int argcnt = 4; + + const Type** fields = TypeTuple::fields(argcnt); + int argp = TypeFunc::Parms; + fields[argp++] = TypeInt::INT; // set flag + fields[argp++] = TypePtr::NOTNULL; // a array (result) + fields[argp++] = TypePtr::NOTNULL; // b array (if set is set) + fields[argp++] = TypeInt::INT; // array length + assert(argp == TypeFunc::Parms + argcnt, "correct decoding"); + const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields); + + // result type needed + fields = TypeTuple::fields(1); + fields[TypeFunc::Parms + 0] = NULL; // void + const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields); + return TypeFunc::make(domain, range); +} + //------------- Interpreter state access for on stack replacement const TypeFunc* OptoRuntime::osr_end_Type() { // create input type (domain) diff --git a/src/hotspot/share/opto/runtime.hpp b/src/hotspot/share/opto/runtime.hpp index 30656044cbb..e4cbdf2f0d0 100644 --- a/src/hotspot/share/opto/runtime.hpp +++ b/src/hotspot/share/opto/runtime.hpp @@ -298,6 +298,8 @@ private: static const TypeFunc* base64_encodeBlock_Type(); static const TypeFunc* base64_decodeBlock_Type(); static const TypeFunc* poly1305_processBlocks_Type(); + static const TypeFunc* intpoly_montgomeryMult_P256_Type(); + static const TypeFunc* intpoly_assign_Type(); static const TypeFunc* updateBytesCRC32_Type(); static const TypeFunc* updateBytesCRC32C_Type(); diff --git a/src/hotspot/share/runtime/globals.hpp b/src/hotspot/share/runtime/globals.hpp index 3c05ea985b6..6bfb260606b 100644 --- a/src/hotspot/share/runtime/globals.hpp +++ b/src/hotspot/share/runtime/globals.hpp @@ -233,6 +233,8 @@ const int ObjectAlignmentInBytes = 8; \ product(bool, UsePoly1305Intrinsics, false, DIAGNOSTIC, \ "Use intrinsics for sun.security.util.math.intpoly") \ + product(bool, UseIntPolyIntrinsics, false, DIAGNOSTIC, \ + "Use intrinsics for sun.security.util.math.intpoly.MontgomeryIntegerPolynomialP256") \ \ product(size_t, LargePageSizeInBytes, 0, \ "Maximum large page size used (0 will use the default large " \ diff --git a/src/hotspot/share/runtime/stubRoutines.cpp b/src/hotspot/share/runtime/stubRoutines.cpp index f52255f504d..74286a4ac98 100644 --- a/src/hotspot/share/runtime/stubRoutines.cpp +++ b/src/hotspot/share/runtime/stubRoutines.cpp @@ -132,6 +132,8 @@ address StubRoutines::_chacha20Block = nullptr; address StubRoutines::_base64_encodeBlock = nullptr; address StubRoutines::_base64_decodeBlock = nullptr; address StubRoutines::_poly1305_processBlocks = nullptr; +address StubRoutines::_intpoly_montgomeryMult_P256 = nullptr; +address StubRoutines::_intpoly_assign = nullptr; address StubRoutines::_md5_implCompress = nullptr; address StubRoutines::_md5_implCompressMB = nullptr; diff --git a/src/hotspot/share/runtime/stubRoutines.hpp b/src/hotspot/share/runtime/stubRoutines.hpp index fe32c9613c8..65b0c0d2f26 100644 --- a/src/hotspot/share/runtime/stubRoutines.hpp +++ b/src/hotspot/share/runtime/stubRoutines.hpp @@ -215,6 +215,8 @@ class StubRoutines: AllStatic { static address _base64_encodeBlock; static address _base64_decodeBlock; static address _poly1305_processBlocks; + static address _intpoly_montgomeryMult_P256; + static address _intpoly_assign; static address _md5_implCompress; static address _md5_implCompressMB; @@ -409,6 +411,8 @@ class StubRoutines: AllStatic { static address electronicCodeBook_encryptAESCrypt() { return _electronicCodeBook_encryptAESCrypt; } static address electronicCodeBook_decryptAESCrypt() { return _electronicCodeBook_decryptAESCrypt; } static address poly1305_processBlocks() { return _poly1305_processBlocks; } + static address intpoly_montgomeryMult_P256() { return _intpoly_montgomeryMult_P256; } + static address intpoly_assign() { return _intpoly_assign; } static address counterMode_AESCrypt() { return _counterMode_AESCrypt; } static address ghash_processBlocks() { return _ghash_processBlocks; } static address chacha20Block() { return _chacha20Block; } diff --git a/src/java.base/share/classes/sun/security/ec/ECDHKeyAgreement.java b/src/java.base/share/classes/sun/security/ec/ECDHKeyAgreement.java index 1fdbd94786a..be3bdfdd639 100644 --- a/src/java.base/share/classes/sun/security/ec/ECDHKeyAgreement.java +++ b/src/java.base/share/classes/sun/security/ec/ECDHKeyAgreement.java @@ -31,6 +31,7 @@ import sun.security.util.CurveDB; import sun.security.util.ECUtil; import sun.security.util.NamedCurve; import sun.security.util.math.IntegerFieldModuloP; +import sun.security.util.math.IntegerMontgomeryFieldModuloP; import sun.security.util.math.MutableIntegerModuloP; import sun.security.util.math.SmallValue; @@ -265,6 +266,11 @@ public final class ECDHKeyAgreement extends KeyAgreementSpi { ECPublicKey pubKey) throws InvalidKeyException { IntegerFieldModuloP field = ops.getField(); + if (field instanceof IntegerMontgomeryFieldModuloP) { + // No point of doing a single SmallValue operation in Montgomery domain + field = ((IntegerMontgomeryFieldModuloP)field).residueField(); + } + // convert s array into field element and multiply by the cofactor MutableIntegerModuloP scalar = field.getElement(priv.getS()).mutable(); SmallValue cofactor = diff --git a/src/java.base/share/classes/sun/security/ec/ECDSAOperations.java b/src/java.base/share/classes/sun/security/ec/ECDSAOperations.java index 7bbcbd032f1..f58d7d8f2d7 100644 --- a/src/java.base/share/classes/sun/security/ec/ECDSAOperations.java +++ b/src/java.base/share/classes/sun/security/ec/ECDSAOperations.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -252,7 +252,7 @@ public class ECDSAOperations { MutablePoint p1 = ecOps.multiply(basePoint, temp1); MutablePoint p2 = ecOps.multiply(pp, temp2); - ecOps.setSum(p1, p2.asAffine()); + ecOps.setSum(p1, p2); IntegerModuloP result = p1.asAffine().getX(); b2a(result, orderField, temp1); return MessageDigest.isEqual(temp1, r); diff --git a/src/java.base/share/classes/sun/security/ec/ECOperations.java b/src/java.base/share/classes/sun/security/ec/ECOperations.java index d4959aed463..2f94bb85342 100644 --- a/src/java.base/share/classes/sun/security/ec/ECOperations.java +++ b/src/java.base/share/classes/sun/security/ec/ECOperations.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -46,12 +46,7 @@ import java.util.Optional; * Formulas are derived from "Complete addition formulas for prime order * elliptic curves" by Renes, Costello, and Batina. */ - public class ECOperations { - private static final ECOperations secp256r1Ops = - new ECOperations(IntegerPolynomialP256.ONE.getElement( - CurveDB.lookup(KnownOIDs.secp256r1.value()).getCurve().getB()), - P256OrderField.ONE); /* * An exception indicating a problem with an intermediate value produced @@ -64,7 +59,7 @@ public class ECOperations { } static final Map fields = Map.of( - IntegerPolynomialP256.MODULUS, IntegerPolynomialP256.ONE, + IntegerPolynomialP256.MODULUS, MontgomeryIntegerPolynomialP256.ONE, IntegerPolynomialP384.MODULUS, IntegerPolynomialP384.ONE, IntegerPolynomialP521.MODULUS, IntegerPolynomialP521.ONE ); @@ -207,11 +202,28 @@ public class ECOperations { * @return the product */ public MutablePoint multiply(AffinePoint affineP, byte[] s) { - return PointMultiplier.of(this, affineP).pointMultiply(s); + PointMultiplier multiplier = null; + if (getField() instanceof IntegerMontgomeryFieldModuloP + && affineP.equals(Secp256R1GeneratorMontgomeryMultiplier.generator)) { + // Lazy class loading here + multiplier = Secp256R1GeneratorMontgomeryMultiplier.multiplier; + } else { + multiplier = new DefaultMultiplier(this, affineP); + } + + return multiplier.pointMultiply(s); } + /** + * Multiply an affine ecpoint point by a scalar and return the result as a + * mutable point. + * + * @param ecPoint the point + * @param s the scalar as a little-endian array + * @return the product + */ public MutablePoint multiply(ECPoint ecPoint, byte[] s) { - return PointMultiplier.of(this, ecPoint).pointMultiply(s); + return multiply(AffinePoint.fromECPoint(ecPoint, getField()), s); } /* @@ -264,21 +276,26 @@ public class ECOperations { } - /* - * Mixed point addition. This method constructs new temporaries each time - * it is called. For better efficiency, the method that reuses temporaries - * should be used if more than one sum will be computed. + /** + * Adds second Mutable (Projective) point to first. + * + * Used by ECDSAOperations. This method constructs new temporaries each time + * it is called. For better efficiency, the (private) method that reuses + * temporaries should be used if more than one sum will be computed. + * + * @param p first point and result + * @param p2 second point to add */ - public void setSum(MutablePoint p, AffinePoint p2) { - + public void setSum(MutablePoint p, MutablePoint p2) { IntegerModuloP zero = p.getField().get0(); MutableIntegerModuloP t0 = zero.mutable(); MutableIntegerModuloP t1 = zero.mutable(); MutableIntegerModuloP t2 = zero.mutable(); MutableIntegerModuloP t3 = zero.mutable(); MutableIntegerModuloP t4 = zero.mutable(); - setSum((ProjectivePoint.Mutable) p, p2, t0, t1, t2, t3, t4); + setSum((ProjectivePoint.Mutable) p, (ProjectivePoint.Mutable) p2, + t0, t1, t2, t3, t4); } /* @@ -289,18 +306,18 @@ public class ECOperations { MutableIntegerModuloP t2, MutableIntegerModuloP t3, MutableIntegerModuloP t4) { - t0.setValue(p.getX()).setProduct(p2.getX()); - t1.setValue(p.getY()).setProduct(p2.getY()); - t3.setValue(p2.getX()).setSum(p2.getY()); + t0.setValue(p.getX()).setProduct(p2.getX(false)); + t1.setValue(p.getY()).setProduct(p2.getY(false)); + t3.setValue(p2.getX(false)).setSum(p2.getY(false)); t4.setValue(p.getX()).setSum(p.getY()); t3.setProduct(t4); t4.setValue(t0).setSum(t1); t3.setDifference(t4); - t4.setValue(p2.getY()).setProduct(p.getZ()); + t4.setValue(p2.getY(false)).setProduct(p.getZ()); t4.setSum(p.getY()); - p.getY().setValue(p2.getX()).setProduct(p.getZ()); + p.getY().setValue(p2.getX(false)).setProduct(p.getZ()); p.getY().setSum(p.getX()); t2.setValue(p.getZ()); p.getZ().setProduct(b); @@ -412,11 +429,8 @@ public class ECOperations { return isNeutral(this.multiply(ap, scalar)); } - sealed interface PointMultiplier { - Map multipliers = Map.of( - Secp256R1GeneratorMultiplier.generator, - Secp256R1GeneratorMultiplier.multiplier); - + sealed interface PointMultiplier + permits DefaultMultiplier, Secp256R1GeneratorMontgomeryMultiplier { // Multiply the point by a scalar and return the result as a mutable // point. The multiplier point is specified by the implementation of // this interface, which could be a general EC point or EC generator @@ -429,26 +443,6 @@ public class ECOperations { // in little endian byte array representation. ProjectivePoint.Mutable pointMultiply(byte[] scalar); - static PointMultiplier of(ECOperations ecOps, AffinePoint affPoint) { - PointMultiplier multiplier = multipliers.get(affPoint.toECPoint()); - if (multiplier == null) { - multiplier = new Default(ecOps, affPoint); - } - - return multiplier; - } - - static PointMultiplier of(ECOperations ecOps, ECPoint ecPoint) { - PointMultiplier multiplier = multipliers.get(ecPoint); - if (multiplier == null) { - AffinePoint affPoint = - AffinePoint.fromECPoint(ecPoint, ecOps.getField()); - multiplier = new Default(ecOps, affPoint); - } - - return multiplier; - } - private static void lookup( ProjectivePoint.Immutable[] ips, int index, ProjectivePoint.Mutable result) { @@ -465,232 +459,249 @@ public class ECOperations { result.conditionalSet(pi, set); } } + } - final class Default implements PointMultiplier { - private final AffinePoint affineP; - private final ECOperations ecOps; + final static class DefaultMultiplier implements PointMultiplier { + private final ECOperations ecOps; + private final ProjectivePoint.Immutable[] pointMultiples; - private Default(ECOperations ecOps, AffinePoint affineP) { - this.ecOps = ecOps; - this.affineP = affineP; + DefaultMultiplier(ECOperations ecOps, AffinePoint affineP) { + this.ecOps = ecOps; + + // Precompute and cache point multiples + this.pointMultiples = new ProjectivePoint.Immutable[16]; + + IntegerFieldModuloP field = ecOps.getField(); + ImmutableIntegerModuloP zero = field.get0(); + // temporaries + MutableIntegerModuloP t0 = zero.mutable(); + MutableIntegerModuloP t1 = zero.mutable(); + MutableIntegerModuloP t2 = zero.mutable(); + MutableIntegerModuloP t3 = zero.mutable(); + MutableIntegerModuloP t4 = zero.mutable(); + + ProjectivePoint.Mutable ps = + new ProjectivePoint.Mutable(field); + ps.getY().setValue(field.get1().mutable()); + + // 0P is neutral---same as initial result value + pointMultiples[0] = ps.fixed(); + + ps.setValue(affineP); + // 1P = P + pointMultiples[1] = ps.fixed(); + + // the rest are calculated using mixed point addition + for (int i = 2; i < 16; i++) { + ecOps.setSum(ps, affineP, t0, t1, t2, t3, t4); + pointMultiples[i] = ps.fixed(); + } + } + + @Override + public ProjectivePoint.Mutable pointMultiply(byte[] s) { + // 4-bit windowed multiply with branchless lookup. + // The mixed addition is faster, so it is used to construct + // the array at the beginning of the operation. + + IntegerFieldModuloP field = ecOps.getField(); + ImmutableIntegerModuloP zero = field.get0(); + // temporaries + MutableIntegerModuloP t0 = zero.mutable(); + MutableIntegerModuloP t1 = zero.mutable(); + MutableIntegerModuloP t2 = zero.mutable(); + MutableIntegerModuloP t3 = zero.mutable(); + MutableIntegerModuloP t4 = zero.mutable(); + + ProjectivePoint.Mutable result = new ProjectivePoint.Mutable(field); + result.getY().setValue(field.get1().mutable()); + ProjectivePoint.Mutable lookupResult = new ProjectivePoint.Mutable(field); + + for (int i = s.length - 1; i >= 0; i--) { + double4(result, t0, t1, t2, t3, t4); + + int high = (0xFF & s[i]) >>> 4; + PointMultiplier.lookup(pointMultiples, high, lookupResult); + ecOps.setSum(result, lookupResult, t0, t1, t2, t3, t4); + + double4(result, t0, t1, t2, t3, t4); + + int low = 0xF & s[i]; + PointMultiplier.lookup(pointMultiples, low, lookupResult); + ecOps.setSum(result, lookupResult, t0, t1, t2, t3, t4); } - @Override - public ProjectivePoint.Mutable pointMultiply(byte[] s) { - // 4-bit windowed multiply with branchless lookup. - // The mixed addition is faster, so it is used to construct - // the array at the beginning of the operation. + return result; + } - IntegerFieldModuloP field = affineP.getX().getField(); - ImmutableIntegerModuloP zero = field.get0(); - // temporaries - MutableIntegerModuloP t0 = zero.mutable(); - MutableIntegerModuloP t1 = zero.mutable(); - MutableIntegerModuloP t2 = zero.mutable(); - MutableIntegerModuloP t3 = zero.mutable(); - MutableIntegerModuloP t4 = zero.mutable(); + private void double4(ProjectivePoint.Mutable p, + MutableIntegerModuloP t0, MutableIntegerModuloP t1, + MutableIntegerModuloP t2, MutableIntegerModuloP t3, + MutableIntegerModuloP t4) { + for (int i = 0; i < 4; i++) { + ecOps.setDouble(p, t0, t1, t2, t3, t4); + } + } + } - ProjectivePoint.Mutable result = - new ProjectivePoint.Mutable(field); - result.getY().setValue(field.get1().mutable()); + // Represents a multiplier with a larger precomputed table. Intended to be + // used for Basepoint multiplication + final static class Secp256R1GeneratorMontgomeryMultiplier + implements PointMultiplier { + private static final ECOperations secp256r1Ops = new ECOperations( + MontgomeryIntegerPolynomialP256.ONE.getElement( + CurveDB.P_256.getCurve().getB()), P256OrderField.ONE); + public static final AffinePoint generator = AffinePoint.fromECPoint( + CurveDB.P_256.getGenerator(), secp256r1Ops.getField()); + public static final PointMultiplier multiplier = + new Secp256R1GeneratorMontgomeryMultiplier(); - ProjectivePoint.Immutable[] pointMultiples = - new ProjectivePoint.Immutable[16]; - // 0P is neutral---same as initial result value - pointMultiples[0] = result.fixed(); + private final ImmutableIntegerModuloP zero; + private final ImmutableIntegerModuloP one; + private final ProjectivePoint.Immutable[][] points; + private final BigInteger[] base; - ProjectivePoint.Mutable ps = new ProjectivePoint.Mutable(field); - ps.setValue(affineP); - // 1P = P - pointMultiples[1] = ps.fixed(); + private Secp256R1GeneratorMontgomeryMultiplier() { + this(MontgomeryIntegerPolynomialP256.ONE, + new DefaultMultiplier(secp256r1Ops, generator)); - // the rest are calculated using mixed point addition - for (int i = 2; i < 16; i++) { - ecOps.setSum(ps, affineP, t0, t1, t2, t3, t4); - pointMultiples[i] = ps.fixed(); + // Check that the tables are correctly generated. + if (ECOperations.class.desiredAssertionStatus()) { + verifyTables(this); + } + } + + private Secp256R1GeneratorMontgomeryMultiplier( + IntegerFieldModuloP field, PointMultiplier smallTableMultiplier) { + zero = field.get0(); + one = field.get1(); + + // Pre-computed table to speed up the point multiplication. + // + // This is a 4x16 array of ProjectivePoint.Immutable elements. + // The first row contains the following multiples of the + // generator. + // + // index | point + // --------+---------------- + // 0x0000 | 0G + // 0x0001 | 1G + // 0x0002 | (2^64)G + // 0x0003 | (2^64 + 1)G + // 0x0004 | 2^128G + // 0x0005 | (2^128 + 1)G + // 0x0006 | (2^128 + 2^64)G + // 0x0007 | (2^128 + 2^64 + 1)G + // 0x0008 | 2^192G + // 0x0009 | (2^192 + 1)G + // 0x000A | (2^192 + 2^64)G + // 0x000B | (2^192 + 2^64 + 1)G + // 0x000C | (2^192 + 2^128)G + // 0x000D | (2^192 + 2^128 + 1)G + // 0x000E | (2^192 + 2^128 + 2^64)G + // 0x000F | (2^192 + 2^128 + 2^64 + 1)G + // + // For the other 3 rows, points[i][j] = 2^16 * (points[i-1][j]. + + // Generate the pre-computed tables. This block may be + // replaced with hard-coded tables in order to speed up + // the class loading. + points = new ProjectivePoint.Immutable[4][16]; + BigInteger[] factors = new BigInteger[] { + BigInteger.ONE, + BigInteger.TWO.pow(64), + BigInteger.TWO.pow(128), + BigInteger.TWO.pow(192) + }; + + base = new BigInteger[16]; + base[0] = BigInteger.ZERO; + base[1] = BigInteger.ONE; + base[2] = factors[1]; + for (int i = 3; i < 16; i++) { + base[i] = BigInteger.ZERO; + for (int k = 0; k < 4; k++) { + if (((i >>> k) & 0x01) != 0) { + base[i] = base[i].add(factors[k]); + } } - - ProjectivePoint.Mutable lookupResult = ps.mutable(); - - for (int i = s.length - 1; i >= 0; i--) { - double4(result, t0, t1, t2, t3, t4); - - int high = (0xFF & s[i]) >>> 4; - lookup(pointMultiples, high, lookupResult); - ecOps.setSum(result, lookupResult, t0, t1, t2, t3, t4); - - double4(result, t0, t1, t2, t3, t4); - - int low = 0xF & s[i]; - lookup(pointMultiples, low, lookupResult); - ecOps.setSum(result, lookupResult, t0, t1, t2, t3, t4); - } - - return result; } - private void double4(ProjectivePoint.Mutable p, - MutableIntegerModuloP t0, MutableIntegerModuloP t1, - MutableIntegerModuloP t2, MutableIntegerModuloP t3, - MutableIntegerModuloP t4) { - for (int i = 0; i < 4; i++) { - ecOps.setDouble(p, t0, t1, t2, t3, t4); + for (int d = 0; d < 4; d++) { + for (int w = 0; w < 16; w++) { + BigInteger bi = base[w]; + if (d != 0) { + bi = bi.multiply(BigInteger.TWO.pow(d * 16)); + } + if (w == 0) { + points[d][0] = new ProjectivePoint.Immutable( + zero.fixed(), one.fixed(), zero.fixed()); + } else { + byte[] s = bi.toByteArray(); + ArrayUtil.reverse(s); + ProjectivePoint.Mutable m = smallTableMultiplier.pointMultiply(s); + points[d][w] = m.fixed(); + } } } } - final class Secp256R1GeneratorMultiplier implements PointMultiplier { - private static final ECPoint generator = - CurveDB.P_256.getGenerator(); - private static final PointMultiplier multiplier = - new Secp256R1GeneratorMultiplier(); + public ProjectivePoint.Mutable pointMultiply(byte[] s) { + MutableIntegerModuloP t0 = zero.mutable(); + MutableIntegerModuloP t1 = zero.mutable(); + MutableIntegerModuloP t2 = zero.mutable(); + MutableIntegerModuloP t3 = zero.mutable(); + MutableIntegerModuloP t4 = zero.mutable(); - private static final ImmutableIntegerModuloP zero = - IntegerPolynomialP256.ONE.get0(); - private static final ImmutableIntegerModuloP one = - IntegerPolynomialP256.ONE.get1(); + ProjectivePoint.Mutable d = new ProjectivePoint.Mutable( + zero.mutable(), + one.mutable(), + zero.mutable()); + ProjectivePoint.Mutable r = d.mutable(); + for (int i = 15; i >= 0; i--) { + secp256r1Ops.setDouble(d, t0, t1, t2, t3, t4); + for (int j = 3; j >= 0; j--) { + int pos = i + j * 16; + int index = (bit(s, pos + 192) << 3) | + (bit(s, pos + 128) << 2) | + (bit(s, pos + 64) << 1) | + bit(s, pos); - @Override - public ProjectivePoint.Mutable pointMultiply(byte[] s) { - MutableIntegerModuloP t0 = zero.mutable(); - MutableIntegerModuloP t1 = zero.mutable(); - MutableIntegerModuloP t2 = zero.mutable(); - MutableIntegerModuloP t3 = zero.mutable(); - MutableIntegerModuloP t4 = zero.mutable(); - - ProjectivePoint.Mutable d = new ProjectivePoint.Mutable( - zero.mutable(), - one.mutable(), - zero.mutable()); - ProjectivePoint.Mutable r = d.mutable(); - for (int i = 15; i >= 0; i--) { - secp256r1Ops.setDouble(d, t0, t1, t2, t3, t4); - for (int j = 3; j >= 0; j--) { - int pos = i + j * 16; - int index = (bit(s, pos + 192) << 3) | - (bit(s, pos + 128) << 2) | - (bit(s, pos + 64) << 1) | - bit(s, pos); - - lookup(P256.points[j], index, r); - secp256r1Ops.setSum(d, r, t0, t1, t2, t3, t4); - } + PointMultiplier.lookup(points[j], index, r); + secp256r1Ops.setSum(d, r, t0, t1, t2, t3, t4); } - - return d; } - private static int bit(byte[] k, int i) { - return (k[i >> 3] >> (i & 0x07)) & 0x01; - } + return d; + } - // Lazy loading of the tables. - private static final class P256 { - // Pre-computed table to speed up the point multiplication. - // - // This is a 4x16 array of ProjectivePoint.Immutable elements. - // The first row contains the following multiples of the - // generator. - // - // index | point - // --------+---------------- - // 0x0000 | 0G - // 0x0001 | 1G - // 0x0002 | (2^64)G - // 0x0003 | (2^64 + 1)G - // 0x0004 | 2^128G - // 0x0005 | (2^128 + 1)G - // 0x0006 | (2^128 + 2^64)G - // 0x0007 | (2^128 + 2^64 + 1)G - // 0x0008 | 2^192G - // 0x0009 | (2^192 + 1)G - // 0x000A | (2^192 + 2^64)G - // 0x000B | (2^192 + 2^64 + 1)G - // 0x000C | (2^192 + 2^128)G - // 0x000D | (2^192 + 2^128 + 1)G - // 0x000E | (2^192 + 2^128 + 2^64)G - // 0x000F | (2^192 + 2^128 + 2^64 + 1)G - // - // For the other 3 rows, points[i][j] = 2^16 * (points[i-1][j]. - private static final ProjectivePoint.Immutable[][] points; + private static int bit(byte[] k, int i) { + return (k[i >> 3] >> (i & 0x07)) & 0x01; + } - // Generate the pre-computed tables. This block may be - // replaced with hard-coded tables in order to speed up - // the class loading. - static { - points = new ProjectivePoint.Immutable[4][16]; - BigInteger[] factors = new BigInteger[] { - BigInteger.ONE, - BigInteger.TWO.pow(64), - BigInteger.TWO.pow(128), - BigInteger.TWO.pow(192) - }; - - BigInteger[] base = new BigInteger[16]; - base[0] = BigInteger.ZERO; - base[1] = BigInteger.ONE; - base[2] = factors[1]; - for (int i = 3; i < 16; i++) { - base[i] = BigInteger.ZERO; - for (int k = 0; k < 4; k++) { - if (((i >>> k) & 0x01) != 0) { - base[i] = base[i].add(factors[k]); - } - } + protected void verifyTables(PointMultiplier multiplier) { + for (int d = 0; d < 4; d++) { + for (int w = 0; w < 16; w++) { + BigInteger bi = base[w]; + if (d != 0) { + bi = bi.multiply(BigInteger.TWO.pow(d * 16)); } + if (w != 0) { + byte[] s = new byte[32]; + byte[] b = bi.toByteArray(); + ArrayUtil.reverse(b); + System.arraycopy(b, 0, s, 0, b.length); - for (int d = 0; d < 4; d++) { - for (int w = 0; w < 16; w++) { - BigInteger bi = base[w]; - if (d != 0) { - bi = bi.multiply(BigInteger.TWO.pow(d * 16)); - } - if (w == 0) { - points[d][0] = new ProjectivePoint.Immutable( - zero.fixed(), one.fixed(), zero.fixed()); - } else { - PointMultiplier multiplier = new Default( - secp256r1Ops, AffinePoint.fromECPoint( - generator, zero.getField())); - byte[] s = bi.toByteArray(); - ArrayUtil.reverse(s); - ProjectivePoint.Mutable m = - multiplier.pointMultiply(s); - points[d][w] = m.setValue(m.asAffine()).fixed(); - } - } - } - - // Check that the tables are correctly generated. - if (ECOperations.class.desiredAssertionStatus()) { - verifyTables(base); - } - } - - private static void verifyTables(BigInteger[] base) { - for (int d = 0; d < 4; d++) { - for (int w = 0; w < 16; w++) { - BigInteger bi = base[w]; - if (d != 0) { - bi = bi.multiply(BigInteger.TWO.pow(d * 16)); - } - if (w != 0) { - byte[] s = new byte[32]; - byte[] b = bi.toByteArray(); - ArrayUtil.reverse(b); - System.arraycopy(b, 0, s, 0, b.length); - - ProjectivePoint.Mutable m = - multiplier.pointMultiply(s); - ProjectivePoint.Immutable v = - m.setValue(m.asAffine()).fixed(); - if (!v.getX().asBigInteger().equals( - points[d][w].getX().asBigInteger()) || - !v.getY().asBigInteger().equals( - points[d][w].getY().asBigInteger())) { - throw new RuntimeException(); - } - } + // Compare this multiplier to the table + // (generated by Default multiplier) + AffinePoint m = multiplier.pointMultiply(s).asAffine(); + AffinePoint v = points[d][w].asAffine(); + if (!m.equals(v)) { + java.util.HexFormat hex = java.util.HexFormat.of(); + throw new RuntimeException( + "Bad multiple found at [" +d+"]["+w+"]" + + hex.formatHex(s) + " " + m.getX().asBigInteger() + ); } } } diff --git a/src/java.base/share/classes/sun/security/ec/point/AffinePoint.java b/src/java.base/share/classes/sun/security/ec/point/AffinePoint.java index bc227b0babf..bc1530cd61b 100644 --- a/src/java.base/share/classes/sun/security/ec/point/AffinePoint.java +++ b/src/java.base/share/classes/sun/security/ec/point/AffinePoint.java @@ -26,6 +26,7 @@ package sun.security.ec.point; import sun.security.util.math.ImmutableIntegerModuloP; import sun.security.util.math.IntegerFieldModuloP; +import sun.security.util.math.IntegerMontgomeryFieldModuloP; import java.security.spec.ECPoint; import java.util.Objects; @@ -54,14 +55,30 @@ public class AffinePoint { } public ECPoint toECPoint() { - return new ECPoint(x.asBigInteger(), y.asBigInteger()); + return new ECPoint(getX().asBigInteger(), getY().asBigInteger()); } public ImmutableIntegerModuloP getX() { + return getX(true); + } + + public ImmutableIntegerModuloP getX(boolean fieldCheck) { + IntegerFieldModuloP field = x.getField(); + if (fieldCheck && field instanceof IntegerMontgomeryFieldModuloP) { + return ((IntegerMontgomeryFieldModuloP)field).fromMontgomery(x); + } return x; } public ImmutableIntegerModuloP getY() { + return getY(true); + } + + public ImmutableIntegerModuloP getY(boolean fieldCheck) { + IntegerFieldModuloP field = y.getField(); + if (fieldCheck && field instanceof IntegerMontgomeryFieldModuloP) { + return ((IntegerMontgomeryFieldModuloP)field).fromMontgomery(y); + } return y; } @@ -71,8 +88,30 @@ public class AffinePoint { return false; } AffinePoint p = (AffinePoint) obj; - boolean xEquals = x.asBigInteger().equals(p.x.asBigInteger()); - boolean yEquals = y.asBigInteger().equals(p.y.asBigInteger()); + boolean xEquals, yEquals; + boolean thisMont = x.getField() instanceof IntegerMontgomeryFieldModuloP; + boolean objMont = p.x.getField() instanceof IntegerMontgomeryFieldModuloP; + if (thisMont ^ objMont == false) { + // both fields same + xEquals = x.asBigInteger().equals(p.x.asBigInteger()); + yEquals = y.asBigInteger().equals(p.y.asBigInteger()); + } else if (thisMont) { + // mismatched fields should not happen in production, but useful in + // testing + IntegerMontgomeryFieldModuloP field = + (IntegerMontgomeryFieldModuloP)x.getField(); + xEquals = x.asBigInteger().equals( + field.getElement(p.x.asBigInteger()).asBigInteger()); + yEquals = y.asBigInteger().equals( + field.getElement(p.y.asBigInteger()).asBigInteger()); + } else { + IntegerMontgomeryFieldModuloP field = + (IntegerMontgomeryFieldModuloP)p.x.getField(); + xEquals = field.getElement( + x.asBigInteger()).asBigInteger().equals(p.x.asBigInteger()); + yEquals = field.getElement( + y.asBigInteger()).asBigInteger().equals(p.y.asBigInteger()); + } return xEquals && yEquals; } diff --git a/src/java.base/share/classes/sun/security/ec/point/ProjectivePoint.java b/src/java.base/share/classes/sun/security/ec/point/ProjectivePoint.java index a3ebc532d46..fbb6681b724 100644 --- a/src/java.base/share/classes/sun/security/ec/point/ProjectivePoint.java +++ b/src/java.base/share/classes/sun/security/ec/point/ProjectivePoint.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,6 +25,7 @@ package sun.security.ec.point; import sun.security.util.math.*; +import jdk.internal.vm.annotation.ForceInline; /** * Elliptic curve point in projective coordinates (X, Y, Z) where @@ -145,6 +146,7 @@ public abstract class ProjectivePoint return conditionalSet(pp, set); } + @ForceInline private Mutable conditionalSet(ProjectivePoint pp, int set) { @@ -157,9 +159,9 @@ public abstract class ProjectivePoint @Override public Mutable setValue(AffinePoint p) { - x.setValue(p.getX()); - y.setValue(p.getY()); - z.setValue(p.getX().getField().get1()); + x.setValue(p.getX(false)); + y.setValue(p.getY(false)); + z.setValue(p.getX(false).getField().get1()); return this; } diff --git a/src/java.base/share/classes/sun/security/util/math/IntegerMontgomeryFieldModuloP.java b/src/java.base/share/classes/sun/security/util/math/IntegerMontgomeryFieldModuloP.java new file mode 100644 index 00000000000..2987674a32b --- /dev/null +++ b/src/java.base/share/classes/sun/security/util/math/IntegerMontgomeryFieldModuloP.java @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package sun.security.util.math; + +import java.math.BigInteger; + +/** + * An interface for the field of integers modulo a prime number. An + * implementation of this interface can be used to get properties of the + * field and to produce field elements of type ImmutableIntegerModuloP from + * other objects and representations of field elements. + */ + +public interface IntegerMontgomeryFieldModuloP extends IntegerFieldModuloP { + ImmutableIntegerModuloP fromMontgomery(ImmutableIntegerModuloP m); + IntegerFieldModuloP residueField(); +} diff --git a/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomial.java b/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomial.java index 18ee87e0b24..05b4a71bebb 100644 --- a/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomial.java +++ b/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomial.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -32,6 +32,9 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.Arrays; +import jdk.internal.vm.annotation.ForceInline; +import jdk.internal.vm.annotation.IntrinsicCandidate; + /** * A large number polynomial representation using sparse limbs of signed * long (64-bit) values. Limb values will always fit within a long, so inputs @@ -62,10 +65,9 @@ import java.util.Arrays; public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP permits IntegerPolynomial1305, IntegerPolynomial25519, IntegerPolynomial448, IntegerPolynomialP256, - IntegerPolynomialP384, IntegerPolynomialP521, - IntegerPolynomialModBinP, P256OrderField, - P384OrderField, P521OrderField, - Curve25519OrderField, + MontgomeryIntegerPolynomialP256, IntegerPolynomialP384, + IntegerPolynomialP521, IntegerPolynomialModBinP, P256OrderField, + P384OrderField, P521OrderField, Curve25519OrderField, Curve448OrderField { protected static final BigInteger TWO = BigInteger.valueOf(2); @@ -74,7 +76,8 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP private final BigInteger modulus; protected final int bitsPerLimb; private final long[] posModLimbs; - private final int maxAdds; + private final int maxAddsMul; // max additions before a multiplication + private final int maxAddsAdd; // max additions before an addition /** * Reduce an IntegerPolynomial representation (a) and store the result @@ -87,11 +90,12 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP * store the result in an IntegerPolynomial representation in a. Requires * that a.length == numLimbs. */ - protected void multByInt(long[] a, long b) { + protected int multByInt(long[] a, long b) { for (int i = 0; i < a.length; i++) { a[i] *= b; } reduce(a); + return 0; } /** @@ -100,7 +104,7 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP * a.length == b.length == r.length == numLimbs. It is allowed for a and r * to be the same array. */ - protected abstract void mult(long[] a, long[] b, long[] r); + protected abstract int mult(long[] a, long[] b, long[] r); /** * Multiply an IntegerPolynomial representation (a) with itself and store @@ -108,19 +112,23 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP * a.length == r.length == numLimbs. It is allowed for a and r * to be the same array. */ - protected abstract void square(long[] a, long[] r); + protected abstract int square(long[] a, long[] r); IntegerPolynomial(int bitsPerLimb, int numLimbs, - int maxAdds, + int maxAddsMul, BigInteger modulus) { this.numLimbs = numLimbs; this.modulus = modulus; this.bitsPerLimb = bitsPerLimb; - this.maxAdds = maxAdds; - + this.maxAddsMul = maxAddsMul; + if (bitsPerLimb>32) { + this.maxAddsAdd = 64 - bitsPerLimb; + } else { + this.maxAddsAdd = 32 - bitsPerLimb; + } posModLimbs = setPosModLimbs(); } @@ -135,7 +143,7 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP } public int getMaxAdds() { - return maxAdds; + return maxAddsMul; } @Override @@ -327,10 +335,9 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP } protected void setLimbsValuePositive(BigInteger v, long[] limbs) { - assert bitsPerLimb < 32; long limbMask = (1L << bitsPerLimb) - 1; for (int i = 0; i < limbs.length; i++) { - limbs[i] = v.intValue() & limbMask; + limbs[i] = v.longValue() & limbMask; v = v.shiftRight(bitsPerLimb); } } @@ -449,6 +456,8 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP * will be unchanged. If set==1, then the values of b will be assigned to a. * The behavior is undefined if swap has any value other than 0 or 1. */ + @ForceInline + @IntrinsicCandidate protected static void conditionalAssign(int set, long[] a, long[] b) { int maskValue = -set; for (int i = 0; i < a.length; i++) { @@ -557,14 +566,12 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP Element b = (Element)genB; // Reduce if required. - // if (numAdds >= maxAdds) { - if (numAdds > 32 - bitsPerLimb) { + if (numAdds > maxAddsAdd) { reduce(limbs); numAdds = 0; } - // if (b.numAdds >= maxAdds) { - if (b.numAdds > 32 - bitsPerLimb) { + if (b.numAdds > maxAddsAdd) { reduce(b.limbs); b.numAdds = 0; } @@ -586,7 +593,7 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP newLimbs[i] = -limbs[i]; } - return new ImmutableElement(newLimbs, numAdds); + return new ImmutableElement(newLimbs, numAdds+1); } protected long[] cloneLow(long[] limbs) { @@ -604,32 +611,32 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP Element b = (Element)genB; // Reduce if required. - if (numAdds > maxAdds) { + if (numAdds > maxAddsMul) { reduce(limbs); numAdds = 0; } - if (b.numAdds > maxAdds) { + if (b.numAdds > maxAddsMul) { reduce(b.limbs); b.numAdds = 0; } long[] newLimbs = new long[limbs.length]; - mult(limbs, b.limbs, newLimbs); - return new ImmutableElement(newLimbs, 0); + int numAdds = mult(limbs, b.limbs, newLimbs); + return new ImmutableElement(newLimbs, numAdds); } @Override public ImmutableElement square() { // Reduce if required. - if (numAdds > maxAdds) { + if (numAdds > maxAddsMul) { reduce(limbs); numAdds = 0; } long[] newLimbs = new long[limbs.length]; - IntegerPolynomial.this.square(limbs, newLimbs); - return new ImmutableElement(newLimbs, 0); + int numAdds = IntegerPolynomial.this.square(limbs, newLimbs); + return new ImmutableElement(newLimbs, numAdds); } public void addModPowerTwo(IntegerModuloP arg, byte[] result) { @@ -637,12 +644,12 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP Element other = (Element)arg; // Reduce if required. - if (numAdds > 32 - bitsPerLimb) { + if (numAdds > maxAddsAdd) { reduce(limbs); numAdds = 0; } - if (other.numAdds > 32 - bitsPerLimb) { + if (other.numAdds > maxAddsAdd) { reduce(other.limbs); other.numAdds = 0; } @@ -734,32 +741,30 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP Element b = (Element)genB; // Reduce if required. - if (numAdds > maxAdds) { + if (numAdds > maxAddsMul) { reduce(limbs); numAdds = 0; } - if (b.numAdds > maxAdds) { + if (b.numAdds > maxAddsMul) { reduce(b.limbs); b.numAdds = 0; } - mult(limbs, b.limbs, limbs); - numAdds = 0; + numAdds = mult(limbs, b.limbs, limbs); return this; } @Override public MutableElement setProduct(SmallValue v) { // Reduce if required. - if (numAdds > maxAdds) { + if (numAdds > maxAddsMul) { reduce(limbs); numAdds = 0; } int value = ((Limb)v).value; - multByInt(limbs, value); - numAdds = 0; + numAdds += multByInt(limbs, value); return this; } @@ -769,14 +774,12 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP Element b = (Element)genB; // Reduce if required. - // if (numAdds >= maxAdds) { - if (numAdds > 32 - bitsPerLimb) { + if (numAdds > maxAddsAdd) { reduce(limbs); numAdds = 0; } - // if (b.numAdds >= maxAdds) { - if (b.numAdds > 32 - bitsPerLimb) { + if (b.numAdds > maxAddsAdd) { reduce(b.limbs); b.numAdds = 0; } @@ -795,14 +798,12 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP Element b = (Element)genB; // Reduce if required. - // if (numAdds >= maxAdds) { - if (numAdds > 32 - bitsPerLimb) { + if (numAdds > maxAddsAdd) { reduce(limbs); numAdds = 0; } - // if (b.numAdds >= maxAdds) { - if (b.numAdds > 32 - bitsPerLimb) { + if (b.numAdds > maxAddsAdd) { reduce(b.limbs); b.numAdds = 0; } @@ -818,13 +819,12 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP @Override public MutableElement setSquare() { // Reduce if required. - if (numAdds > maxAdds) { + if (numAdds > maxAddsMul) { reduce(limbs); numAdds = 0; } - IntegerPolynomial.this.square(limbs, limbs); - numAdds = 0; + numAdds = IntegerPolynomial.this.square(limbs, limbs);; return this; } @@ -833,6 +833,7 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP for (int i = 0; i < limbs.length; i++) { limbs[i] = -limbs[i]; } + numAdds++; return this; } } diff --git a/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomial1305.java b/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomial1305.java index 5015d186d37..706651330d3 100644 --- a/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomial1305.java +++ b/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomial1305.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -50,7 +50,7 @@ public final class IntegerPolynomial1305 extends IntegerPolynomial { super(BITS_PER_LIMB, NUM_LIMBS, 1, MODULUS); } - protected void mult(long[] a, long[] b, long[] r) { + protected int mult(long[] a, long[] b, long[] r) { // Use grade-school multiplication into primitives to avoid the // temporary array allocation. This is equivalent to the following @@ -73,6 +73,7 @@ public final class IntegerPolynomial1305 extends IntegerPolynomial { long c8 = (a[4] * b[4]); carryReduce(r, c0, c1, c2, c3, c4, c5, c6, c7, c8); + return 0; } private void carryReduce(long[] r, long c0, long c1, long c2, long c3, @@ -99,7 +100,7 @@ public final class IntegerPolynomial1305 extends IntegerPolynomial { } @Override - protected void square(long[] a, long[] r) { + protected int square(long[] a, long[] r) { // Use grade-school multiplication with a simple squaring optimization. // Multiply into primitives to avoid the temporary array allocation. // This is equivalent to the following code: @@ -122,6 +123,7 @@ public final class IntegerPolynomial1305 extends IntegerPolynomial { long c8 = (a[4] * a[4]); carryReduce(r, c0, c1, c2, c3, c4, c5, c6, c7, c8); + return 0; } @Override diff --git a/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomialModBinP.java b/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomialModBinP.java index c6e58322d7c..e57316ed964 100644 --- a/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomialModBinP.java +++ b/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomialModBinP.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -131,11 +131,12 @@ public sealed class IntegerPolynomialModBinP extends IntegerPolynomial { } @Override - protected void mult(long[] a, long[] b, long[] r) { + protected int mult(long[] a, long[] b, long[] r) { long[] c = new long[2 * numLimbs]; multOnly(a, b, c); carryReduce(c, r); + return 0; } private void modReduceInBits(long[] limbs, int index, int bits, long x) { @@ -188,7 +189,7 @@ public sealed class IntegerPolynomialModBinP extends IntegerPolynomial { } @Override - protected void square(long[] a, long[] r) { + protected int square(long[] a, long[] r) { long[] c = new long[2 * numLimbs]; for (int i = 0; i < numLimbs; i++) { @@ -199,7 +200,7 @@ public sealed class IntegerPolynomialModBinP extends IntegerPolynomial { } carryReduce(c, r); - + return 0; } /** diff --git a/src/java.base/share/classes/sun/security/util/math/intpoly/MontgomeryIntegerPolynomialP256.java b/src/java.base/share/classes/sun/security/util/math/intpoly/MontgomeryIntegerPolynomialP256.java new file mode 100644 index 00000000000..d4c0348eb9d --- /dev/null +++ b/src/java.base/share/classes/sun/security/util/math/intpoly/MontgomeryIntegerPolynomialP256.java @@ -0,0 +1,560 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package sun.security.util.math.intpoly; + +import sun.security.util.math.ImmutableIntegerModuloP; +import sun.security.util.math.IntegerMontgomeryFieldModuloP; +import sun.security.util.math.SmallValue; +import sun.security.util.math.IntegerFieldModuloP; +import java.lang.Math; +import java.math.BigInteger; +import jdk.internal.vm.annotation.IntrinsicCandidate; + +// Reference: +// - [1] Shay Gueron and Vlad Krasnov "Fast Prime Field Elliptic Curve +// Cryptography with 256 Bit Primes" +// +public final class MontgomeryIntegerPolynomialP256 extends IntegerPolynomial + implements IntegerMontgomeryFieldModuloP { + private static final int BITS_PER_LIMB = 52; + private static final int NUM_LIMBS = 5; + private static final int MAX_ADDS = 0; + public static final BigInteger MODULUS = evaluateModulus(); + private static final long LIMB_MASK = -1L >>> (64 - BITS_PER_LIMB); + + public static final MontgomeryIntegerPolynomialP256 ONE = new MontgomeryIntegerPolynomialP256(); + + // h = 2^(2*260)%p = 0x4fffffffdfffffffffffffffefffffffbffffffff000000000000000300 + // oneActual = 1 + // oneMont = (1*2^260) mod p + // modulus = p + private static final long[] h = new long[] { + 0x0000000000000300L, 0x000ffffffff00000L, 0x000ffffefffffffbL, + 0x000fdfffffffffffL, 0x0000000004ffffffL }; + private static final long[] oneActual = new long[] { + 0x0000000000000001L, 0x0000000000000000L, 0x0000000000000000L, + 0x0000000000000000L, 0x0000000000000000L }; + private static final long[] oneMont = new long[] { + 0x0000000000000010L, 0x000f000000000000L, 0x000fffffffffffffL, + 0x000ffeffffffffffL, 0x00000000000fffffL }; + private static final long[] zero = new long[] { + 0x0000000000000000L, 0x0000000000000000L, 0x0000000000000000L, + 0x0000000000000000L, 0x0000000000000000L }; + private static final long[] modulus = new long[] { + 0x000fffffffffffffL, 0x00000fffffffffffL, 0x0000000000000000L, + 0x0000001000000000L, 0x0000ffffffff0000L }; + + private MontgomeryIntegerPolynomialP256() { + super(BITS_PER_LIMB, NUM_LIMBS, MAX_ADDS, MODULUS); + } + + public IntegerFieldModuloP residueField() { + return IntegerPolynomialP256.ONE; + } + + // (224%nat,-1)::(192%nat,1)::(96%nat,1)::(0%nat,-1)::nil. + private static BigInteger evaluateModulus() { + BigInteger result = BigInteger.valueOf(2).pow(256); + result = result.subtract(BigInteger.valueOf(1).shiftLeft(224)); + result = result.add(BigInteger.valueOf(1).shiftLeft(192)); + result = result.add(BigInteger.valueOf(1).shiftLeft(96)); + result = result.subtract(BigInteger.valueOf(1)); + return result; + } + + @Override + public ImmutableElement get0() { + return new ImmutableElement(zero, 0); + } + + // One in montgomery domain: (1*2^260) mod p + @Override + public ImmutableElement get1() { + return new ImmutableElement(oneMont, 0); + } + + // Convert v to Montgomery domain + @Override + public ImmutableElement getElement(BigInteger v) { + long[] vLimbs = new long[NUM_LIMBS]; + long[] montLimbs = new long[NUM_LIMBS]; + setLimbsValuePositive(v, vLimbs); + + // Convert to Montgomery domain + int numAdds = mult(vLimbs, h, montLimbs); + return new ImmutableElement(montLimbs, numAdds); + } + + @Override + public SmallValue getSmallValue(int value) { + // Explicitely here as reminder that SmallValue stays in residue domain + // See multByInt below for how this is used + return super.getSmallValue(value); + } + + /* + * This function is used by IntegerPolynomial.setProduct(SmallValue v) to + * multiply by a small constant (i.e. (int) 1,2,3,4). Instead of doing a + * montgomery conversion followed by a montgomery multiplication, just use + * the spare top (64-BITS_PER_LIMB) bits to multiply by a constant. (See [1] + * Section 4 ) + * + * Will return an unreduced value + */ + @Override + protected int multByInt(long[] a, long b) { + assert (b < (1 << BITS_PER_LIMB)); + for (int i = 0; i < a.length; i++) { + a[i] *= b; + } + return (int) (b - 1); + } + + @Override + public ImmutableIntegerModuloP fromMontgomery(ImmutableIntegerModuloP n) { + assert n.getField() == MontgomeryIntegerPolynomialP256.ONE; + + ImmutableElement nn = (ImmutableElement) n; + long[] r1 = new long[NUM_LIMBS]; + long[] r2 = new long[2 * NUM_LIMBS]; + long[] limbs = nn.getLimbs(); + reduce(limbs); + MontgomeryIntegerPolynomialP256.ONE.mult(limbs, oneActual, r1); + reduce(r1); + halfLimbs(r1, r2); + return IntegerPolynomialP256.ONE.new ImmutableElement(r2, 0); + } + + private void halfLimbs(long[] a, long[] r) { + final long HALF_BITS_LIMB = BITS_PER_LIMB / 2; + final long HALF_LIMB_MASK = -1L >>> (64 - HALF_BITS_LIMB); + r[0] = a[0] & HALF_LIMB_MASK; + r[1] = a[0] >> HALF_BITS_LIMB; + r[2] = a[1] & HALF_LIMB_MASK; + r[3] = a[1] >> HALF_BITS_LIMB; + r[4] = a[2] & HALF_LIMB_MASK; + r[5] = a[2] >> HALF_BITS_LIMB; + r[6] = a[3] & HALF_LIMB_MASK; + r[7] = a[3] >> HALF_BITS_LIMB; + r[8] = a[4] & HALF_LIMB_MASK; + r[9] = a[4] >> HALF_BITS_LIMB; + } + + @Override + protected int square(long[] a, long[] r) { + return mult(a, a, r); + } + + /** + * Unrolled Word-by-Word Montgomery Multiplication r = a * b * 2^-260 (mod P) + * + * See [1] Figure 5. "Algorithm 2: Word-by-Word Montgomery Multiplication + * for a Montgomery Friendly modulus p". Note: Step 6. Skipped; Instead use + * numAdds to reuse existing overflow logic. + */ + @IntrinsicCandidate + protected int mult(long[] a, long[] b, long[] r) { + long aa0 = a[0]; + long aa1 = a[1]; + long aa2 = a[2]; + long aa3 = a[3]; + long aa4 = a[4]; + + long bb0 = b[0]; + long bb1 = b[1]; + long bb2 = b[2]; + long bb3 = b[3]; + long bb4 = b[4]; + + final long shift1 = 64 - BITS_PER_LIMB; // 12 + final long shift2 = BITS_PER_LIMB; // 40 + + long d0, d1, d2, d3, d4; // low digits from multiplication + long dd0, dd1, dd2, dd3, dd4; // high digits from multiplication + long n, n0, n1, n2, n3, n4, + nn0, nn1, nn2, nn3, nn4; // modulus multiple digits + long c0, c1, c2, c3, c4, c5, c6, c7, c8, c9; // multiplication result + // digits for each column + + // Row 0 - multiply by aa0 and reduce out c0 + d0 = aa0 * bb0; + dd0 = Math.unsignedMultiplyHigh(aa0, bb0) << shift1 | (d0 >>> shift2); + d0 &= LIMB_MASK; + n = d0; + d1 = aa0 * bb1; + dd1 = Math.unsignedMultiplyHigh(aa0, bb1) << shift1 | (d1 >>> shift2); + d1 &= LIMB_MASK; + d2 = aa0 * bb2; + dd2 = Math.unsignedMultiplyHigh(aa0, bb2) << shift1 | (d2 >>> shift2); + d2 &= LIMB_MASK; + d3 = aa0 * bb3; + dd3 = Math.unsignedMultiplyHigh(aa0, bb3) << shift1 | (d3 >>> shift2); + d3 &= LIMB_MASK; + d4 = aa0 * bb4; + dd4 = Math.unsignedMultiplyHigh(aa0, bb4) << shift1 | (d4 >>> shift2); + d4 &= LIMB_MASK; + + n0 = n * modulus[0]; + nn0 = Math.unsignedMultiplyHigh(n, modulus[0]) << shift1 | (n0 >>> shift2); + n0 &= LIMB_MASK; + n1 = n * modulus[1]; + nn1 = Math.unsignedMultiplyHigh(n, modulus[1]) << shift1 | (n1 >>> shift2); + n1 &= LIMB_MASK; + n2 = n * modulus[2]; + nn2 = Math.unsignedMultiplyHigh(n, modulus[2]) << shift1 | (n2 >>> shift2); + n2 &= LIMB_MASK; + n3 = n * modulus[3]; + nn3 = Math.unsignedMultiplyHigh(n, modulus[3]) << shift1 | (n3 >>> shift2); + n3 &= LIMB_MASK; + n4 = n * modulus[4]; + nn4 = Math.unsignedMultiplyHigh(n, modulus[4]) << shift1 | (n4 >>> shift2); + n4 &= LIMB_MASK; + + dd0 += nn0; + d0 += n0; + dd1 += nn1; + d1 += n1; + dd2 += nn2; + d2 += n2; + dd3 += nn3; + d3 += n3; + dd4 += nn4; + d4 += n4; + + c1 = d1 + dd0 + (d0 >>> BITS_PER_LIMB); + c2 = d2 + dd1; + c3 = d3 + dd2; + c4 = d4 + dd3; + c5 = dd4; + + // Row 1 - multiply by aa1 and reduce out c1 + d0 = aa1 * bb0; + dd0 = Math.unsignedMultiplyHigh(aa1, bb0) << shift1 | (d0 >>> shift2); + d0 &= LIMB_MASK; + d0 += c1; + n = d0 & LIMB_MASK; + d1 = aa1 * bb1; + dd1 = Math.unsignedMultiplyHigh(aa1, bb1) << shift1 | (d1 >>> shift2); + d1 &= LIMB_MASK; + d2 = aa1 * bb2; + dd2 = Math.unsignedMultiplyHigh(aa1, bb2) << shift1 | (d2 >>> shift2); + d2 &= LIMB_MASK; + d3 = aa1 * bb3; + dd3 = Math.unsignedMultiplyHigh(aa1, bb3) << shift1 | (d3 >>> shift2); + d3 &= LIMB_MASK; + d4 = aa1 * bb4; + dd4 = Math.unsignedMultiplyHigh(aa1, bb4) << shift1 | (d4 >>> shift2); + d4 &= LIMB_MASK; + + n0 = n * modulus[0]; + dd0 += Math.unsignedMultiplyHigh(n, modulus[0]) << shift1 | (n0 >>> shift2); + d0 += n0 & LIMB_MASK; + n1 = n * modulus[1]; + dd1 += Math.unsignedMultiplyHigh(n, modulus[1]) << shift1 | (n1 >>> shift2); + d1 += n1 & LIMB_MASK; + n2 = n * modulus[2]; + dd2 += Math.unsignedMultiplyHigh(n, modulus[2]) << shift1 | (n2 >>> shift2); + d2 += n2 & LIMB_MASK; + n3 = n * modulus[3]; + dd3 += Math.unsignedMultiplyHigh(n, modulus[3]) << shift1 | (n3 >>> shift2); + d3 += n3 & LIMB_MASK; + n4 = n * modulus[4]; + dd4 += Math.unsignedMultiplyHigh(n, modulus[4]) << shift1 | (n4 >>> shift2); + d4 += n4 & LIMB_MASK; + + c2 += d1 + dd0 + (d0 >>> BITS_PER_LIMB); + c3 += d2 + dd1; + c4 += d3 + dd2; + c5 += d4 + dd3; + c6 = dd4; + + // Row 2 - multiply by aa2 and reduce out c2 + d0 = aa2 * bb0; + dd0 = Math.unsignedMultiplyHigh(aa2, bb0) << shift1 | (d0 >>> shift2); + d0 &= LIMB_MASK; + d0 += c2; + n = d0 & LIMB_MASK; + d1 = aa2 * bb1; + dd1 = Math.unsignedMultiplyHigh(aa2, bb1) << shift1 | (d1 >>> shift2); + d1 &= LIMB_MASK; + d2 = aa2 * bb2; + dd2 = Math.unsignedMultiplyHigh(aa2, bb2) << shift1 | (d2 >>> shift2); + d2 &= LIMB_MASK; + d3 = aa2 * bb3; + dd3 = Math.unsignedMultiplyHigh(aa2, bb3) << shift1 | (d3 >>> shift2); + d3 &= LIMB_MASK; + d4 = aa2 * bb4; + dd4 = Math.unsignedMultiplyHigh(aa2, bb4) << shift1 | (d4 >>> shift2); + d4 &= LIMB_MASK; + + n0 = n * modulus[0]; + dd0 += Math.unsignedMultiplyHigh(n, modulus[0]) << shift1 | (n0 >>> shift2); + d0 += n0 & LIMB_MASK; + n1 = n * modulus[1]; + dd1 += Math.unsignedMultiplyHigh(n, modulus[1]) << shift1 | (n1 >>> shift2); + d1 += n1 & LIMB_MASK; + n2 = n * modulus[2]; + dd2 += Math.unsignedMultiplyHigh(n, modulus[2]) << shift1 | (n2 >>> shift2); + d2 += n2 & LIMB_MASK; + n3 = n * modulus[3]; + dd3 += Math.unsignedMultiplyHigh(n, modulus[3]) << shift1 | (n3 >>> shift2); + d3 += n3 & LIMB_MASK; + n4 = n * modulus[4]; + dd4 += Math.unsignedMultiplyHigh(n, modulus[4]) << shift1 | (n4 >>> shift2); + d4 += n4 & LIMB_MASK; + + c3 += d1 + dd0 + (d0 >>> BITS_PER_LIMB); + c4 += d2 + dd1; + c5 += d3 + dd2; + c6 += d4 + dd3; + c7 = dd4; + + // Row 3 - multiply by aa3 and reduce out c3 + d0 = aa3 * bb0; + dd0 = Math.unsignedMultiplyHigh(aa3, bb0) << shift1 | (d0 >>> shift2); + d0 &= LIMB_MASK; + d0 += c3; + n = d0 & LIMB_MASK; + d1 = aa3 * bb1; + dd1 = Math.unsignedMultiplyHigh(aa3, bb1) << shift1 | (d1 >>> shift2); + d1 &= LIMB_MASK; + d2 = aa3 * bb2; + dd2 = Math.unsignedMultiplyHigh(aa3, bb2) << shift1 | (d2 >>> shift2); + d2 &= LIMB_MASK; + d3 = aa3 * bb3; + dd3 = Math.unsignedMultiplyHigh(aa3, bb3) << shift1 | (d3 >>> shift2); + d3 &= LIMB_MASK; + d4 = aa3 * bb4; + dd4 = Math.unsignedMultiplyHigh(aa3, bb4) << shift1 | (d4 >>> shift2); + d4 &= LIMB_MASK; + + n0 = n * modulus[0]; + dd0 += Math.unsignedMultiplyHigh(n, modulus[0]) << shift1 | (n0 >>> shift2); + d0 += n0 & LIMB_MASK; + n1 = n * modulus[1]; + dd1 += Math.unsignedMultiplyHigh(n, modulus[1]) << shift1 | (n1 >>> shift2); + d1 += n1 & LIMB_MASK; + n2 = n * modulus[2]; + dd2 += Math.unsignedMultiplyHigh(n, modulus[2]) << shift1 | (n2 >>> shift2); + d2 += n2 & LIMB_MASK; + n3 = n * modulus[3]; + dd3 += Math.unsignedMultiplyHigh(n, modulus[3]) << shift1 | (n3 >>> shift2); + d3 += n3 & LIMB_MASK; + n4 = n * modulus[4]; + dd4 += Math.unsignedMultiplyHigh(n, modulus[4]) << shift1 | (n4 >>> shift2); + d4 += n4 & LIMB_MASK; + + c4 += d1 + dd0 + (d0 >>> BITS_PER_LIMB); + c5 += d2 + dd1; + c6 += d3 + dd2; + c7 += d4 + dd3; + c8 = dd4; + + // Row 4 - multiply by aa3 and reduce out c4 + d0 = aa4 * bb0; + dd0 = Math.unsignedMultiplyHigh(aa4, bb0) << shift1 | (d0 >>> shift2); + d0 &= LIMB_MASK; + d0 += c4; + n = d0 & LIMB_MASK; + d1 = aa4 * bb1; + dd1 = Math.unsignedMultiplyHigh(aa4, bb1) << shift1 | (d1 >>> shift2); + d1 &= LIMB_MASK; + d2 = aa4 * bb2; + dd2 = Math.unsignedMultiplyHigh(aa4, bb2) << shift1 | (d2 >>> shift2); + d2 &= LIMB_MASK; + d3 = aa4 * bb3; + dd3 = Math.unsignedMultiplyHigh(aa4, bb3) << shift1 | (d3 >>> shift2); + d3 &= LIMB_MASK; + d4 = aa4 * bb4; + dd4 = Math.unsignedMultiplyHigh(aa4, bb4) << shift1 | (d4 >>> shift2); + d4 &= LIMB_MASK; + + n0 = n * modulus[0]; + dd0 += Math.unsignedMultiplyHigh(n, modulus[0]) << shift1 | (n0 >>> shift2); + d0 += n0 & LIMB_MASK; + n1 = n * modulus[1]; + dd1 += Math.unsignedMultiplyHigh(n, modulus[1]) << shift1 | (n1 >>> shift2); + d1 += n1 & LIMB_MASK; + n2 = n * modulus[2]; + dd2 += Math.unsignedMultiplyHigh(n, modulus[2]) << shift1 | (n2 >>> shift2); + d2 += n2 & LIMB_MASK; + n3 = n * modulus[3]; + dd3 += Math.unsignedMultiplyHigh(n, modulus[3]) << shift1 | (n3 >>> shift2); + d3 += n3 & LIMB_MASK; + n4 = n * modulus[4]; + dd4 += Math.unsignedMultiplyHigh(n, modulus[4]) << shift1 | (n4 >>> shift2); + d4 += n4 & LIMB_MASK; + + c5 += d1 + dd0 + (d0 >>> BITS_PER_LIMB); + c6 += d2 + dd1 + (c5 >>> BITS_PER_LIMB); + c7 += d3 + dd2 + (c6 >>> BITS_PER_LIMB); + c8 += d4 + dd3 + (c7 >>> BITS_PER_LIMB); + c9 = dd4 + (c8 >>> BITS_PER_LIMB); + + c5 &= LIMB_MASK; + c6 &= LIMB_MASK; + c7 &= LIMB_MASK; + c8 &= LIMB_MASK; + + // At this point, the result could overflow by one modulus. + c0 = c5 - modulus[0]; + c1 = c6 - modulus[1] + (c0 >> BITS_PER_LIMB); + c0 &= LIMB_MASK; + c2 = c7 - modulus[2] + (c1 >> BITS_PER_LIMB); + c1 &= LIMB_MASK; + c3 = c8 - modulus[3] + (c2 >> BITS_PER_LIMB); + c2 &= LIMB_MASK; + c4 = c9 - modulus[4] + (c3 >> BITS_PER_LIMB); + c3 &= LIMB_MASK; + + long mask = c4 >> BITS_PER_LIMB; // Signed shift! + + r[0] = ((c5 & mask) | (c0 & ~mask)); + r[1] = ((c6 & mask) | (c1 & ~mask)); + r[2] = ((c7 & mask) | (c2 & ~mask)); + r[3] = ((c8 & mask) | (c3 & ~mask)); + r[4] = ((c9 & mask) | (c4 & ~mask)); + + return 0; + } + + @Override + protected void finalCarryReduceLast(long[] limbs) { + reduce(limbs); + } + + @Override + protected long carryValue(long x) { + return x >> BITS_PER_LIMB; + } + + @Override + protected void postEncodeCarry(long[] v) { + // not needed because carry is unsigned + } + + // Proof: + // carry * 2^256 (mod p) == carry * [2^256 - p] (mod p) + // == carry * [2^256 - (2^256 -2^224 +2^192 +2^96 -1)] (mod p) + // == carry * [2^224 -2^192 -2^96 +1] (mod p) + @Override + protected void reduce(long[] limbs) { + long b0 = limbs[0]; + long b1 = limbs[1]; + long b2 = limbs[2]; + long b3 = limbs[3]; + long b4 = limbs[4]; + long carry = b4 >> 48; // max 16-bits + b4 -= carry << 48; + + // 2^0 position + b0 += carry; + // -2^96 + b1 -= carry << 44; + // -2^192 + b3 -= carry << 36; + // 2^224 + b4 += carry << 16; + + b1 += b0 >> BITS_PER_LIMB; + b2 += b1 >> BITS_PER_LIMB; + b3 += b2 >> BITS_PER_LIMB; + b4 += b3 >> BITS_PER_LIMB; + + b0 &= LIMB_MASK; + b1 &= LIMB_MASK; + b2 &= LIMB_MASK; + b3 &= LIMB_MASK; + + long c0, c1, c2, c3, c4; + c0 = modulus[0] + b0; + c1 = modulus[1] + b1 + (c0 >> BITS_PER_LIMB); + c0 &= LIMB_MASK; + c2 = modulus[2] + b2 + (c1 >> BITS_PER_LIMB); + c1 &= LIMB_MASK; + c3 = modulus[3] + b3 + (c2 >> BITS_PER_LIMB); + c2 &= LIMB_MASK; + c4 = modulus[4] + b4 + (c3 >> BITS_PER_LIMB); + c3 &= LIMB_MASK; + + long mask = b4 >> BITS_PER_LIMB; // Signed shift! + + limbs[0] = (b0 & ~mask) | (c0 & mask); + limbs[1] = (b1 & ~mask) | (c1 & mask); + limbs[2] = (b2 & ~mask) | (c2 & mask); + limbs[3] = (b3 & ~mask) | (c3 & mask); + limbs[4] = (b4 & ~mask) | (c4 & mask); + } + + public ImmutableElement getElement(byte[] v, int offset, int length, + byte highByte) { + + long[] vLimbs = new long[NUM_LIMBS]; + long[] montLimbs = new long[NUM_LIMBS]; + super.encode(v, offset, length, highByte, vLimbs); + + // Convert to Montgomery domain + int numAdds = mult(vLimbs, h, montLimbs); + return new ImmutableElement(montLimbs, numAdds); + } + + /* + * This function 'moves/reduces' digit 'v' to the 'lower' limbs + * + * The result is not reduced further. Carry propagation is not performed + * (see IntegerPolynomial.reduceHigh() for how this method is used) + * + * Proof: + * v * 2^(i*52) (mod p) == v * 2^(52i) - v * 2^(52i-256) * p (mod p) + * == v * 2^(52i) - v * 2^(52i-256) * (2^256 -2^224 +2^192 +2^96 -1) (mod p) + * == v * 2^(52i) - v * [2^(52i-256+256) -2^(52i-256+224) +2^(52i-256+192) +2^(52i-256+96) -2^(52i-256)] (mod p) + * == v * 2^(52i) - v * [2^(52i) -2^(52i-32) +2^(52i-64) +2^(52i-160) -2^(52i-256)] (mod p) + * + * == v * [2^(52i-32) +2^(52i-52-12) +2^(52i-3*52-4) -2^(52i-4*52-48)] (mod p) + */ + @Override + protected void reduceIn(long[] limbs, long v, int i) { + // Since top term (2^(52i-32)) will leave top 20 bits back in the same + // position i, + // "repeat same reduction on top 20 bits" + v += v >> 32; + + // 2^(52i-32) + limbs[i - 1] += (v << 20) & LIMB_MASK; + + // 2^(52i-52-12) + limbs[i - 2] -= (v << 40) & LIMB_MASK; + limbs[i - 1] -= v >> 12; + + // 2^(52i-3*52-4) + limbs[i - 4] -= (v << 48) & LIMB_MASK; + limbs[i - 3] -= v >> 4; + + // 2^(52i-4*52-48) + limbs[i - 5] += (v << 4) & LIMB_MASK; + limbs[i - 4] += v >> 48; + } +} \ No newline at end of file diff --git a/test/jdk/com/sun/security/ec/ECOperationsFuzzTest.java b/test/jdk/com/sun/security/ec/ECOperationsFuzzTest.java new file mode 100644 index 00000000000..31d83815ed8 --- /dev/null +++ b/test/jdk/com/sun/security/ec/ECOperationsFuzzTest.java @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2024, Intel Corporation. All rights reserved. + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import java.util.Random; +import java.math.BigInteger; +import java.lang.reflect.Field; +import java.security.spec.ECParameterSpec; +import sun.security.ec.ECOperations; +import sun.security.util.ECUtil; +import sun.security.util.NamedCurve; +import sun.security.util.CurveDB; +import sun.security.ec.point.*; +import java.security.spec.ECPoint; +import sun.security.util.KnownOIDs; +import sun.security.util.math.IntegerMontgomeryFieldModuloP; +import sun.security.util.math.intpoly.*; + +/* + * @test + * @key randomness + * @modules java.base/sun.security.ec java.base/sun.security.ec.point + * java.base/sun.security.util java.base/sun.security.util.math + * java.base/sun.security.util.math.intpoly + * @run main/othervm/timeout=1200 --add-opens + * java.base/sun.security.ec=ALL-UNNAMED -XX:+UnlockDiagnosticVMOptions + * -XX:-UseIntPolyIntrinsics ECOperationsFuzzTest + * @summary Unit test ECOperationsFuzzTest. + */ + +/* + * @test + * @key randomness + * @modules java.base/sun.security.ec java.base/sun.security.ec.point + * java.base/sun.security.util java.base/sun.security.util.math + * java.base/sun.security.util.math.intpoly + * @run main/othervm/timeout=1200 --add-opens + * java.base/sun.security.ec=ALL-UNNAMED -XX:+UnlockDiagnosticVMOptions + * -XX:+UseIntPolyIntrinsics ECOperationsFuzzTest + * @summary Unit test ECOperationsFuzzTest. + */ + +// This test case is NOT entirely deterministic, it uses a random seed for +// pseudo-random number generator. If a failure occurs, hardcode the seed to +// make the test case deterministic +public class ECOperationsFuzzTest { + public static void main(String[] args) throws Exception { + // Note: it might be useful to increase this number during development + final int repeat = 10000; + test(repeat); + System.out.println("Fuzz Success"); + } + + private static void check(MutablePoint reference, MutablePoint testValue, + long seed, int iter) { + AffinePoint affineRef = reference.asAffine(); + AffinePoint affine = testValue.asAffine(); + if (!affineRef.equals(affine)) { + throw new RuntimeException( + "Found error with seed " + seed + "at iteration " + iter); + } + } + + public static void test(int repeat) throws Exception { + Random rnd = new Random(); + long seed = rnd.nextLong(); + rnd.setSeed(seed); + + int keySize = 256; + ECParameterSpec params = ECUtil.getECParameterSpec(keySize); + NamedCurve curve = CurveDB.lookup(KnownOIDs.secp256r1.value()); + ECPoint generator = curve.getGenerator(); + BigInteger b = curve.getCurve().getB(); + if (params == null || generator == null) { + throw new RuntimeException( + "No EC parameters available for key size " + keySize + " bits"); + } + + ECOperations ops = ECOperations.forParameters(params).get(); + ECOperations opsReference = new ECOperations( + IntegerPolynomialP256.ONE.getElement(b), P256OrderField.ONE); + + boolean instanceTest1 = ops + .getField() instanceof IntegerMontgomeryFieldModuloP; + boolean instanceTest2 = opsReference + .getField() instanceof IntegerMontgomeryFieldModuloP; + if (instanceTest1 == false || instanceTest2 == true) { + throw new RuntimeException("Bad Initialization: [" + + instanceTest1 + "," + instanceTest2 + "]"); + } + + byte[] multiple = new byte[keySize / 8]; + rnd.nextBytes(multiple); + multiple[keySize/8 - 1] &= 0x7f; // from opsReference.seedToScalar(multiple); + + MutablePoint referencePoint = opsReference.multiply(generator, multiple); + MutablePoint point = ops.multiply(generator, multiple); + check(referencePoint, point, seed, -1); + + AffinePoint refAffineGenerator = AffinePoint.fromECPoint(generator, + referencePoint.getField()); + AffinePoint montAffineGenerator = AffinePoint.fromECPoint(generator, + point.getField()); + + MutablePoint refProjGenerator = new ProjectivePoint.Mutable( + refAffineGenerator.getX(false).mutable(), + refAffineGenerator.getY(false).mutable(), + referencePoint.getField().get1().mutable()); + + MutablePoint projGenerator = new ProjectivePoint.Mutable( + montAffineGenerator.getX(false).mutable(), + montAffineGenerator.getY(false).mutable(), + point.getField().get1().mutable()); + + for (int i = 0; i < repeat; i++) { + rnd.nextBytes(multiple); + multiple[keySize/8 - 1] &= 0x7f; // opsReference.seedToScalar(multiple); + + MutablePoint nextReferencePoint = opsReference + .multiply(referencePoint.asAffine(), multiple); + MutablePoint nextPoint = ops.multiply(point.asAffine().toECPoint(), + multiple); + check(nextReferencePoint, nextPoint, seed, i); + + if (rnd.nextBoolean()) { + opsReference.setSum(nextReferencePoint, referencePoint); + ops.setSum(nextPoint, point); + check(nextReferencePoint, nextPoint, seed, i); + } + + if (rnd.nextBoolean()) { + opsReference.setSum(nextReferencePoint, refProjGenerator); + ops.setSum(nextPoint, projGenerator); + check(nextReferencePoint, nextPoint, seed, i); + } + + if (rnd.nextInt(100) < 10) { // 10% Reset point to generator, test + // generator multiplier + referencePoint = opsReference.multiply(generator, multiple); + point = ops.multiply(generator, multiple); + check(referencePoint, point, seed, i); + } else { + referencePoint = nextReferencePoint; + point = nextPoint; + } + } + } + +} + +// make test TEST="test/jdk/com/sun/security/ec/ECOperationsFuzzTest.java" \ No newline at end of file diff --git a/test/jdk/com/sun/security/ec/ECOperationsKATTest.java b/test/jdk/com/sun/security/ec/ECOperationsKATTest.java new file mode 100644 index 00000000000..3c98b5f63cd --- /dev/null +++ b/test/jdk/com/sun/security/ec/ECOperationsKATTest.java @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2024, Intel Corporation. All rights reserved. + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import java.util.Random; +import java.util.List; +import java.util.LinkedList; +import java.math.BigInteger; +import java.lang.reflect.Field; +import java.security.spec.ECParameterSpec; +import sun.security.ec.ECOperations; +import sun.security.util.ECUtil; +import sun.security.util.NamedCurve; +import sun.security.util.CurveDB; +import sun.security.ec.point.*; +import java.security.spec.ECPoint; +import sun.security.util.KnownOIDs; +import sun.security.util.math.IntegerMontgomeryFieldModuloP; +import sun.security.util.math.intpoly.*; + +/* + * @test + * @modules java.base/sun.security.ec java.base/sun.security.ec.point + * java.base/sun.security.util java.base/sun.security.util.math + * java.base/sun.security.util.math.intpoly + * @run main/othervm --add-opens java.base/sun.security.ec=ALL-UNNAMED + * ECOperationsKATTest + * @summary Unit test ECOperationsKATTest. + */ + +/* + * @test + * @modules java.base/sun.security.ec java.base/sun.security.ec.point + * java.base/sun.security.util java.base/sun.security.util.math + * java.base/sun.security.util.math.intpoly + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -Xcomp + * -XX:-TieredCompilation --add-opens java.base/sun.security.ec=ALL-UNNAMED + * -XX:+UnlockDiagnosticVMOptions ECOperationsKATTest + * @summary Unit test ECOperationsKATTest. + */ + + public class ECOperationsKATTest { + final private static java.util.HexFormat hex = java.util.HexFormat.of(); + + public static void main(String args[]) throws Exception { + int testsPassed = 0; + int testNumber = 0; + + for (TestData test : testList) { + System.out.println("*** Test " + ++testNumber + ": " + test.testName); + if (runSingleTest(test)) { + testsPassed++; + } + } + System.out.println(); + + if (testsPassed != testNumber) { + throw new RuntimeException( + "One or more tests failed. Check output for details"); + } + } + + private static boolean check(MutablePoint testValue, ECPoint reference) { + AffinePoint affine = testValue.asAffine(); + BigInteger x = affine.getX().asBigInteger(); + BigInteger y = affine.getY().asBigInteger(); + BigInteger refX = reference.getAffineX(); + BigInteger refY = reference.getAffineY(); + + if (!refX.equals(x) || !refY.equals(y)) { + System.out.println("ERROR - Output Mismatch!"); + System.out.println("Expected: X: " + refX.toString(16) + " Y: " + + refY.toString(16)); + System.out.println( + "Result: X: " + x.toString(16) + " Y: " + y.toString(16)); + return false; + } + return true; + } + + private static class TestData { + public TestData(String name, String keyStr, String xStr1, String yStr1, + String xStr2, String yStr2) { + testName = name; + // multiplier = (new BigInteger(keyStr, 16)).toByteArray(); + multiplier = hex.parseHex(keyStr); + sun.security.util.ArrayUtil.reverse(multiplier); + reference1 = new ECPoint(new BigInteger(xStr1, 16), + new BigInteger(yStr1, 16)); + reference2 = new ECPoint(new BigInteger(xStr2, 16), + new BigInteger(yStr2, 16)); + } + + String testName; + byte[] multiplier; + ECPoint reference1; // For generator multiplier test + ECPoint reference2; // For non-generator multiplier test + } + + public static final List testList = new LinkedList() {{ + // (x1,y1) = mult*generator + // (x2,y2) = mult*mult*generator + add(new TestData("Test Vector #1", + "0000000000000000000000000000000000000000000000000000000000000012", // mult + "1057E0AB5780F470DEFC9378D1C7C87437BB4C6F9EA55C63D936266DBD781FDA", // x1 + "F6F1645A15CBE5DC9FA9B7DFD96EE5A7DCC11B5C5EF4F1F78D83B3393C6A45A2", // y1 + "4954047A366A91E3FD94E574DB6F2B04F3A8465883DBC55A816EA563BF54A324", // x2 + "B5A54786FD9EA48C9FC38A0557B0C4D54F285908A7291B630D06BEE970F530D3") // y2 + ); + add(new TestData("Test Vector #2", + "1200000000000000000000000000000000000000000000000000000000000000", // mult + "DF684E6D0D57AF8B89DA11E8F7436C3D360F531D62BDCE42C5A8B72D73D5C717", // x + "9D3576BD03C09B8F416EE9C27D70AD4A425119271ACF549312CA48758F4E1FEC", // y + "57C8257EEAABF5446DCFACB99DEE104367B6C9950C76797C372EB177D5FA23B3", // x + "1CD3E8A34521C1C8E574EB4B99343CAA57E00725D8618F0231C7C79AA6837725") // y + ); + add(new TestData("Test Vector #3", + "0000000000000000000000000000000120000000000000000000000000000012", // mult + "A69DFD47B24485E5F523BDA5FBACF03F5A7C3D22E0C2BC6705594B7B051A06D0", // x + "ECF19629416BE5C9AF1E30988F3AA8B803809CF4D12944EB49C5E9892723798A", // y + "1E28559F5B681C308632EE11A007B9891B3FD592C982C4926153795794295E58", // x + "3C373046C27BB34609A43C91DF6D4B9AB9EB08F3B69A8F8FAE944211D8297F30") // y + ); + add(new TestData("Test Vector #4", + "0000000000000000000000000000000000000000000000000000000000000001", // mult + "6B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C296", // x + "4FE342E2FE1A7F9B8EE7EB4A7C0F9E162BCE33576B315ECECBB6406837BF51F5", // y + "6B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C296", // x + "4FE342E2FE1A7F9B8EE7EB4A7C0F9E162BCE33576B315ECECBB6406837BF51F5") // y + ); + add(new TestData("Test Vector #5", + "EFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", // mult + "66B71D0BD47344197CCFB0C9578EAF0ADB609E05BB4E8F87D56BD34F24EE7C47", // x + "14A0ECB7F708C02B2BAE238D2C4607BB9D04FCE64E10A428C911D6FA25B2F0FD", // y + "D25AAFD0FCC5B5E95C84C0702C138BC4D7FEB4E5F9C2DFB4301E313507EFDF44", // x + "F3F04EBC7D308511B0392BB7171CF92688D6484A95A8100EDFC933613A359133") // y + ); + add(new TestData("Test Vector #6", + "1111111111111111111111111111111111111111111111111111111111111111", // mult + "0217E617F0B6443928278F96999E69A23A4F2C152BDF6D6CDF66E5B80282D4ED", // x + "194A7DEBCB97712D2DDA3CA85AA8765A56F45FC758599652F2897C65306E5794", // y + "A83A07D6AE918359DEBCC385DA1E416EB83417435079CA8DB06005E107C309A0", // x + "5AACDF816850C33EB3E54F3D0DD759B97B5E7065B2060016F73735E4A6AADE23") // y + ); + }}; + + private static boolean runSingleTest(TestData testData) { + int keySize = 256; + ECParameterSpec params = ECUtil.getECParameterSpec(keySize); + NamedCurve curve = CurveDB.lookup(KnownOIDs.secp256r1.value()); + ECPoint generator = curve.getGenerator(); + BigInteger b = curve.getCurve().getB(); + if (params == null || generator == null) { + throw new RuntimeException( + "No EC parameters available for key size " + keySize + " bits"); + } + + ECOperations ops = ECOperations.forParameters(params).get(); + ECOperations opsReference = new ECOperations( + IntegerPolynomialP256.ONE.getElement(b), P256OrderField.ONE); + + boolean instanceTest1 = ops + .getField() instanceof IntegerMontgomeryFieldModuloP; + boolean instanceTest2 = opsReference + .getField() instanceof IntegerMontgomeryFieldModuloP; + if (instanceTest1 == false || instanceTest2 == true) { + throw new RuntimeException("Bad Initialization: [" + instanceTest1 + "," + + instanceTest2 + "]"); + } + + MutablePoint nextPoint = ops.multiply(generator, testData.multiplier); + MutablePoint nextReferencePoint = opsReference.multiply(generator, + testData.multiplier); + if (!check(nextReferencePoint, testData.reference1) + || !check(nextPoint, testData.reference1)) { + return false; + } + + nextPoint = ops.multiply(nextPoint.asAffine(), testData.multiplier); + nextReferencePoint = opsReference.multiply(nextReferencePoint.asAffine(), + testData.multiplier); + if (!check(nextReferencePoint, testData.reference2) + || !check(nextPoint, testData.reference2)) { + return false; + } + + return true; + } +} + +//make test TEST="test/jdk/com/sun/security/ec/ECOperationsKATTest.java" + +/* + * KAT generator using OpenSSL for reference vectors + * g++ ecpoint.cpp -g -lcrypto -Wno-deprecated-declarations && ./a.out + * (Some OpenSSL EC operations are marked internal i.e. deprecated) + * + +#include +#include + +void check(int rc, const char* locator) { + if (rc != 1) { + printf("Failed at %s\n", locator); + exit(55); + } +} + +int main(){ + BN_CTX* ctx = BN_CTX_new(); + BIGNUM* k = BN_CTX_get(ctx); + BIGNUM* x1 = BN_CTX_get(ctx); + BIGNUM* y1 = BN_CTX_get(ctx); + BIGNUM* x2 = BN_CTX_get(ctx); + BIGNUM* y2 = BN_CTX_get(ctx); + EC_GROUP *ec_group = EC_GROUP_new_by_curve_name(NID_X9_62_prime256v1); + EC_POINT* pubkey = EC_POINT_new(ec_group); + EC_POINT* pubkey2 = EC_POINT_new(ec_group); + int rc; + + rc = BN_hex2bn(&k, "1111111111111111111111111111111111111111111111111111111111111111"); //check(rc, "set raw key"); + rc = EC_POINT_mul(ec_group, pubkey, k, NULL, NULL, ctx); check(rc, "mult public key"); + rc = EC_POINT_get_affine_coordinates(ec_group, pubkey, x1, y1, ctx); check(rc, "get affine coordinates"); + rc = EC_POINT_mul(ec_group, pubkey2, NULL, pubkey, k, ctx); check(rc, "mult public key"); + rc = EC_POINT_get_affine_coordinates(ec_group, pubkey2, x2, y2, ctx); check(rc, "get affine coordinates"); + printf("k: %s\n", BN_bn2hex(k)); + printf("x: %s\ny: %s\n", BN_bn2hex(x1), BN_bn2hex(y1)); + printf("x: %s\ny: %s\n", BN_bn2hex(x2), BN_bn2hex(y2)); + + BN_CTX_free(ctx); + return 0; +} + */ \ No newline at end of file diff --git a/test/jdk/com/sun/security/util/math/intpoly/IntegerPolynomialTest.java b/test/jdk/com/sun/security/util/math/intpoly/IntegerPolynomialTest.java new file mode 100644 index 00000000000..237c0408c58 --- /dev/null +++ b/test/jdk/com/sun/security/util/math/intpoly/IntegerPolynomialTest.java @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2024, Intel Corporation. All rights reserved. + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import java.util.Random; +import java.math.BigInteger; +import java.util.Arrays; +import sun.security.util.math.*; +import sun.security.util.math.intpoly.*; + +/* + * @test + * @key randomness + * @modules java.base/sun.security.util java.base/sun.security.util.math + * java.base/sun.security.util.math.intpoly + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:-UseIntPolyIntrinsics + * IntegerPolynomialTest + * @summary Unit test + * IntegerPolynomial.MutableIntegerModuloP.conditionalAssign(). + */ + +/* + * @test + * @key randomness + * @modules java.base/sun.security.util java.base/sun.security.util.math + * java.base/sun.security.util.math.intpoly + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -Xcomp + * -XX:-TieredCompilation -XX:+UseIntPolyIntrinsics IntegerPolynomialTest + * @summary Unit test + * IntegerPolynomial.MutableIntegerModuloP.conditionalAssign(). + */ + +// This test case is NOT entirely deterministic, it uses a random seed for +// pseudo-random number generator. If a failure occurs, hardcode the seed to +// make the test case deterministic +public class IntegerPolynomialTest { + public static void main(String[] args) throws Exception { + Random rnd = new Random(); + long seed = rnd.nextLong(); + rnd.setSeed(seed); + + IntegerPolynomial testFields[] = new IntegerPolynomial[] { + IntegerPolynomial1305.ONE, IntegerPolynomial25519.ONE, + IntegerPolynomial448.ONE, IntegerPolynomialP256.ONE, + MontgomeryIntegerPolynomialP256.ONE, IntegerPolynomialP384.ONE, + IntegerPolynomialP521.ONE, + new IntegerPolynomialModBinP.Curve25519OrderField(), + new IntegerPolynomialModBinP.Curve448OrderField(), + P256OrderField.ONE, P384OrderField.ONE, P521OrderField.ONE, + Curve25519OrderField.ONE, Curve448OrderField.ONE }; + + for (IntegerPolynomial field : testFields) { + ImmutableIntegerModuloP aRef = field + .getElement(new BigInteger(32 * 64, rnd)); + MutableIntegerModuloP a = aRef.mutable(); + ImmutableIntegerModuloP bRef = field + .getElement(new BigInteger(32 * 64, rnd)); + MutableIntegerModuloP b = bRef.mutable(); + + a.conditionalSet(b, 0); // Don't assign + if (Arrays.equals(a.getLimbs(), b.getLimbs())) { + throw new RuntimeException( + "[SEED " + seed + "]: Incorrect assign for " + field); + } + a.conditionalSet(b, 1); // Assign + if (!Arrays.equals(a.getLimbs(), b.getLimbs())) { + throw new RuntimeException( + "[SEED " + seed + "]: Incorrect assign for " + field); + } + } + System.out.println("Test Success"); + } +} + +//make test TEST="test/jdk/com/sun/security/util/math/intpoly/IntegerPolynomialTest.java" \ No newline at end of file diff --git a/test/jdk/com/sun/security/util/math/intpoly/MontgomeryPolynomialFuzzTest.java b/test/jdk/com/sun/security/util/math/intpoly/MontgomeryPolynomialFuzzTest.java new file mode 100644 index 00000000000..da5aa33d831 --- /dev/null +++ b/test/jdk/com/sun/security/util/math/intpoly/MontgomeryPolynomialFuzzTest.java @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2024, Intel Corporation. All rights reserved. + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import java.util.Random; +import sun.security.util.math.IntegerMontgomeryFieldModuloP; +import sun.security.util.math.ImmutableIntegerModuloP; +import java.math.BigInteger; +import sun.security.util.math.intpoly.*; + +/* + * @test + * @key randomness + * @modules java.base/sun.security.util java.base/sun.security.util.math + * java.base/sun.security.util.math.intpoly + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:-UseIntPolyIntrinsics + * MontgomeryPolynomialFuzzTest + * @summary Unit test MontgomeryPolynomialFuzzTest. + */ + +/* + * @test + * @key randomness + * @modules java.base/sun.security.util java.base/sun.security.util.math + * java.base/sun.security.util.math.intpoly + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UseIntPolyIntrinsics + * MontgomeryPolynomialFuzzTest + * @summary Unit test MontgomeryPolynomialFuzzTest. + */ + +// This test case is NOT entirely deterministic, it uses a random seed for pseudo-random number generator +// If a failure occurs, hardcode the seed to make the test case deterministic +public class MontgomeryPolynomialFuzzTest { + public static void main(String[] args) throws Exception { + // Note: it might be useful to increase this number during development + final int repeat = 1000000; + for (int i = 0; i < repeat; i++) { + run(); + } + System.out.println("Fuzz Success"); + } + + private static void check(BigInteger reference, + ImmutableIntegerModuloP testValue, long seed) { + if (!reference.equals(testValue.asBigInteger())) { + throw new RuntimeException("SEED: " + seed); + } + } + + public static void run() throws Exception { + Random rnd = new Random(); + long seed = rnd.nextLong(); + rnd.setSeed(seed); + + IntegerMontgomeryFieldModuloP montField = MontgomeryIntegerPolynomialP256.ONE; + BigInteger P = MontgomeryIntegerPolynomialP256.ONE.MODULUS; + BigInteger r = BigInteger.ONE.shiftLeft(260).mod(P); + BigInteger rInv = r.modInverse(P); + BigInteger aRef = (new BigInteger(P.bitLength(), rnd)).mod(P); + + // Test conversion to montgomery domain + ImmutableIntegerModuloP a = montField.getElement(aRef); + aRef = aRef.multiply(r).mod(P); + check(aRef, a, seed); + + if (rnd.nextBoolean()) { + aRef = aRef.multiply(aRef).multiply(rInv).mod(P); + a = a.multiply(a); + check(aRef, a, seed); + } + + if (rnd.nextBoolean()) { + aRef = aRef.add(aRef).mod(P); + a = a.add(a); + check(aRef, a, seed); + } + } +} + +//make test TEST="test/jdk/com/sun/security/util/math/intpoly/MontgomeryPolynomialFuzzTest.java" \ No newline at end of file diff --git a/test/micro/org/openjdk/bench/javax/crypto/full/PolynomialP256Bench.java b/test/micro/org/openjdk/bench/javax/crypto/full/PolynomialP256Bench.java new file mode 100644 index 00000000000..94c247c9080 --- /dev/null +++ b/test/micro/org/openjdk/bench/javax/crypto/full/PolynomialP256Bench.java @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.openjdk.bench.javax.crypto.full; + +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.annotations.Benchmark; +import java.math.BigInteger; +import java.util.concurrent.TimeUnit; +import sun.security.util.math.intpoly.MontgomeryIntegerPolynomialP256; +import sun.security.util.math.intpoly.IntegerPolynomialP256; +import sun.security.util.math.MutableIntegerModuloP; +import sun.security.util.math.ImmutableIntegerModuloP; + +@Fork(jvmArgsAppend = {"-XX:+AlwaysPreTouch", + "--add-exports", "java.base/sun.security.util.math.intpoly=ALL-UNNAMED", + "--add-exports", "java.base/sun.security.util.math=ALL-UNNAMED"}, value = 1) +@Warmup(iterations = 3, time = 3) +@Measurement(iterations = 8, time = 2) +@OutputTimeUnit(TimeUnit.SECONDS) +@State(Scope.Thread) +@BenchmarkMode(Mode.Throughput) +public class PolynomialP256Bench { + final MontgomeryIntegerPolynomialP256 montField = MontgomeryIntegerPolynomialP256.ONE; + final IntegerPolynomialP256 residueField = IntegerPolynomialP256.ONE; + final BigInteger refx = + new BigInteger("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16); + final ImmutableIntegerModuloP x = residueField.getElement(refx); + final ImmutableIntegerModuloP X = montField.getElement(refx); + final ImmutableIntegerModuloP one = montField.get1(); + + @Param({"true", "false"}) + private boolean isMontBench; + + @Benchmark + public MutableIntegerModuloP benchMultiply() { + MutableIntegerModuloP test; + if (isMontBench) { + test = X.mutable(); + } else { + test = x.mutable(); + } + + for (int i = 0; i< 10000; i++) { + test = test.setProduct(test); + } + return test; + } + + @Benchmark + public MutableIntegerModuloP benchSquare() { + MutableIntegerModuloP test; + if (isMontBench) { + test = X.mutable(); + } else { + test = x.mutable(); + } + + for (int i = 0; i< 10000; i++) { + test = test.setSquare(); + } + return test; + } + + @Benchmark + public MutableIntegerModuloP benchAssign() { + MutableIntegerModuloP test1 = X.mutable(); + MutableIntegerModuloP test2 = one.mutable(); + for (int i = 0; i< 10000; i++) { + test1.conditionalSet(test2, 0); + test1.conditionalSet(test2, 1); + test2.conditionalSet(test1, 0); + test2.conditionalSet(test1, 1); + } + return test2; + } +}