8288047: Accelerate Poly1305 on x86_64 using AVX512 instructions

Reviewed-by: sviswanathan, vlivanov
This commit is contained in:
Volodymyr Paprotski 2022-11-21 21:01:25 +00:00 committed by Sandhya Viswanathan
parent cd6a203a3e
commit f12710e938
32 changed files with 1857 additions and 36 deletions

View file

@ -34,6 +34,8 @@ import java.util.Objects;
import sun.security.util.math.*;
import sun.security.util.math.intpoly.*;
import jdk.internal.vm.annotation.IntrinsicCandidate;
import jdk.internal.vm.annotation.ForceInline;
/**
* This class represents the Poly1305 function defined in RFC 7539.
@ -59,8 +61,10 @@ final class Poly1305 {
private IntegerModuloP s;
private MutableIntegerModuloP a;
private final MutableIntegerModuloP n = ipl1305.get1().mutable();
private final boolean checkWeakKey;
Poly1305() { }
Poly1305() { this(true); }
Poly1305(boolean checkKey) { checkWeakKey = checkKey; }
/**
* Initialize the Poly1305 object
@ -165,11 +169,15 @@ final class Poly1305 {
blockOffset = 0;
}
}
while (len >= BLOCK_LENGTH) {
processBlock(input, offset, BLOCK_LENGTH);
offset += BLOCK_LENGTH;
len -= BLOCK_LENGTH;
}
int blockMultipleLength = len & (~(BLOCK_LENGTH-1));
long[] aLimbs = a.getLimbs();
long[] rLimbs = r.getLimbs();
processMultipleBlocksCheck(input, offset, blockMultipleLength, aLimbs, rLimbs);
processMultipleBlocks(input, offset, blockMultipleLength, aLimbs, rLimbs);
offset += blockMultipleLength;
len -= blockMultipleLength;
if (len > 0) { // and len < BLOCK_LENGTH
System.arraycopy(input, offset, block, 0, len);
blockOffset = len;
@ -235,12 +243,35 @@ final class Poly1305 {
a.setProduct(r); // a = (a * r) % p
}
// This is an intrinsified method. The unused parameters aLimbs and rLimbs are used by the intrinsic.
// They correspond to this.a and this.r respectively
@ForceInline
@IntrinsicCandidate
private void processMultipleBlocks(byte[] input, int offset, int length, long[] aLimbs, long[] rLimbs) {
while (length >= BLOCK_LENGTH) {
processBlock(input, offset, BLOCK_LENGTH);
offset += BLOCK_LENGTH;
length -= BLOCK_LENGTH;
}
}
private static void processMultipleBlocksCheck(byte[] input, int offset, int length, long[] aLimbs, long[] rLimbs) {
Objects.checkFromIndexSize(offset, length, input.length);
final int numLimbs = 5; // Intrinsic expects exactly 5 limbs
if (aLimbs.length != numLimbs) {
throw new RuntimeException("invalid accumulator length: " + aLimbs.length);
}
if (rLimbs.length != numLimbs) {
throw new RuntimeException("invalid R length: " + rLimbs.length);
}
}
/**
* Partition the authentication key into the R and S components, clamp
* the R value, and instantiate IntegerModuloP objects to R and S's
* numeric values.
*/
private void setRSVals() {
private void setRSVals() throws InvalidKeyException {
// Clamp the bytes in the "r" half of the key.
keyBytes[3] &= 15;
keyBytes[7] &= 15;
@ -250,6 +281,24 @@ final class Poly1305 {
keyBytes[8] &= (byte)252;
keyBytes[12] &= (byte)252;
if (checkWeakKey) {
byte keyIsZero = 0;
for (int i = 0; i < RS_LENGTH; i++) {
keyIsZero |= keyBytes[i];
}
if (keyIsZero == 0) {
throw new InvalidKeyException("R is set to zero");
}
keyIsZero = 0;
for (int i = RS_LENGTH; i < 2*RS_LENGTH; i++) {
keyIsZero |= keyBytes[i];
}
if (keyIsZero == 0) {
throw new InvalidKeyException("S is set to zero");
}
}
// Create IntegerModuloP elements from the r and s values
r = ipl1305.getElement(keyBytes, 0, RS_LENGTH, (byte)0);
s = ipl1305.getElement(keyBytes, RS_LENGTH, RS_LENGTH, (byte)0);