8247645: ChaCha20 intrinsics

Reviewed-by: sviswanathan, ngasson, vlivanov, ascarpino
This commit is contained in:
Jamil Nimeh 2022-11-29 14:40:20 +00:00
parent 33587ffd35
commit cd6bebbf34
28 changed files with 1590 additions and 38 deletions

View file

@ -39,6 +39,9 @@ import javax.crypto.*;
import javax.crypto.spec.ChaCha20ParameterSpec;
import javax.crypto.spec.IvParameterSpec;
import javax.crypto.spec.SecretKeySpec;
import jdk.internal.vm.annotation.ForceInline;
import jdk.internal.vm.annotation.IntrinsicCandidate;
import sun.security.util.DerValue;
/**
@ -58,8 +61,9 @@ abstract class ChaCha20Cipher extends CipherSpi {
private static final int STATE_CONST_3 = 0x6b206574;
// The keystream block size in bytes and as integers
private static final int KEYSTREAM_SIZE = 64;
private static final int KS_SIZE_INTS = KEYSTREAM_SIZE / Integer.BYTES;
private static final int KS_MAX_LEN = 1024;
private static final int KS_BLK_SIZE = 64;
private static final int KS_SIZE_INTS = KS_BLK_SIZE / Integer.BYTES;
private static final int CIPHERBUF_BASE = 1024;
// The initialization state of the cipher
@ -85,14 +89,18 @@ abstract class ChaCha20Cipher extends CipherSpi {
private long finalCounterValue;
private long counter;
// Two arrays, both implemented as 16-element integer arrays:
// The base state, created at initialization time, and a working
// state which is a clone of the start state, and is then modified
// with the counter and the ChaCha20 block function.
// The base state is created at initialization time as a 16-int array
// and then is copied into either local variables for computations (Java) or
// into SIMD registers (intrinsics).
private final int[] startState = new int[KS_SIZE_INTS];
private final byte[] keyStream = new byte[KEYSTREAM_SIZE];
// The offset into the current keystream
// The output keystream array is sized to hold keystream output from the
// implChaCha20Block method. This can range from a single block at a time
// (Java software) up to 16 blocks on x86_64 with AVX512 support.
private final byte[] keyStream = new byte[KS_MAX_LEN];
// The keystream buffer limit and offset
private int keyStrLimit;
private int keyStrOffset;
// AEAD-related fields and constants
@ -561,12 +569,14 @@ abstract class ChaCha20Cipher extends CipherSpi {
}
}
// We can also get one block's worth of keystream created
// We can also generate the first block (or blocks if intrinsics
// are capable of doing multiple blocks at a time) of keystream.
finalCounterValue = counter + MAX_UINT32;
generateKeystream();
this.keyStrLimit = chaCha20Block(startState, counter, keyStream);
this.keyStrOffset = 0;
this.counter += (keyStrLimit / KS_BLK_SIZE);
direction = opmode;
aadDone = false;
this.keyStrOffset = 0;
initialized = true;
}
@ -831,31 +841,34 @@ abstract class ChaCha20Cipher extends CipherSpi {
}
}
/**
* Using the current state and counter create the next set of keystream
* bytes. This method will generate the next 512 bits of keystream and
* return it in the {@code keyStream} parameter. Following the
* block function the counter will be incremented.
*/
private void generateKeystream() {
chaCha20Block(startState, counter, keyStream);
counter++;
@ForceInline
private static int chaCha20Block(int[] initState, long counter,
byte[] result) {
if (initState.length != KS_SIZE_INTS || result.length != KS_MAX_LEN) {
throw new IllegalArgumentException(
"Illegal state or keystream buffer length");
}
// Set the counter value before sending into the underlying
// private block method
initState[12] = (int)counter;
return implChaCha20Block(initState, result);
}
/**
* Perform a full 20-round ChaCha20 transform on the initial state.
*
* @param initState the starting state, not including the counter
* value.
* @param counter the counter value to apply
* @param initState the starting state using the current counter value.
* @param result the array that will hold the result of the ChaCha20
* block function.
*
* @note it is the caller's responsibility to ensure that the workState
* is sized the same as the initState, no checking is performed internally.
* @return the number of keystream bytes generated. In a pure Java method
* this will always be 64 bytes, but intrinsics that make use of
* AVX2 or AVX512 registers may generate multiple blocks of keystream
* in a single call and therefore may be a larger multiple of 64.
*/
private static void chaCha20Block(int[] initState, long counter,
byte[] result) {
@IntrinsicCandidate
private static int implChaCha20Block(int[] initState, byte[] result) {
// Create an initial state and clone a working copy
int ws00 = STATE_CONST_0;
int ws01 = STATE_CONST_1;
@ -869,7 +882,7 @@ abstract class ChaCha20Cipher extends CipherSpi {
int ws09 = initState[9];
int ws10 = initState[10];
int ws11 = initState[11];
int ws12 = (int)counter;
int ws12 = initState[12];
int ws13 = initState[13];
int ws14 = initState[14];
int ws15 = initState[15];
@ -986,11 +999,12 @@ abstract class ChaCha20Cipher extends CipherSpi {
asIntLittleEndian.set(result, 36, ws09 + initState[9]);
asIntLittleEndian.set(result, 40, ws10 + initState[10]);
asIntLittleEndian.set(result, 44, ws11 + initState[11]);
// Add the counter back into workState[12]
asIntLittleEndian.set(result, 48, ws12 + (int)counter);
asIntLittleEndian.set(result, 48, ws12 + initState[12]);
asIntLittleEndian.set(result, 52, ws13 + initState[13]);
asIntLittleEndian.set(result, 56, ws14 + initState[14]);
asIntLittleEndian.set(result, 60, ws15 + initState[15]);
return KS_BLK_SIZE;
}
/**
@ -1009,12 +1023,21 @@ abstract class ChaCha20Cipher extends CipherSpi {
int remainingData = inLen;
while (remainingData > 0) {
int ksRemain = keyStream.length - keyStrOffset;
int ksRemain = keyStrLimit - keyStrOffset;
if (ksRemain <= 0) {
if (counter <= finalCounterValue) {
generateKeystream();
// Intrinsics can do multiple blocks at once. This means
// it may overrun the counter. In order to prevent key
// stream reuse, we adjust the key stream limit to only the
// key stream length that is calculated from unique
// counter values.
keyStrLimit = chaCha20Block(startState, counter, keyStream);
counter += (keyStrLimit / KS_BLK_SIZE);
if (counter > finalCounterValue) {
keyStrLimit -= (int)(counter - finalCounterValue) * 64;
}
keyStrOffset = 0;
ksRemain = keyStream.length;
ksRemain = keyStrLimit;
} else {
throw new KeyException("Counter exhausted. " +
"Reinitialize with new key and/or nonce");
@ -1060,9 +1083,10 @@ abstract class ChaCha20Cipher extends CipherSpi {
private void initAuthenticator() throws InvalidKeyException {
authenticator = new Poly1305();
// Derive the Poly1305 key from the starting state
byte[] serializedKey = new byte[KEYSTREAM_SIZE];
chaCha20Block(startState, 0, serializedKey);
// Derive the Poly1305 key from the starting state with the counter
// value forced to zero.
byte[] serializedKey = new byte[KS_MAX_LEN];
chaCha20Block(startState, 0L, serializedKey);
authenticator.engineInit(new SecretKeySpec(serializedKey, 0, 32,
authAlgName), null);