8248188: Add IntrinsicCandidate and API for Base64 decoding

8248188: Add IntrinsicCandidate and API for Base64 decoding, add Power64LE intrinsic implementation.

This patch set encompasses the following commits:

Adds a new intrinsic candidate to the java.lang.Base64 class - decodeBlock(), and provides a flexible API for the intrinsic. The API is similar to the existing encodeBlock intrinsic.

Adds the code in HotSpot to check and martial the new intrinsic's arguments to the arch-specific intrinsic implementation.

Adds a Power64LE-specific implementation of the decodeBlock intrinsic.

Adds a JMH microbenchmark for both Base64 encoding and encoding.

Enhances the JTReg hotspot intrinsic "TestBase64.java" regression test to more fully test both decoding and encoding.

Reviewed-by: rriggs, mdoerr, kvn
This commit is contained in:
Corey Ashford 2020-11-11 21:31:07 +00:00 committed by Martin Doerr
parent 59965c17b4
commit ccb48b7203
25 changed files with 1901 additions and 27 deletions

View file

@ -741,6 +741,67 @@ public class Base64 {
return 3 * (int) ((len + 3L) / 4) - paddings;
}
/**
* Decodes base64 characters, and returns the number of data bytes
* written into the destination array.
*
* It is the fast path for full 4-byte to 3-byte decoding w/o errors.
*
* decodeBlock() can be overridden by an arch-specific intrinsic.
* decodeBlock can choose to decode all, none, or a variable-sized
* prefix of the src bytes. This allows the intrinsic to decode in
* chunks of the src that are of a favorable size for the specific
* processor it's running on.
*
* If the intrinsic function does not process all of the bytes in
* src, it must process a multiple of four of them, making the
* returned destination length a multiple of three.
*
* If any illegal base64 bytes are encountered in src by the
* intrinsic, the intrinsic must return the actual number of valid
* data bytes already written to dst. Note that the '=' pad
* character is treated as an illegal Base64 character by
* decodeBlock, so it will not process a block of 4 bytes
* containing pad characters.
*
* Given the parameters, no length check is possible on dst, so dst
* is assumed to be large enough to store the decoded bytes.
*
* @param src
* the source byte array of Base64 encoded bytes
* @param sp
* the offset into src array to begin reading
* @param sl
* the offset (exclusive) past the last byte to be converted.
* @param dst
* the destination byte array of decoded data bytes
* @param dp
* the offset into dst array to begin writing
* @param isURL
* boolean, when true decode RFC4648 URL-safe base64 characters
* @return the number of destination data bytes produced
*/
@IntrinsicCandidate
private int decodeBlock(byte[] src, int sp, int sl, byte[] dst, int dp, boolean isURL) {
int[] base64 = isURL ? fromBase64URL : fromBase64;
int sl0 = sp + ((sl - sp) & ~0b11);
int new_dp = dp;
while (sp < sl0) {
int b1 = base64[src[sp++] & 0xff];
int b2 = base64[src[sp++] & 0xff];
int b3 = base64[src[sp++] & 0xff];
int b4 = base64[src[sp++] & 0xff];
if ((b1 | b2 | b3 | b4) < 0) { // non base64 byte
return new_dp - dp;
}
int bits0 = b1 << 18 | b2 << 12 | b3 << 6 | b4;
dst[new_dp++] = (byte)(bits0 >> 16);
dst[new_dp++] = (byte)(bits0 >> 8);
dst[new_dp++] = (byte)(bits0);
}
return new_dp - dp;
}
private int decode0(byte[] src, int sp, int sl, byte[] dst) {
int[] base64 = isURL ? fromBase64URL : fromBase64;
int dp = 0;
@ -748,24 +809,20 @@ public class Base64 {
int shiftto = 18; // pos of first byte of 4-byte atom
while (sp < sl) {
if (shiftto == 18 && sp + 4 < sl) { // fast path
int sl0 = sp + ((sl - sp) & ~0b11);
while (sp < sl0) {
int b1 = base64[src[sp++] & 0xff];
int b2 = base64[src[sp++] & 0xff];
int b3 = base64[src[sp++] & 0xff];
int b4 = base64[src[sp++] & 0xff];
if ((b1 | b2 | b3 | b4) < 0) { // non base64 byte
sp -= 4;
break;
}
int bits0 = b1 << 18 | b2 << 12 | b3 << 6 | b4;
dst[dp++] = (byte)(bits0 >> 16);
dst[dp++] = (byte)(bits0 >> 8);
dst[dp++] = (byte)(bits0);
}
if (sp >= sl)
break;
if (shiftto == 18 && sp < sl - 4) { // fast path
int dl = decodeBlock(src, sp, sl, dst, dp, isURL);
/*
* Calculate how many characters were processed by how many
* bytes of data were returned.
*/
int chars_decoded = (dl / 3) * 4;
sp += chars_decoded;
dp += dl;
}
if (sp >= sl) {
// we're done
break;
}
int b = src[sp++] & 0xff;
if ((b = base64[b]) < 0) {