8329623: NegativeArraySizeException encoding large String to UTF-8

Reviewed-by: naoto, rgiulietti
This commit is contained in:
Roger Riggs 2024-04-08 17:58:21 +00:00
parent dd930c573b
commit 212a253697
2 changed files with 78 additions and 2 deletions

View file

@ -1335,7 +1335,13 @@ public final class String
int dp = 0;
int sp = 0;
int sl = val.length >> 1;
byte[] dst = new byte[sl * 3];
// UTF-8 encoded can be as much as 3 times the string length
// For very large estimate, (as in overflow of 32 bit int), precompute the exact size
long allocLen = (sl * 3 < 0) ? computeSizeUTF8_UTF16(val, doReplace) : sl * 3;
if (allocLen > (long)Integer.MAX_VALUE) {
throw new OutOfMemoryError("Required length exceeds implementation limit");
}
byte[] dst = new byte[(int) allocLen];
while (sp < sl) {
// ascii fast loop;
char c = StringUTF16.getChar(val, sp);
@ -1385,6 +1391,47 @@ public final class String
return Arrays.copyOf(dst, dp);
}
/**
* {@return the exact size required to UTF_8 encode this UTF16 string}
* @param val UTF16 encoded byte array
* @param doReplace true to replace unmappable characters
*/
private static long computeSizeUTF8_UTF16(byte[] val, boolean doReplace) {
long dp = 0L;
int sp = 0;
int sl = val.length >> 1;
while (sp < sl) {
char c = StringUTF16.getChar(val, sp++);
if (c < 0x80) {
dp++;
} else if (c < 0x800) {
dp += 2;
} else if (Character.isSurrogate(c)) {
int uc = -1;
char c2;
if (Character.isHighSurrogate(c) && sp < sl &&
Character.isLowSurrogate(c2 = StringUTF16.getChar(val, sp))) {
uc = Character.toCodePoint(c, c2);
}
if (uc < 0) {
if (doReplace) {
dp++;
} else {
throwUnmappable(sp - 1);
}
} else {
dp += 4;
sp++; // 2 chars
}
} else {
// 3 bytes, 16 bits
dp += 3;
}
}
return dp;
}
/**
* Constructs a new {@code String} by decoding the specified array of bytes
* using the specified {@linkplain java.nio.charset.Charset charset}. The