8316681: Rewrite URLEncoder.encode to use small reusable buffers

Reviewed-by: dfuchs, rriggs
This commit is contained in:
Claes Redestad 2023-09-22 09:41:01 +00:00
parent bd2439f3fc
commit c24c66db97
4 changed files with 82 additions and 19 deletions

View file

@ -26,8 +26,13 @@
package java.net; package java.net;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.io.CharArrayWriter; import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset; import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.IllegalCharsetNameException; import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException ; import java.nio.charset.UnsupportedCharsetException ;
import java.util.BitSet; import java.util.BitSet;
@ -138,11 +143,6 @@ public class URLEncoder {
DEFAULT_ENCODING_NAME = StaticProperty.fileEncoding(); DEFAULT_ENCODING_NAME = StaticProperty.fileEncoding();
} }
private static void encodeByte(StringBuilder out, byte b) {
out.append('%');
HexFormat.of().withUpperCase().toHexDigits(out, b);
}
/** /**
* You can't call the constructor. * You can't call the constructor.
*/ */
@ -205,6 +205,8 @@ public class URLEncoder {
} }
} }
private static final int ENCODING_CHUNK_SIZE = 8;
/** /**
* Translates a string into {@code application/x-www-form-urlencoded} * Translates a string into {@code application/x-www-form-urlencoded}
* format using a specific {@linkplain Charset Charset}. * format using a specific {@linkplain Charset Charset}.
@ -239,11 +241,16 @@ public class URLEncoder {
} }
StringBuilder out = new StringBuilder(s.length() << 1); StringBuilder out = new StringBuilder(s.length() << 1);
CharArrayWriter charArrayWriter = new CharArrayWriter();
if (i > 0) { if (i > 0) {
out.append(s, 0, i); out.append(s, 0, i);
} }
CharsetEncoder ce = charset.newEncoder()
.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE);
CharBuffer cb = CharBuffer.allocate(ENCODING_CHUNK_SIZE);
ByteBuffer bb = ByteBuffer.allocate((int)(ENCODING_CHUNK_SIZE * ce.maxBytesPerChar()));
while (i < s.length()) { while (i < s.length()) {
char c = s.charAt(i); char c = s.charAt(i);
if (DONT_NEED_ENCODING.test(c)) { if (DONT_NEED_ENCODING.test(c)) {
@ -255,7 +262,7 @@ public class URLEncoder {
} else { } else {
// convert to external encoding before hex conversion // convert to external encoding before hex conversion
do { do {
charArrayWriter.write(c); cb.put(c);
/* /*
* If this character represents the start of a Unicode * If this character represents the start of a Unicode
* surrogate pair, then pass in two characters. It's not * surrogate pair, then pass in two characters. It's not
@ -268,23 +275,63 @@ public class URLEncoder {
if ((i + 1) < s.length()) { if ((i + 1) < s.length()) {
char d = s.charAt(i + 1); char d = s.charAt(i + 1);
if (Character.isLowSurrogate(d)) { if (Character.isLowSurrogate(d)) {
charArrayWriter.write(d); cb.put(d);
i++; i++;
} }
} }
} }
// Limit to ENCODING_CHUNK_SIZE - 1 so that we can always fit in
// a surrogate pair on the next iteration
if (cb.position() >= ENCODING_CHUNK_SIZE - 1) {
flushToStringBuilder(out, ce, cb, bb, false);
}
i++; i++;
} while (i < s.length() && !DONT_NEED_ENCODING.test((c = s.charAt(i)))); } while (i < s.length() && !DONT_NEED_ENCODING.test((c = s.charAt(i))));
flushToStringBuilder(out, ce, cb, bb, true);
String str = charArrayWriter.toString();
byte[] ba = str.getBytes(charset);
for (byte b : ba) {
encodeByte(out, b);
}
charArrayWriter.reset();
} }
} }
return out.toString(); return out.toString();
} }
/**
* Encodes input chars in {@code cb} and appends the byte values in an escaped
* format ({@code "%XX"}) to {@code out}. The temporary byte buffer, {@code bb},
* must be able to accept {@code cb.position() * ce.maxBytesPerChar()} bytes.
*
* @param out the StringBuilder to output encoded and escaped bytes to
* @param ce charset encoder. Will be reset if endOfInput is true
* @param cb input buffer, will be cleared
* @param bb output buffer, will be cleared
* @param endOfInput true if this is the last flush for an encoding chunk,
* to all bytes in ce is flushed to out and reset
*/
private static void flushToStringBuilder(StringBuilder out,
CharsetEncoder ce,
CharBuffer cb,
ByteBuffer bb,
boolean endOfInput) {
cb.flip();
try {
CoderResult cr = ce.encode(cb, bb, endOfInput);
if (!cr.isUnderflow())
cr.throwException();
if (endOfInput) {
cr = ce.flush(bb);
if (!cr.isUnderflow())
cr.throwException();
ce.reset();
}
} catch (CharacterCodingException x) {
throw new Error(x); // Can't happen
}
HexFormat hex = HexFormat.of().withUpperCase();
byte[] bytes = bb.array();
int len = bb.position();
for (int i = 0; i < len; i++) {
out.append('%');
hex.toHexDigits(out, bytes[i]);
}
cb.clear();
bb.clear();
}
} }

View file

@ -679,9 +679,8 @@ public final class HexFormat {
* @throws UncheckedIOException if an I/O exception occurs appending to the output * @throws UncheckedIOException if an I/O exception occurs appending to the output
*/ */
public <A extends Appendable> A toHexDigits(A out, byte value) { public <A extends Appendable> A toHexDigits(A out, byte value) {
Objects.requireNonNull(out, "out");
try { try {
out.append(toHighHexDigit(value)); out.append(toHighHexDigit(value)); // implicit null-check
out.append(toLowHexDigit(value)); out.append(toLowHexDigit(value));
return out; return out;
} catch (IOException ioe) { } catch (IOException ioe) {

View file

@ -58,6 +58,9 @@ public class SurrogatePairs {
{"\uDBFF\uDC001", "%F4%8F%B0%801"}, {"\uDBFF\uDC001", "%F4%8F%B0%801"},
{"\uDBFF\uDC00@", "%F4%8F%B0%80%40"}, {"\uDBFF\uDC00@", "%F4%8F%B0%80%40"},
{"\u0101\uDBFF\uDC00", "%C4%81%F4%8F%B0%80"}, {"\u0101\uDBFF\uDC00", "%C4%81%F4%8F%B0%80"},
{"\u0101\u0101\u0101\u0101\u0101\u0101\uDBFF\uDC00\u0101", "%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%F4%8F%B0%80%C4%81"},
{"\u0101\u0101\u0101\u0101\u0101\u0101\u0101\uDBFF\uDC00\u0101", "%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%F4%8F%B0%80%C4%81"},
{"\u0101\u0101\u0101\u0101\u0101\u0101\u0101\u0101\uDBFF\uDC00\u0101", "%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%F4%8F%B0%80%C4%81"},
{"\uDBFF\uDC00\u0101", "%F4%8F%B0%80%C4%81"}, {"\uDBFF\uDC00\u0101", "%F4%8F%B0%80%C4%81"},
{"\uDE0A\uD83D", "%3F%3F"}, {"\uDE0A\uD83D", "%3F%3F"},
{"1\uDE0A\uD83D", "1%3F%3F"}, {"1\uDE0A\uD83D", "1%3F%3F"},

View file

@ -190,4 +190,18 @@ public class URLEncodeDecode {
} }
@Benchmark
public void testEncodeLatin1(Blackhole bh) throws UnsupportedEncodingException {
for (String s : testStringsEncode) {
bh.consume(java.net.URLEncoder.encode(s, StandardCharsets.ISO_8859_1));
}
}
@Benchmark
public void testDecodeLatin1(Blackhole bh) throws UnsupportedEncodingException {
for (String s : testStringsDecode) {
bh.consume(URLDecoder.decode(s, StandardCharsets.ISO_8859_1));
}
}
} }