mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-27 06:45:07 +02:00
8316681: Rewrite URLEncoder.encode to use small reusable buffers
Reviewed-by: dfuchs, rriggs
This commit is contained in:
parent
bd2439f3fc
commit
c24c66db97
4 changed files with 82 additions and 19 deletions
|
@ -26,8 +26,13 @@
|
||||||
package java.net;
|
package java.net;
|
||||||
|
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.io.CharArrayWriter;
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.CharBuffer;
|
||||||
|
import java.nio.charset.CharacterCodingException;
|
||||||
import java.nio.charset.Charset;
|
import java.nio.charset.Charset;
|
||||||
|
import java.nio.charset.CharsetEncoder;
|
||||||
|
import java.nio.charset.CoderResult;
|
||||||
|
import java.nio.charset.CodingErrorAction;
|
||||||
import java.nio.charset.IllegalCharsetNameException;
|
import java.nio.charset.IllegalCharsetNameException;
|
||||||
import java.nio.charset.UnsupportedCharsetException ;
|
import java.nio.charset.UnsupportedCharsetException ;
|
||||||
import java.util.BitSet;
|
import java.util.BitSet;
|
||||||
|
@ -138,11 +143,6 @@ public class URLEncoder {
|
||||||
DEFAULT_ENCODING_NAME = StaticProperty.fileEncoding();
|
DEFAULT_ENCODING_NAME = StaticProperty.fileEncoding();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void encodeByte(StringBuilder out, byte b) {
|
|
||||||
out.append('%');
|
|
||||||
HexFormat.of().withUpperCase().toHexDigits(out, b);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* You can't call the constructor.
|
* You can't call the constructor.
|
||||||
*/
|
*/
|
||||||
|
@ -205,6 +205,8 @@ public class URLEncoder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static final int ENCODING_CHUNK_SIZE = 8;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Translates a string into {@code application/x-www-form-urlencoded}
|
* Translates a string into {@code application/x-www-form-urlencoded}
|
||||||
* format using a specific {@linkplain Charset Charset}.
|
* format using a specific {@linkplain Charset Charset}.
|
||||||
|
@ -239,11 +241,16 @@ public class URLEncoder {
|
||||||
}
|
}
|
||||||
|
|
||||||
StringBuilder out = new StringBuilder(s.length() << 1);
|
StringBuilder out = new StringBuilder(s.length() << 1);
|
||||||
CharArrayWriter charArrayWriter = new CharArrayWriter();
|
|
||||||
if (i > 0) {
|
if (i > 0) {
|
||||||
out.append(s, 0, i);
|
out.append(s, 0, i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CharsetEncoder ce = charset.newEncoder()
|
||||||
|
.onMalformedInput(CodingErrorAction.REPLACE)
|
||||||
|
.onUnmappableCharacter(CodingErrorAction.REPLACE);
|
||||||
|
CharBuffer cb = CharBuffer.allocate(ENCODING_CHUNK_SIZE);
|
||||||
|
ByteBuffer bb = ByteBuffer.allocate((int)(ENCODING_CHUNK_SIZE * ce.maxBytesPerChar()));
|
||||||
|
|
||||||
while (i < s.length()) {
|
while (i < s.length()) {
|
||||||
char c = s.charAt(i);
|
char c = s.charAt(i);
|
||||||
if (DONT_NEED_ENCODING.test(c)) {
|
if (DONT_NEED_ENCODING.test(c)) {
|
||||||
|
@ -255,7 +262,7 @@ public class URLEncoder {
|
||||||
} else {
|
} else {
|
||||||
// convert to external encoding before hex conversion
|
// convert to external encoding before hex conversion
|
||||||
do {
|
do {
|
||||||
charArrayWriter.write(c);
|
cb.put(c);
|
||||||
/*
|
/*
|
||||||
* If this character represents the start of a Unicode
|
* If this character represents the start of a Unicode
|
||||||
* surrogate pair, then pass in two characters. It's not
|
* surrogate pair, then pass in two characters. It's not
|
||||||
|
@ -268,23 +275,63 @@ public class URLEncoder {
|
||||||
if ((i + 1) < s.length()) {
|
if ((i + 1) < s.length()) {
|
||||||
char d = s.charAt(i + 1);
|
char d = s.charAt(i + 1);
|
||||||
if (Character.isLowSurrogate(d)) {
|
if (Character.isLowSurrogate(d)) {
|
||||||
charArrayWriter.write(d);
|
cb.put(d);
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Limit to ENCODING_CHUNK_SIZE - 1 so that we can always fit in
|
||||||
|
// a surrogate pair on the next iteration
|
||||||
|
if (cb.position() >= ENCODING_CHUNK_SIZE - 1) {
|
||||||
|
flushToStringBuilder(out, ce, cb, bb, false);
|
||||||
|
}
|
||||||
i++;
|
i++;
|
||||||
} while (i < s.length() && !DONT_NEED_ENCODING.test((c = s.charAt(i))));
|
} while (i < s.length() && !DONT_NEED_ENCODING.test((c = s.charAt(i))));
|
||||||
|
flushToStringBuilder(out, ce, cb, bb, true);
|
||||||
String str = charArrayWriter.toString();
|
|
||||||
byte[] ba = str.getBytes(charset);
|
|
||||||
for (byte b : ba) {
|
|
||||||
encodeByte(out, b);
|
|
||||||
}
|
|
||||||
charArrayWriter.reset();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return out.toString();
|
return out.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encodes input chars in {@code cb} and appends the byte values in an escaped
|
||||||
|
* format ({@code "%XX"}) to {@code out}. The temporary byte buffer, {@code bb},
|
||||||
|
* must be able to accept {@code cb.position() * ce.maxBytesPerChar()} bytes.
|
||||||
|
*
|
||||||
|
* @param out the StringBuilder to output encoded and escaped bytes to
|
||||||
|
* @param ce charset encoder. Will be reset if endOfInput is true
|
||||||
|
* @param cb input buffer, will be cleared
|
||||||
|
* @param bb output buffer, will be cleared
|
||||||
|
* @param endOfInput true if this is the last flush for an encoding chunk,
|
||||||
|
* to all bytes in ce is flushed to out and reset
|
||||||
|
*/
|
||||||
|
private static void flushToStringBuilder(StringBuilder out,
|
||||||
|
CharsetEncoder ce,
|
||||||
|
CharBuffer cb,
|
||||||
|
ByteBuffer bb,
|
||||||
|
boolean endOfInput) {
|
||||||
|
cb.flip();
|
||||||
|
try {
|
||||||
|
CoderResult cr = ce.encode(cb, bb, endOfInput);
|
||||||
|
if (!cr.isUnderflow())
|
||||||
|
cr.throwException();
|
||||||
|
if (endOfInput) {
|
||||||
|
cr = ce.flush(bb);
|
||||||
|
if (!cr.isUnderflow())
|
||||||
|
cr.throwException();
|
||||||
|
ce.reset();
|
||||||
|
}
|
||||||
|
} catch (CharacterCodingException x) {
|
||||||
|
throw new Error(x); // Can't happen
|
||||||
|
}
|
||||||
|
HexFormat hex = HexFormat.of().withUpperCase();
|
||||||
|
byte[] bytes = bb.array();
|
||||||
|
int len = bb.position();
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
out.append('%');
|
||||||
|
hex.toHexDigits(out, bytes[i]);
|
||||||
|
}
|
||||||
|
cb.clear();
|
||||||
|
bb.clear();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -679,9 +679,8 @@ public final class HexFormat {
|
||||||
* @throws UncheckedIOException if an I/O exception occurs appending to the output
|
* @throws UncheckedIOException if an I/O exception occurs appending to the output
|
||||||
*/
|
*/
|
||||||
public <A extends Appendable> A toHexDigits(A out, byte value) {
|
public <A extends Appendable> A toHexDigits(A out, byte value) {
|
||||||
Objects.requireNonNull(out, "out");
|
|
||||||
try {
|
try {
|
||||||
out.append(toHighHexDigit(value));
|
out.append(toHighHexDigit(value)); // implicit null-check
|
||||||
out.append(toLowHexDigit(value));
|
out.append(toLowHexDigit(value));
|
||||||
return out;
|
return out;
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
|
|
|
@ -58,6 +58,9 @@ public class SurrogatePairs {
|
||||||
{"\uDBFF\uDC001", "%F4%8F%B0%801"},
|
{"\uDBFF\uDC001", "%F4%8F%B0%801"},
|
||||||
{"\uDBFF\uDC00@", "%F4%8F%B0%80%40"},
|
{"\uDBFF\uDC00@", "%F4%8F%B0%80%40"},
|
||||||
{"\u0101\uDBFF\uDC00", "%C4%81%F4%8F%B0%80"},
|
{"\u0101\uDBFF\uDC00", "%C4%81%F4%8F%B0%80"},
|
||||||
|
{"\u0101\u0101\u0101\u0101\u0101\u0101\uDBFF\uDC00\u0101", "%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%F4%8F%B0%80%C4%81"},
|
||||||
|
{"\u0101\u0101\u0101\u0101\u0101\u0101\u0101\uDBFF\uDC00\u0101", "%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%F4%8F%B0%80%C4%81"},
|
||||||
|
{"\u0101\u0101\u0101\u0101\u0101\u0101\u0101\u0101\uDBFF\uDC00\u0101", "%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%F4%8F%B0%80%C4%81"},
|
||||||
{"\uDBFF\uDC00\u0101", "%F4%8F%B0%80%C4%81"},
|
{"\uDBFF\uDC00\u0101", "%F4%8F%B0%80%C4%81"},
|
||||||
{"\uDE0A\uD83D", "%3F%3F"},
|
{"\uDE0A\uD83D", "%3F%3F"},
|
||||||
{"1\uDE0A\uD83D", "1%3F%3F"},
|
{"1\uDE0A\uD83D", "1%3F%3F"},
|
||||||
|
|
|
@ -190,4 +190,18 @@ public class URLEncodeDecode {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void testEncodeLatin1(Blackhole bh) throws UnsupportedEncodingException {
|
||||||
|
for (String s : testStringsEncode) {
|
||||||
|
bh.consume(java.net.URLEncoder.encode(s, StandardCharsets.ISO_8859_1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void testDecodeLatin1(Blackhole bh) throws UnsupportedEncodingException {
|
||||||
|
for (String s : testStringsDecode) {
|
||||||
|
bh.consume(URLDecoder.decode(s, StandardCharsets.ISO_8859_1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue