8316681: Rewrite URLEncoder.encode to use small reusable buffers

Reviewed-by: dfuchs, rriggs
2025-08-27 06:45:07 +02:00 · 2023-09-22 09:41:01 +00:00 · 2023-09-22 09:41:01 +00:00 · c24c66db97
commit c24c66db97
parent bd2439f3fc
4 changed files with 82 additions and 19 deletions
--- a/src/java.base/share/classes/java/net/URLEncoder.java
+++ b/src/java.base/share/classes/java/net/URLEncoder.java
@ -26,8 +26,13 @@
 package java.net;
 import java.io.UnsupportedEncodingException;
-import java.io.CharArrayWriter;
+import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
 import java.nio.charset.CharacterCodingException;
 import java.nio.charset.Charset;
 import java.nio.charset.CharsetEncoder;
 import java.nio.charset.CoderResult;
 import java.nio.charset.CodingErrorAction;
 import java.nio.charset.IllegalCharsetNameException;
 import java.nio.charset.UnsupportedCharsetException ;
 import java.util.BitSet;
@ -138,11 +143,6 @@ public class URLEncoder {
        DEFAULT_ENCODING_NAME = StaticProperty.fileEncoding();
    }
    private static void encodeByte(StringBuilder out, byte b) {
        out.append('%');
        HexFormat.of().withUpperCase().toHexDigits(out, b);
    }
    /**
     * You can't call the constructor.
     */
@ -205,6 +205,8 @@ public class URLEncoder {
        }
    }
    private static final int ENCODING_CHUNK_SIZE = 8;
    /**
     * Translates a string into {@code application/x-www-form-urlencoded}
     * format using a specific {@linkplain Charset Charset}.
@ -239,11 +241,16 @@ public class URLEncoder {
        }
        StringBuilder out = new StringBuilder(s.length() << 1);
        CharArrayWriter charArrayWriter = new CharArrayWriter();
        if (i > 0) {
            out.append(s, 0, i);
        }
        CharsetEncoder ce = charset.newEncoder()
                .onMalformedInput(CodingErrorAction.REPLACE)
                .onUnmappableCharacter(CodingErrorAction.REPLACE);
        CharBuffer cb = CharBuffer.allocate(ENCODING_CHUNK_SIZE);
        ByteBuffer bb = ByteBuffer.allocate((int)(ENCODING_CHUNK_SIZE * ce.maxBytesPerChar()));
        while (i < s.length()) {
            char c = s.charAt(i);
            if (DONT_NEED_ENCODING.test(c)) {
@ -255,7 +262,7 @@ public class URLEncoder {
            } else {
                // convert to external encoding before hex conversion
                do {
-                    charArrayWriter.write(c);
+                    cb.put(c);
                    /*
                     * If this character represents the start of a Unicode
                     * surrogate pair, then pass in two characters. It's not
@ -268,23 +275,63 @@ public class URLEncoder {
                        if ((i + 1) < s.length()) {
                            char d = s.charAt(i + 1);
                            if (Character.isLowSurrogate(d)) {
-                                charArrayWriter.write(d);
+                                cb.put(d);
                                i++;
                            }
                        }
                    }
                    // Limit to ENCODING_CHUNK_SIZE - 1 so that we can always fit in
                    // a surrogate pair on the next iteration
                    if (cb.position() >= ENCODING_CHUNK_SIZE - 1) {
                        flushToStringBuilder(out, ce, cb, bb, false);
                    }
                    i++;
                } while (i < s.length() && !DONT_NEED_ENCODING.test((c = s.charAt(i))));
-
+                flushToStringBuilder(out, ce, cb, bb, true);
                String str = charArrayWriter.toString();
                byte[] ba = str.getBytes(charset);
                for (byte b : ba) {
                    encodeByte(out, b);
                }
                charArrayWriter.reset();
            }
        }
        return out.toString();
    }
    /**
     * Encodes input chars in {@code cb} and appends the byte values in an escaped
     * format ({@code "%XX"}) to {@code out}. The temporary byte buffer, {@code bb},
     * must be able to accept {@code cb.position() * ce.maxBytesPerChar()} bytes.
     *
     * @param out the StringBuilder to output encoded and escaped bytes to
     * @param ce charset encoder. Will be reset if endOfInput is true
     * @param cb input buffer, will be cleared
     * @param bb output buffer, will be cleared
     * @param endOfInput true if this is the last flush for an encoding chunk,
     *                  to all bytes in ce is flushed to out and reset
     */
    private static void flushToStringBuilder(StringBuilder out,
                                             CharsetEncoder ce,
                                             CharBuffer cb,
                                             ByteBuffer bb,
                                             boolean endOfInput) {
        cb.flip();
        try {
            CoderResult cr = ce.encode(cb, bb, endOfInput);
            if (!cr.isUnderflow())
                cr.throwException();
            if (endOfInput) {
                cr = ce.flush(bb);
                if (!cr.isUnderflow())
                    cr.throwException();
                ce.reset();
            }
        } catch (CharacterCodingException x) {
            throw new Error(x); // Can't happen
        }
        HexFormat hex = HexFormat.of().withUpperCase();
        byte[] bytes = bb.array();
        int len = bb.position();
        for (int i = 0; i < len; i++) {
            out.append('%');
            hex.toHexDigits(out, bytes[i]);
        }
        cb.clear();
        bb.clear();
    }
 }
--- a/src/java.base/share/classes/java/util/HexFormat.java
+++ b/src/java.base/share/classes/java/util/HexFormat.java
@ -679,9 +679,8 @@ public final class HexFormat {
     * @throws UncheckedIOException if an I/O exception occurs appending to the output
     */
    public <A extends Appendable> A toHexDigits(A out, byte value) {
        Objects.requireNonNull(out, "out");
        try {
-            out.append(toHighHexDigit(value));
+            out.append(toHighHexDigit(value)); // implicit null-check
            out.append(toLowHexDigit(value));
            return out;
        } catch (IOException ioe) {
--- a/test/jdk/java/net/URLEncoder/SurrogatePairs.java
+++ b/test/jdk/java/net/URLEncoder/SurrogatePairs.java
@ -58,6 +58,9 @@ public class SurrogatePairs {
                {"\uDBFF\uDC001", "%F4%8F%B0%801"},
                {"\uDBFF\uDC00@", "%F4%8F%B0%80%40"},
                {"\u0101\uDBFF\uDC00", "%C4%81%F4%8F%B0%80"},
                {"\u0101\u0101\u0101\u0101\u0101\u0101\uDBFF\uDC00\u0101", "%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%F4%8F%B0%80%C4%81"},
                {"\u0101\u0101\u0101\u0101\u0101\u0101\u0101\uDBFF\uDC00\u0101", "%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%F4%8F%B0%80%C4%81"},
                {"\u0101\u0101\u0101\u0101\u0101\u0101\u0101\u0101\uDBFF\uDC00\u0101", "%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%F4%8F%B0%80%C4%81"},
                {"\uDBFF\uDC00\u0101", "%F4%8F%B0%80%C4%81"},
                {"\uDE0A\uD83D", "%3F%3F"},
                {"1\uDE0A\uD83D", "1%3F%3F"},
--- a/test/micro/org/openjdk/bench/java/net/URLEncodeDecode.java
+++ b/test/micro/org/openjdk/bench/java/net/URLEncodeDecode.java
@ -190,4 +190,18 @@ public class URLEncodeDecode {
    }
    @Benchmark
    public void testEncodeLatin1(Blackhole bh) throws UnsupportedEncodingException {
        for (String s : testStringsEncode) {
            bh.consume(java.net.URLEncoder.encode(s, StandardCharsets.ISO_8859_1));
        }
    }
    @Benchmark
    public void testDecodeLatin1(Blackhole bh) throws UnsupportedEncodingException {
        for (String s : testStringsDecode) {
            bh.consume(URLDecoder.decode(s, StandardCharsets.ISO_8859_1));
        }
    }
 }