8325340: Add ASCII fast-path to Data-/ObjectInputStream.readUTF

Reviewed-by: rgiulietti, bpb, rriggs
2025-08-26 14:24:46 +02:00 · 2024-02-26 16:05:18 +00:00 · 2024-02-26 16:05:18 +00:00 · 9a9cfbe0ba
commit 9a9cfbe0ba
parent 3780ad3133
4 changed files with 362 additions and 24 deletions
--- a/src/java.base/share/classes/java/io/DataInputStream.java
+++ b/src/java.base/share/classes/java/io/DataInputStream.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1994, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1994, 2024, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -571,10 +571,10 @@ loop:   while (true) {
     *               valid modified UTF-8 encoding of a Unicode string.
     * @see        java.io.DataInputStream#readUnsignedShort()
     */
-    public static final String readUTF(DataInput in) throws IOException {
+    public static String readUTF(DataInput in) throws IOException {
        int utflen = in.readUnsignedShort();
-        byte[] bytearr = null;
-        char[] chararr = null;
+        byte[] bytearr;
+        char[] chararr;
        if (in instanceof DataInputStream dis) {
            if (dis.bytearr.length < utflen) {
                dis.bytearr = new byte[utflen*2];
--- a/src/java.base/share/classes/java/io/ObjectInputStream.java
+++ b/src/java.base/share/classes/java/io/ObjectInputStream.java
@ -33,6 +33,7 @@ import java.lang.reflect.Array;
 import java.lang.reflect.InvocationHandler;
 import java.lang.reflect.Modifier;
 import java.lang.reflect.Proxy;
+import java.nio.charset.StandardCharsets;
 import java.security.AccessControlContext;
 import java.security.AccessController;
 import java.security.PrivilegedAction;
@ -42,6 +43,7 @@ import java.util.Arrays;
 import java.util.Map;
 import java.util.Objects;

+import jdk.internal.access.JavaLangAccess;
 import jdk.internal.access.SharedSecrets;
 import jdk.internal.event.DeserializationEvent;
 import jdk.internal.misc.Unsafe;
@ -2995,6 +2997,8 @@ public class ObjectInputStream
        private static final int CHAR_BUF_SIZE = 256;
        /** readBlockHeader() return value indicating header read may block */
        private static final int HEADER_BLOCKED = -2;
+        /** access to internal methods to count ASCII and inflate latin1/ASCII bytes to char */
+        private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();

        /** buffer for reading general/block data */
        private final byte[] buf = new byte[MAX_BLOCK_SIZE];
@ -3671,8 +3675,32 @@ public class ObjectInputStream
         * utflen bytes.
         */
        private String readUTFBody(long utflen) throws IOException {
+            if (!blkmode) {
+                end = pos = 0;
+            }
+
            StringBuilder sbuf;
            if (utflen > 0 && utflen < Integer.MAX_VALUE) {
+                // Scan for leading ASCII chars
+                int avail = end - pos;
+                int ascii = JLA.countPositives(buf, pos, Math.min(avail, (int)utflen));
+                if (ascii == utflen) {
+                    // Complete match, consume the buf[pos ... pos + ascii] range and return.
+                    // Modified UTF-8 and ISO-8859-1 are both ASCII-compatible encodings bytes
+                    // thus we can treat the range as ISO-8859-1 and avoid a redundant scan
+                    // in the String constructor
+                    String utf = new String(buf, pos, ascii, StandardCharsets.ISO_8859_1);
+                    pos += ascii;
+                    return utf;
+                }
+                // Avoid allocating a StringBuilder if there's enough data in buf and
+                // cbuf is large enough
+                if (avail >= utflen && utflen <= CHAR_BUF_SIZE) {
+                    JLA.inflateBytesToChars(buf, pos, cbuf, 0, ascii);
+                    pos += ascii;
+                    int cbufPos = readUTFSpan(ascii, utflen - ascii);
+                    return new String(cbuf, 0, cbufPos);
+                }
                // a reasonable initial capacity based on the UTF length
                int initialCapacity = Math.min((int)utflen, 0xFFFF);
                sbuf = new StringBuilder(initialCapacity);
@ -3680,14 +3708,14 @@ public class ObjectInputStream
                sbuf = new StringBuilder();
            }

-            if (!blkmode) {
-                end = pos = 0;
-            }
-
            while (utflen > 0) {
                int avail = end - pos;
                if (avail >= 3 || (long) avail == utflen) {
-                    utflen -= readUTFSpan(sbuf, utflen);
+                    int cbufPos = readUTFSpan(0, utflen);
+                    // pos has advanced: adjust utflen by the difference in
+                    // available bytes
+                    utflen -= avail - (end - pos);
+                    sbuf.append(cbuf, 0, cbufPos);
                } else {
                    if (blkmode) {
                        // near block boundary, read one byte at a time
@ -3709,18 +3737,17 @@ public class ObjectInputStream

        /**
         * Reads span of UTF-encoded characters out of internal buffer
-         * (starting at offset pos and ending at or before offset end),
-         * consuming no more than utflen bytes.  Appends read characters to
-         * sbuf.  Returns the number of bytes consumed.
+         * (starting at offset pos), consuming no more than utflen bytes.
+         * Appends read characters to cbuf. Returns the current position
+         * in cbuf.
         */
-        private long readUTFSpan(StringBuilder sbuf, long utflen)
+        private int readUTFSpan(int cpos, long utflen)
            throws IOException
        {
-            int cpos = 0;
            int start = pos;
            int avail = Math.min(end - pos, CHAR_BUF_SIZE);
            // stop short of last char unless all of utf bytes in buffer
-            int stop = pos + ((utflen > avail) ? avail - 2 : (int) utflen);
+            int stop = start + ((utflen > avail) ? avail - 2 : (int) utflen);
            boolean outOfBounds = false;

            try {
@ -3765,9 +3792,7 @@ public class ObjectInputStream
                    throw new UTFDataFormatException();
                }
            }
-
-            sbuf.append(cbuf, 0, cpos);
-            return pos - start;
+            return cpos;
        }

        /**