8184947: ZipCoder performance improvements

Reviewed-by: martin, redestad
2025-08-27 14:54:52 +02:00 · 2017-12-13 07:51:57 -08:00 · 2017-12-13 07:51:57 -08:00 · 4f0ea9242f
commit 4f0ea9242f
parent 1ae8f54835
9 changed files with 620 additions and 881 deletions
--- a/src/java.base/share/classes/java/util/zip/ZipCoder.java
+++ b/src/java.base/share/classes/java/util/zip/ZipCoder.java
@ -28,72 +28,60 @@ package java.util.zip;
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
 import java.nio.charset.Charset;
-import java.nio.charset.StandardCharsets;
 import java.nio.charset.CharsetDecoder;
 import java.nio.charset.CharsetEncoder;
-import java.nio.charset.CoderResult;
+import java.nio.charset.CharacterCodingException;
 import java.nio.charset.CodingErrorAction;
-import java.util.Arrays;
-import sun.nio.cs.ArrayDecoder;
-import sun.nio.cs.ArrayEncoder;
+
+import static java.nio.charset.StandardCharsets.UTF_8;

 /**
 * Utility class for zipfile name and comment decoding and encoding
 */

-final class ZipCoder {
+class ZipCoder {

-    private static boolean isASCII(byte[] ba, int off, int len) {
-        for (int i = off; i < off + len; i++) {
-            if (ba[i] < 0)
-                return false;
+    private static final jdk.internal.misc.JavaLangAccess JLA =
+        jdk.internal.misc.SharedSecrets.getJavaLangAccess();
+
+    static final class UTF8 extends ZipCoder {
+
+        UTF8(Charset utf8) {
+            super(utf8);
+        }
+
+        @Override
+        boolean isUTF8() {
+            return true;
+        }
+
+        @Override
+        String toString(byte[] ba, int off, int length) {
+            return JLA.newStringUTF8NoRepl(ba, off, length);
+        }
+
+        @Override
+        byte[] getBytes(String s) {
+            return JLA.getBytesUTF8NoRepl(s);
        }
-        return true;
    }

-    private static boolean hasReplaceChar(byte[] ba) {
-        for (int i = 0; i < ba.length; i++) {
-            if (ba[i] == (byte)'?')
-                return true;
-        }
-        return false;
+    // UTF_8.ArrayEn/Decoder is stateless, so make it singleton.
+    private static ZipCoder utf8 = new UTF8(UTF_8);
+
+    public static ZipCoder get(Charset charset) {
+        if (charset == UTF_8)
+            return utf8;
+        return new ZipCoder(charset);
    }

    String toString(byte[] ba, int off, int length) {
+        try {
+              return decoder().decode(ByteBuffer.wrap(ba, off, length)).toString();

-        // fastpath for UTF-8 cs and ascii only name, leverage the
-        // compact string impl to avoid the unnecessary char[] copy/
-        // paste. A temporary workaround before we have better approach,
-        // such as a String constructor that throws exception for
-        // malformed and/or unmappable characters, instead of silently
-        // replacing with repl char
-        if (isUTF8 && isASCII(ba, off, length)) {
-            return new String(ba, off, length, cs);
+        } catch (CharacterCodingException x) {
+            throw new IllegalArgumentException(x);
        }
-
-        CharsetDecoder cd = decoder().reset();
-        int len = (int)(length * cd.maxCharsPerByte());
-        char[] ca = new char[len];
-        if (len == 0)
-            return new String(ca);
-        // UTF-8 only for now. Other ArrayDeocder only handles
-        // CodingErrorAction.REPLACE mode. ZipCoder uses
-        // REPORT mode.
-        if (isUTF8 && cd instanceof ArrayDecoder) {
-            int clen = ((ArrayDecoder)cd).decode(ba, off, length, ca);
-            if (clen == -1)    // malformed
-                throw new IllegalArgumentException("MALFORMED");
-            return new String(ca, 0, clen);
-        }
-        ByteBuffer bb = ByteBuffer.wrap(ba, off, length);
-        CharBuffer cb = CharBuffer.wrap(ca);
-        CoderResult cr = cd.decode(bb, cb, true);
-        if (!cr.isUnderflow())
-            throw new IllegalArgumentException(cr.toString());
-        cr = cd.flush(cb);
-        if (!cr.isUnderflow())
-            throw new IllegalArgumentException(cr.toString());
-        return new String(ca, 0, cb.position());
    }

    String toString(byte[] ba, int length) {
@ -105,84 +93,47 @@ final class ZipCoder {
    }

    byte[] getBytes(String s) {
-        if (isUTF8) {
-            // fastpath for UTF8. should only occur when the string
-            // has malformed surrogates. A postscan should still be
-            // faster and use less memory.
-            byte[] ba = s.getBytes(cs);
-            if (!hasReplaceChar(ba)) {
-                return ba;
+        try {
+            ByteBuffer bb = encoder().encode(CharBuffer.wrap(s));
+            int pos = bb.position();
+            int limit = bb.limit();
+            if (bb.hasArray() && pos == 0 && limit == bb.capacity()) {
+                return bb.array();
            }
+            byte[] bytes = new byte[bb.limit() - bb.position()];
+            bb.get(bytes);
+            return bytes;
+        } catch (CharacterCodingException x) {
+            throw new IllegalArgumentException(x);
        }
-        CharsetEncoder ce = encoder().reset();
-        char[] ca = s.toCharArray();
-        int len = (int)(ca.length * ce.maxBytesPerChar());
-        byte[] ba = new byte[len];
-        if (len == 0)
-            return ba;
-        // UTF-8 only for now. Other ArrayDeocder only handles
-        // CodingErrorAction.REPLACE mode.
-        if (isUTF8 && ce instanceof ArrayEncoder) {
-            int blen = ((ArrayEncoder)ce).encode(ca, 0, ca.length, ba);
-            if (blen == -1)    // malformed
-                throw new IllegalArgumentException("MALFORMED");
-            return Arrays.copyOf(ba, blen);
-        }
-        ByteBuffer bb = ByteBuffer.wrap(ba);
-        CharBuffer cb = CharBuffer.wrap(ca);
-        CoderResult cr = ce.encode(cb, bb, true);
-        if (!cr.isUnderflow())
-            throw new IllegalArgumentException(cr.toString());
-        cr = ce.flush(bb);
-        if (!cr.isUnderflow())
-            throw new IllegalArgumentException(cr.toString());
-        if (bb.position() == ba.length)  // defensive copy?
-            return ba;
-        else
-            return Arrays.copyOf(ba, bb.position());
    }

    // assume invoked only if "this" is not utf8
    byte[] getBytesUTF8(String s) {
-        if (isUTF8)
-            return getBytes(s);
-        if (utf8 == null)
-            utf8 = new ZipCoder(StandardCharsets.UTF_8);
        return utf8.getBytes(s);
    }

    String toStringUTF8(byte[] ba, int len) {
-        return toStringUTF8(ba, 0, len);
+        return utf8.toString(ba, 0, len);
    }

    String toStringUTF8(byte[] ba, int off, int len) {
-        if (isUTF8)
-            return toString(ba, off, len);
-        if (utf8 == null)
-            utf8 = new ZipCoder(StandardCharsets.UTF_8);
        return utf8.toString(ba, off, len);
    }

    boolean isUTF8() {
-        return isUTF8;
+        return false;
    }

    private Charset cs;
    private CharsetDecoder dec;
    private CharsetEncoder enc;
-    private boolean isUTF8;
-    private ZipCoder utf8;

    private ZipCoder(Charset cs) {
        this.cs = cs;
-        this.isUTF8 = cs.name().equals(StandardCharsets.UTF_8.name());
    }

-    static ZipCoder get(Charset charset) {
-        return new ZipCoder(charset);
-    }
-
-    private CharsetDecoder decoder() {
+    protected CharsetDecoder decoder() {
        if (dec == null) {
            dec = cs.newDecoder()
              .onMalformedInput(CodingErrorAction.REPORT)
@ -191,7 +142,7 @@ final class ZipCoder {
        return dec;
    }

-    private CharsetEncoder encoder() {
+    protected CharsetEncoder encoder() {
        if (enc == null) {
            enc = cs.newEncoder()
              .onMalformedInput(CodingErrorAction.REPORT)