8260617: Merge ZipFile encoding check with the initial hash calculation

Reviewed-by: lancea
2025-08-28 15:24:43 +02:00 · 2021-02-03 11:43:17 +00:00 · 2021-02-03 11:43:17 +00:00 · c8de943c1f
commit c8de943c1f
parent ae2c5f07ce
2 changed files with 109 additions and 125 deletions
--- a/src/java.base/share/classes/java/util/zip/ZipCoder.java
+++ b/src/java.base/share/classes/java/util/zip/ZipCoder.java
@ -54,14 +54,6 @@ class ZipCoder {
        return new ZipCoder(charset);
    }

-    void checkEncoding(byte[] a, int pos, int nlen) throws ZipException {
-        try {
-            toString(a, pos, nlen);
-        } catch(Exception e) {
-            throw new ZipException("invalid CEN header (bad entry name)");
-        }
-    }
-
    String toString(byte[] ba, int off, int length) {
        try {
            return decoder().decode(ByteBuffer.wrap(ba, off, length)).toString();
@ -98,10 +90,6 @@ class ZipCoder {
        return UTF8.toString(ba, 0, len);
    }

-    static String toStringUTF8(byte[] ba, int off, int len) {
-        return UTF8.toString(ba, off, len);
-    }
-
    boolean isUTF8() {
        return false;
    }
@ -110,15 +98,33 @@ class ZipCoder {
    // we first decoded the byte sequence to a String, then appended '/' if no
    // trailing slash was found, then called String.hashCode(). This
    // normalization ensures we can simplify and speed up lookups.
-    int normalizedHash(byte[] a, int off, int len) {
+    //
+    // Does encoding error checking and hashing in a single pass for efficiency.
+    // On an error, this function will throw CharacterCodingException while the
+    // UTF8ZipCoder override will throw IllegalArgumentException, so we declare
+    // throws Exception to keep things simple.
+    int checkedHash(byte[] a, int off, int len) throws Exception {
        if (len == 0) {
            return 0;
        }
-        return normalizedHashDecode(0, a, off, off + len);
+
+        int h = 0;
+        // cb will be a newly allocated CharBuffer with pos == 0,
+        // arrayOffset == 0, backed by an array.
+        CharBuffer cb = decoder().decode(ByteBuffer.wrap(a, off, len));
+        int limit = cb.limit();
+        char[] decoded = cb.array();
+        for (int i = 0; i < limit; i++) {
+            h = 31 * h + decoded[i];
+        }
+        if (limit > 0 && decoded[limit - 1] != '/') {
+            h = 31 * h + '/';
+        }
+        return h;
    }

-    // Matching normalized hash code function for Strings
-    static int normalizedHash(String name) {
+    // Hash function equivalent of checkedHash for String inputs
+    static int hash(String name) {
        int hsh = name.hashCode();
        int len = name.length();
        if (len > 0 && name.charAt(len - 1) != '/') {
@ -133,29 +139,6 @@ class ZipCoder {
            Arrays.mismatch(a, end - slashBytes.length, end, slashBytes, 0, slashBytes.length) == -1;
    }

-    // Implements normalizedHash by decoding byte[] to char[] and then computing
-    // the hash. This is a slow-path used for non-UTF8 charsets and also when
-    // aborting the ASCII fast-path in the UTF8 implementation, so {@code h}
-    // might be a partially calculated hash code
-    int normalizedHashDecode(int h, byte[] a, int off, int end) {
-        try {
-            // cb will be a newly allocated CharBuffer with pos == 0,
-            // arrayOffset == 0, backed by an array.
-            CharBuffer cb = decoder().decode(ByteBuffer.wrap(a, off, end - off));
-            int limit = cb.limit();
-            char[] decoded = cb.array();
-            for (int i = 0; i < limit; i++) {
-                h = 31 * h + decoded[i];
-            }
-            if (limit > 0 && decoded[limit - 1] != '/') {
-                h = 31 * h + '/';
-            }
-        } catch (CharacterCodingException cce) {
-            // Ignore - return the hash code generated so far.
-        }
-        return h;
-    }
-
    private byte[] slashBytes;
    private final Charset cs;
    protected CharsetDecoder dec;
@ -211,25 +194,6 @@ class ZipCoder {
            return true;
        }

-        @Override
-        void checkEncoding(byte[] a, int pos, int len) throws ZipException {
-            try {
-                int end = pos + len;
-                while (pos < end) {
-                    // ASCII fast-path: When checking that a range of bytes is
-                    // valid UTF-8, we can avoid some allocation by skipping
-                    // past bytes in the 0-127 range
-                    if (a[pos] < 0) {
-                        ZipCoder.toStringUTF8(a, pos, end - pos);
-                        break;
-                    }
-                    pos++;
-                }
-            } catch(Exception e) {
-                throw new ZipException("invalid CEN header (bad entry name)");
-            }
-        }
-
        @Override
        String toString(byte[] ba, int off, int length) {
            return JLA.newStringUTF8NoRepl(ba, off, length);
@ -241,7 +205,7 @@ class ZipCoder {
        }

        @Override
-        int normalizedHash(byte[] a, int off, int len) {
+        int checkedHash(byte[] a, int off, int len) throws Exception {
            if (len == 0) {
                return 0;
            }
@ -250,18 +214,17 @@ class ZipCoder {
            int h = 0;
            while (off < end) {
                byte b = a[off];
-                if (b < 0) {
+                if (b >= 0) {
+                    // ASCII, keep going
+                    h = 31 * h + b;
+                    off++;
+                } else {
                    // Non-ASCII, fall back to decoding a String
                    // We avoid using decoder() here since the UTF8ZipCoder is
                    // shared and that decoder is not thread safe.
-                    // We also avoid the JLA.newStringUTF8NoRepl variant at
-                    // this point to avoid throwing exceptions eagerly when
-                    // opening ZipFiles (exceptions are expected when accessing
-                    // malformed entries.)
-                    return normalizedHash(new String(a, end - len, len, UTF_8.INSTANCE));
-                } else {
-                    h = 31 * h + b;
-                    off++;
+                    // We use the JLA.newStringUTF8NoRepl variant to throw
+                    // exceptions eagerly when opening ZipFiles
+                    return hash(JLA.newStringUTF8NoRepl(a, end - len, len));
                }
            }