8243469: Lazily encode name in ZipFile.getEntryPos

Co-authored-by: Eirik Bjørsnøs <eirbjo@gmail.com>
Reviewed-by: lancea, simonis
This commit is contained in:
Claes Redestad 2020-04-27 17:26:05 +02:00
parent c55e7d5a4a
commit d2e0d0e06a
3 changed files with 337 additions and 136 deletions

View file

@ -32,6 +32,7 @@ import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.CodingErrorAction;
import java.util.Arrays;
import sun.nio.cs.UTF_8;
@ -43,51 +44,13 @@ class ZipCoder {
private static final jdk.internal.access.JavaLangAccess JLA =
jdk.internal.access.SharedSecrets.getJavaLangAccess();
static final class UTF8ZipCoder extends ZipCoder {
// Encoding/decoding is stateless, so make it singleton.
static final ZipCoder INSTANCE = new UTF8ZipCoder(UTF_8.INSTANCE);
private UTF8ZipCoder(Charset utf8) {
super(utf8);
}
@Override
boolean isUTF8() {
return true;
}
@Override
String toString(byte[] ba, int off, int length) {
return JLA.newStringUTF8NoRepl(ba, off, length);
}
@Override
byte[] getBytes(String s) {
return JLA.getBytesUTF8NoRepl(s);
}
@Override
int hashN(byte[] a, int off, int len) {
// Performance optimization: when UTF8-encoded, ZipFile.getEntryPos
// assume that the hash of a name remains unchanged when appending a
// trailing '/', which allows lookups to avoid rehashing
int end = off + len;
if (len > 0 && a[end - 1] == '/') {
end--;
}
int h = 1;
for (int i = off; i < end; i++) {
h = 31 * h + a[i];
}
return h;
}
}
// Encoding/decoding is stateless, so make it singleton.
static final UTF8ZipCoder UTF8 = new UTF8ZipCoder(UTF_8.INSTANCE);
public static ZipCoder get(Charset charset) {
if (charset == UTF_8.INSTANCE)
return UTF8ZipCoder.INSTANCE;
if (charset == UTF_8.INSTANCE) {
return UTF8;
}
return new ZipCoder(charset);
}
@ -123,40 +86,74 @@ class ZipCoder {
}
}
// assume invoked only if "this" is not utf8
byte[] getBytesUTF8(String s) {
return UTF8ZipCoder.INSTANCE.getBytes(s);
}
String toStringUTF8(byte[] ba, int len) {
return UTF8ZipCoder.INSTANCE.toString(ba, 0, len);
}
String toStringUTF8(byte[] ba, int off, int len) {
return UTF8ZipCoder.INSTANCE.toString(ba, off, len);
return UTF8.toString(ba, 0, len);
}
boolean isUTF8() {
return false;
}
int hashN(byte[] a, int off, int len) {
int h = 1;
while (len-- > 0) {
h = 31 * h + a[off++];
// Hash code functions for ZipFile entry names. We generate the hash as-if
// we first decoded the byte sequence to a String, then appended '/' if no
// trailing slash was found, then called String.hashCode(). This
// normalization ensures we can simplify and speed up lookups.
int normalizedHash(byte[] a, int off, int len) {
if (len == 0) {
return 0;
}
return normalizedHashDecode(0, a, off, off + len);
}
// Matching normalized hash code function for Strings
static int normalizedHash(String name) {
int hsh = name.hashCode();
int len = name.length();
if (len > 0 && name.charAt(len - 1) != '/') {
hsh = hsh * 31 + '/';
}
return hsh;
}
boolean hasTrailingSlash(byte[] a, int end) {
byte[] slashBytes = slashBytes();
return end >= slashBytes.length &&
Arrays.mismatch(a, end - slashBytes.length, end, slashBytes, 0, slashBytes.length) == -1;
}
// Implements normalizedHash by decoding byte[] to char[] and then computing
// the hash. This is a slow-path used for non-UTF8 charsets and also when
// aborting the ASCII fast-path in the UTF8 implementation, so {@code h}
// might be a partially calculated hash code
int normalizedHashDecode(int h, byte[] a, int off, int end) {
try {
// cb will be a newly allocated CharBuffer with pos == 0,
// arrayOffset == 0, backed by an array.
CharBuffer cb = decoder().decode(ByteBuffer.wrap(a, off, end - off));
int limit = cb.limit();
char[] decoded = cb.array();
for (int i = 0; i < limit; i++) {
h = 31 * h + decoded[i];
}
if (limit > 0 && decoded[limit - 1] != '/') {
h = 31 * h + '/';
}
} catch (CharacterCodingException cce) {
// Ignore - return the hash code generated so far.
}
return h;
}
private Charset cs;
private CharsetDecoder dec;
private byte[] slashBytes;
private final Charset cs;
protected CharsetDecoder dec;
private CharsetEncoder enc;
private ZipCoder(Charset cs) {
this.cs = cs;
}
private CharsetDecoder decoder() {
protected CharsetDecoder decoder() {
if (dec == null) {
dec = cs.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
@ -173,4 +170,73 @@ class ZipCoder {
}
return enc;
}
// This method produces an array with the bytes that will correspond to a
// trailing '/' in the chosen character encoding.
//
// While in most charsets a trailing slash will be encoded as the byte
// value of '/', this does not hold in the general case. E.g., in charsets
// such as UTF-16 and UTF-32 it will be represented by a sequence of 2 or 4
// bytes, respectively.
private byte[] slashBytes() {
if (slashBytes == null) {
// Take into account charsets that produce a BOM, e.g., UTF-16
byte[] slash = "/".getBytes(cs);
byte[] doubleSlash = "//".getBytes(cs);
slashBytes = Arrays.copyOfRange(doubleSlash, slash.length, doubleSlash.length);
}
return slashBytes;
}
static final class UTF8ZipCoder extends ZipCoder {
private UTF8ZipCoder(Charset utf8) {
super(utf8);
}
@Override
boolean isUTF8() {
return true;
}
@Override
String toString(byte[] ba, int off, int length) {
return JLA.newStringUTF8NoRepl(ba, off, length);
}
@Override
byte[] getBytes(String s) {
return JLA.getBytesUTF8NoRepl(s);
}
@Override
int normalizedHash(byte[] a, int off, int len) {
if (len == 0) {
return 0;
}
int end = off + len;
int h = 0;
while (off < end) {
byte b = a[off];
if (b < 0) {
// Non-ASCII, fall back to decoder loop
return normalizedHashDecode(h, a, off, end);
} else {
h = 31 * h + b;
off++;
}
}
if (a[end - 1] != '/') {
h = 31 * h + '/';
}
return h;
}
@Override
boolean hasTrailingSlash(byte[] a, int end) {
return end > 0 && a[end - 1] == '/';
}
}
}

View file

@ -92,7 +92,6 @@ public class ZipFile implements ZipConstants, Closeable {
private final String name; // zip file name
private volatile boolean closeRequested;
private final @Stable ZipCoder zc;
// The "resource" used by this zip file that needs to be
// cleaned after use.
@ -232,11 +231,10 @@ public class ZipFile implements ZipConstants, Closeable {
}
Objects.requireNonNull(charset, "charset");
this.zc = ZipCoder.get(charset);
this.name = name;
long t0 = System.nanoTime();
this.res = new CleanableResource(this, file, mode);
this.res = new CleanableResource(this, ZipCoder.get(charset), file, mode);
PerfCounter.getZipFileOpenTime().addElapsedTimeFrom(t0);
PerfCounter.getZipFileCount().increment();
@ -307,7 +305,7 @@ public class ZipFile implements ZipConstants, Closeable {
if (res.zsrc.comment == null) {
return null;
}
return zc.toString(res.zsrc.comment);
return res.zsrc.zc.toString(res.zsrc.comment);
}
}
@ -338,18 +336,9 @@ public class ZipFile implements ZipConstants, Closeable {
ZipEntry entry = null;
synchronized (this) {
ensureOpen();
byte[] bname = zc.getBytes(name);
int pos = res.zsrc.getEntryPos(bname, true);
int pos = res.zsrc.getEntryPos(name, true);
if (pos != -1) {
entry = getZipEntry(name, bname, pos, func);
} else if (!zc.isUTF8() && !name.isEmpty() && !name.endsWith("/")) {
// non-UTF-8 charsets need to lookup again with added slash
name = name + '/';
bname = zc.getBytes(name);
pos = res.zsrc.getEntryPos(bname, false);
if (pos != -1) {
entry = getZipEntry(name, bname, pos, func);
}
entry = getZipEntry(name, pos, func);
}
}
return entry;
@ -371,7 +360,7 @@ public class ZipFile implements ZipConstants, Closeable {
*/
public InputStream getInputStream(ZipEntry entry) throws IOException {
Objects.requireNonNull(entry, "entry");
int pos = -1;
int pos;
ZipFileInputStream in;
Source zsrc = res.zsrc;
Set<InputStream> istreams = res.istreams;
@ -379,10 +368,8 @@ public class ZipFile implements ZipConstants, Closeable {
ensureOpen();
if (Objects.equals(lastEntryName, entry.name)) {
pos = lastEntryPos;
} else if (!zc.isUTF8() && (entry.flag & USE_UTF8) != 0) {
pos = zsrc.getEntryPos(zc.getBytesUTF8(entry.name), false);
} else {
pos = zsrc.getEntryPos(zc.getBytes(entry.name), false);
pos = zsrc.getEntryPos(entry.name, false);
}
if (pos == -1) {
return null;
@ -528,7 +515,7 @@ public class ZipFile implements ZipConstants, Closeable {
throw new NoSuchElementException();
}
// each "entry" has 3 ints in table entries
return (T)getZipEntry(null, null, res.zsrc.getEntryPos(i++ * 3), gen);
return (T)getZipEntry(null, res.zsrc.getEntryPos(i++ * 3), gen);
}
}
@ -600,18 +587,15 @@ public class ZipFile implements ZipConstants, Closeable {
synchronized (this) {
ensureOpen();
return StreamSupport.stream(new EntrySpliterator<>(0, res.zsrc.total,
pos -> getZipEntry(null, null, pos, ZipEntry::new)), false);
pos -> getZipEntry(null, pos, ZipEntry::new)), false);
}
}
private String getEntryName(int pos) {
byte[] cen = res.zsrc.cen;
int nlen = CENNAM(cen, pos);
if (!zc.isUTF8() && (CENFLG(cen, pos) & USE_UTF8) != 0) {
return zc.toStringUTF8(cen, pos + CENHDR, nlen);
} else {
return zc.toString(cen, pos + CENHDR, nlen);
}
ZipCoder zc = res.zsrc.zipCoderForPos(pos);
return zc.toString(cen, pos + CENHDR, nlen);
}
/*
@ -647,34 +631,37 @@ public class ZipFile implements ZipConstants, Closeable {
synchronized (this) {
ensureOpen();
return StreamSupport.stream(new EntrySpliterator<>(0, res.zsrc.total,
pos -> (JarEntry)getZipEntry(null, null, pos, func)), false);
pos -> (JarEntry)getZipEntry(null, pos, func)), false);
}
}
private String lastEntryName;
private int lastEntryPos;
/* Checks ensureOpen() before invoke this method */
private ZipEntry getZipEntry(String name, byte[] bname, int pos,
/* Check ensureOpen() before invoking this method */
private ZipEntry getZipEntry(String name, int pos,
Function<String, ? extends ZipEntry> func) {
byte[] cen = res.zsrc.cen;
int nlen = CENNAM(cen, pos);
int elen = CENEXT(cen, pos);
int clen = CENCOM(cen, pos);
int flag = CENFLG(cen, pos);
if (name == null || bname.length != nlen) {
// to use the entry name stored in cen, if the passed in name is
// (1) null, invoked from iterator, or
// (2) not equal to the name stored, a slash is appended during
// getEntryPos() search.
if (!zc.isUTF8() && (flag & USE_UTF8) != 0) {
name = zc.toStringUTF8(cen, pos + CENHDR, nlen);
} else {
name = zc.toString(cen, pos + CENHDR, nlen);
ZipCoder zc = res.zsrc.zipCoderForPos(pos);
if (name != null) {
// only need to check for mismatch of trailing slash
if (nlen > 0 &&
!name.isEmpty() &&
zc.hasTrailingSlash(cen, pos + CENHDR + nlen) &&
!name.endsWith("/"))
{
name += '/';
}
} else {
// invoked from iterator, use the entry name stored in cen
name = zc.toString(cen, pos + CENHDR, nlen);
}
ZipEntry e = func.apply(name); //ZipEntry e = new ZipEntry(name);
e.flag = flag;
e.flag = CENFLG(cen, pos);
e.xdostime = CENTIM(cen, pos);
e.crc = CENCRC(cen, pos);
e.size = CENLEN(cen, pos);
@ -686,11 +673,7 @@ public class ZipFile implements ZipConstants, Closeable {
}
if (clen != 0) {
int start = pos + CENHDR + nlen + elen;
if (!zc.isUTF8() && (flag & USE_UTF8) != 0) {
e.comment = zc.toStringUTF8(cen, start, clen);
} else {
e.comment = zc.toString(cen, start, clen);
}
e.comment = zc.toString(cen, start, clen);
}
lastEntryName = e.name;
lastEntryPos = pos;
@ -721,11 +704,11 @@ public class ZipFile implements ZipConstants, Closeable {
Source zsrc;
CleanableResource(ZipFile zf, File file, int mode) throws IOException {
CleanableResource(ZipFile zf, ZipCoder zc, File file, int mode) throws IOException {
this.cleanable = CleanerFactory.cleaner().register(zf, this);
this.istreams = Collections.newSetFromMap(new WeakHashMap<>());
this.inflaterCache = new ArrayDeque<>();
this.zsrc = Source.get(file, (mode & OPEN_DELETE) != 0, zf.zc);
this.zsrc = Source.get(file, (mode & OPEN_DELETE) != 0, zc);
}
void clean() {
@ -1113,7 +1096,7 @@ public class ZipFile implements ZipConstants, Closeable {
private static final int[] EMPTY_META_VERSIONS = new int[0];
private final Key key; // the key in files
private final ZipCoder zc; // zip coder used to decode/encode
private final @Stable ZipCoder zc; // zip coder used to decode/encode
private int refs = 1;
@ -1412,8 +1395,6 @@ public class ZipFile implements ZipConstants, Closeable {
private void initCEN(int knownTotal) throws IOException {
// Prefer locals for better performance during startup
byte[] cen;
ZipCoder zc = this.zc;
if (knownTotal == -1) {
End end = findEND();
if (end.endpos == 0) {
@ -1488,7 +1469,7 @@ public class ZipFile implements ZipConstants, Closeable {
if (entryPos + nlen > limit)
zerror("invalid CEN header (bad header size)");
// Record the CEN offset and the name hash in our hash cell.
hash = zc.hashN(cen, entryPos, nlen);
hash = zipCoderForPos(pos).normalizedHash(cen, entryPos, nlen);
hsh = (hash & 0x7fffffff) % tablelen;
next = table[hsh];
table[hsh] = idx;
@ -1544,11 +1525,12 @@ public class ZipFile implements ZipConstants, Closeable {
* Returns the {@code pos} of the zip cen entry corresponding to the
* specified entry name, or -1 if not found.
*/
private int getEntryPos(byte[] name, boolean addSlash) {
private int getEntryPos(String name, boolean addSlash) {
if (total == 0) {
return -1;
}
int hsh = zc.hashN(name, 0, name.length);
int hsh = ZipCoder.normalizedHash(name);
int idx = table[(hsh & 0x7fffffff) % tablelen];
// Search down the target hash chain for a entry whose
@ -1557,31 +1539,25 @@ public class ZipFile implements ZipConstants, Closeable {
if (getEntryHash(idx) == hsh) {
// The CEN name must match the specfied one
int pos = getEntryPos(idx);
byte[] cen = this.cen;
final int nlen = CENNAM(cen, pos);
int nameoff = pos + CENHDR;
// If addSlash is true and we're using the UTF-8 zip coder,
// we'll directly test for name+/ in addition to name,
// unless name is the empty string or already ends with a
// slash
if (name.length == nlen ||
(addSlash &&
zc.isUTF8() &&
name.length > 0 &&
name.length + 1 == nlen &&
cen[nameoff + nlen - 1] == '/' &&
name[name.length - 1] != '/')) {
boolean matched = true;
for (int i = 0; i < name.length; i++) {
if (name[i] != cen[nameoff++]) {
matched = false;
break;
}
}
if (matched) {
return pos;
try {
ZipCoder zc = zipCoderForPos(pos);
String entry = zc.toString(cen, pos + CENHDR, CENNAM(cen, pos));
// If addSlash is true we'll test for name+/ in addition to
// name, unless name is the empty string or already ends with a
// slash
int entryLen = entry.length();
int nameLen = name.length();
if ((entryLen == nameLen && entry.equals(name)) ||
(addSlash &&
nameLen + 1 == entryLen &&
entry.startsWith(name) &&
entry.charAt(entryLen - 1) == '/')) {
return pos;
}
} catch (IllegalArgumentException iae) {
// Ignore
}
}
idx = getEntryNext(idx);
@ -1589,6 +1565,16 @@ public class ZipFile implements ZipConstants, Closeable {
return -1;
}
private ZipCoder zipCoderForPos(int pos) {
if (zc.isUTF8()) {
return zc;
}
if ((CENFLG(cen, pos) & USE_UTF8) != 0) {
return ZipCoder.UTF8;
}
return zc;
}
/**
* Returns true if the bytes represent a non-directory name
* beginning with "META-INF/", disregarding ASCII case.