mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-27 14:54:52 +02:00
8243469: Lazily encode name in ZipFile.getEntryPos
Co-authored-by: Eirik Bjørsnøs <eirbjo@gmail.com> Reviewed-by: lancea, simonis
This commit is contained in:
parent
c55e7d5a4a
commit
d2e0d0e06a
3 changed files with 337 additions and 136 deletions
|
@ -32,6 +32,7 @@ import java.nio.charset.CharsetDecoder;
|
|||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CharacterCodingException;
|
||||
import java.nio.charset.CodingErrorAction;
|
||||
import java.util.Arrays;
|
||||
|
||||
import sun.nio.cs.UTF_8;
|
||||
|
||||
|
@ -43,51 +44,13 @@ class ZipCoder {
|
|||
private static final jdk.internal.access.JavaLangAccess JLA =
|
||||
jdk.internal.access.SharedSecrets.getJavaLangAccess();
|
||||
|
||||
static final class UTF8ZipCoder extends ZipCoder {
|
||||
|
||||
// Encoding/decoding is stateless, so make it singleton.
|
||||
static final ZipCoder INSTANCE = new UTF8ZipCoder(UTF_8.INSTANCE);
|
||||
|
||||
private UTF8ZipCoder(Charset utf8) {
|
||||
super(utf8);
|
||||
}
|
||||
|
||||
@Override
|
||||
boolean isUTF8() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
String toString(byte[] ba, int off, int length) {
|
||||
return JLA.newStringUTF8NoRepl(ba, off, length);
|
||||
}
|
||||
|
||||
@Override
|
||||
byte[] getBytes(String s) {
|
||||
return JLA.getBytesUTF8NoRepl(s);
|
||||
}
|
||||
|
||||
@Override
|
||||
int hashN(byte[] a, int off, int len) {
|
||||
// Performance optimization: when UTF8-encoded, ZipFile.getEntryPos
|
||||
// assume that the hash of a name remains unchanged when appending a
|
||||
// trailing '/', which allows lookups to avoid rehashing
|
||||
int end = off + len;
|
||||
if (len > 0 && a[end - 1] == '/') {
|
||||
end--;
|
||||
}
|
||||
|
||||
int h = 1;
|
||||
for (int i = off; i < end; i++) {
|
||||
h = 31 * h + a[i];
|
||||
}
|
||||
return h;
|
||||
}
|
||||
}
|
||||
// Encoding/decoding is stateless, so make it singleton.
|
||||
static final UTF8ZipCoder UTF8 = new UTF8ZipCoder(UTF_8.INSTANCE);
|
||||
|
||||
public static ZipCoder get(Charset charset) {
|
||||
if (charset == UTF_8.INSTANCE)
|
||||
return UTF8ZipCoder.INSTANCE;
|
||||
if (charset == UTF_8.INSTANCE) {
|
||||
return UTF8;
|
||||
}
|
||||
return new ZipCoder(charset);
|
||||
}
|
||||
|
||||
|
@ -123,40 +86,74 @@ class ZipCoder {
|
|||
}
|
||||
}
|
||||
|
||||
// assume invoked only if "this" is not utf8
|
||||
byte[] getBytesUTF8(String s) {
|
||||
return UTF8ZipCoder.INSTANCE.getBytes(s);
|
||||
}
|
||||
|
||||
String toStringUTF8(byte[] ba, int len) {
|
||||
return UTF8ZipCoder.INSTANCE.toString(ba, 0, len);
|
||||
}
|
||||
|
||||
String toStringUTF8(byte[] ba, int off, int len) {
|
||||
return UTF8ZipCoder.INSTANCE.toString(ba, off, len);
|
||||
return UTF8.toString(ba, 0, len);
|
||||
}
|
||||
|
||||
boolean isUTF8() {
|
||||
return false;
|
||||
}
|
||||
|
||||
int hashN(byte[] a, int off, int len) {
|
||||
int h = 1;
|
||||
while (len-- > 0) {
|
||||
h = 31 * h + a[off++];
|
||||
// Hash code functions for ZipFile entry names. We generate the hash as-if
|
||||
// we first decoded the byte sequence to a String, then appended '/' if no
|
||||
// trailing slash was found, then called String.hashCode(). This
|
||||
// normalization ensures we can simplify and speed up lookups.
|
||||
int normalizedHash(byte[] a, int off, int len) {
|
||||
if (len == 0) {
|
||||
return 0;
|
||||
}
|
||||
return normalizedHashDecode(0, a, off, off + len);
|
||||
}
|
||||
|
||||
// Matching normalized hash code function for Strings
|
||||
static int normalizedHash(String name) {
|
||||
int hsh = name.hashCode();
|
||||
int len = name.length();
|
||||
if (len > 0 && name.charAt(len - 1) != '/') {
|
||||
hsh = hsh * 31 + '/';
|
||||
}
|
||||
return hsh;
|
||||
}
|
||||
|
||||
boolean hasTrailingSlash(byte[] a, int end) {
|
||||
byte[] slashBytes = slashBytes();
|
||||
return end >= slashBytes.length &&
|
||||
Arrays.mismatch(a, end - slashBytes.length, end, slashBytes, 0, slashBytes.length) == -1;
|
||||
}
|
||||
|
||||
// Implements normalizedHash by decoding byte[] to char[] and then computing
|
||||
// the hash. This is a slow-path used for non-UTF8 charsets and also when
|
||||
// aborting the ASCII fast-path in the UTF8 implementation, so {@code h}
|
||||
// might be a partially calculated hash code
|
||||
int normalizedHashDecode(int h, byte[] a, int off, int end) {
|
||||
try {
|
||||
// cb will be a newly allocated CharBuffer with pos == 0,
|
||||
// arrayOffset == 0, backed by an array.
|
||||
CharBuffer cb = decoder().decode(ByteBuffer.wrap(a, off, end - off));
|
||||
int limit = cb.limit();
|
||||
char[] decoded = cb.array();
|
||||
for (int i = 0; i < limit; i++) {
|
||||
h = 31 * h + decoded[i];
|
||||
}
|
||||
if (limit > 0 && decoded[limit - 1] != '/') {
|
||||
h = 31 * h + '/';
|
||||
}
|
||||
} catch (CharacterCodingException cce) {
|
||||
// Ignore - return the hash code generated so far.
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
private Charset cs;
|
||||
private CharsetDecoder dec;
|
||||
private byte[] slashBytes;
|
||||
private final Charset cs;
|
||||
protected CharsetDecoder dec;
|
||||
private CharsetEncoder enc;
|
||||
|
||||
private ZipCoder(Charset cs) {
|
||||
this.cs = cs;
|
||||
}
|
||||
|
||||
private CharsetDecoder decoder() {
|
||||
protected CharsetDecoder decoder() {
|
||||
if (dec == null) {
|
||||
dec = cs.newDecoder()
|
||||
.onMalformedInput(CodingErrorAction.REPORT)
|
||||
|
@ -173,4 +170,73 @@ class ZipCoder {
|
|||
}
|
||||
return enc;
|
||||
}
|
||||
|
||||
// This method produces an array with the bytes that will correspond to a
|
||||
// trailing '/' in the chosen character encoding.
|
||||
//
|
||||
// While in most charsets a trailing slash will be encoded as the byte
|
||||
// value of '/', this does not hold in the general case. E.g., in charsets
|
||||
// such as UTF-16 and UTF-32 it will be represented by a sequence of 2 or 4
|
||||
// bytes, respectively.
|
||||
private byte[] slashBytes() {
|
||||
if (slashBytes == null) {
|
||||
// Take into account charsets that produce a BOM, e.g., UTF-16
|
||||
byte[] slash = "/".getBytes(cs);
|
||||
byte[] doubleSlash = "//".getBytes(cs);
|
||||
slashBytes = Arrays.copyOfRange(doubleSlash, slash.length, doubleSlash.length);
|
||||
}
|
||||
return slashBytes;
|
||||
}
|
||||
|
||||
static final class UTF8ZipCoder extends ZipCoder {
|
||||
|
||||
private UTF8ZipCoder(Charset utf8) {
|
||||
super(utf8);
|
||||
}
|
||||
|
||||
@Override
|
||||
boolean isUTF8() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
String toString(byte[] ba, int off, int length) {
|
||||
return JLA.newStringUTF8NoRepl(ba, off, length);
|
||||
}
|
||||
|
||||
@Override
|
||||
byte[] getBytes(String s) {
|
||||
return JLA.getBytesUTF8NoRepl(s);
|
||||
}
|
||||
|
||||
@Override
|
||||
int normalizedHash(byte[] a, int off, int len) {
|
||||
if (len == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int end = off + len;
|
||||
int h = 0;
|
||||
while (off < end) {
|
||||
byte b = a[off];
|
||||
if (b < 0) {
|
||||
// Non-ASCII, fall back to decoder loop
|
||||
return normalizedHashDecode(h, a, off, end);
|
||||
} else {
|
||||
h = 31 * h + b;
|
||||
off++;
|
||||
}
|
||||
}
|
||||
|
||||
if (a[end - 1] != '/') {
|
||||
h = 31 * h + '/';
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
@Override
|
||||
boolean hasTrailingSlash(byte[] a, int end) {
|
||||
return end > 0 && a[end - 1] == '/';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -92,7 +92,6 @@ public class ZipFile implements ZipConstants, Closeable {
|
|||
|
||||
private final String name; // zip file name
|
||||
private volatile boolean closeRequested;
|
||||
private final @Stable ZipCoder zc;
|
||||
|
||||
// The "resource" used by this zip file that needs to be
|
||||
// cleaned after use.
|
||||
|
@ -232,11 +231,10 @@ public class ZipFile implements ZipConstants, Closeable {
|
|||
}
|
||||
Objects.requireNonNull(charset, "charset");
|
||||
|
||||
this.zc = ZipCoder.get(charset);
|
||||
this.name = name;
|
||||
long t0 = System.nanoTime();
|
||||
|
||||
this.res = new CleanableResource(this, file, mode);
|
||||
this.res = new CleanableResource(this, ZipCoder.get(charset), file, mode);
|
||||
|
||||
PerfCounter.getZipFileOpenTime().addElapsedTimeFrom(t0);
|
||||
PerfCounter.getZipFileCount().increment();
|
||||
|
@ -307,7 +305,7 @@ public class ZipFile implements ZipConstants, Closeable {
|
|||
if (res.zsrc.comment == null) {
|
||||
return null;
|
||||
}
|
||||
return zc.toString(res.zsrc.comment);
|
||||
return res.zsrc.zc.toString(res.zsrc.comment);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -338,18 +336,9 @@ public class ZipFile implements ZipConstants, Closeable {
|
|||
ZipEntry entry = null;
|
||||
synchronized (this) {
|
||||
ensureOpen();
|
||||
byte[] bname = zc.getBytes(name);
|
||||
int pos = res.zsrc.getEntryPos(bname, true);
|
||||
int pos = res.zsrc.getEntryPos(name, true);
|
||||
if (pos != -1) {
|
||||
entry = getZipEntry(name, bname, pos, func);
|
||||
} else if (!zc.isUTF8() && !name.isEmpty() && !name.endsWith("/")) {
|
||||
// non-UTF-8 charsets need to lookup again with added slash
|
||||
name = name + '/';
|
||||
bname = zc.getBytes(name);
|
||||
pos = res.zsrc.getEntryPos(bname, false);
|
||||
if (pos != -1) {
|
||||
entry = getZipEntry(name, bname, pos, func);
|
||||
}
|
||||
entry = getZipEntry(name, pos, func);
|
||||
}
|
||||
}
|
||||
return entry;
|
||||
|
@ -371,7 +360,7 @@ public class ZipFile implements ZipConstants, Closeable {
|
|||
*/
|
||||
public InputStream getInputStream(ZipEntry entry) throws IOException {
|
||||
Objects.requireNonNull(entry, "entry");
|
||||
int pos = -1;
|
||||
int pos;
|
||||
ZipFileInputStream in;
|
||||
Source zsrc = res.zsrc;
|
||||
Set<InputStream> istreams = res.istreams;
|
||||
|
@ -379,10 +368,8 @@ public class ZipFile implements ZipConstants, Closeable {
|
|||
ensureOpen();
|
||||
if (Objects.equals(lastEntryName, entry.name)) {
|
||||
pos = lastEntryPos;
|
||||
} else if (!zc.isUTF8() && (entry.flag & USE_UTF8) != 0) {
|
||||
pos = zsrc.getEntryPos(zc.getBytesUTF8(entry.name), false);
|
||||
} else {
|
||||
pos = zsrc.getEntryPos(zc.getBytes(entry.name), false);
|
||||
pos = zsrc.getEntryPos(entry.name, false);
|
||||
}
|
||||
if (pos == -1) {
|
||||
return null;
|
||||
|
@ -528,7 +515,7 @@ public class ZipFile implements ZipConstants, Closeable {
|
|||
throw new NoSuchElementException();
|
||||
}
|
||||
// each "entry" has 3 ints in table entries
|
||||
return (T)getZipEntry(null, null, res.zsrc.getEntryPos(i++ * 3), gen);
|
||||
return (T)getZipEntry(null, res.zsrc.getEntryPos(i++ * 3), gen);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -600,18 +587,15 @@ public class ZipFile implements ZipConstants, Closeable {
|
|||
synchronized (this) {
|
||||
ensureOpen();
|
||||
return StreamSupport.stream(new EntrySpliterator<>(0, res.zsrc.total,
|
||||
pos -> getZipEntry(null, null, pos, ZipEntry::new)), false);
|
||||
pos -> getZipEntry(null, pos, ZipEntry::new)), false);
|
||||
}
|
||||
}
|
||||
|
||||
private String getEntryName(int pos) {
|
||||
byte[] cen = res.zsrc.cen;
|
||||
int nlen = CENNAM(cen, pos);
|
||||
if (!zc.isUTF8() && (CENFLG(cen, pos) & USE_UTF8) != 0) {
|
||||
return zc.toStringUTF8(cen, pos + CENHDR, nlen);
|
||||
} else {
|
||||
return zc.toString(cen, pos + CENHDR, nlen);
|
||||
}
|
||||
ZipCoder zc = res.zsrc.zipCoderForPos(pos);
|
||||
return zc.toString(cen, pos + CENHDR, nlen);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -647,34 +631,37 @@ public class ZipFile implements ZipConstants, Closeable {
|
|||
synchronized (this) {
|
||||
ensureOpen();
|
||||
return StreamSupport.stream(new EntrySpliterator<>(0, res.zsrc.total,
|
||||
pos -> (JarEntry)getZipEntry(null, null, pos, func)), false);
|
||||
pos -> (JarEntry)getZipEntry(null, pos, func)), false);
|
||||
}
|
||||
}
|
||||
|
||||
private String lastEntryName;
|
||||
private int lastEntryPos;
|
||||
|
||||
/* Checks ensureOpen() before invoke this method */
|
||||
private ZipEntry getZipEntry(String name, byte[] bname, int pos,
|
||||
/* Check ensureOpen() before invoking this method */
|
||||
private ZipEntry getZipEntry(String name, int pos,
|
||||
Function<String, ? extends ZipEntry> func) {
|
||||
byte[] cen = res.zsrc.cen;
|
||||
int nlen = CENNAM(cen, pos);
|
||||
int elen = CENEXT(cen, pos);
|
||||
int clen = CENCOM(cen, pos);
|
||||
int flag = CENFLG(cen, pos);
|
||||
if (name == null || bname.length != nlen) {
|
||||
// to use the entry name stored in cen, if the passed in name is
|
||||
// (1) null, invoked from iterator, or
|
||||
// (2) not equal to the name stored, a slash is appended during
|
||||
// getEntryPos() search.
|
||||
if (!zc.isUTF8() && (flag & USE_UTF8) != 0) {
|
||||
name = zc.toStringUTF8(cen, pos + CENHDR, nlen);
|
||||
} else {
|
||||
name = zc.toString(cen, pos + CENHDR, nlen);
|
||||
|
||||
ZipCoder zc = res.zsrc.zipCoderForPos(pos);
|
||||
if (name != null) {
|
||||
// only need to check for mismatch of trailing slash
|
||||
if (nlen > 0 &&
|
||||
!name.isEmpty() &&
|
||||
zc.hasTrailingSlash(cen, pos + CENHDR + nlen) &&
|
||||
!name.endsWith("/"))
|
||||
{
|
||||
name += '/';
|
||||
}
|
||||
} else {
|
||||
// invoked from iterator, use the entry name stored in cen
|
||||
name = zc.toString(cen, pos + CENHDR, nlen);
|
||||
}
|
||||
ZipEntry e = func.apply(name); //ZipEntry e = new ZipEntry(name);
|
||||
e.flag = flag;
|
||||
e.flag = CENFLG(cen, pos);
|
||||
e.xdostime = CENTIM(cen, pos);
|
||||
e.crc = CENCRC(cen, pos);
|
||||
e.size = CENLEN(cen, pos);
|
||||
|
@ -686,11 +673,7 @@ public class ZipFile implements ZipConstants, Closeable {
|
|||
}
|
||||
if (clen != 0) {
|
||||
int start = pos + CENHDR + nlen + elen;
|
||||
if (!zc.isUTF8() && (flag & USE_UTF8) != 0) {
|
||||
e.comment = zc.toStringUTF8(cen, start, clen);
|
||||
} else {
|
||||
e.comment = zc.toString(cen, start, clen);
|
||||
}
|
||||
e.comment = zc.toString(cen, start, clen);
|
||||
}
|
||||
lastEntryName = e.name;
|
||||
lastEntryPos = pos;
|
||||
|
@ -721,11 +704,11 @@ public class ZipFile implements ZipConstants, Closeable {
|
|||
|
||||
Source zsrc;
|
||||
|
||||
CleanableResource(ZipFile zf, File file, int mode) throws IOException {
|
||||
CleanableResource(ZipFile zf, ZipCoder zc, File file, int mode) throws IOException {
|
||||
this.cleanable = CleanerFactory.cleaner().register(zf, this);
|
||||
this.istreams = Collections.newSetFromMap(new WeakHashMap<>());
|
||||
this.inflaterCache = new ArrayDeque<>();
|
||||
this.zsrc = Source.get(file, (mode & OPEN_DELETE) != 0, zf.zc);
|
||||
this.zsrc = Source.get(file, (mode & OPEN_DELETE) != 0, zc);
|
||||
}
|
||||
|
||||
void clean() {
|
||||
|
@ -1113,7 +1096,7 @@ public class ZipFile implements ZipConstants, Closeable {
|
|||
private static final int[] EMPTY_META_VERSIONS = new int[0];
|
||||
|
||||
private final Key key; // the key in files
|
||||
private final ZipCoder zc; // zip coder used to decode/encode
|
||||
private final @Stable ZipCoder zc; // zip coder used to decode/encode
|
||||
|
||||
private int refs = 1;
|
||||
|
||||
|
@ -1412,8 +1395,6 @@ public class ZipFile implements ZipConstants, Closeable {
|
|||
private void initCEN(int knownTotal) throws IOException {
|
||||
// Prefer locals for better performance during startup
|
||||
byte[] cen;
|
||||
ZipCoder zc = this.zc;
|
||||
|
||||
if (knownTotal == -1) {
|
||||
End end = findEND();
|
||||
if (end.endpos == 0) {
|
||||
|
@ -1488,7 +1469,7 @@ public class ZipFile implements ZipConstants, Closeable {
|
|||
if (entryPos + nlen > limit)
|
||||
zerror("invalid CEN header (bad header size)");
|
||||
// Record the CEN offset and the name hash in our hash cell.
|
||||
hash = zc.hashN(cen, entryPos, nlen);
|
||||
hash = zipCoderForPos(pos).normalizedHash(cen, entryPos, nlen);
|
||||
hsh = (hash & 0x7fffffff) % tablelen;
|
||||
next = table[hsh];
|
||||
table[hsh] = idx;
|
||||
|
@ -1544,11 +1525,12 @@ public class ZipFile implements ZipConstants, Closeable {
|
|||
* Returns the {@code pos} of the zip cen entry corresponding to the
|
||||
* specified entry name, or -1 if not found.
|
||||
*/
|
||||
private int getEntryPos(byte[] name, boolean addSlash) {
|
||||
private int getEntryPos(String name, boolean addSlash) {
|
||||
if (total == 0) {
|
||||
return -1;
|
||||
}
|
||||
int hsh = zc.hashN(name, 0, name.length);
|
||||
|
||||
int hsh = ZipCoder.normalizedHash(name);
|
||||
int idx = table[(hsh & 0x7fffffff) % tablelen];
|
||||
|
||||
// Search down the target hash chain for a entry whose
|
||||
|
@ -1557,31 +1539,25 @@ public class ZipFile implements ZipConstants, Closeable {
|
|||
if (getEntryHash(idx) == hsh) {
|
||||
// The CEN name must match the specfied one
|
||||
int pos = getEntryPos(idx);
|
||||
byte[] cen = this.cen;
|
||||
final int nlen = CENNAM(cen, pos);
|
||||
int nameoff = pos + CENHDR;
|
||||
|
||||
// If addSlash is true and we're using the UTF-8 zip coder,
|
||||
// we'll directly test for name+/ in addition to name,
|
||||
// unless name is the empty string or already ends with a
|
||||
// slash
|
||||
if (name.length == nlen ||
|
||||
(addSlash &&
|
||||
zc.isUTF8() &&
|
||||
name.length > 0 &&
|
||||
name.length + 1 == nlen &&
|
||||
cen[nameoff + nlen - 1] == '/' &&
|
||||
name[name.length - 1] != '/')) {
|
||||
boolean matched = true;
|
||||
for (int i = 0; i < name.length; i++) {
|
||||
if (name[i] != cen[nameoff++]) {
|
||||
matched = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (matched) {
|
||||
return pos;
|
||||
try {
|
||||
ZipCoder zc = zipCoderForPos(pos);
|
||||
String entry = zc.toString(cen, pos + CENHDR, CENNAM(cen, pos));
|
||||
|
||||
// If addSlash is true we'll test for name+/ in addition to
|
||||
// name, unless name is the empty string or already ends with a
|
||||
// slash
|
||||
int entryLen = entry.length();
|
||||
int nameLen = name.length();
|
||||
if ((entryLen == nameLen && entry.equals(name)) ||
|
||||
(addSlash &&
|
||||
nameLen + 1 == entryLen &&
|
||||
entry.startsWith(name) &&
|
||||
entry.charAt(entryLen - 1) == '/')) {
|
||||
return pos;
|
||||
}
|
||||
} catch (IllegalArgumentException iae) {
|
||||
// Ignore
|
||||
}
|
||||
}
|
||||
idx = getEntryNext(idx);
|
||||
|
@ -1589,6 +1565,16 @@ public class ZipFile implements ZipConstants, Closeable {
|
|||
return -1;
|
||||
}
|
||||
|
||||
private ZipCoder zipCoderForPos(int pos) {
|
||||
if (zc.isUTF8()) {
|
||||
return zc;
|
||||
}
|
||||
if ((CENFLG(cen, pos) & USE_UTF8) != 0) {
|
||||
return ZipCoder.UTF8;
|
||||
}
|
||||
return zc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the bytes represent a non-directory name
|
||||
* beginning with "META-INF/", disregarding ASCII case.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue