8184947: ZipCoder performance improvements

Reviewed-by: martin, redestad
This commit is contained in:
Xueming Shen 2017-12-13 07:51:57 -08:00
parent 1ae8f54835
commit 4f0ea9242f
9 changed files with 620 additions and 881 deletions

View file

@ -28,72 +28,60 @@ package java.util.zip;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.CodingErrorAction;
import java.util.Arrays;
import sun.nio.cs.ArrayDecoder;
import sun.nio.cs.ArrayEncoder;
import static java.nio.charset.StandardCharsets.UTF_8;
/**
* Utility class for zipfile name and comment decoding and encoding
*/
final class ZipCoder {
class ZipCoder {
private static boolean isASCII(byte[] ba, int off, int len) {
for (int i = off; i < off + len; i++) {
if (ba[i] < 0)
return false;
private static final jdk.internal.misc.JavaLangAccess JLA =
jdk.internal.misc.SharedSecrets.getJavaLangAccess();
static final class UTF8 extends ZipCoder {
UTF8(Charset utf8) {
super(utf8);
}
@Override
boolean isUTF8() {
return true;
}
@Override
String toString(byte[] ba, int off, int length) {
return JLA.newStringUTF8NoRepl(ba, off, length);
}
@Override
byte[] getBytes(String s) {
return JLA.getBytesUTF8NoRepl(s);
}
return true;
}
private static boolean hasReplaceChar(byte[] ba) {
for (int i = 0; i < ba.length; i++) {
if (ba[i] == (byte)'?')
return true;
}
return false;
// UTF_8.ArrayEn/Decoder is stateless, so make it singleton.
private static ZipCoder utf8 = new UTF8(UTF_8);
public static ZipCoder get(Charset charset) {
if (charset == UTF_8)
return utf8;
return new ZipCoder(charset);
}
String toString(byte[] ba, int off, int length) {
try {
return decoder().decode(ByteBuffer.wrap(ba, off, length)).toString();
// fastpath for UTF-8 cs and ascii only name, leverage the
// compact string impl to avoid the unnecessary char[] copy/
// paste. A temporary workaround before we have better approach,
// such as a String constructor that throws exception for
// malformed and/or unmappable characters, instead of silently
// replacing with repl char
if (isUTF8 && isASCII(ba, off, length)) {
return new String(ba, off, length, cs);
} catch (CharacterCodingException x) {
throw new IllegalArgumentException(x);
}
CharsetDecoder cd = decoder().reset();
int len = (int)(length * cd.maxCharsPerByte());
char[] ca = new char[len];
if (len == 0)
return new String(ca);
// UTF-8 only for now. Other ArrayDeocder only handles
// CodingErrorAction.REPLACE mode. ZipCoder uses
// REPORT mode.
if (isUTF8 && cd instanceof ArrayDecoder) {
int clen = ((ArrayDecoder)cd).decode(ba, off, length, ca);
if (clen == -1) // malformed
throw new IllegalArgumentException("MALFORMED");
return new String(ca, 0, clen);
}
ByteBuffer bb = ByteBuffer.wrap(ba, off, length);
CharBuffer cb = CharBuffer.wrap(ca);
CoderResult cr = cd.decode(bb, cb, true);
if (!cr.isUnderflow())
throw new IllegalArgumentException(cr.toString());
cr = cd.flush(cb);
if (!cr.isUnderflow())
throw new IllegalArgumentException(cr.toString());
return new String(ca, 0, cb.position());
}
String toString(byte[] ba, int length) {
@ -105,84 +93,47 @@ final class ZipCoder {
}
byte[] getBytes(String s) {
if (isUTF8) {
// fastpath for UTF8. should only occur when the string
// has malformed surrogates. A postscan should still be
// faster and use less memory.
byte[] ba = s.getBytes(cs);
if (!hasReplaceChar(ba)) {
return ba;
try {
ByteBuffer bb = encoder().encode(CharBuffer.wrap(s));
int pos = bb.position();
int limit = bb.limit();
if (bb.hasArray() && pos == 0 && limit == bb.capacity()) {
return bb.array();
}
byte[] bytes = new byte[bb.limit() - bb.position()];
bb.get(bytes);
return bytes;
} catch (CharacterCodingException x) {
throw new IllegalArgumentException(x);
}
CharsetEncoder ce = encoder().reset();
char[] ca = s.toCharArray();
int len = (int)(ca.length * ce.maxBytesPerChar());
byte[] ba = new byte[len];
if (len == 0)
return ba;
// UTF-8 only for now. Other ArrayDeocder only handles
// CodingErrorAction.REPLACE mode.
if (isUTF8 && ce instanceof ArrayEncoder) {
int blen = ((ArrayEncoder)ce).encode(ca, 0, ca.length, ba);
if (blen == -1) // malformed
throw new IllegalArgumentException("MALFORMED");
return Arrays.copyOf(ba, blen);
}
ByteBuffer bb = ByteBuffer.wrap(ba);
CharBuffer cb = CharBuffer.wrap(ca);
CoderResult cr = ce.encode(cb, bb, true);
if (!cr.isUnderflow())
throw new IllegalArgumentException(cr.toString());
cr = ce.flush(bb);
if (!cr.isUnderflow())
throw new IllegalArgumentException(cr.toString());
if (bb.position() == ba.length) // defensive copy?
return ba;
else
return Arrays.copyOf(ba, bb.position());
}
// assume invoked only if "this" is not utf8
byte[] getBytesUTF8(String s) {
if (isUTF8)
return getBytes(s);
if (utf8 == null)
utf8 = new ZipCoder(StandardCharsets.UTF_8);
return utf8.getBytes(s);
}
String toStringUTF8(byte[] ba, int len) {
return toStringUTF8(ba, 0, len);
return utf8.toString(ba, 0, len);
}
String toStringUTF8(byte[] ba, int off, int len) {
if (isUTF8)
return toString(ba, off, len);
if (utf8 == null)
utf8 = new ZipCoder(StandardCharsets.UTF_8);
return utf8.toString(ba, off, len);
}
boolean isUTF8() {
return isUTF8;
return false;
}
private Charset cs;
private CharsetDecoder dec;
private CharsetEncoder enc;
private boolean isUTF8;
private ZipCoder utf8;
private ZipCoder(Charset cs) {
this.cs = cs;
this.isUTF8 = cs.name().equals(StandardCharsets.UTF_8.name());
}
static ZipCoder get(Charset charset) {
return new ZipCoder(charset);
}
private CharsetDecoder decoder() {
protected CharsetDecoder decoder() {
if (dec == null) {
dec = cs.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
@ -191,7 +142,7 @@ final class ZipCoder {
return dec;
}
private CharsetEncoder encoder() {
protected CharsetEncoder encoder() {
if (enc == null) {
enc = cs.newEncoder()
.onMalformedInput(CodingErrorAction.REPORT)