mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-27 14:54:52 +02:00
8184947: ZipCoder performance improvements
Reviewed-by: martin, redestad
This commit is contained in:
parent
1ae8f54835
commit
4f0ea9242f
9 changed files with 620 additions and 881 deletions
|
@ -28,72 +28,60 @@ package java.util.zip;
|
|||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
import java.nio.charset.CharacterCodingException;
|
||||
import java.nio.charset.CodingErrorAction;
|
||||
import java.util.Arrays;
|
||||
import sun.nio.cs.ArrayDecoder;
|
||||
import sun.nio.cs.ArrayEncoder;
|
||||
|
||||
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||
|
||||
/**
|
||||
* Utility class for zipfile name and comment decoding and encoding
|
||||
*/
|
||||
|
||||
final class ZipCoder {
|
||||
class ZipCoder {
|
||||
|
||||
private static boolean isASCII(byte[] ba, int off, int len) {
|
||||
for (int i = off; i < off + len; i++) {
|
||||
if (ba[i] < 0)
|
||||
return false;
|
||||
private static final jdk.internal.misc.JavaLangAccess JLA =
|
||||
jdk.internal.misc.SharedSecrets.getJavaLangAccess();
|
||||
|
||||
static final class UTF8 extends ZipCoder {
|
||||
|
||||
UTF8(Charset utf8) {
|
||||
super(utf8);
|
||||
}
|
||||
|
||||
@Override
|
||||
boolean isUTF8() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
String toString(byte[] ba, int off, int length) {
|
||||
return JLA.newStringUTF8NoRepl(ba, off, length);
|
||||
}
|
||||
|
||||
@Override
|
||||
byte[] getBytes(String s) {
|
||||
return JLA.getBytesUTF8NoRepl(s);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private static boolean hasReplaceChar(byte[] ba) {
|
||||
for (int i = 0; i < ba.length; i++) {
|
||||
if (ba[i] == (byte)'?')
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
// UTF_8.ArrayEn/Decoder is stateless, so make it singleton.
|
||||
private static ZipCoder utf8 = new UTF8(UTF_8);
|
||||
|
||||
public static ZipCoder get(Charset charset) {
|
||||
if (charset == UTF_8)
|
||||
return utf8;
|
||||
return new ZipCoder(charset);
|
||||
}
|
||||
|
||||
String toString(byte[] ba, int off, int length) {
|
||||
try {
|
||||
return decoder().decode(ByteBuffer.wrap(ba, off, length)).toString();
|
||||
|
||||
// fastpath for UTF-8 cs and ascii only name, leverage the
|
||||
// compact string impl to avoid the unnecessary char[] copy/
|
||||
// paste. A temporary workaround before we have better approach,
|
||||
// such as a String constructor that throws exception for
|
||||
// malformed and/or unmappable characters, instead of silently
|
||||
// replacing with repl char
|
||||
if (isUTF8 && isASCII(ba, off, length)) {
|
||||
return new String(ba, off, length, cs);
|
||||
} catch (CharacterCodingException x) {
|
||||
throw new IllegalArgumentException(x);
|
||||
}
|
||||
|
||||
CharsetDecoder cd = decoder().reset();
|
||||
int len = (int)(length * cd.maxCharsPerByte());
|
||||
char[] ca = new char[len];
|
||||
if (len == 0)
|
||||
return new String(ca);
|
||||
// UTF-8 only for now. Other ArrayDeocder only handles
|
||||
// CodingErrorAction.REPLACE mode. ZipCoder uses
|
||||
// REPORT mode.
|
||||
if (isUTF8 && cd instanceof ArrayDecoder) {
|
||||
int clen = ((ArrayDecoder)cd).decode(ba, off, length, ca);
|
||||
if (clen == -1) // malformed
|
||||
throw new IllegalArgumentException("MALFORMED");
|
||||
return new String(ca, 0, clen);
|
||||
}
|
||||
ByteBuffer bb = ByteBuffer.wrap(ba, off, length);
|
||||
CharBuffer cb = CharBuffer.wrap(ca);
|
||||
CoderResult cr = cd.decode(bb, cb, true);
|
||||
if (!cr.isUnderflow())
|
||||
throw new IllegalArgumentException(cr.toString());
|
||||
cr = cd.flush(cb);
|
||||
if (!cr.isUnderflow())
|
||||
throw new IllegalArgumentException(cr.toString());
|
||||
return new String(ca, 0, cb.position());
|
||||
}
|
||||
|
||||
String toString(byte[] ba, int length) {
|
||||
|
@ -105,84 +93,47 @@ final class ZipCoder {
|
|||
}
|
||||
|
||||
byte[] getBytes(String s) {
|
||||
if (isUTF8) {
|
||||
// fastpath for UTF8. should only occur when the string
|
||||
// has malformed surrogates. A postscan should still be
|
||||
// faster and use less memory.
|
||||
byte[] ba = s.getBytes(cs);
|
||||
if (!hasReplaceChar(ba)) {
|
||||
return ba;
|
||||
try {
|
||||
ByteBuffer bb = encoder().encode(CharBuffer.wrap(s));
|
||||
int pos = bb.position();
|
||||
int limit = bb.limit();
|
||||
if (bb.hasArray() && pos == 0 && limit == bb.capacity()) {
|
||||
return bb.array();
|
||||
}
|
||||
byte[] bytes = new byte[bb.limit() - bb.position()];
|
||||
bb.get(bytes);
|
||||
return bytes;
|
||||
} catch (CharacterCodingException x) {
|
||||
throw new IllegalArgumentException(x);
|
||||
}
|
||||
CharsetEncoder ce = encoder().reset();
|
||||
char[] ca = s.toCharArray();
|
||||
int len = (int)(ca.length * ce.maxBytesPerChar());
|
||||
byte[] ba = new byte[len];
|
||||
if (len == 0)
|
||||
return ba;
|
||||
// UTF-8 only for now. Other ArrayDeocder only handles
|
||||
// CodingErrorAction.REPLACE mode.
|
||||
if (isUTF8 && ce instanceof ArrayEncoder) {
|
||||
int blen = ((ArrayEncoder)ce).encode(ca, 0, ca.length, ba);
|
||||
if (blen == -1) // malformed
|
||||
throw new IllegalArgumentException("MALFORMED");
|
||||
return Arrays.copyOf(ba, blen);
|
||||
}
|
||||
ByteBuffer bb = ByteBuffer.wrap(ba);
|
||||
CharBuffer cb = CharBuffer.wrap(ca);
|
||||
CoderResult cr = ce.encode(cb, bb, true);
|
||||
if (!cr.isUnderflow())
|
||||
throw new IllegalArgumentException(cr.toString());
|
||||
cr = ce.flush(bb);
|
||||
if (!cr.isUnderflow())
|
||||
throw new IllegalArgumentException(cr.toString());
|
||||
if (bb.position() == ba.length) // defensive copy?
|
||||
return ba;
|
||||
else
|
||||
return Arrays.copyOf(ba, bb.position());
|
||||
}
|
||||
|
||||
// assume invoked only if "this" is not utf8
|
||||
byte[] getBytesUTF8(String s) {
|
||||
if (isUTF8)
|
||||
return getBytes(s);
|
||||
if (utf8 == null)
|
||||
utf8 = new ZipCoder(StandardCharsets.UTF_8);
|
||||
return utf8.getBytes(s);
|
||||
}
|
||||
|
||||
String toStringUTF8(byte[] ba, int len) {
|
||||
return toStringUTF8(ba, 0, len);
|
||||
return utf8.toString(ba, 0, len);
|
||||
}
|
||||
|
||||
String toStringUTF8(byte[] ba, int off, int len) {
|
||||
if (isUTF8)
|
||||
return toString(ba, off, len);
|
||||
if (utf8 == null)
|
||||
utf8 = new ZipCoder(StandardCharsets.UTF_8);
|
||||
return utf8.toString(ba, off, len);
|
||||
}
|
||||
|
||||
boolean isUTF8() {
|
||||
return isUTF8;
|
||||
return false;
|
||||
}
|
||||
|
||||
private Charset cs;
|
||||
private CharsetDecoder dec;
|
||||
private CharsetEncoder enc;
|
||||
private boolean isUTF8;
|
||||
private ZipCoder utf8;
|
||||
|
||||
private ZipCoder(Charset cs) {
|
||||
this.cs = cs;
|
||||
this.isUTF8 = cs.name().equals(StandardCharsets.UTF_8.name());
|
||||
}
|
||||
|
||||
static ZipCoder get(Charset charset) {
|
||||
return new ZipCoder(charset);
|
||||
}
|
||||
|
||||
private CharsetDecoder decoder() {
|
||||
protected CharsetDecoder decoder() {
|
||||
if (dec == null) {
|
||||
dec = cs.newDecoder()
|
||||
.onMalformedInput(CodingErrorAction.REPORT)
|
||||
|
@ -191,7 +142,7 @@ final class ZipCoder {
|
|||
return dec;
|
||||
}
|
||||
|
||||
private CharsetEncoder encoder() {
|
||||
protected CharsetEncoder encoder() {
|
||||
if (enc == null) {
|
||||
enc = cs.newEncoder()
|
||||
.onMalformedInput(CodingErrorAction.REPORT)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue