mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-27 14:54:52 +02:00
8141132: JEP 254: Compact Strings
Adopt a more space-efficient internal representation for strings. Co-authored-by: Brent Christian <brent.christian@oracle.com> Co-authored-by: Vivek Deshpande <vivek.r.deshpande@intel.com> Co-authored-by: Charlie Hunt <charlie.hunt@oracle.com> Co-authored-by: Vladimir Kozlov <vladimir.kozlov@oracle.com> Co-authored-by: Roger Riggs <roger.riggs@oracle.com> Co-authored-by: Xueming Shen <xueming.shen@oracle.com> Co-authored-by: Aleksey Shipilev <aleksey.shipilev@oracle.com> Co-authored-by: Sandhya Viswanathan <sandhya.viswanathan@intel.com> Reviewed-by: alanb, bdelsart, coleenp, iklam, jiangli, jrose, kevinw, naoto, pliden, roland, smarks, twisti
This commit is contained in:
parent
b7dca6971d
commit
4ed5b73f3d
76 changed files with 8755 additions and 1288 deletions
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -38,11 +38,19 @@ import java.nio.charset.CodingErrorAction;
|
|||
import java.nio.charset.IllegalCharsetNameException;
|
||||
import java.nio.charset.UnsupportedCharsetException;
|
||||
import java.util.Arrays;
|
||||
import jdk.internal.HotSpotIntrinsicCandidate;
|
||||
import sun.misc.MessageUtils;
|
||||
import sun.nio.cs.HistoricallyNamedCharset;
|
||||
import sun.nio.cs.ArrayDecoder;
|
||||
import sun.nio.cs.ArrayEncoder;
|
||||
|
||||
import static java.lang.String.LATIN1;
|
||||
import static java.lang.String.UTF16;
|
||||
import static java.lang.String.COMPACT_STRINGS;
|
||||
import static java.nio.charset.StandardCharsets.ISO_8859_1;
|
||||
import static java.nio.charset.StandardCharsets.US_ASCII;
|
||||
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||
|
||||
/**
|
||||
* Utility class for string encoding and decoding.
|
||||
*/
|
||||
|
@ -72,23 +80,13 @@ class StringCoding {
|
|||
|
||||
// Trim the given byte array to the given length
|
||||
//
|
||||
private static byte[] safeTrim(byte[] ba, int len, Charset cs, boolean isTrusted) {
|
||||
private static byte[] safeTrim(byte[] ba, int len, boolean isTrusted) {
|
||||
if (len == ba.length && (isTrusted || System.getSecurityManager() == null))
|
||||
return ba;
|
||||
else
|
||||
return Arrays.copyOf(ba, len);
|
||||
}
|
||||
|
||||
// Trim the given char array to the given length
|
||||
//
|
||||
private static char[] safeTrim(char[] ca, int len,
|
||||
Charset cs, boolean isTrusted) {
|
||||
if (len == ca.length && (isTrusted || System.getSecurityManager() == null))
|
||||
return ca;
|
||||
else
|
||||
return Arrays.copyOf(ca, len);
|
||||
}
|
||||
|
||||
private static int scale(int len, float expansionFactor) {
|
||||
// We need to perform double, not float, arithmetic; otherwise
|
||||
// we lose low order bits when len is larger than 2**24.
|
||||
|
@ -117,21 +115,64 @@ class StringCoding {
|
|||
}
|
||||
}
|
||||
|
||||
static class Result {
|
||||
byte[] value;
|
||||
byte coder;
|
||||
|
||||
Result with() {
|
||||
coder = COMPACT_STRINGS ? LATIN1 : UTF16;
|
||||
value = new byte[0];
|
||||
return this;
|
||||
}
|
||||
|
||||
Result with(char[] val, int off, int len) {
|
||||
if (String.COMPACT_STRINGS) {
|
||||
byte[] bs = StringUTF16.compress(val, off, len);
|
||||
if (bs != null) {
|
||||
value = bs;
|
||||
coder = LATIN1;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
coder = UTF16;
|
||||
value = StringUTF16.toBytes(val, off, len);
|
||||
return this;
|
||||
}
|
||||
|
||||
Result with(byte[] val, byte coder) {
|
||||
this.coder = coder;
|
||||
value = val;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
@HotSpotIntrinsicCandidate
|
||||
private static boolean hasNegatives(byte[] ba, int off, int len) {
|
||||
for (int i = off; i < off + len; i++) {
|
||||
if (ba[i] < 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// -- Decoding --
|
||||
private static class StringDecoder {
|
||||
static class StringDecoder {
|
||||
private final String requestedCharsetName;
|
||||
private final Charset cs;
|
||||
private final boolean isASCIICompatible;
|
||||
private final CharsetDecoder cd;
|
||||
private final boolean isTrusted;
|
||||
protected final Result result;
|
||||
|
||||
private StringDecoder(Charset cs, String rcn) {
|
||||
StringDecoder(Charset cs, String rcn) {
|
||||
this.requestedCharsetName = rcn;
|
||||
this.cs = cs;
|
||||
this.cd = cs.newDecoder()
|
||||
.onMalformedInput(CodingErrorAction.REPLACE)
|
||||
.onUnmappableCharacter(CodingErrorAction.REPLACE);
|
||||
this.isTrusted = (cs.getClass().getClassLoader0() == null);
|
||||
this.result = new Result();
|
||||
this.isASCIICompatible = (cd instanceof ArrayDecoder) &&
|
||||
((ArrayDecoder)cd).isASCIICompatible();
|
||||
}
|
||||
|
||||
String charsetName() {
|
||||
|
@ -144,36 +185,58 @@ class StringCoding {
|
|||
return requestedCharsetName;
|
||||
}
|
||||
|
||||
char[] decode(byte[] ba, int off, int len) {
|
||||
Result decode(byte[] ba, int off, int len) {
|
||||
if (len == 0) {
|
||||
return result.with();
|
||||
}
|
||||
// fastpath for ascii compatible
|
||||
if (isASCIICompatible && !hasNegatives(ba, off, len)) {
|
||||
if (COMPACT_STRINGS) {
|
||||
return result.with(Arrays.copyOfRange(ba, off, off + len),
|
||||
LATIN1);
|
||||
} else {
|
||||
return result.with(StringLatin1.inflate(ba, off, len), UTF16);
|
||||
}
|
||||
}
|
||||
int en = scale(len, cd.maxCharsPerByte());
|
||||
char[] ca = new char[en];
|
||||
if (len == 0)
|
||||
return ca;
|
||||
if (cd instanceof ArrayDecoder) {
|
||||
int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
|
||||
return safeTrim(ca, clen, cs, isTrusted);
|
||||
return result.with(ca, 0, clen);
|
||||
}
|
||||
cd.reset();
|
||||
ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
|
||||
CharBuffer cb = CharBuffer.wrap(ca);
|
||||
try {
|
||||
CoderResult cr = cd.decode(bb, cb, true);
|
||||
if (!cr.isUnderflow())
|
||||
cr.throwException();
|
||||
cr = cd.flush(cb);
|
||||
if (!cr.isUnderflow())
|
||||
cr.throwException();
|
||||
} catch (CharacterCodingException x) {
|
||||
// Substitution is always enabled,
|
||||
// so this shouldn't happen
|
||||
throw new Error(x);
|
||||
}
|
||||
return result.with(ca, 0, cb.position());
|
||||
}
|
||||
}
|
||||
|
||||
private static class StringDecoder8859_1 extends StringDecoder {
|
||||
StringDecoder8859_1(Charset cs, String rcn) {
|
||||
super(cs, rcn);
|
||||
}
|
||||
Result decode(byte[] ba, int off, int len) {
|
||||
if (COMPACT_STRINGS) {
|
||||
return result.with(Arrays.copyOfRange(ba, off, off + len), LATIN1);
|
||||
} else {
|
||||
cd.reset();
|
||||
ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
|
||||
CharBuffer cb = CharBuffer.wrap(ca);
|
||||
try {
|
||||
CoderResult cr = cd.decode(bb, cb, true);
|
||||
if (!cr.isUnderflow())
|
||||
cr.throwException();
|
||||
cr = cd.flush(cb);
|
||||
if (!cr.isUnderflow())
|
||||
cr.throwException();
|
||||
} catch (CharacterCodingException x) {
|
||||
// Substitution is always enabled,
|
||||
// so this shouldn't happen
|
||||
throw new Error(x);
|
||||
}
|
||||
return safeTrim(ca, cb.position(), cs, isTrusted);
|
||||
return result.with(StringLatin1.inflate(ba, off, len), UTF16);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static char[] decode(String charsetName, byte[] ba, int off, int len)
|
||||
static Result decode(String charsetName, byte[] ba, int off, int len)
|
||||
throws UnsupportedEncodingException
|
||||
{
|
||||
StringDecoder sd = deref(decoder);
|
||||
|
@ -183,8 +246,15 @@ class StringCoding {
|
|||
sd = null;
|
||||
try {
|
||||
Charset cs = lookupCharset(csn);
|
||||
if (cs != null)
|
||||
sd = new StringDecoder(cs, csn);
|
||||
if (cs != null) {
|
||||
if (cs == UTF_8) {
|
||||
sd = new StringDecoderUTF8(cs, csn);
|
||||
} else if (cs == ISO_8859_1) {
|
||||
sd = new StringDecoder8859_1(cs, csn);
|
||||
} else {
|
||||
sd = new StringDecoder(cs, csn);
|
||||
}
|
||||
}
|
||||
} catch (IllegalCharsetNameException x) {}
|
||||
if (sd == null)
|
||||
throw new UnsupportedEncodingException(csn);
|
||||
|
@ -193,7 +263,7 @@ class StringCoding {
|
|||
return sd.decode(ba, off, len);
|
||||
}
|
||||
|
||||
static char[] decode(Charset cs, byte[] ba, int off, int len) {
|
||||
static Result decode(Charset cs, byte[] ba, int off, int len) {
|
||||
// (1)We never cache the "external" cs, the only benefit of creating
|
||||
// an additional StringDe/Encoder object to wrap it is to share the
|
||||
// de/encode() method. These SD/E objects are short-lived, the young-gen
|
||||
|
@ -210,44 +280,57 @@ class StringCoding {
|
|||
// check (... && (isTrusted || SM == null || getClassLoader0())) in trim
|
||||
// but it then can be argued that the SM is null when the operation
|
||||
// is started...
|
||||
if (cs == UTF_8) {
|
||||
return StringDecoderUTF8.decode(ba, off, len, new Result());
|
||||
}
|
||||
CharsetDecoder cd = cs.newDecoder();
|
||||
// ascii fastpath
|
||||
if (cs == ISO_8859_1 || ((cd instanceof ArrayDecoder) &&
|
||||
((ArrayDecoder)cd).isASCIICompatible() &&
|
||||
!hasNegatives(ba, off, len))) {
|
||||
if (COMPACT_STRINGS) {
|
||||
return new Result().with(Arrays.copyOfRange(ba, off, off + len),
|
||||
LATIN1);
|
||||
} else {
|
||||
return new Result().with(StringLatin1.inflate(ba, off, len), UTF16);
|
||||
}
|
||||
}
|
||||
int en = scale(len, cd.maxCharsPerByte());
|
||||
char[] ca = new char[en];
|
||||
if (len == 0)
|
||||
return ca;
|
||||
boolean isTrusted = false;
|
||||
if (System.getSecurityManager() != null) {
|
||||
if (!(isTrusted = (cs.getClass().getClassLoader0() == null))) {
|
||||
ba = Arrays.copyOfRange(ba, off, off + len);
|
||||
off = 0;
|
||||
}
|
||||
if (len == 0) {
|
||||
return new Result().with();
|
||||
}
|
||||
if (System.getSecurityManager() != null &&
|
||||
cs.getClass().getClassLoader0() != null) {
|
||||
ba = Arrays.copyOfRange(ba, off, off + len);
|
||||
off = 0;
|
||||
}
|
||||
cd.onMalformedInput(CodingErrorAction.REPLACE)
|
||||
.onUnmappableCharacter(CodingErrorAction.REPLACE)
|
||||
.reset();
|
||||
|
||||
char[] ca = new char[en];
|
||||
if (cd instanceof ArrayDecoder) {
|
||||
int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
|
||||
return safeTrim(ca, clen, cs, isTrusted);
|
||||
} else {
|
||||
ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
|
||||
CharBuffer cb = CharBuffer.wrap(ca);
|
||||
try {
|
||||
CoderResult cr = cd.decode(bb, cb, true);
|
||||
if (!cr.isUnderflow())
|
||||
cr.throwException();
|
||||
cr = cd.flush(cb);
|
||||
if (!cr.isUnderflow())
|
||||
cr.throwException();
|
||||
} catch (CharacterCodingException x) {
|
||||
// Substitution is always enabled,
|
||||
// so this shouldn't happen
|
||||
throw new Error(x);
|
||||
}
|
||||
return safeTrim(ca, cb.position(), cs, isTrusted);
|
||||
return new Result().with(ca, 0, clen);
|
||||
}
|
||||
ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
|
||||
CharBuffer cb = CharBuffer.wrap(ca);
|
||||
try {
|
||||
CoderResult cr = cd.decode(bb, cb, true);
|
||||
if (!cr.isUnderflow())
|
||||
cr.throwException();
|
||||
cr = cd.flush(cb);
|
||||
if (!cr.isUnderflow())
|
||||
cr.throwException();
|
||||
} catch (CharacterCodingException x) {
|
||||
// Substitution is always enabled,
|
||||
// so this shouldn't happen
|
||||
throw new Error(x);
|
||||
}
|
||||
return new Result().with(ca, 0, cb.position());
|
||||
}
|
||||
|
||||
static char[] decode(byte[] ba, int off, int len) {
|
||||
static Result decode(byte[] ba, int off, int len) {
|
||||
String csn = Charset.defaultCharset().name();
|
||||
try {
|
||||
// use charset name decode() variant which provides caching.
|
||||
|
@ -273,6 +356,7 @@ class StringCoding {
|
|||
private static class StringEncoder {
|
||||
private Charset cs;
|
||||
private CharsetEncoder ce;
|
||||
private final boolean isASCIICompatible;
|
||||
private final String requestedCharsetName;
|
||||
private final boolean isTrusted;
|
||||
|
||||
|
@ -283,6 +367,8 @@ class StringCoding {
|
|||
.onMalformedInput(CodingErrorAction.REPLACE)
|
||||
.onUnmappableCharacter(CodingErrorAction.REPLACE);
|
||||
this.isTrusted = (cs.getClass().getClassLoader0() == null);
|
||||
this.isASCIICompatible = (ce instanceof ArrayEncoder) &&
|
||||
((ArrayEncoder)ce).isASCIICompatible();
|
||||
}
|
||||
|
||||
String charsetName() {
|
||||
|
@ -295,36 +381,186 @@ class StringCoding {
|
|||
return requestedCharsetName;
|
||||
}
|
||||
|
||||
byte[] encode(char[] ca, int off, int len) {
|
||||
byte[] encode(byte coder, byte[] val) {
|
||||
// fastpath for ascii compatible
|
||||
if (coder == LATIN1 && isASCIICompatible &&
|
||||
!hasNegatives(val, 0, val.length)) {
|
||||
return Arrays.copyOf(val, val.length);
|
||||
}
|
||||
int len = val.length >> coder; // assume LATIN1=0/UTF16=1;
|
||||
int en = scale(len, ce.maxBytesPerChar());
|
||||
byte[] ba = new byte[en];
|
||||
if (len == 0)
|
||||
if (len == 0) {
|
||||
return ba;
|
||||
if (ce instanceof ArrayEncoder) {
|
||||
int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
|
||||
return safeTrim(ba, blen, cs, isTrusted);
|
||||
} else {
|
||||
ce.reset();
|
||||
ByteBuffer bb = ByteBuffer.wrap(ba);
|
||||
CharBuffer cb = CharBuffer.wrap(ca, off, len);
|
||||
try {
|
||||
CoderResult cr = ce.encode(cb, bb, true);
|
||||
if (!cr.isUnderflow())
|
||||
cr.throwException();
|
||||
cr = ce.flush(bb);
|
||||
if (!cr.isUnderflow())
|
||||
cr.throwException();
|
||||
} catch (CharacterCodingException x) {
|
||||
// Substitution is always enabled,
|
||||
// so this shouldn't happen
|
||||
throw new Error(x);
|
||||
}
|
||||
return safeTrim(ba, bb.position(), cs, isTrusted);
|
||||
}
|
||||
if (ce instanceof ArrayEncoder) {
|
||||
if (!isTrusted) {
|
||||
val = Arrays.copyOf(val, val.length);
|
||||
}
|
||||
int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
|
||||
: ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
|
||||
if (blen != -1) {
|
||||
return safeTrim(ba, blen, isTrusted);
|
||||
}
|
||||
}
|
||||
char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
|
||||
: StringUTF16.toChars(val);
|
||||
ce.reset();
|
||||
ByteBuffer bb = ByteBuffer.wrap(ba);
|
||||
CharBuffer cb = CharBuffer.wrap(ca, 0, len);
|
||||
try {
|
||||
CoderResult cr = ce.encode(cb, bb, true);
|
||||
if (!cr.isUnderflow())
|
||||
cr.throwException();
|
||||
cr = ce.flush(bb);
|
||||
if (!cr.isUnderflow())
|
||||
cr.throwException();
|
||||
} catch (CharacterCodingException x) {
|
||||
// Substitution is always enabled,
|
||||
// so this shouldn't happen
|
||||
throw new Error(x);
|
||||
}
|
||||
return safeTrim(ba, bb.position(), isTrusted);
|
||||
}
|
||||
}
|
||||
|
||||
static byte[] encode(String charsetName, char[] ca, int off, int len)
|
||||
@HotSpotIntrinsicCandidate
|
||||
private static int implEncodeISOArray(byte[] sa, int sp,
|
||||
byte[] da, int dp, int len) {
|
||||
int i = 0;
|
||||
for (; i < len; i++) {
|
||||
char c = StringUTF16.getChar(sa, sp++);
|
||||
if (c > '\u00FF')
|
||||
break;
|
||||
da[dp++] = (byte)c;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
static byte[] encode8859_1(byte coder, byte[] val) {
|
||||
if (coder == LATIN1) {
|
||||
return Arrays.copyOf(val, val.length);
|
||||
}
|
||||
int len = val.length >> 1;
|
||||
byte[] dst = new byte[len];
|
||||
int dp = 0;
|
||||
int sp = 0;
|
||||
int sl = len;
|
||||
while (sp < sl) {
|
||||
int ret = implEncodeISOArray(val, sp, dst, dp, len);
|
||||
sp = sp + ret;
|
||||
dp = dp + ret;
|
||||
if (ret != len) {
|
||||
char c = StringUTF16.getChar(val, sp++);
|
||||
if (Character.isHighSurrogate(c) && sp < sl &&
|
||||
Character.isLowSurrogate(StringUTF16.getChar(val, sp))) {
|
||||
sp++;
|
||||
}
|
||||
dst[dp++] = '?';
|
||||
len = sl - sp;
|
||||
}
|
||||
}
|
||||
if (dp == dst.length) {
|
||||
return dst;
|
||||
}
|
||||
return Arrays.copyOf(dst, dp);
|
||||
}
|
||||
|
||||
static byte[] encodeASCII(byte coder, byte[] val) {
|
||||
if (coder == LATIN1) {
|
||||
byte[] dst = new byte[val.length];
|
||||
for (int i = 0; i < val.length; i++) {
|
||||
if (val[i] < 0) {
|
||||
dst[i] = '?';
|
||||
} else {
|
||||
dst[i] = val[i];
|
||||
}
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
int len = val.length >> 1;
|
||||
byte[] dst = new byte[len];
|
||||
int dp = 0;
|
||||
for (int i = 0; i < len; i++) {
|
||||
char c = StringUTF16.getChar(val, i);
|
||||
if (c < 0x80) {
|
||||
dst[dp++] = (byte)c;
|
||||
continue;
|
||||
}
|
||||
if (Character.isHighSurrogate(c) && i + 1 < len &&
|
||||
Character.isLowSurrogate(StringUTF16.getChar(val, i + 1))) {
|
||||
i++;
|
||||
}
|
||||
dst[dp++] = '?';
|
||||
}
|
||||
if (len == dp) {
|
||||
return dst;
|
||||
}
|
||||
return Arrays.copyOf(dst, dp);
|
||||
}
|
||||
|
||||
static byte[] encodeUTF8(byte coder, byte[] val) {
|
||||
int dp = 0;
|
||||
byte[] dst;
|
||||
if (coder == LATIN1) {
|
||||
dst = new byte[val.length << 1];
|
||||
for (int sp = 0; sp < val.length; sp++) {
|
||||
byte c = val[sp];
|
||||
if (c < 0) {
|
||||
dst[dp++] = (byte)(0xc0 | ((c & 0xff) >> 6));
|
||||
dst[dp++] = (byte)(0x80 | (c & 0x3f));
|
||||
} else {
|
||||
dst[dp++] = c;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
int sp = 0;
|
||||
int sl = val.length >> 1;
|
||||
dst = new byte[sl * 3];
|
||||
char c;
|
||||
while (sp < sl && (c = StringUTF16.getChar(val, sp)) < '\u0080') {
|
||||
// ascii fast loop;
|
||||
dst[dp++] = (byte)c;
|
||||
sp++;
|
||||
}
|
||||
while (sp < sl) {
|
||||
c = StringUTF16.getChar(val, sp++);
|
||||
if (c < 0x80) {
|
||||
dst[dp++] = (byte)c;
|
||||
} else if (c < 0x800) {
|
||||
dst[dp++] = (byte)(0xc0 | (c >> 6));
|
||||
dst[dp++] = (byte)(0x80 | (c & 0x3f));
|
||||
} else if (Character.isSurrogate(c)) {
|
||||
int uc = -1;
|
||||
char c2;
|
||||
if (Character.isHighSurrogate(c) && sp < sl &&
|
||||
Character.isLowSurrogate(c2 = StringUTF16.getChar(val, sp))) {
|
||||
uc = Character.toCodePoint(c, c2);
|
||||
}
|
||||
if (uc < 0) {
|
||||
dst[dp++] = '?';
|
||||
} else {
|
||||
dst[dp++] = (byte)(0xf0 | ((uc >> 18)));
|
||||
dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
|
||||
dst[dp++] = (byte)(0x80 | ((uc >> 6) & 0x3f));
|
||||
dst[dp++] = (byte)(0x80 | (uc & 0x3f));
|
||||
sp++; // 2 chars
|
||||
}
|
||||
} else {
|
||||
// 3 bytes, 16 bits
|
||||
dst[dp++] = (byte)(0xe0 | ((c >> 12)));
|
||||
dst[dp++] = (byte)(0x80 | ((c >> 6) & 0x3f));
|
||||
dst[dp++] = (byte)(0x80 | (c & 0x3f));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (dp == dst.length) {
|
||||
return dst;
|
||||
}
|
||||
return Arrays.copyOf(dst, dp);
|
||||
}
|
||||
|
||||
static byte[] encode(String charsetName, byte coder, byte[] val)
|
||||
throws UnsupportedEncodingException
|
||||
{
|
||||
StringEncoder se = deref(encoder);
|
||||
|
@ -334,62 +570,88 @@ class StringCoding {
|
|||
se = null;
|
||||
try {
|
||||
Charset cs = lookupCharset(csn);
|
||||
if (cs != null)
|
||||
if (cs != null) {
|
||||
if (cs == UTF_8) {
|
||||
return encodeUTF8(coder, val);
|
||||
} else if (cs == ISO_8859_1) {
|
||||
return encode8859_1(coder, val);
|
||||
} else if (cs == US_ASCII) {
|
||||
return encodeASCII(coder, val);
|
||||
}
|
||||
se = new StringEncoder(cs, csn);
|
||||
}
|
||||
} catch (IllegalCharsetNameException x) {}
|
||||
if (se == null)
|
||||
if (se == null) {
|
||||
throw new UnsupportedEncodingException (csn);
|
||||
}
|
||||
set(encoder, se);
|
||||
}
|
||||
return se.encode(ca, off, len);
|
||||
return se.encode(coder, val);
|
||||
}
|
||||
|
||||
static byte[] encode(Charset cs, char[] ca, int off, int len) {
|
||||
static byte[] encode(Charset cs, byte coder, byte[] val) {
|
||||
if (cs == UTF_8) {
|
||||
return encodeUTF8(coder, val);
|
||||
} else if (cs == ISO_8859_1) {
|
||||
return encode8859_1(coder, val);
|
||||
} else if (cs == US_ASCII) {
|
||||
return encodeASCII(coder, val);
|
||||
}
|
||||
CharsetEncoder ce = cs.newEncoder();
|
||||
// fastpath for ascii compatible
|
||||
if (coder == LATIN1 && (((ce instanceof ArrayEncoder) &&
|
||||
((ArrayEncoder)ce).isASCIICompatible() &&
|
||||
!hasNegatives(val, 0, val.length)))) {
|
||||
return Arrays.copyOf(val, val.length);
|
||||
}
|
||||
int len = val.length >> coder; // assume LATIN1=0/UTF16=1;
|
||||
int en = scale(len, ce.maxBytesPerChar());
|
||||
byte[] ba = new byte[en];
|
||||
if (len == 0)
|
||||
if (len == 0) {
|
||||
return ba;
|
||||
boolean isTrusted = false;
|
||||
if (System.getSecurityManager() != null) {
|
||||
if (!(isTrusted = (cs.getClass().getClassLoader0() == null))) {
|
||||
ca = Arrays.copyOfRange(ca, off, off + len);
|
||||
off = 0;
|
||||
}
|
||||
}
|
||||
boolean isTrusted = System.getSecurityManager() == null ||
|
||||
cs.getClass().getClassLoader0() == null;
|
||||
ce.onMalformedInput(CodingErrorAction.REPLACE)
|
||||
.onUnmappableCharacter(CodingErrorAction.REPLACE)
|
||||
.reset();
|
||||
if (ce instanceof ArrayEncoder) {
|
||||
int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
|
||||
return safeTrim(ba, blen, cs, isTrusted);
|
||||
} else {
|
||||
ByteBuffer bb = ByteBuffer.wrap(ba);
|
||||
CharBuffer cb = CharBuffer.wrap(ca, off, len);
|
||||
try {
|
||||
CoderResult cr = ce.encode(cb, bb, true);
|
||||
if (!cr.isUnderflow())
|
||||
cr.throwException();
|
||||
cr = ce.flush(bb);
|
||||
if (!cr.isUnderflow())
|
||||
cr.throwException();
|
||||
} catch (CharacterCodingException x) {
|
||||
throw new Error(x);
|
||||
if (!isTrusted) {
|
||||
val = Arrays.copyOf(val, val.length);
|
||||
}
|
||||
int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
|
||||
: ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
|
||||
if (blen != -1) {
|
||||
return safeTrim(ba, blen, isTrusted);
|
||||
}
|
||||
return safeTrim(ba, bb.position(), cs, isTrusted);
|
||||
}
|
||||
char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
|
||||
: StringUTF16.toChars(val);
|
||||
ByteBuffer bb = ByteBuffer.wrap(ba);
|
||||
CharBuffer cb = CharBuffer.wrap(ca, 0, len);
|
||||
try {
|
||||
CoderResult cr = ce.encode(cb, bb, true);
|
||||
if (!cr.isUnderflow())
|
||||
cr.throwException();
|
||||
cr = ce.flush(bb);
|
||||
if (!cr.isUnderflow())
|
||||
cr.throwException();
|
||||
} catch (CharacterCodingException x) {
|
||||
throw new Error(x);
|
||||
}
|
||||
return safeTrim(ba, bb.position(), isTrusted);
|
||||
}
|
||||
|
||||
static byte[] encode(char[] ca, int off, int len) {
|
||||
static byte[] encode(byte coder, byte[] val) {
|
||||
String csn = Charset.defaultCharset().name();
|
||||
try {
|
||||
// use charset name encode() variant which provides caching.
|
||||
return encode(csn, ca, off, len);
|
||||
return encode(csn, coder, val);
|
||||
} catch (UnsupportedEncodingException x) {
|
||||
warnUnsupportedCharset(csn);
|
||||
}
|
||||
try {
|
||||
return encode("ISO-8859-1", ca, off, len);
|
||||
return encode("ISO-8859-1", coder, val);
|
||||
} catch (UnsupportedEncodingException x) {
|
||||
// If this code is hit during VM initialization, MessageUtils is
|
||||
// the only way we will be able to get any kind of error message.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue