mirror of
https://github.com/openjdk/jdk.git
synced 2025-09-15 08:34:30 +02:00
8282429: StringBuilder/StringBuffer.toString() skip compressing for UTF16 strings
Reviewed-by: djelinski, redestad
This commit is contained in:
parent
8eff80682a
commit
bab431cc12
6 changed files with 122 additions and 11 deletions
|
@ -68,6 +68,14 @@ abstract sealed class AbstractStringBuilder implements Appendable, CharSequence
|
|||
*/
|
||||
byte coder;
|
||||
|
||||
/**
|
||||
* The attribute indicates {@code value} might be compressible to LATIN1 if it is UTF16-encoded.
|
||||
* An inflated byte array becomes compressible only when those non-latin1 chars are deleted.
|
||||
* We simply set this attribute in all methods which may delete chars. Therefore, there are
|
||||
* false positives. Subclasses and String need to handle it properly.
|
||||
*/
|
||||
boolean maybeLatin1;
|
||||
|
||||
/**
|
||||
* The count is the number of characters used.
|
||||
*/
|
||||
|
@ -132,10 +140,11 @@ abstract sealed class AbstractStringBuilder implements Appendable, CharSequence
|
|||
|
||||
final byte initCoder;
|
||||
if (COMPACT_STRINGS) {
|
||||
if (seq instanceof AbstractStringBuilder) {
|
||||
initCoder = ((AbstractStringBuilder)seq).getCoder();
|
||||
} else if (seq instanceof String) {
|
||||
initCoder = ((String)seq).coder();
|
||||
if (seq instanceof AbstractStringBuilder asb) {
|
||||
initCoder = asb.getCoder();
|
||||
maybeLatin1 |= asb.maybeLatin1;
|
||||
} else if (seq instanceof String s) {
|
||||
initCoder = s.coder();
|
||||
} else {
|
||||
initCoder = LATIN1;
|
||||
}
|
||||
|
@ -319,6 +328,8 @@ abstract sealed class AbstractStringBuilder implements Appendable, CharSequence
|
|||
} else {
|
||||
StringUTF16.fillNull(value, count, newLength);
|
||||
}
|
||||
} else if (count > newLength) {
|
||||
maybeLatin1 = true;
|
||||
}
|
||||
count = newLength;
|
||||
}
|
||||
|
@ -528,6 +539,7 @@ abstract sealed class AbstractStringBuilder implements Appendable, CharSequence
|
|||
inflate();
|
||||
}
|
||||
StringUTF16.putCharSB(value, index, ch);
|
||||
maybeLatin1 = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -597,6 +609,7 @@ abstract sealed class AbstractStringBuilder implements Appendable, CharSequence
|
|||
inflateIfNeededFor(asb);
|
||||
asb.getBytes(value, count, coder);
|
||||
count += len;
|
||||
maybeLatin1 |= asb.maybeLatin1;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -907,6 +920,7 @@ abstract sealed class AbstractStringBuilder implements Appendable, CharSequence
|
|||
if (len > 0) {
|
||||
shift(end, -len);
|
||||
this.count = count - len;
|
||||
maybeLatin1 = true;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
@ -958,6 +972,7 @@ abstract sealed class AbstractStringBuilder implements Appendable, CharSequence
|
|||
checkIndex(index, count);
|
||||
shift(index + 1, -1);
|
||||
count--;
|
||||
maybeLatin1 = true;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -992,6 +1007,7 @@ abstract sealed class AbstractStringBuilder implements Appendable, CharSequence
|
|||
shift(end, newCount - count);
|
||||
this.count = newCount;
|
||||
putStringAt(start, str);
|
||||
maybeLatin1 = true;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
|
|
@ -4521,7 +4521,8 @@ public final class String
|
|||
this.coder = LATIN1;
|
||||
this.value = Arrays.copyOfRange(val, 0, length);
|
||||
} else {
|
||||
if (COMPACT_STRINGS) {
|
||||
// only try to compress val if some characters were deleted.
|
||||
if (COMPACT_STRINGS && asb.maybeLatin1) {
|
||||
byte[] buf = StringUTF16.compress(val, 0, length);
|
||||
if (buf != null) {
|
||||
this.coder = LATIN1;
|
||||
|
|
|
@ -712,9 +712,7 @@ import jdk.internal.vm.annotation.IntrinsicCandidate;
|
|||
@IntrinsicCandidate
|
||||
public synchronized String toString() {
|
||||
if (toStringCache == null) {
|
||||
return toStringCache =
|
||||
isLatin1() ? StringLatin1.newString(value, 0, count)
|
||||
: StringUTF16.newString(value, 0, count);
|
||||
return toStringCache = new String(this, null);
|
||||
}
|
||||
return new String(toStringCache);
|
||||
}
|
||||
|
|
|
@ -450,8 +450,7 @@ public final class StringBuilder
|
|||
@IntrinsicCandidate
|
||||
public String toString() {
|
||||
// Create a copy, don't share the array
|
||||
return isLatin1() ? StringLatin1.newString(value, 0, count)
|
||||
: StringUTF16.newString(value, 0, count);
|
||||
return new String(this);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -367,6 +367,43 @@ public class CompactStringBuilder {
|
|||
check(new StringBuilder(ORIGIN).reverse(), "\uD801\uFF21\uDC01A");
|
||||
}
|
||||
|
||||
/*
|
||||
* Tests for maybeLatin1 attribute
|
||||
*/
|
||||
@Test
|
||||
public void testCompactStringForMaybeLatin1() {
|
||||
StringBuilder sb = new StringBuilder("A\uDC01");
|
||||
|
||||
sb.setLength(0); // maybeLatin1 become true
|
||||
check(sb, "");
|
||||
check(new StringBuilder(sb).append('A'), "A");
|
||||
check(new StringBuilder().append(sb), "");
|
||||
|
||||
sb = new StringBuilder("A\uDC01");
|
||||
sb.setCharAt(1, 'B'); // maybeLatin1 become true
|
||||
check(sb, "AB");
|
||||
check(new StringBuilder(sb).append('A'), "ABA");
|
||||
check(new StringBuilder().append(sb), "AB");
|
||||
|
||||
sb = new StringBuilder("A\uDC01");
|
||||
sb.deleteCharAt(1); // maybeLatin1 become true
|
||||
check(sb, "A");
|
||||
check(new StringBuilder(sb).append('B'), "AB");
|
||||
check(new StringBuilder().append(sb), "A");
|
||||
|
||||
sb = new StringBuilder("A\uDC01\uFF21\uD801");
|
||||
sb.delete(1, 4);
|
||||
check(sb, "A"); // maybeLatin1 become true
|
||||
check(new StringBuilder(sb).append('B'), "AB");
|
||||
check(new StringBuilder().append(sb), "A");
|
||||
|
||||
sb = new StringBuilder("A\uDC01\uFF21\uD801");
|
||||
sb.replace(1, 4, "B");
|
||||
check(sb, "AB"); // maybeLatin1 become true
|
||||
check(new StringBuilder(sb).append('A'), "ABA");
|
||||
check(new StringBuilder().append(sb), "AB");
|
||||
}
|
||||
|
||||
private void checkGetChars(StringBuilder sb, int srcBegin, int srcEnd,
|
||||
char expected[]) {
|
||||
char[] dst = new char[srcEnd - srcBegin];
|
||||
|
|
|
@ -0,0 +1,60 @@
|
|||
/*
|
||||
* Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package org.openjdk.bench.java.lang;
|
||||
|
||||
import org.openjdk.jmh.annotations.Benchmark;
|
||||
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||
import org.openjdk.jmh.annotations.Fork;
|
||||
import org.openjdk.jmh.annotations.Measurement;
|
||||
import org.openjdk.jmh.annotations.Mode;
|
||||
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||
import org.openjdk.jmh.annotations.Param;
|
||||
import org.openjdk.jmh.annotations.Scope;
|
||||
import org.openjdk.jmh.annotations.State;
|
||||
import org.openjdk.jmh.annotations.Warmup;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
@BenchmarkMode(Mode.AverageTime)
|
||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||
@State(Scope.Thread)
|
||||
@Warmup(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
|
||||
@Measurement(iterations = 5, time = 1000, timeUnit = TimeUnit.MILLISECONDS)
|
||||
@Fork(3)
|
||||
public class StringBuilderToString {
|
||||
@Param({"128", "256", "1024"})
|
||||
public int MIXED_SIZE;
|
||||
|
||||
/**
|
||||
* This microbench simulates how java.io.BufferedReader uses StringBuilder.
|
||||
*/
|
||||
@Benchmark
|
||||
public String toStringWithMixedChars() {
|
||||
StringBuilder sb = new StringBuilder(MIXED_SIZE);
|
||||
for (int i = 0; i < MIXED_SIZE - 4; ++i) {
|
||||
sb.append('a');
|
||||
}
|
||||
sb.append('\u3042'); // can't be encoded in latin-1,
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue