8282429: StringBuilder/StringBuffer.toString() skip compressing for UTF16 strings

Reviewed-by: djelinski, redestad
This commit is contained in:
Xin Liu 2022-04-01 04:42:03 +00:00
parent 8eff80682a
commit bab431cc12
6 changed files with 122 additions and 11 deletions

View file

@ -68,6 +68,14 @@ abstract sealed class AbstractStringBuilder implements Appendable, CharSequence
*/
byte coder;
/**
* The attribute indicates {@code value} might be compressible to LATIN1 if it is UTF16-encoded.
* An inflated byte array becomes compressible only when those non-latin1 chars are deleted.
* We simply set this attribute in all methods which may delete chars. Therefore, there are
* false positives. Subclasses and String need to handle it properly.
*/
boolean maybeLatin1;
/**
* The count is the number of characters used.
*/
@ -132,10 +140,11 @@ abstract sealed class AbstractStringBuilder implements Appendable, CharSequence
final byte initCoder;
if (COMPACT_STRINGS) {
if (seq instanceof AbstractStringBuilder) {
initCoder = ((AbstractStringBuilder)seq).getCoder();
} else if (seq instanceof String) {
initCoder = ((String)seq).coder();
if (seq instanceof AbstractStringBuilder asb) {
initCoder = asb.getCoder();
maybeLatin1 |= asb.maybeLatin1;
} else if (seq instanceof String s) {
initCoder = s.coder();
} else {
initCoder = LATIN1;
}
@ -319,6 +328,8 @@ abstract sealed class AbstractStringBuilder implements Appendable, CharSequence
} else {
StringUTF16.fillNull(value, count, newLength);
}
} else if (count > newLength) {
maybeLatin1 = true;
}
count = newLength;
}
@ -528,6 +539,7 @@ abstract sealed class AbstractStringBuilder implements Appendable, CharSequence
inflate();
}
StringUTF16.putCharSB(value, index, ch);
maybeLatin1 = true;
}
}
@ -597,6 +609,7 @@ abstract sealed class AbstractStringBuilder implements Appendable, CharSequence
inflateIfNeededFor(asb);
asb.getBytes(value, count, coder);
count += len;
maybeLatin1 |= asb.maybeLatin1;
return this;
}
@ -907,6 +920,7 @@ abstract sealed class AbstractStringBuilder implements Appendable, CharSequence
if (len > 0) {
shift(end, -len);
this.count = count - len;
maybeLatin1 = true;
}
return this;
}
@ -958,6 +972,7 @@ abstract sealed class AbstractStringBuilder implements Appendable, CharSequence
checkIndex(index, count);
shift(index + 1, -1);
count--;
maybeLatin1 = true;
return this;
}
@ -992,6 +1007,7 @@ abstract sealed class AbstractStringBuilder implements Appendable, CharSequence
shift(end, newCount - count);
this.count = newCount;
putStringAt(start, str);
maybeLatin1 = true;
return this;
}

View file

@ -4521,7 +4521,8 @@ public final class String
this.coder = LATIN1;
this.value = Arrays.copyOfRange(val, 0, length);
} else {
if (COMPACT_STRINGS) {
// only try to compress val if some characters were deleted.
if (COMPACT_STRINGS && asb.maybeLatin1) {
byte[] buf = StringUTF16.compress(val, 0, length);
if (buf != null) {
this.coder = LATIN1;

View file

@ -712,9 +712,7 @@ import jdk.internal.vm.annotation.IntrinsicCandidate;
@IntrinsicCandidate
public synchronized String toString() {
if (toStringCache == null) {
return toStringCache =
isLatin1() ? StringLatin1.newString(value, 0, count)
: StringUTF16.newString(value, 0, count);
return toStringCache = new String(this, null);
}
return new String(toStringCache);
}

View file

@ -450,8 +450,7 @@ public final class StringBuilder
@IntrinsicCandidate
public String toString() {
// Create a copy, don't share the array
return isLatin1() ? StringLatin1.newString(value, 0, count)
: StringUTF16.newString(value, 0, count);
return new String(this);
}
/**

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -367,6 +367,43 @@ public class CompactStringBuilder {
check(new StringBuilder(ORIGIN).reverse(), "\uD801\uFF21\uDC01A");
}
/*
* Tests for maybeLatin1 attribute
*/
@Test
public void testCompactStringForMaybeLatin1() {
StringBuilder sb = new StringBuilder("A\uDC01");
sb.setLength(0); // maybeLatin1 become true
check(sb, "");
check(new StringBuilder(sb).append('A'), "A");
check(new StringBuilder().append(sb), "");
sb = new StringBuilder("A\uDC01");
sb.setCharAt(1, 'B'); // maybeLatin1 become true
check(sb, "AB");
check(new StringBuilder(sb).append('A'), "ABA");
check(new StringBuilder().append(sb), "AB");
sb = new StringBuilder("A\uDC01");
sb.deleteCharAt(1); // maybeLatin1 become true
check(sb, "A");
check(new StringBuilder(sb).append('B'), "AB");
check(new StringBuilder().append(sb), "A");
sb = new StringBuilder("A\uDC01\uFF21\uD801");
sb.delete(1, 4);
check(sb, "A"); // maybeLatin1 become true
check(new StringBuilder(sb).append('B'), "AB");
check(new StringBuilder().append(sb), "A");
sb = new StringBuilder("A\uDC01\uFF21\uD801");
sb.replace(1, 4, "B");
check(sb, "AB"); // maybeLatin1 become true
check(new StringBuilder(sb).append('A'), "ABA");
check(new StringBuilder().append(sb), "AB");
}
private void checkGetChars(StringBuilder sb, int srcBegin, int srcEnd,
char expected[]) {
char[] dst = new char[srcEnd - srcBegin];

View file

@ -0,0 +1,60 @@
/*
* Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.java.lang;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import java.util.concurrent.TimeUnit;
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Thread)
@Warmup(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@Measurement(iterations = 5, time = 1000, timeUnit = TimeUnit.MILLISECONDS)
@Fork(3)
public class StringBuilderToString {
@Param({"128", "256", "1024"})
public int MIXED_SIZE;
/**
* This microbench simulates how java.io.BufferedReader uses StringBuilder.
*/
@Benchmark
public String toStringWithMixedChars() {
StringBuilder sb = new StringBuilder(MIXED_SIZE);
for (int i = 0; i < MIXED_SIZE - 4; ++i) {
sb.append('a');
}
sb.append('\u3042'); // can't be encoded in latin-1,
return sb.toString();
}
}