8222955: Optimize String.replace(CharSequence, CharSequence) for common cases

Reviewed-by: redestad, tvaleev
This commit is contained in:
Ivan Gerasimov 2019-05-06 18:07:55 -07:00
parent 0c6f7e5516
commit 4f02d011b0
5 changed files with 408 additions and 34 deletions

View file

@ -75,7 +75,7 @@ import static java.util.function.Predicate.not;
* System.out.println("abc");
* String cde = "cde";
* System.out.println("abc" + cde);
* String c = "abc".substring(2,3);
* String c = "abc".substring(2, 3);
* String d = cde.substring(1, 2);
* </pre></blockquote>
* <p>
@ -2160,27 +2160,48 @@ public final class String
* @since 1.5
*/
public String replace(CharSequence target, CharSequence replacement) {
String tgtStr = target.toString();
String trgtStr = target.toString();
String replStr = replacement.toString();
int j = indexOf(tgtStr);
if (j < 0) {
return this;
}
int tgtLen = tgtStr.length();
int tgtLen1 = Math.max(tgtLen, 1);
int thisLen = length();
int trgtLen = trgtStr.length();
int replLen = replStr.length();
int newLenHint = thisLen - tgtLen + replStr.length();
if (newLenHint < 0) {
throw new OutOfMemoryError();
if (trgtLen > 0) {
if (trgtLen == 1 && replLen == 1) {
return replace(trgtStr.charAt(0), replStr.charAt(0));
}
boolean thisIsLatin1 = this.isLatin1();
boolean trgtIsLatin1 = trgtStr.isLatin1();
boolean replIsLatin1 = replStr.isLatin1();
String ret = (thisIsLatin1 && trgtIsLatin1 && replIsLatin1)
? StringLatin1.replace(value, thisLen,
trgtStr.value, trgtLen,
replStr.value, replLen)
: StringUTF16.replace(value, thisLen, thisIsLatin1,
trgtStr.value, trgtLen, trgtIsLatin1,
replStr.value, replLen, replIsLatin1);
if (ret != null) {
return ret;
}
return this;
} else { // trgtLen == 0
int resultLen;
try {
resultLen = Math.addExact(thisLen, Math.multiplyExact(
Math.addExact(thisLen, 1), replLen));
} catch (ArithmeticException ignored) {
throw new OutOfMemoryError();
}
StringBuilder sb = new StringBuilder(resultLen);
sb.append(replStr);
for (int i = 0; i < thisLen; ++i) {
sb.append(charAt(i)).append(replStr);
}
return sb.toString();
}
StringBuilder sb = new StringBuilder(newLenHint);
int i = 0;
do {
sb.append(this, i, j).append(replStr);
i = j + tgtLen;
} while (j < thisLen && (j = indexOf(tgtStr, j + tgtLen1)) > 0);
return sb.append(this, i, thisLen).toString();
}
/**

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -42,6 +42,14 @@ import static java.lang.String.checkOffset;
final class StringLatin1 {
/**
* The maximum size of array to allocate (unless necessary).
* Some VMs reserve some header words in an array.
* Attempts to allocate larger arrays may result in
* OutOfMemoryError: Requested array size exceeds VM limit
*/
private static final int MAX_ARRAY_SIZE = Integer.MAX_VALUE - 8;
public static char charAt(byte[] value, int index) {
if (index < 0 || index >= value.length) {
throw new StringIndexOutOfBoundsException(index);
@ -304,7 +312,7 @@ final class StringLatin1 {
}
if (i < len) {
if (canEncode(newChar)) {
byte buf[] = new byte[len];
byte[] buf = StringConcatHelper.newArray(len);
for (int j = 0; j < i; j++) { // TBD arraycopy?
buf[j] = value[j];
}
@ -330,6 +338,64 @@ final class StringLatin1 {
return null; // for string to return this;
}
public static String replace(byte[] value, int valLen, byte[] targ,
int targLen, byte[] repl, int replLen)
{
assert targLen > 0;
int i, j, p = 0;
if (valLen == 0 || (i = indexOf(value, valLen, targ, targLen, 0)) < 0) {
return null; // for string to return this;
}
// find and store indices of substrings to replace
int[] pos = new int[16];
pos[0] = i;
i += targLen;
while ((j = indexOf(value, valLen, targ, targLen, i)) > 0) {
if (++p == pos.length) {
int cap = p + (p >> 1);
// overflow-conscious code
if (cap - MAX_ARRAY_SIZE > 0) {
if (p == MAX_ARRAY_SIZE) {
throw new OutOfMemoryError();
}
cap = MAX_ARRAY_SIZE;
}
pos = Arrays.copyOf(pos, cap);
}
pos[p] = j;
i = j + targLen;
}
int resultLen;
try {
resultLen = Math.addExact(valLen,
Math.multiplyExact(++p, replLen - targLen));
} catch (ArithmeticException ignored) {
throw new OutOfMemoryError();
}
if (resultLen == 0) {
return "";
}
byte[] result = StringConcatHelper.newArray(resultLen);
int posFrom = 0, posTo = 0;
for (int q = 0; q < p; ++q) {
int nextPos = pos[q];
while (posFrom < nextPos) {
result[posTo++] = value[posFrom++];
}
posFrom += targLen;
for (int k = 0; k < replLen; ++k) {
result[posTo++] = repl[k];
}
}
while (posFrom < valLen) {
result[posTo++] = value[posFrom++];
}
return new String(result, LATIN1);
}
// case insensitive
public static boolean regionMatchesCI(byte[] value, int toffset,
byte[] other, int ooffset, int len) {

View file

@ -574,7 +574,7 @@ final class StringUTF16 {
}
}
if (i < len) {
byte buf[] = new byte[value.length];
byte[] buf = new byte[value.length];
for (int j = 0; j < i; j++) {
putChar(buf, j, getChar(value, j)); // TBD:arraycopy?
}
@ -582,21 +582,145 @@ final class StringUTF16 {
char c = getChar(value, i);
putChar(buf, i, c == oldChar ? newChar : c);
i++;
}
// Check if we should try to compress to latin1
if (String.COMPACT_STRINGS &&
!StringLatin1.canEncode(oldChar) &&
StringLatin1.canEncode(newChar)) {
byte[] val = compress(buf, 0, len);
if (val != null) {
return new String(val, LATIN1);
}
}
return new String(buf, UTF16);
}
// Check if we should try to compress to latin1
if (String.COMPACT_STRINGS &&
!StringLatin1.canEncode(oldChar) &&
StringLatin1.canEncode(newChar)) {
byte[] val = compress(buf, 0, len);
if (val != null) {
return new String(val, LATIN1);
}
}
return new String(buf, UTF16);
}
return null;
}
public static String replace(byte[] value, int valLen, boolean valLat1,
byte[] targ, int targLen, boolean targLat1,
byte[] repl, int replLen, boolean replLat1)
{
assert targLen > 0;
assert !valLat1 || !targLat1 || !replLat1;
// Possible combinations of the arguments/result encodings:
// +---+--------+--------+--------+-----------------------+
// | # | VALUE | TARGET | REPL | RESULT |
// +===+========+========+========+=======================+
// | 1 | Latin1 | Latin1 | UTF16 | null or UTF16 |
// +---+--------+--------+--------+-----------------------+
// | 2 | Latin1 | UTF16 | Latin1 | null |
// +---+--------+--------+--------+-----------------------+
// | 3 | Latin1 | UTF16 | UTF16 | null |
// +---+--------+--------+--------+-----------------------+
// | 4 | UTF16 | Latin1 | Latin1 | null or UTF16 |
// +---+--------+--------+--------+-----------------------+
// | 5 | UTF16 | Latin1 | UTF16 | null or UTF16 |
// +---+--------+--------+--------+-----------------------+
// | 6 | UTF16 | UTF16 | Latin1 | null, Latin1 or UTF16 |
// +---+--------+--------+--------+-----------------------+
// | 7 | UTF16 | UTF16 | UTF16 | null or UTF16 |
// +---+--------+--------+--------+-----------------------+
if (String.COMPACT_STRINGS && valLat1 && !targLat1) {
// combinations 2 or 3
return null; // for string to return this;
}
int i = (String.COMPACT_STRINGS && valLat1)
? StringLatin1.indexOf(value, targ) :
(String.COMPACT_STRINGS && targLat1)
? indexOfLatin1(value, targ)
: indexOf(value, targ);
if (i < 0) {
return null; // for string to return this;
}
// find and store indices of substrings to replace
int j, p = 0;
int[] pos = new int[16];
pos[0] = i;
i += targLen;
while ((j = ((String.COMPACT_STRINGS && valLat1)
? StringLatin1.indexOf(value, valLen, targ, targLen, i) :
(String.COMPACT_STRINGS && targLat1)
? indexOfLatin1(value, valLen, targ, targLen, i)
: indexOf(value, valLen, targ, targLen, i))) > 0)
{
if (++p == pos.length) {
int cap = p + (p >> 1);
// overflow-conscious code
if (cap - MAX_ARRAY_SIZE > 0) {
if (p == MAX_ARRAY_SIZE) {
throw new OutOfMemoryError();
}
cap = MAX_ARRAY_SIZE;
}
pos = Arrays.copyOf(pos, cap);
}
pos[p] = j;
i = j + targLen;
}
int resultLen;
try {
resultLen = Math.addExact(valLen,
Math.multiplyExact(++p, replLen - targLen));
} catch (ArithmeticException ignored) {
throw new OutOfMemoryError();
}
if (resultLen == 0) {
return "";
}
byte[] result = newBytesFor(resultLen);
int posFrom = 0, posTo = 0;
for (int q = 0; q < p; ++q) {
int nextPos = pos[q];
if (String.COMPACT_STRINGS && valLat1) {
while (posFrom < nextPos) {
char c = (char)(value[posFrom++] & 0xff);
putChar(result, posTo++, c);
}
} else {
while (posFrom < nextPos) {
putChar(result, posTo++, getChar(value, posFrom++));
}
}
posFrom += targLen;
if (String.COMPACT_STRINGS && replLat1) {
for (int k = 0; k < replLen; ++k) {
char c = (char)(repl[k] & 0xff);
putChar(result, posTo++, c);
}
} else {
for (int k = 0; k < replLen; ++k) {
putChar(result, posTo++, getChar(repl, k));
}
}
}
if (String.COMPACT_STRINGS && valLat1) {
while (posFrom < valLen) {
char c = (char)(value[posFrom++] & 0xff);
putChar(result, posTo++, c);
}
} else {
while (posFrom < valLen) {
putChar(result, posTo++, getChar(value, posFrom++));
}
}
if (String.COMPACT_STRINGS && replLat1 && !targLat1) {
// combination 6
byte[] lat1Result = compress(result, 0, resultLen);
if (lat1Result != null) {
return new String(lat1Result, LATIN1);
}
}
return new String(result, UTF16);
}
public static boolean regionMatchesCI(byte[] value, int toffset,
byte[] other, int ooffset, int len) {
int last = toffset + len;
@ -1430,6 +1554,15 @@ final class StringUTF16 {
static final int MAX_LENGTH = Integer.MAX_VALUE >> 1;
/**
* The maximum size of array to allocate (unless necessary).
* Some VMs reserve some header words in an array.
* Attempts to allocate larger arrays may result in
* OutOfMemoryError: Requested array size exceeds VM limit
*/
private static final int MAX_ARRAY_SIZE = Integer.MAX_VALUE - 8;
// Used by trusted callers. Assumes all necessary bounds checks have
// been done by the caller.