8158168: Missing bounds checks for some String intrinsics

Reviewed-by: vlivanov, thartmann, sherman
This commit is contained in:
Dean Long 2017-04-12 16:37:33 -04:00
parent 456c8e0846
commit 73551c45ef
7 changed files with 417 additions and 254 deletions

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -307,6 +307,8 @@ abstract class AbstractStringBuilder implements Appendable, CharSequence {
* sequence. * sequence.
*/ */
public int codePointAt(int index) { public int codePointAt(int index) {
int count = this.count;
byte[] value = this.value;
checkIndex(index, count); checkIndex(index, count);
if (isLatin1()) { if (isLatin1()) {
return value[index] & 0xff; return value[index] & 0xff;
@ -560,11 +562,7 @@ abstract class AbstractStringBuilder implements Appendable, CharSequence {
val[count++] = 'l'; val[count++] = 'l';
val[count++] = 'l'; val[count++] = 'l';
} else { } else {
checkOffset(count + 4, val.length >> 1); count = StringUTF16.putCharsAt(val, count, 'n', 'u', 'l', 'l');
StringUTF16.putChar(val, count++, 'n');
StringUTF16.putChar(val, count++, 'u');
StringUTF16.putChar(val, count++, 'l');
StringUTF16.putChar(val, count++, 'l');
} }
this.count = count; this.count = count;
return this; return this;
@ -695,18 +693,9 @@ abstract class AbstractStringBuilder implements Appendable, CharSequence {
} }
} else { } else {
if (b) { if (b) {
checkOffset(count + 4, val.length >> 1); count = StringUTF16.putCharsAt(val, count, 't', 'r', 'u', 'e');
StringUTF16.putChar(val, count++, 't');
StringUTF16.putChar(val, count++, 'r');
StringUTF16.putChar(val, count++, 'u');
StringUTF16.putChar(val, count++, 'e');
} else { } else {
checkOffset(count + 5, val.length >> 1); count = StringUTF16.putCharsAt(val, count, 'f', 'a', 'l', 's', 'e');
StringUTF16.putChar(val, count++, 'f');
StringUTF16.putChar(val, count++, 'a');
StringUTF16.putChar(val, count++, 'l');
StringUTF16.putChar(val, count++, 's');
StringUTF16.putChar(val, count++, 'e');
} }
} }
this.count = count; this.count = count;
@ -755,16 +744,15 @@ abstract class AbstractStringBuilder implements Appendable, CharSequence {
* @return a reference to this object. * @return a reference to this object.
*/ */
public AbstractStringBuilder append(int i) { public AbstractStringBuilder append(int i) {
int count = this.count;
int spaceNeeded = count + Integer.stringSize(i); int spaceNeeded = count + Integer.stringSize(i);
ensureCapacityInternal(spaceNeeded); ensureCapacityInternal(spaceNeeded);
if (isLatin1()) { if (isLatin1()) {
Integer.getChars(i, spaceNeeded, value); Integer.getChars(i, spaceNeeded, value);
} else { } else {
byte[] val = this.value; StringUTF16.getChars(i, count, spaceNeeded, value);
checkOffset(spaceNeeded, val.length >> 1);
Integer.getCharsUTF16(i, spaceNeeded, val);
} }
count = spaceNeeded; this.count = spaceNeeded;
return this; return this;
} }
@ -781,16 +769,15 @@ abstract class AbstractStringBuilder implements Appendable, CharSequence {
* @return a reference to this object. * @return a reference to this object.
*/ */
public AbstractStringBuilder append(long l) { public AbstractStringBuilder append(long l) {
int count = this.count;
int spaceNeeded = count + Long.stringSize(l); int spaceNeeded = count + Long.stringSize(l);
ensureCapacityInternal(spaceNeeded); ensureCapacityInternal(spaceNeeded);
if (isLatin1()) { if (isLatin1()) {
Long.getChars(l, spaceNeeded, value); Long.getChars(l, spaceNeeded, value);
} else { } else {
byte[] val = this.value; StringUTF16.getChars(l, count, spaceNeeded, value);
checkOffset(spaceNeeded, val.length >> 1);
Long.getCharsUTF16(l, spaceNeeded, val);
} }
count = spaceNeeded; this.count = spaceNeeded;
return this; return this;
} }
@ -843,6 +830,7 @@ abstract class AbstractStringBuilder implements Appendable, CharSequence {
* greater than {@code end}. * greater than {@code end}.
*/ */
public AbstractStringBuilder delete(int start, int end) { public AbstractStringBuilder delete(int start, int end) {
int count = this.count;
if (end > count) { if (end > count) {
end = count; end = count;
} }
@ -850,7 +838,7 @@ abstract class AbstractStringBuilder implements Appendable, CharSequence {
int len = end - start; int len = end - start;
if (len > 0) { if (len > 0) {
shift(end, -len); shift(end, -len);
count -= len; this.count = count - len;
} }
return this; return this;
} }
@ -925,6 +913,7 @@ abstract class AbstractStringBuilder implements Appendable, CharSequence {
* greater than {@code end}. * greater than {@code end}.
*/ */
public AbstractStringBuilder replace(int start, int end, String str) { public AbstractStringBuilder replace(int start, int end, String str) {
int count = this.count;
if (end > count) { if (end > count) {
end = count; end = count;
} }
@ -933,7 +922,7 @@ abstract class AbstractStringBuilder implements Appendable, CharSequence {
int newCount = count + len - (end - start); int newCount = count + len - (end - start);
ensureCapacityInternal(newCount); ensureCapacityInternal(newCount);
shift(end, newCount - count); shift(end, newCount - count);
count = newCount; this.count = newCount;
putStringAt(start, str); putStringAt(start, str);
return this; return this;
} }
@ -1500,40 +1489,11 @@ abstract class AbstractStringBuilder implements Appendable, CharSequence {
val[k] = cj; val[k] = cj;
} }
} else { } else {
checkOffset(count, val.length >> 1); StringUTF16.reverse(val, count);
boolean hasSurrogates = false;
for (int j = (n-1) >> 1; j >= 0; j--) {
int k = n - j;
char cj = StringUTF16.getChar(val, j);
char ck = StringUTF16.getChar(val, k);
StringUTF16.putChar(val, j, ck);
StringUTF16.putChar(val, k, cj);
if (Character.isSurrogate(cj) ||
Character.isSurrogate(ck)) {
hasSurrogates = true;
}
}
if (hasSurrogates) {
reverseAllValidSurrogatePairs(val, count);
}
} }
return this; return this;
} }
/** Outlined helper method for reverse() */
private void reverseAllValidSurrogatePairs(byte[] val, int count) {
for (int i = 0; i < count - 1; i++) {
char c2 = StringUTF16.getChar(val, i);
if (Character.isLowSurrogate(c2)) {
char c1 = StringUTF16.getChar(val, i + 1);
if (Character.isHighSurrogate(c1)) {
StringUTF16.putChar(val, i++, c1);
StringUTF16.putChar(val, i, c2);
}
}
}
}
/** /**
* Returns a string representing the data in this sequence. * Returns a string representing the data in this sequence.
* A new {@code String} object is allocated and initialized to * A new {@code String} object is allocated and initialized to
@ -1682,6 +1642,7 @@ abstract class AbstractStringBuilder implements Appendable, CharSequence {
} }
private final void appendChars(char[] s, int off, int end) { private final void appendChars(char[] s, int off, int end) {
int count = this.count;
if (isLatin1()) { if (isLatin1()) {
byte[] val = this.value; byte[] val = this.value;
for (int i = off, j = count; i < end; i++) { for (int i = off, j = count; i < end; i++) {
@ -1689,17 +1650,17 @@ abstract class AbstractStringBuilder implements Appendable, CharSequence {
if (StringLatin1.canEncode(c)) { if (StringLatin1.canEncode(c)) {
val[j++] = (byte)c; val[j++] = (byte)c;
} else { } else {
count = j; this.count = count = j;
inflate(); inflate();
StringUTF16.putCharsSB(this.value, j, s, i, end); StringUTF16.putCharsSB(this.value, j, s, i, end);
count += end - i; this.count = count + end - i;
return; return;
} }
} }
} else { } else {
StringUTF16.putCharsSB(this.value, count, s, off, end); StringUTF16.putCharsSB(this.value, count, s, off, end);
} }
count += end - off; this.count = count + end - off;
} }
private final void appendChars(CharSequence s, int off, int end) { private final void appendChars(CharSequence s, int off, int end) {

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 1994, 2016, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 1994, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -386,7 +386,7 @@ public final class Integer extends Number implements Comparable<Integer> {
} }
/** byte[]/UTF16 version */ /** byte[]/UTF16 version */
static void formatUnsignedIntUTF16(int val, int shift, byte[] buf, int offset, int len) { private static void formatUnsignedIntUTF16(int val, int shift, byte[] buf, int offset, int len) {
int charPos = offset + len; int charPos = offset + len;
int radix = 1 << shift; int radix = 1 << shift;
int mask = radix - 1; int mask = radix - 1;
@ -442,7 +442,7 @@ public final class Integer extends Number implements Comparable<Integer> {
return new String(buf, LATIN1); return new String(buf, LATIN1);
} else { } else {
byte[] buf = new byte[size * 2]; byte[] buf = new byte[size * 2];
getCharsUTF16(i, size, buf); StringUTF16.getChars(i, size, buf);
return new String(buf, UTF16); return new String(buf, UTF16);
} }
} }
@ -516,49 +516,6 @@ public final class Integer extends Number implements Comparable<Integer> {
return charPos; return charPos;
} }
/**
* This is a variant of {@link #getChars(int, int, byte[])}, but for
* UTF-16 coder.
*
* @param i value to convert
* @param index next index, after the least significant digit
* @param buf target buffer, UTF16-coded.
* @return index of the most significant digit or minus sign, if present
*/
static int getCharsUTF16(int i, int index, byte[] buf) {
int q, r;
int charPos = index;
boolean negative = (i < 0);
if (!negative) {
i = -i;
}
// Get 2 digits/iteration using ints
while (i <= -100) {
q = i / 100;
r = (q * 100) - i;
i = q;
StringUTF16.putChar(buf, --charPos, DigitOnes[r]);
StringUTF16.putChar(buf, --charPos, DigitTens[r]);
}
// We know there are at most two digits left at this point.
q = i / 10;
r = (q * 10) - i;
StringUTF16.putChar(buf, --charPos, '0' + r);
// Whatever left is the remaining digit.
if (q < 0) {
StringUTF16.putChar(buf, --charPos, '0' - q);
}
if (negative) {
StringUTF16.putChar(buf, --charPos, '-');
}
return charPos;
}
// Left here for compatibility reasons, see JDK-8143900. // Left here for compatibility reasons, see JDK-8143900.
static final int [] sizeTable = { 9, 99, 999, 9999, 99999, 999999, 9999999, static final int [] sizeTable = { 9, 99, 999, 9999, 99999, 999999, 9999999,
99999999, 999999999, Integer.MAX_VALUE }; 99999999, 999999999, Integer.MAX_VALUE };

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 1994, 2016, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 1994, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -414,7 +414,7 @@ public final class Long extends Number implements Comparable<Long> {
} }
/** byte[]/UTF16 version */ /** byte[]/UTF16 version */
static void formatUnsignedLong0UTF16(long val, int shift, byte[] buf, int offset, int len) { private static void formatUnsignedLong0UTF16(long val, int shift, byte[] buf, int offset, int len) {
int charPos = offset + len; int charPos = offset + len;
int radix = 1 << shift; int radix = 1 << shift;
int mask = radix - 1; int mask = radix - 1;
@ -475,7 +475,7 @@ public final class Long extends Number implements Comparable<Long> {
return new String(buf, LATIN1); return new String(buf, LATIN1);
} else { } else {
byte[] buf = new byte[size * 2]; byte[] buf = new byte[size * 2];
getCharsUTF16(i, size, buf); StringUTF16.getChars(i, size, buf);
return new String(buf, UTF16); return new String(buf, UTF16);
} }
} }
@ -561,61 +561,6 @@ public final class Long extends Number implements Comparable<Long> {
return charPos; return charPos;
} }
/**
* This is a variant of {@link #getChars(long, int, byte[])}, but for
* UTF-16 coder.
*
* @param i value to convert
* @param index next index, after the least significant digit
* @param buf target buffer, UTF16-coded.
* @return index of the most significant digit or minus sign, if present
*/
static int getCharsUTF16(long i, int index, byte[] buf) {
long q;
int r;
int charPos = index;
boolean negative = (i < 0);
if (!negative) {
i = -i;
}
// Get 2 digits/iteration using longs until quotient fits into an int
while (i <= Integer.MIN_VALUE) {
q = i / 100;
r = (int)((q * 100) - i);
i = q;
StringUTF16.putChar(buf, --charPos, Integer.DigitOnes[r]);
StringUTF16.putChar(buf, --charPos, Integer.DigitTens[r]);
}
// Get 2 digits/iteration using ints
int q2;
int i2 = (int)i;
while (i2 <= -100) {
q2 = i2 / 100;
r = (q2 * 100) - i2;
i2 = q2;
StringUTF16.putChar(buf, --charPos, Integer.DigitOnes[r]);
StringUTF16.putChar(buf, --charPos, Integer.DigitTens[r]);
}
// We know there are at most two digits left at this point.
q2 = i2 / 10;
r = (q2 * 10) - i2;
StringUTF16.putChar(buf, --charPos, '0' + r);
// Whatever left is the remaining digit.
if (q2 < 0) {
StringUTF16.putChar(buf, --charPos, '0' - q2);
}
if (negative) {
StringUTF16.putChar(buf, --charPos, '-');
}
return charPos;
}
/** /**
* Returns the string representation size for a given long value. * Returns the string representation size for a given long value.
* *

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 1994, 2016, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 1994, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -1064,11 +1064,7 @@ public final class String
if (!isLatin1()) { // utf16 str and latin1 abs can never be "equal" if (!isLatin1()) { // utf16 str and latin1 abs can never be "equal"
return false; return false;
} }
for (int i = 0; i < len; i++) { return StringUTF16.contentEquals(v1, v2, len);
if ((char)(v1[i] & 0xff) != StringUTF16.getChar(v2, i)) {
return false;
}
}
} }
return true; return true;
} }
@ -1120,10 +1116,8 @@ public final class String
} }
} }
} else { } else {
for (int i = 0; i < n; i++) { if (!StringUTF16.contentEquals(val, cs, n)) {
if (StringUTF16.getChar(val, i) != cs.charAt(i)) { return false;
return false;
}
} }
} }
return true; return true;
@ -1734,6 +1728,9 @@ public final class String
if (tgtCount == 0) { if (tgtCount == 0) {
return fromIndex; return fromIndex;
} }
if (tgtCount > srcCount) {
return -1;
}
if (srcCoder == tgtCoder) { if (srcCoder == tgtCoder) {
return srcCoder == LATIN1 return srcCoder == LATIN1
? StringLatin1.indexOf(src, srcCount, tgt, tgtCount, fromIndex) ? StringLatin1.indexOf(src, srcCount, tgt, tgtCount, fromIndex)
@ -1792,7 +1789,7 @@ public final class String
* is the string being searched for. * is the string being searched for.
* *
* @param src the characters being searched. * @param src the characters being searched.
* @param srcCoder coder handles the mapping between bytes/chars * @param srcCoder coder handles the mapping between bytes/chars
* @param srcCount count of the source string. * @param srcCount count of the source string.
* @param tgt the characters being searched for. * @param tgt the characters being searched for.
* @param fromIndex the index to begin searching from. * @param fromIndex the index to begin searching from.
@ -1807,12 +1804,12 @@ public final class String
* consistency, don't check for null str. * consistency, don't check for null str.
*/ */
int rightIndex = srcCount - tgtCount; int rightIndex = srcCount - tgtCount;
if (fromIndex < 0) {
return -1;
}
if (fromIndex > rightIndex) { if (fromIndex > rightIndex) {
fromIndex = rightIndex; fromIndex = rightIndex;
} }
if (fromIndex < 0) {
return -1;
}
/* Empty string always matches. */ /* Empty string always matches. */
if (tgtCount == 0) { if (tgtCount == 0) {
return fromIndex; return fromIndex;
@ -1825,31 +1822,8 @@ public final class String
if (srcCoder == LATIN1) { // && tgtCoder == UTF16 if (srcCoder == LATIN1) { // && tgtCoder == UTF16
return -1; return -1;
} }
// srcCoder == UTF16 && tgtCoder == LATIN1 // srcCoder == UTF16 && tgtCoder == LATIN1
int min = tgtCount - 1; return StringUTF16.lastIndexOfLatin1(src, srcCount, tgt, tgtCount, fromIndex);
int i = min + fromIndex;
int strLastIndex = tgtCount - 1;
char strLastChar = (char)(tgt[strLastIndex] & 0xff);
startSearchForLastChar:
while (true) {
while (i >= min && StringUTF16.getChar(src, i) != strLastChar) {
i--;
}
if (i < min) {
return -1;
}
int j = i - 1;
int start = j - strLastIndex;
int k = strLastIndex - 1;
while (j > start) {
if (StringUTF16.getChar(src, j--) != (tgt[k--] & 0xff)) {
i--;
continue startSearchForLastChar;
}
}
return start + 1;
}
} }
/** /**
@ -3078,7 +3052,8 @@ public final class String
*/ */
static void checkIndex(int index, int length) { static void checkIndex(int index, int length) {
if (index < 0 || index >= length) { if (index < 0 || index >= length) {
throw new StringIndexOutOfBoundsException("index " + index); throw new StringIndexOutOfBoundsException("index " + index +
",length " + length);
} }
} }
@ -3116,7 +3091,7 @@ public final class String
* If {@code begin} is negative, {@code begin} is greater than * If {@code begin} is negative, {@code begin} is greater than
* {@code end}, or {@code end} is greater than {@code length}. * {@code end}, or {@code end} is greater than {@code length}.
*/ */
private static void checkBoundsBeginEnd(int begin, int end, int length) { static void checkBoundsBeginEnd(int begin, int end, int length) {
if (begin < 0 || begin > end || end > length) { if (begin < 0 || begin > end || end > length) {
throw new StringIndexOutOfBoundsException( throw new StringIndexOutOfBoundsException(
"begin " + begin + ", end " + end + ", length " + length); "begin " + begin + ", end " + end + ", length " + length);

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -293,7 +293,7 @@ final class StringConcatHelper {
if (coder == String.LATIN1) { if (coder == String.LATIN1) {
return Integer.getChars(value, index, buf); return Integer.getChars(value, index, buf);
} else { } else {
return Integer.getCharsUTF16(value, index, buf); return StringUTF16.getChars(value, index, buf);
} }
} }
@ -311,7 +311,7 @@ final class StringConcatHelper {
if (coder == String.LATIN1) { if (coder == String.LATIN1) {
return Long.getChars(value, index, buf); return Long.getChars(value, index, buf);
} else { } else {
return Long.getCharsUTF16(value, index, buf); return StringUTF16.getChars(value, index, buf);
} }
} }

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -36,7 +36,6 @@ import jdk.internal.HotSpotIntrinsicCandidate;
import static java.lang.String.LATIN1; import static java.lang.String.LATIN1;
import static java.lang.String.UTF16; import static java.lang.String.UTF16;
import static java.lang.String.checkOffset; import static java.lang.String.checkOffset;
import static java.lang.String.checkBoundsOffCount;
final class StringLatin1 { final class StringLatin1 {
@ -566,11 +565,7 @@ final class StringLatin1 {
// inflatedCopy byte[] -> byte[] // inflatedCopy byte[] -> byte[]
@HotSpotIntrinsicCandidate @HotSpotIntrinsicCandidate
public static void inflate(byte[] src, int srcOff, byte[] dst, int dstOff, int len) { public static void inflate(byte[] src, int srcOff, byte[] dst, int dstOff, int len) {
// We need a range check here because 'putChar' has no checks StringUTF16.inflate(src, srcOff, dst, dstOff, len);
checkBoundsOffCount(dstOff << 1, len << 1, dst.length);
for (int i = 0; i < len; i++) {
StringUTF16.putChar(dst, dstOff++, src[srcOff++] & 0xff);
}
} }
static class CharsSpliterator implements Spliterator.OfInt { static class CharsSpliterator implements Spliterator.OfInt {

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -30,12 +30,11 @@ import java.util.Locale;
import java.util.Spliterator; import java.util.Spliterator;
import java.util.function.IntConsumer; import java.util.function.IntConsumer;
import jdk.internal.HotSpotIntrinsicCandidate; import jdk.internal.HotSpotIntrinsicCandidate;
import jdk.internal.vm.annotation.ForceInline;
import jdk.internal.vm.annotation.DontInline;
import static java.lang.String.UTF16; import static java.lang.String.UTF16;
import static java.lang.String.LATIN1; import static java.lang.String.LATIN1;
import static java.lang.String.checkIndex;
import static java.lang.String.checkOffset;
import static java.lang.String.checkBoundsOffCount;
final class StringUTF16 { final class StringUTF16 {
@ -51,33 +50,37 @@ final class StringUTF16 {
} }
@HotSpotIntrinsicCandidate @HotSpotIntrinsicCandidate
public static void putChar(byte[] val, int index, int c) { // intrinsic performs no bounds checks
static void putChar(byte[] val, int index, int c) {
assert index >= 0 && index < length(val) : "Trusted caller missed bounds check";
index <<= 1; index <<= 1;
val[index++] = (byte)(c >> HI_BYTE_SHIFT); val[index++] = (byte)(c >> HI_BYTE_SHIFT);
val[index] = (byte)(c >> LO_BYTE_SHIFT); val[index] = (byte)(c >> LO_BYTE_SHIFT);
} }
@HotSpotIntrinsicCandidate @HotSpotIntrinsicCandidate
public static char getChar(byte[] val, int index) { // intrinsic performs no bounds checks
static char getChar(byte[] val, int index) {
assert index >= 0 && index < length(val) : "Trusted caller missed bounds check";
index <<= 1; index <<= 1;
return (char)(((val[index++] & 0xff) << HI_BYTE_SHIFT) | return (char)(((val[index++] & 0xff) << HI_BYTE_SHIFT) |
((val[index] & 0xff) << LO_BYTE_SHIFT)); ((val[index] & 0xff) << LO_BYTE_SHIFT));
} }
public static char charAt(byte[] value, int index) {
if (index < 0 || index >= value.length >> 1) {
throw new StringIndexOutOfBoundsException(index);
}
return getChar(value, index);
}
public static int length(byte[] value) { public static int length(byte[] value) {
return value.length >> 1; return value.length >> 1;
} }
public static int codePointAt(byte[] value, int index, int end) { private static int codePointAt(byte[] value, int index, int end, boolean checked) {
assert index < end;
if (checked) {
checkIndex(index, value);
}
char c1 = getChar(value, index); char c1 = getChar(value, index);
if (Character.isHighSurrogate(c1) && ++index < end) { if (Character.isHighSurrogate(c1) && ++index < end) {
if (checked) {
checkIndex(index, value);
}
char c2 = getChar(value, index); char c2 = getChar(value, index);
if (Character.isLowSurrogate(c2)) { if (Character.isLowSurrogate(c2)) {
return Character.toCodePoint(c1, c2); return Character.toCodePoint(c1, c2);
@ -86,10 +89,22 @@ final class StringUTF16 {
return c1; return c1;
} }
public static int codePointBefore(byte[] value, int index) { public static int codePointAt(byte[] value, int index, int end) {
char c2 = getChar(value, --index); return codePointAt(value, index, end, false /* unchecked */);
}
private static int codePointBefore(byte[] value, int index, boolean checked) {
--index;
if (checked) {
checkIndex(index, value);
}
char c2 = getChar(value, index);
if (Character.isLowSurrogate(c2) && index > 0) { if (Character.isLowSurrogate(c2) && index > 0) {
char c1 = getChar(value, --index); --index;
if (checked) {
checkIndex(index, value);
}
char c1 = getChar(value, index);
if (Character.isHighSurrogate(c1)) { if (Character.isHighSurrogate(c1)) {
return Character.toCodePoint(c1, c2); return Character.toCodePoint(c1, c2);
} }
@ -97,11 +112,19 @@ final class StringUTF16 {
return c2; return c2;
} }
public static int codePointCount(byte[] value, int beginIndex, int endIndex) { public static int codePointBefore(byte[] value, int index) {
return codePointBefore(value, index, false /* unchecked */);
}
private static int codePointCount(byte[] value, int beginIndex, int endIndex, boolean checked) {
assert beginIndex <= endIndex;
int count = endIndex - beginIndex; int count = endIndex - beginIndex;
for (int i = beginIndex; i < endIndex; ) { int i = beginIndex;
if (checked && i < endIndex) {
checkBoundsBeginEnd(i, endIndex, value);
}
for (; i < endIndex - 1; ) {
if (Character.isHighSurrogate(getChar(value, i++)) && if (Character.isHighSurrogate(getChar(value, i++)) &&
i < endIndex &&
Character.isLowSurrogate(getChar(value, i))) { Character.isLowSurrogate(getChar(value, i))) {
count--; count--;
i++; i++;
@ -110,6 +133,10 @@ final class StringUTF16 {
return count; return count;
} }
public static int codePointCount(byte[] value, int beginIndex, int endIndex) {
return codePointCount(value, beginIndex, endIndex, false /* unchecked */);
}
public static char[] toChars(byte[] value) { public static char[] toChars(byte[] value) {
char[] dst = new char[value.length >> 1]; char[] dst = new char[value.length >> 1];
getChars(value, 0, dst.length, dst, 0); getChars(value, 0, dst.length, dst, 0);
@ -162,7 +189,7 @@ final class StringUTF16 {
@HotSpotIntrinsicCandidate @HotSpotIntrinsicCandidate
public static int compress(byte[] src, int srcOff, byte[] dst, int dstOff, int len) { public static int compress(byte[] src, int srcOff, byte[] dst, int dstOff, int len) {
// We need a range check here because 'getChar' has no checks // We need a range check here because 'getChar' has no checks
checkBoundsOffCount(srcOff << 1, len << 1, src.length); checkBoundsOffCount(srcOff, len, src);
for (int i = 0; i < len; i++) { for (int i = 0; i < len; i++) {
char c = getChar(src, srcOff); char c = getChar(src, srcOff);
if (c > 0xFF) { if (c > 0xFF) {
@ -212,7 +239,7 @@ final class StringUTF16 {
public static void getChars(byte[] value, int srcBegin, int srcEnd, char dst[], int dstBegin) { public static void getChars(byte[] value, int srcBegin, int srcEnd, char dst[], int dstBegin) {
// We need a range check here because 'getChar' has no checks // We need a range check here because 'getChar' has no checks
if (srcBegin < srcEnd) { if (srcBegin < srcEnd) {
checkBoundsOffCount(srcBegin << 1, (srcEnd - srcBegin) << 1, value.length); checkBoundsOffCount(srcBegin, srcEnd - srcBegin, value);
} }
for (int i = srcBegin; i < srcEnd; i++) { for (int i = srcBegin; i < srcEnd; i++) {
dst[dstBegin++] = getChar(value, i); dst[dstBegin++] = getChar(value, i);
@ -319,14 +346,25 @@ final class StringUTF16 {
if (str.length == 0) { if (str.length == 0) {
return 0; return 0;
} }
if (value.length == 0) { if (value.length < str.length) {
return -1; return -1;
} }
return indexOf(value, length(value), str, length(str), 0); return indexOfUnsafe(value, length(value), str, length(str), 0);
} }
@HotSpotIntrinsicCandidate @HotSpotIntrinsicCandidate
public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) { public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) {
checkBoundsBeginEnd(fromIndex, valueCount, value);
checkBoundsBeginEnd(0, strCount, str);
return indexOfUnsafe(value, valueCount, str, strCount, fromIndex);
}
private static int indexOfUnsafe(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) {
assert fromIndex >= 0;
assert strCount > 0;
assert strCount <= length(str);
assert valueCount >= strCount;
char first = getChar(str, 0); char first = getChar(str, 0);
int max = (valueCount - strCount); int max = (valueCount - strCount);
for (int i = fromIndex; i <= max; i++) { for (int i = fromIndex; i <= max; i++) {
@ -348,6 +386,7 @@ final class StringUTF16 {
return -1; return -1;
} }
/** /**
* Handles indexOf Latin1 substring in UTF16 string. * Handles indexOf Latin1 substring in UTF16 string.
*/ */
@ -356,14 +395,24 @@ final class StringUTF16 {
if (str.length == 0) { if (str.length == 0) {
return 0; return 0;
} }
if (value.length == 0) { if (length(value) < str.length) {
return -1; return -1;
} }
return indexOfLatin1(value, length(value), str, str.length, 0); return indexOfLatin1Unsafe(value, length(value), str, str.length, 0);
} }
@HotSpotIntrinsicCandidate @HotSpotIntrinsicCandidate
public static int indexOfLatin1(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) { public static int indexOfLatin1(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) {
checkBoundsBeginEnd(fromIndex, srcCount, src);
String.checkBoundsBeginEnd(0, tgtCount, tgt.length);
return indexOfLatin1Unsafe(src, srcCount, tgt, tgtCount, fromIndex);
}
public static int indexOfLatin1Unsafe(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) {
assert fromIndex >= 0;
assert tgtCount > 0;
assert tgtCount <= tgt.length;
assert srcCount >= tgtCount;
char first = (char)(tgt[0] & 0xff); char first = (char)(tgt[0] & 0xff);
int max = (srcCount - tgtCount); int max = (srcCount - tgtCount);
for (int i = fromIndex; i <= max; i++) { for (int i = fromIndex; i <= max; i++) {
@ -389,6 +438,11 @@ final class StringUTF16 {
@HotSpotIntrinsicCandidate @HotSpotIntrinsicCandidate
private static int indexOfChar(byte[] value, int ch, int fromIndex, int max) { private static int indexOfChar(byte[] value, int ch, int fromIndex, int max) {
checkBoundsBeginEnd(fromIndex, max, value);
return indexOfCharUnsafe(value, ch, fromIndex, max);
}
private static int indexOfCharUnsafe(byte[] value, int ch, int fromIndex, int max) {
for (int i = fromIndex; i < max; i++) { for (int i = fromIndex; i < max; i++) {
if (getChar(value, i) == ch) { if (getChar(value, i) == ch) {
return i; return i;
@ -404,6 +458,7 @@ final class StringUTF16 {
if (Character.isValidCodePoint(ch)) { if (Character.isValidCodePoint(ch)) {
final char hi = Character.highSurrogate(ch); final char hi = Character.highSurrogate(ch);
final char lo = Character.lowSurrogate(ch); final char lo = Character.lowSurrogate(ch);
checkBoundsBeginEnd(fromIndex, max, value);
for (int i = fromIndex; i < max - 1; i++) { for (int i = fromIndex; i < max - 1; i++) {
if (getChar(value, i) == hi && getChar(value, i + 1 ) == lo) { if (getChar(value, i) == hi && getChar(value, i + 1 ) == lo) {
return i; return i;
@ -413,13 +468,21 @@ final class StringUTF16 {
return -1; return -1;
} }
// srcCoder == UTF16 && tgtCoder == UTF16
public static int lastIndexOf(byte[] src, int srcCount, public static int lastIndexOf(byte[] src, int srcCount,
byte[] tgt, int tgtCount, int fromIndex) { byte[] tgt, int tgtCount, int fromIndex) {
assert fromIndex >= 0;
assert tgtCount > 0;
assert tgtCount <= length(tgt);
int min = tgtCount - 1; int min = tgtCount - 1;
int i = min + fromIndex; int i = min + fromIndex;
int strLastIndex = tgtCount - 1; int strLastIndex = tgtCount - 1;
checkIndex(strLastIndex, tgt);
char strLastChar = getChar(tgt, strLastIndex); char strLastChar = getChar(tgt, strLastIndex);
checkIndex(i, src);
startSearchForLastChar: startSearchForLastChar:
while (true) { while (true) {
while (i >= min && getChar(src, i) != strLastChar) { while (i >= min && getChar(src, i) != strLastChar) {
@ -509,6 +572,9 @@ final class StringUTF16 {
public static boolean regionMatchesCI(byte[] value, int toffset, public static boolean regionMatchesCI(byte[] value, int toffset,
byte[] other, int ooffset, int len) { byte[] other, int ooffset, int len) {
int last = toffset + len; int last = toffset + len;
assert toffset >= 0 && ooffset >= 0;
assert ooffset + len <= length(other);
assert last <= length(value);
while (toffset < last) { while (toffset < last) {
char c1 = getChar(value, toffset++); char c1 = getChar(value, toffset++);
char c2 = getChar(other, ooffset++); char c2 = getChar(other, ooffset++);
@ -599,6 +665,8 @@ final class StringUTF16 {
private static String toLowerCaseEx(String str, byte[] value, private static String toLowerCaseEx(String str, byte[] value,
byte[] result, int first, Locale locale, byte[] result, int first, Locale locale,
boolean localeDependent) { boolean localeDependent) {
assert(result.length == value.length);
assert(first >= 0);
int resultOffset = first; int resultOffset = first;
int length = value.length >> 1; int length = value.length >> 1;
int srcCount; int srcCount;
@ -633,6 +701,8 @@ final class StringUTF16 {
System.arraycopy(result, 0, result2, 0, resultOffset << 1); System.arraycopy(result, 0, result2, 0, resultOffset << 1);
result = result2; result = result2;
} }
assert resultOffset >= 0;
assert resultOffset + mapLen <= length(result);
for (int x = 0; x < mapLen; ++x) { for (int x = 0; x < mapLen; ++x) {
putChar(result, resultOffset++, lowerCharArray[x]); putChar(result, resultOffset++, lowerCharArray[x]);
} }
@ -697,6 +767,8 @@ final class StringUTF16 {
byte[] result, int first, byte[] result, int first,
Locale locale, boolean localeDependent) Locale locale, boolean localeDependent)
{ {
assert(result.length == value.length);
assert(first >= 0);
int resultOffset = first; int resultOffset = first;
int length = value.length >> 1; int length = value.length >> 1;
int srcCount; int srcCount;
@ -733,10 +805,12 @@ final class StringUTF16 {
byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount); byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount);
System.arraycopy(result, 0, result2, 0, resultOffset << 1); System.arraycopy(result, 0, result2, 0, resultOffset << 1);
result = result2; result = result2;
} }
for (int x = 0; x < mapLen; ++x) { assert resultOffset >= 0;
assert resultOffset + mapLen <= length(result);
for (int x = 0; x < mapLen; ++x) {
putChar(result, resultOffset++, upperCharArray[x]); putChar(result, resultOffset++, upperCharArray[x]);
} }
} }
} }
return newString(result, 0, resultOffset); return newString(result, 0, resultOffset);
@ -757,7 +831,7 @@ final class StringUTF16 {
null; null;
} }
public static void putChars(byte[] val, int index, char[] str, int off, int end) { private static void putChars(byte[] val, int index, char[] str, int off, int end) {
while (off < end) { while (off < end) {
putChar(val, index++, str[off++]); putChar(val, index++, str[off++]);
} }
@ -927,35 +1001,172 @@ final class StringUTF16 {
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
public static void putCharSB(byte[] val, int index, int c) { public static void putCharSB(byte[] val, int index, int c) {
checkIndex(index, val.length >> 1); checkIndex(index, val);
putChar(val, index, c); putChar(val, index, c);
} }
public static void putCharsSB(byte[] val, int index, char[] ca, int off, int end) { public static void putCharsSB(byte[] val, int index, char[] ca, int off, int end) {
checkOffset(index + end - off, val.length >> 1); checkBoundsBeginEnd(index, index + end - off, val);
putChars(val, index, ca, off, end); putChars(val, index, ca, off, end);
} }
public static void putCharsSB(byte[] val, int index, CharSequence s, int off, int end) { public static void putCharsSB(byte[] val, int index, CharSequence s, int off, int end) {
checkOffset(index + end - off, val.length >> 1); checkBoundsBeginEnd(index, index + end - off, val);
for (int i = off; i < end; i++) { for (int i = off; i < end; i++) {
putChar(val, index++, s.charAt(i)); putChar(val, index++, s.charAt(i));
} }
} }
public static int codePointAtSB(byte[] val, int index, int end) { public static int codePointAtSB(byte[] val, int index, int end) {
checkOffset(end, val.length >> 1); return codePointAt(val, index, end, true /* checked */);
return codePointAt(val, index, end);
} }
public static int codePointBeforeSB(byte[] val, int index) { public static int codePointBeforeSB(byte[] val, int index) {
checkOffset(index, val.length >> 1); return codePointBefore(val, index, true /* checked */);
return codePointBefore(val, index);
} }
public static int codePointCountSB(byte[] val, int beginIndex, int endIndex) { public static int codePointCountSB(byte[] val, int beginIndex, int endIndex) {
checkOffset(endIndex, val.length >> 1); return codePointCount(val, beginIndex, endIndex, true /* checked */);
return codePointCount(val, beginIndex, endIndex); }
public static int getChars(int i, int begin, int end, byte[] value) {
checkBoundsBeginEnd(begin, end, value);
int pos = getChars(i, end, value);
assert begin == pos;
return pos;
}
public static int getChars(long l, int begin, int end, byte[] value) {
checkBoundsBeginEnd(begin, end, value);
int pos = getChars(l, end, value);
assert begin == pos;
return pos;
}
public static boolean contentEquals(byte[] v1, byte[] v2, int len) {
checkBoundsOffCount(0, len, v2);
for (int i = 0; i < len; i++) {
if ((char)(v1[i] & 0xff) != getChar(v2, i)) {
return false;
}
}
return true;
}
public static boolean contentEquals(byte[] value, CharSequence cs, int len) {
checkOffset(len, value);
for (int i = 0; i < len; i++) {
if (getChar(value, i) != cs.charAt(i)) {
return false;
}
}
return true;
}
public static int putCharsAt(byte[] value, int i, char c1, char c2, char c3, char c4) {
int end = i + 4;
checkBoundsBeginEnd(i, end, value);
putChar(value, i++, c1);
putChar(value, i++, c2);
putChar(value, i++, c3);
putChar(value, i++, c4);
assert(i == end);
return end;
}
public static int putCharsAt(byte[] value, int i, char c1, char c2, char c3, char c4, char c5) {
int end = i + 5;
checkBoundsBeginEnd(i, end, value);
putChar(value, i++, c1);
putChar(value, i++, c2);
putChar(value, i++, c3);
putChar(value, i++, c4);
putChar(value, i++, c5);
assert(i == end);
return end;
}
public static char charAt(byte[] value, int index) {
checkIndex(index, value);
return getChar(value, index);
}
public static void reverse(byte[] val, int count) {
checkOffset(count, val);
int n = count - 1;
boolean hasSurrogates = false;
for (int j = (n-1) >> 1; j >= 0; j--) {
int k = n - j;
char cj = getChar(val, j);
char ck = getChar(val, k);
putChar(val, j, ck);
putChar(val, k, cj);
if (Character.isSurrogate(cj) ||
Character.isSurrogate(ck)) {
hasSurrogates = true;
}
}
if (hasSurrogates) {
reverseAllValidSurrogatePairs(val, count);
}
}
/** Outlined helper method for reverse() */
private static void reverseAllValidSurrogatePairs(byte[] val, int count) {
for (int i = 0; i < count - 1; i++) {
char c2 = getChar(val, i);
if (Character.isLowSurrogate(c2)) {
char c1 = getChar(val, i + 1);
if (Character.isHighSurrogate(c1)) {
putChar(val, i++, c1);
putChar(val, i, c2);
}
}
}
}
// inflatedCopy byte[] -> byte[]
public static void inflate(byte[] src, int srcOff, byte[] dst, int dstOff, int len) {
// We need a range check here because 'putChar' has no checks
checkBoundsOffCount(dstOff, len, dst);
for (int i = 0; i < len; i++) {
putChar(dst, dstOff++, src[srcOff++] & 0xff);
}
}
// srcCoder == UTF16 && tgtCoder == LATIN1
public static int lastIndexOfLatin1(byte[] src, int srcCount,
byte[] tgt, int tgtCount, int fromIndex) {
assert fromIndex >= 0;
assert tgtCount > 0;
assert tgtCount <= tgt.length;
int min = tgtCount - 1;
int i = min + fromIndex;
int strLastIndex = tgtCount - 1;
char strLastChar = (char)(tgt[strLastIndex] & 0xff);
checkIndex(i, src);
startSearchForLastChar:
while (true) {
while (i >= min && getChar(src, i) != strLastChar) {
i--;
}
if (i < min) {
return -1;
}
int j = i - 1;
int start = j - strLastIndex;
int k = strLastIndex - 1;
while (j > start) {
if (getChar(src, j--) != (tgt[k--] & 0xff)) {
i--;
continue startSearchForLastChar;
}
}
return start + 1;
}
} }
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
@ -975,4 +1186,123 @@ final class StringUTF16 {
} }
static final int MAX_LENGTH = Integer.MAX_VALUE >> 1; static final int MAX_LENGTH = Integer.MAX_VALUE >> 1;
// Used by trusted callers. Assumes all necessary bounds checks have
// been done by the caller.
/**
* This is a variant of {@link Integer#getChars(int, int, byte[])}, but for
* UTF-16 coder.
*
* @param i value to convert
* @param index next index, after the least significant digit
* @param buf target buffer, UTF16-coded.
* @return index of the most significant digit or minus sign, if present
*/
static int getChars(int i, int index, byte[] buf) {
int q, r;
int charPos = index;
boolean negative = (i < 0);
if (!negative) {
i = -i;
}
// Get 2 digits/iteration using ints
while (i <= -100) {
q = i / 100;
r = (q * 100) - i;
i = q;
putChar(buf, --charPos, Integer.DigitOnes[r]);
putChar(buf, --charPos, Integer.DigitTens[r]);
}
// We know there are at most two digits left at this point.
q = i / 10;
r = (q * 10) - i;
putChar(buf, --charPos, '0' + r);
// Whatever left is the remaining digit.
if (q < 0) {
putChar(buf, --charPos, '0' - q);
}
if (negative) {
putChar(buf, --charPos, '-');
}
return charPos;
}
/**
* This is a variant of {@link Long#getChars(long, int, byte[])}, but for
* UTF-16 coder.
*
* @param i value to convert
* @param index next index, after the least significant digit
* @param buf target buffer, UTF16-coded.
* @return index of the most significant digit or minus sign, if present
*/
static int getChars(long i, int index, byte[] buf) {
long q;
int r;
int charPos = index;
boolean negative = (i < 0);
if (!negative) {
i = -i;
}
// Get 2 digits/iteration using longs until quotient fits into an int
while (i <= Integer.MIN_VALUE) {
q = i / 100;
r = (int)((q * 100) - i);
i = q;
putChar(buf, --charPos, Integer.DigitOnes[r]);
putChar(buf, --charPos, Integer.DigitTens[r]);
}
// Get 2 digits/iteration using ints
int q2;
int i2 = (int)i;
while (i2 <= -100) {
q2 = i2 / 100;
r = (q2 * 100) - i2;
i2 = q2;
putChar(buf, --charPos, Integer.DigitOnes[r]);
putChar(buf, --charPos, Integer.DigitTens[r]);
}
// We know there are at most two digits left at this point.
q2 = i2 / 10;
r = (q2 * 10) - i2;
putChar(buf, --charPos, '0' + r);
// Whatever left is the remaining digit.
if (q2 < 0) {
putChar(buf, --charPos, '0' - q2);
}
if (negative) {
putChar(buf, --charPos, '-');
}
return charPos;
}
// End of trusted methods.
public static void checkIndex(int off, byte[] val) {
String.checkIndex(off, length(val));
}
public static void checkOffset(int off, byte[] val) {
String.checkOffset(off, length(val));
}
public static void checkBoundsBeginEnd(int begin, int end, byte[] val) {
String.checkBoundsBeginEnd(begin, end, length(val));
}
public static void checkBoundsOffCount(int offset, int count, byte[] val) {
String.checkBoundsOffCount(offset, count, length(val));
}
} }