mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-27 14:54:52 +02:00
8200377: String::strip, String::stripLeading, String::stripTrailing
Reviewed-by: sundar, rriggs
This commit is contained in:
parent
ec2d9845e0
commit
7906014509
4 changed files with 300 additions and 1 deletions
|
@ -2602,7 +2602,7 @@ public final class String
|
||||||
* Returns a string whose value is this string, with all leading
|
* Returns a string whose value is this string, with all leading
|
||||||
* and trailing space removed, where space is defined
|
* and trailing space removed, where space is defined
|
||||||
* as any character whose codepoint is less than or equal to
|
* as any character whose codepoint is less than or equal to
|
||||||
* {@code '\u005Cu0020'} (the space character).
|
* {@code 'U+0020'} (the space character).
|
||||||
* <p>
|
* <p>
|
||||||
* If this {@code String} object represents an empty character
|
* If this {@code String} object represents an empty character
|
||||||
* sequence, or the first and last characters of character sequence
|
* sequence, or the first and last characters of character sequence
|
||||||
|
@ -2636,6 +2636,98 @@ public final class String
|
||||||
return ret == null ? this : ret;
|
return ret == null ? this : ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a string whose value is this string, with all leading
|
||||||
|
* and trailing {@link Character#isWhitespace(int) white space}
|
||||||
|
* removed.
|
||||||
|
* <p>
|
||||||
|
* If this {@code String} object represents an empty string,
|
||||||
|
* or if all code points in this string are
|
||||||
|
* {@link Character#isWhitespace(int) white space}, then an empty string
|
||||||
|
* is returned.
|
||||||
|
* <p>
|
||||||
|
* Otherwise, returns a substring of this string beginning with the first
|
||||||
|
* code point that is not a {@link Character#isWhitespace(int) white space}
|
||||||
|
* up to and including the last code point that is not a
|
||||||
|
* {@link Character#isWhitespace(int) white space}.
|
||||||
|
* <p>
|
||||||
|
* This method may be used to strip
|
||||||
|
* {@link Character#isWhitespace(int) white space} from
|
||||||
|
* the beginning and end of a string.
|
||||||
|
*
|
||||||
|
* @return a string whose value is this string, with all leading
|
||||||
|
* and trailing white space removed
|
||||||
|
*
|
||||||
|
* @see Character#isWhitespace(int)
|
||||||
|
*
|
||||||
|
* @since 11
|
||||||
|
*/
|
||||||
|
public String strip() {
|
||||||
|
String ret = isLatin1() ? StringLatin1.strip(value)
|
||||||
|
: StringUTF16.strip(value);
|
||||||
|
return ret == null ? this : ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a string whose value is this string, with all leading
|
||||||
|
* {@link Character#isWhitespace(int) white space} removed.
|
||||||
|
* <p>
|
||||||
|
* If this {@code String} object represents an empty string,
|
||||||
|
* or if all code points in this string are
|
||||||
|
* {@link Character#isWhitespace(int) white space}, then an empty string
|
||||||
|
* is returned.
|
||||||
|
* <p>
|
||||||
|
* Otherwise, returns a substring of this string beginning with the first
|
||||||
|
* code point that is not a {@link Character#isWhitespace(int) white space}
|
||||||
|
* up to to and including the last code point of this string.
|
||||||
|
* <p>
|
||||||
|
* This method may be used to trim
|
||||||
|
* {@link Character#isWhitespace(int) white space} from
|
||||||
|
* the beginning of a string.
|
||||||
|
*
|
||||||
|
* @return a string whose value is this string, with all leading white
|
||||||
|
* space removed
|
||||||
|
*
|
||||||
|
* @see Character#isWhitespace(int)
|
||||||
|
*
|
||||||
|
* @since 11
|
||||||
|
*/
|
||||||
|
public String stripLeading() {
|
||||||
|
String ret = isLatin1() ? StringLatin1.stripLeading(value)
|
||||||
|
: StringUTF16.stripLeading(value);
|
||||||
|
return ret == null ? this : ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a string whose value is this string, with all trailing
|
||||||
|
* {@link Character#isWhitespace(int) white space} removed.
|
||||||
|
* <p>
|
||||||
|
* If this {@code String} object represents an empty string,
|
||||||
|
* or if all characters in this string are
|
||||||
|
* {@link Character#isWhitespace(int) white space}, then an empty string
|
||||||
|
* is returned.
|
||||||
|
* <p>
|
||||||
|
* Otherwise, returns a substring of this string beginning with the first
|
||||||
|
* code point of this string up to and including the last code point
|
||||||
|
* that is not a {@link Character#isWhitespace(int) white space}.
|
||||||
|
* <p>
|
||||||
|
* This method may be used to trim
|
||||||
|
* {@link Character#isWhitespace(int) white space} from
|
||||||
|
* the end of a string.
|
||||||
|
*
|
||||||
|
* @return a string whose value is this string, with all trailing white
|
||||||
|
* space removed
|
||||||
|
*
|
||||||
|
* @see Character#isWhitespace(int)
|
||||||
|
*
|
||||||
|
* @since 11
|
||||||
|
*/
|
||||||
|
public String stripTrailing() {
|
||||||
|
String ret = isLatin1() ? StringLatin1.stripTrailing(value)
|
||||||
|
: StringUTF16.stripTrailing(value);
|
||||||
|
return ret == null ? this : ret;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This object (which is already a string!) is itself returned.
|
* This object (which is already a string!) is itself returned.
|
||||||
*
|
*
|
||||||
|
|
|
@ -538,6 +538,57 @@ final class StringLatin1 {
|
||||||
newString(value, st, len - st) : null;
|
newString(value, st, len - st) : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static int indexOfNonWhitespace(byte[] value) {
|
||||||
|
int length = value.length;
|
||||||
|
int left = 0;
|
||||||
|
while (left < length) {
|
||||||
|
char ch = (char)(value[left] & 0xff);
|
||||||
|
if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
left++;
|
||||||
|
}
|
||||||
|
return left;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int lastIndexOfNonWhitespace(byte[] value) {
|
||||||
|
int length = value.length;
|
||||||
|
int right = length;
|
||||||
|
while (0 < right) {
|
||||||
|
char ch = (char)(value[right - 1] & 0xff);
|
||||||
|
if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
right--;
|
||||||
|
}
|
||||||
|
return right;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String strip(byte[] value) {
|
||||||
|
int left = indexOfNonWhitespace(value);
|
||||||
|
if (left == value.length) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
int right = lastIndexOfNonWhitespace(value);
|
||||||
|
return ((left > 0) || (right < value.length)) ? newString(value, left, right - left) : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String stripLeading(byte[] value) {
|
||||||
|
int left = indexOfNonWhitespace(value);
|
||||||
|
if (left == value.length) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
return (left != 0) ? newString(value, left, value.length - left) : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String stripTrailing(byte[] value) {
|
||||||
|
int right = lastIndexOfNonWhitespace(value);
|
||||||
|
if (right == 0) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
return (right != value.length) ? newString(value, 0, right) : null;
|
||||||
|
}
|
||||||
|
|
||||||
public static void putChar(byte[] val, int index, int c) {
|
public static void putChar(byte[] val, int index, int c) {
|
||||||
//assert (canEncode(c));
|
//assert (canEncode(c));
|
||||||
val[index] = (byte)(c);
|
val[index] = (byte)(c);
|
||||||
|
|
|
@ -856,6 +856,61 @@ final class StringUTF16 {
|
||||||
null;
|
null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static int indexOfNonWhitespace(byte[] value) {
|
||||||
|
int length = value.length >> 1;
|
||||||
|
int left = 0;
|
||||||
|
while (left < length) {
|
||||||
|
int codepoint = codePointAt(value, left, length);
|
||||||
|
if (codepoint != ' ' && codepoint != '\t' && !Character.isWhitespace(codepoint)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
left += Character.charCount(codepoint);
|
||||||
|
}
|
||||||
|
return left;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int lastIndexOfNonWhitespace(byte[] value) {
|
||||||
|
int length = value.length >> 1;
|
||||||
|
int right = length;
|
||||||
|
while (0 < right) {
|
||||||
|
int codepoint = codePointBefore(value, right);
|
||||||
|
if (codepoint != ' ' && codepoint != '\t' && !Character.isWhitespace(codepoint)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
right -= Character.charCount(codepoint);
|
||||||
|
}
|
||||||
|
return right;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String strip(byte[] value) {
|
||||||
|
int length = value.length >> 1;
|
||||||
|
int left = indexOfNonWhitespace(value);
|
||||||
|
if (left == length) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
int right = lastIndexOfNonWhitespace(value);
|
||||||
|
return ((left > 0) || (right < length)) ? newString(value, left, right - left) : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String stripLeading(byte[] value) {
|
||||||
|
int length = value.length >> 1;
|
||||||
|
int left = indexOfNonWhitespace(value);
|
||||||
|
if (left == length) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
return (left != 0) ? newString(value, left, length - left) : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String stripTrailing(byte[] value) {
|
||||||
|
int length = value.length >> 1;
|
||||||
|
int right = lastIndexOfNonWhitespace(value);
|
||||||
|
if (right == 0) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
return (right != length) ? newString(value, 0, right) : null;
|
||||||
|
}
|
||||||
|
|
||||||
private static void putChars(byte[] val, int index, char[] str, int off, int end) {
|
private static void putChars(byte[] val, int index, char[] str, int off, int end) {
|
||||||
while (off < end) {
|
while (off < end) {
|
||||||
putChar(val, index++, str[off++]);
|
putChar(val, index++, str[off++]);
|
||||||
|
|
101
test/jdk/java/lang/String/Strip.java
Normal file
101
test/jdk/java/lang/String/Strip.java
Normal file
|
@ -0,0 +1,101 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.IntStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @test
|
||||||
|
* @summary Basic strip, stripLeading, stripTrailing functionality
|
||||||
|
* @bug 8200377
|
||||||
|
* @run main/othervm Strip
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class Strip {
|
||||||
|
public static void main(String... arg) {
|
||||||
|
testStrip();
|
||||||
|
testWhitespace();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test basic stripping routines
|
||||||
|
*/
|
||||||
|
static void testStrip() {
|
||||||
|
equal(" abc ".strip(), "abc");
|
||||||
|
equal(" abc ".stripLeading(), "abc ");
|
||||||
|
equal(" abc ".stripTrailing(), " abc");
|
||||||
|
equal(" abc\u2022 ".strip(), "abc\u2022");
|
||||||
|
equal(" abc\u2022 ".stripLeading(), "abc\u2022 ");
|
||||||
|
equal(" abc\u2022 ".stripTrailing(), " abc\u2022");
|
||||||
|
equal("".strip(), "");
|
||||||
|
equal("".stripLeading(), "");
|
||||||
|
equal("".stripTrailing(), "");
|
||||||
|
equal("\b".strip(), "\b");
|
||||||
|
equal("\b".stripLeading(), "\b");
|
||||||
|
equal("\b".stripTrailing(), "\b");
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test full whitespace range
|
||||||
|
*/
|
||||||
|
static void testWhitespace() {
|
||||||
|
StringBuilder sb = new StringBuilder(64);
|
||||||
|
IntStream.range(1, 0xFFFF).filter(c -> Character.isWhitespace(c))
|
||||||
|
.forEach(c -> sb.append((char)c));
|
||||||
|
String whiteSpace = sb.toString();
|
||||||
|
|
||||||
|
String testString = whiteSpace + "abc" + whiteSpace;
|
||||||
|
equal(testString.strip(), "abc");
|
||||||
|
equal(testString.stripLeading(), "abc" + whiteSpace);
|
||||||
|
equal(testString.stripTrailing(), whiteSpace + "abc");
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Report difference in result.
|
||||||
|
*/
|
||||||
|
static void report(String message, String inputTag, String input,
|
||||||
|
String outputTag, String output) {
|
||||||
|
System.err.println(message);
|
||||||
|
System.err.println();
|
||||||
|
System.err.println(inputTag);
|
||||||
|
System.err.println(input.codePoints()
|
||||||
|
.mapToObj(c -> (Integer)c)
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
System.err.println();
|
||||||
|
System.err.println(outputTag);
|
||||||
|
System.err.println(output.codePoints()
|
||||||
|
.mapToObj(c -> (Integer)c)
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
throw new RuntimeException();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Raise an exception if the two inputs are not equivalent.
|
||||||
|
*/
|
||||||
|
static void equal(String input, String expected) {
|
||||||
|
if (input == null || expected == null || !expected.equals(input)) {
|
||||||
|
report("Failed equal", "Input:", input, "Expected:", expected);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue