mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-27 14:54:52 +02:00
8302871: Speed up StringLatin1.regionMatchesCI
Reviewed-by: redestad, martin, alanb
This commit is contained in:
parent
b4ea80731c
commit
17e3769ed7
4 changed files with 161 additions and 18 deletions
|
@ -138,10 +138,11 @@ class CharacterDataLatin1 extends CharacterData {
|
|||
if (ch < 'A') { // Fast path for low code points
|
||||
return ch;
|
||||
}
|
||||
int l = ch | 0x20; // Lowercase using 'oldest ASCII trick in the book'
|
||||
if (l <= 'z' // In range a-z
|
||||
|| (l >= 0xE0 && l <= 0xFE && l != 0xF7)) { // ..or agrave-thorn, excluding division
|
||||
return l;
|
||||
// ASCII and Latin-1 were designed to optimize case-twiddling operations
|
||||
int lower = ch | 0x20;
|
||||
if (lower <= 'z' // In range a-z
|
||||
|| (lower >= 0xE0 && lower <= 0xFE && lower != 0xF7)) { // ..or agrave-thorn, excluding division
|
||||
return lower;
|
||||
}
|
||||
return ch;
|
||||
}
|
||||
|
@ -150,10 +151,11 @@ class CharacterDataLatin1 extends CharacterData {
|
|||
if (ch < 'a') { // Fast path for low code points
|
||||
return ch;
|
||||
}
|
||||
int U = ch & 0xDF; // Uppercase using 'oldest ASCII trick in the book'
|
||||
if (U <= 'Z' // In range A-Z
|
||||
|| (U >= 0xC0 && U <= 0xDE && U != 0xD7)) { // ..or Agrave-Thorn, excluding multiplication
|
||||
return U;
|
||||
// ASCII and Latin-1 were designed to optimize case-twiddling operations
|
||||
int upper = ch & 0xDF;
|
||||
if (upper <= 'Z' // In range A-Z
|
||||
|| (upper >= 0xC0 && upper <= 0xDE && upper != 0xD7)) { // ..or Agrave-Thorn, not multiplication
|
||||
return upper;
|
||||
}
|
||||
|
||||
// Special-case for 'y with Diaeresis' which uppercases out of latin1
|
||||
|
@ -167,6 +169,27 @@ class CharacterDataLatin1 extends CharacterData {
|
|||
return ch;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares two latin1 code points, ignoring case considerations
|
||||
*
|
||||
* @param b1 byte representing a latin1 code point
|
||||
* @param b2 another byte representing a latin1 code point
|
||||
* @return true if the two bytes are considered equals ignoring case in latin1
|
||||
*/
|
||||
static boolean equalsIgnoreCase(byte b1, byte b2) {
|
||||
if (b1 == b2) {
|
||||
return true;
|
||||
}
|
||||
// ASCII and Latin-1 were designed to optimize case-twiddling operations
|
||||
int upper = b1 & 0xDF;
|
||||
if (upper < 'A') {
|
||||
return false; // Low ASCII
|
||||
}
|
||||
return (upper <= 'Z' // In range A-Z
|
||||
|| (upper >= 0xC0 && upper <= 0XDE && upper != 0xD7)) // ..or A-grave-Thorn, not multiplication
|
||||
&& upper == (b2 & 0xDF); // b2 has same uppercase
|
||||
}
|
||||
|
||||
int toTitleCase(int ch) {
|
||||
return toUpperCase(ch);
|
||||
}
|
||||
|
|
|
@ -384,14 +384,9 @@ final class StringLatin1 {
|
|||
byte[] other, int ooffset, int len) {
|
||||
int last = toffset + len;
|
||||
while (toffset < last) {
|
||||
char c1 = (char)(value[toffset++] & 0xff);
|
||||
char c2 = (char)(other[ooffset++] & 0xff);
|
||||
if (c1 == c2) {
|
||||
continue;
|
||||
}
|
||||
int u1 = CharacterDataLatin1.instance.toUpperCase(c1);
|
||||
int u2 = CharacterDataLatin1.instance.toUpperCase(c2);
|
||||
if (u1 == u2) {
|
||||
byte b1 = value[toffset++];
|
||||
byte b2 = other[ooffset++];
|
||||
if (CharacterDataLatin1.equalsIgnoreCase(b1, b2)) {
|
||||
continue;
|
||||
}
|
||||
return false;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -25,10 +25,12 @@ import org.testng.annotations.DataProvider;
|
|||
import org.testng.annotations.Test;
|
||||
|
||||
import static org.testng.Assert.assertEquals;
|
||||
import static org.testng.Assert.assertFalse;
|
||||
import static org.testng.Assert.assertTrue;
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 8077559 8248655
|
||||
* @bug 8077559 8248655 8302871
|
||||
* @summary Tests Compact String. This one is for String.equalsIgnoreCase.
|
||||
* @run testng/othervm -XX:+CompactStrings EqualsIgnoreCase
|
||||
* @run testng/othervm -XX:-CompactStrings EqualsIgnoreCase
|
||||
|
@ -75,4 +77,31 @@ public class EqualsIgnoreCase extends CompactString {
|
|||
source));
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Exhaustively check that all 256x256 latin1 code point pairs are equalsIgnoreCased
|
||||
* in a manner consistent with Character.toLowerCase(Character.toUpperCase(c));
|
||||
*/
|
||||
@Test
|
||||
public void checkConsistencyWithCharacterUppercaseLowerCase() {
|
||||
for (char a = 0; a < 256; a++) {
|
||||
for (char b = 0; b < 256; b++) {
|
||||
|
||||
int caseFoldA = Character.toLowerCase(Character.toUpperCase(a));
|
||||
int caseFoldB = Character.toLowerCase(Character.toUpperCase(b));
|
||||
|
||||
String astr = Character.toString(a);
|
||||
String bstr = Character.toString(b);
|
||||
|
||||
// If characters fold to the same lowercase, their strings should equalsIgnoreCase:
|
||||
if (caseFoldA == caseFoldB) {
|
||||
assertTrue(astr.equalsIgnoreCase(bstr),
|
||||
"Expected %s to equalsIgnoreCase %s".formatted(astr, bstr));
|
||||
} else {
|
||||
assertFalse(astr.equalsIgnoreCase(bstr),
|
||||
"Expected %s to not equalsIgnoreCase %s".formatted(astr, bstr));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
96
test/micro/org/openjdk/bench/java/lang/RegionMatchesIC.java
Normal file
96
test/micro/org/openjdk/bench/java/lang/RegionMatchesIC.java
Normal file
|
@ -0,0 +1,96 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package org.openjdk.bench.java.lang;
|
||||
|
||||
import org.openjdk.jmh.annotations.*;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/*
|
||||
* This benchmark naively explores String::regionMatches, ignoring case
|
||||
*/
|
||||
|
||||
public class RegionMatchesIC {
|
||||
|
||||
@BenchmarkMode(Mode.AverageTime)
|
||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||
@State(Scope.Benchmark)
|
||||
@Warmup(iterations = 5, time = 1)
|
||||
@Measurement(iterations = 5, time = 1)
|
||||
@Fork(value = 3)
|
||||
public static class Latin1 {
|
||||
|
||||
@Param({"1024"})
|
||||
public int size;
|
||||
|
||||
@Param({"ascii-match",
|
||||
"ascii-mismatch",
|
||||
"number-match",
|
||||
"number-mismatch",
|
||||
"lat1-match",
|
||||
"lat1-mismatch"})
|
||||
String codePoints;
|
||||
private String leftString;
|
||||
private String rightString;
|
||||
|
||||
@Setup
|
||||
public void setup() {
|
||||
|
||||
switch (codePoints) {
|
||||
case "ascii-match" -> {
|
||||
leftString = "a".repeat(size);
|
||||
rightString = "A".repeat(size);
|
||||
}
|
||||
case "ascii-mismatch" -> {
|
||||
leftString = "a".repeat(size);
|
||||
rightString = "b".repeat(size);
|
||||
}
|
||||
case "number-match" -> {
|
||||
leftString = "7".repeat(size);
|
||||
rightString = "7".repeat(size);
|
||||
}
|
||||
case "number-mismatch" -> {
|
||||
leftString = "7".repeat(size);
|
||||
rightString = "9".repeat(size);
|
||||
}
|
||||
case "lat1-match" -> {
|
||||
leftString = "\u00e5".repeat(size);
|
||||
rightString = "\u00c5".repeat(size);
|
||||
}
|
||||
case "lat1-mismatch" -> {
|
||||
leftString = "\u00e5".repeat(size);
|
||||
rightString = "\u00c6".repeat(size);
|
||||
}
|
||||
default -> throw new IllegalArgumentException("Unsupported coding: " + codePoints);
|
||||
}
|
||||
// Make sure strings do not String.equals by adding a prefix
|
||||
leftString = "l" + leftString;
|
||||
rightString = "r" + rightString;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public boolean regionMatchesIC() {
|
||||
return leftString.regionMatches(true, 1, rightString, 1, size);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue