diff --git a/src/java.base/share/classes/java/lang/CharacterDataLatin1.java.template b/src/java.base/share/classes/java/lang/CharacterDataLatin1.java.template index 8a704635f14..f2d8fcf37f5 100644 --- a/src/java.base/share/classes/java/lang/CharacterDataLatin1.java.template +++ b/src/java.base/share/classes/java/lang/CharacterDataLatin1.java.template @@ -138,10 +138,11 @@ class CharacterDataLatin1 extends CharacterData { if (ch < 'A') { // Fast path for low code points return ch; } - int l = ch | 0x20; // Lowercase using 'oldest ASCII trick in the book' - if (l <= 'z' // In range a-z - || (l >= 0xE0 && l <= 0xFE && l != 0xF7)) { // ..or agrave-thorn, excluding division - return l; + // ASCII and Latin-1 were designed to optimize case-twiddling operations + int lower = ch | 0x20; + if (lower <= 'z' // In range a-z + || (lower >= 0xE0 && lower <= 0xFE && lower != 0xF7)) { // ..or agrave-thorn, excluding division + return lower; } return ch; } @@ -150,10 +151,11 @@ class CharacterDataLatin1 extends CharacterData { if (ch < 'a') { // Fast path for low code points return ch; } - int U = ch & 0xDF; // Uppercase using 'oldest ASCII trick in the book' - if (U <= 'Z' // In range A-Z - || (U >= 0xC0 && U <= 0xDE && U != 0xD7)) { // ..or Agrave-Thorn, excluding multiplication - return U; + // ASCII and Latin-1 were designed to optimize case-twiddling operations + int upper = ch & 0xDF; + if (upper <= 'Z' // In range A-Z + || (upper >= 0xC0 && upper <= 0xDE && upper != 0xD7)) { // ..or Agrave-Thorn, not multiplication + return upper; } // Special-case for 'y with Diaeresis' which uppercases out of latin1 @@ -167,6 +169,27 @@ class CharacterDataLatin1 extends CharacterData { return ch; } + /** + * Compares two latin1 code points, ignoring case considerations + * + * @param b1 byte representing a latin1 code point + * @param b2 another byte representing a latin1 code point + * @return true if the two bytes are considered equals ignoring case in latin1 + */ + static boolean equalsIgnoreCase(byte b1, byte b2) { + if (b1 == b2) { + return true; + } + // ASCII and Latin-1 were designed to optimize case-twiddling operations + int upper = b1 & 0xDF; + if (upper < 'A') { + return false; // Low ASCII + } + return (upper <= 'Z' // In range A-Z + || (upper >= 0xC0 && upper <= 0XDE && upper != 0xD7)) // ..or A-grave-Thorn, not multiplication + && upper == (b2 & 0xDF); // b2 has same uppercase + } + int toTitleCase(int ch) { return toUpperCase(ch); } diff --git a/src/java.base/share/classes/java/lang/StringLatin1.java b/src/java.base/share/classes/java/lang/StringLatin1.java index 7749597a24a..e96e660728b 100644 --- a/src/java.base/share/classes/java/lang/StringLatin1.java +++ b/src/java.base/share/classes/java/lang/StringLatin1.java @@ -384,14 +384,9 @@ final class StringLatin1 { byte[] other, int ooffset, int len) { int last = toffset + len; while (toffset < last) { - char c1 = (char)(value[toffset++] & 0xff); - char c2 = (char)(other[ooffset++] & 0xff); - if (c1 == c2) { - continue; - } - int u1 = CharacterDataLatin1.instance.toUpperCase(c1); - int u2 = CharacterDataLatin1.instance.toUpperCase(c2); - if (u1 == u2) { + byte b1 = value[toffset++]; + byte b2 = other[ooffset++]; + if (CharacterDataLatin1.equalsIgnoreCase(b1, b2)) { continue; } return false; diff --git a/test/jdk/java/lang/String/CompactString/EqualsIgnoreCase.java b/test/jdk/java/lang/String/CompactString/EqualsIgnoreCase.java index 5960740c6ec..78bececbf42 100644 --- a/test/jdk/java/lang/String/CompactString/EqualsIgnoreCase.java +++ b/test/jdk/java/lang/String/CompactString/EqualsIgnoreCase.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,10 +25,12 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; /* * @test - * @bug 8077559 8248655 + * @bug 8077559 8248655 8302871 * @summary Tests Compact String. This one is for String.equalsIgnoreCase. * @run testng/othervm -XX:+CompactStrings EqualsIgnoreCase * @run testng/othervm -XX:-CompactStrings EqualsIgnoreCase @@ -75,4 +77,31 @@ public class EqualsIgnoreCase extends CompactString { source)); }); } + + /** + * Exhaustively check that all 256x256 latin1 code point pairs are equalsIgnoreCased + * in a manner consistent with Character.toLowerCase(Character.toUpperCase(c)); + */ + @Test + public void checkConsistencyWithCharacterUppercaseLowerCase() { + for (char a = 0; a < 256; a++) { + for (char b = 0; b < 256; b++) { + + int caseFoldA = Character.toLowerCase(Character.toUpperCase(a)); + int caseFoldB = Character.toLowerCase(Character.toUpperCase(b)); + + String astr = Character.toString(a); + String bstr = Character.toString(b); + + // If characters fold to the same lowercase, their strings should equalsIgnoreCase: + if (caseFoldA == caseFoldB) { + assertTrue(astr.equalsIgnoreCase(bstr), + "Expected %s to equalsIgnoreCase %s".formatted(astr, bstr)); + } else { + assertFalse(astr.equalsIgnoreCase(bstr), + "Expected %s to not equalsIgnoreCase %s".formatted(astr, bstr)); + } + } + } + } } diff --git a/test/micro/org/openjdk/bench/java/lang/RegionMatchesIC.java b/test/micro/org/openjdk/bench/java/lang/RegionMatchesIC.java new file mode 100644 index 00000000000..753e7dcd784 --- /dev/null +++ b/test/micro/org/openjdk/bench/java/lang/RegionMatchesIC.java @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.openjdk.bench.java.lang; + +import org.openjdk.jmh.annotations.*; + +import java.util.concurrent.TimeUnit; + +/* + * This benchmark naively explores String::regionMatches, ignoring case + */ + +public class RegionMatchesIC { + + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.NANOSECONDS) + @State(Scope.Benchmark) + @Warmup(iterations = 5, time = 1) + @Measurement(iterations = 5, time = 1) + @Fork(value = 3) + public static class Latin1 { + + @Param({"1024"}) + public int size; + + @Param({"ascii-match", + "ascii-mismatch", + "number-match", + "number-mismatch", + "lat1-match", + "lat1-mismatch"}) + String codePoints; + private String leftString; + private String rightString; + + @Setup + public void setup() { + + switch (codePoints) { + case "ascii-match" -> { + leftString = "a".repeat(size); + rightString = "A".repeat(size); + } + case "ascii-mismatch" -> { + leftString = "a".repeat(size); + rightString = "b".repeat(size); + } + case "number-match" -> { + leftString = "7".repeat(size); + rightString = "7".repeat(size); + } + case "number-mismatch" -> { + leftString = "7".repeat(size); + rightString = "9".repeat(size); + } + case "lat1-match" -> { + leftString = "\u00e5".repeat(size); + rightString = "\u00c5".repeat(size); + } + case "lat1-mismatch" -> { + leftString = "\u00e5".repeat(size); + rightString = "\u00c6".repeat(size); + } + default -> throw new IllegalArgumentException("Unsupported coding: " + codePoints); + } + // Make sure strings do not String.equals by adding a prefix + leftString = "l" + leftString; + rightString = "r" + rightString; + } + + @Benchmark + public boolean regionMatchesIC() { + return leftString.regionMatches(true, 1, rightString, 1, size); + } + } +}