8302871: Speed up StringLatin1.regionMatchesCI

Reviewed-by: redestad, martin, alanb
This commit is contained in:
Eirik Bjorsnos 2023-02-25 07:48:03 +00:00 committed by Alan Bateman
parent b4ea80731c
commit 17e3769ed7
4 changed files with 161 additions and 18 deletions

View file

@ -138,10 +138,11 @@ class CharacterDataLatin1 extends CharacterData {
if (ch < 'A') { // Fast path for low code points
return ch;
}
int l = ch | 0x20; // Lowercase using 'oldest ASCII trick in the book'
if (l <= 'z' // In range a-z
|| (l >= 0xE0 && l <= 0xFE && l != 0xF7)) { // ..or agrave-thorn, excluding division
return l;
// ASCII and Latin-1 were designed to optimize case-twiddling operations
int lower = ch | 0x20;
if (lower <= 'z' // In range a-z
|| (lower >= 0xE0 && lower <= 0xFE && lower != 0xF7)) { // ..or agrave-thorn, excluding division
return lower;
}
return ch;
}
@ -150,10 +151,11 @@ class CharacterDataLatin1 extends CharacterData {
if (ch < 'a') { // Fast path for low code points
return ch;
}
int U = ch & 0xDF; // Uppercase using 'oldest ASCII trick in the book'
if (U <= 'Z' // In range A-Z
|| (U >= 0xC0 && U <= 0xDE && U != 0xD7)) { // ..or Agrave-Thorn, excluding multiplication
return U;
// ASCII and Latin-1 were designed to optimize case-twiddling operations
int upper = ch & 0xDF;
if (upper <= 'Z' // In range A-Z
|| (upper >= 0xC0 && upper <= 0xDE && upper != 0xD7)) { // ..or Agrave-Thorn, not multiplication
return upper;
}
// Special-case for 'y with Diaeresis' which uppercases out of latin1
@ -167,6 +169,27 @@ class CharacterDataLatin1 extends CharacterData {
return ch;
}
/**
* Compares two latin1 code points, ignoring case considerations
*
* @param b1 byte representing a latin1 code point
* @param b2 another byte representing a latin1 code point
* @return true if the two bytes are considered equals ignoring case in latin1
*/
static boolean equalsIgnoreCase(byte b1, byte b2) {
if (b1 == b2) {
return true;
}
// ASCII and Latin-1 were designed to optimize case-twiddling operations
int upper = b1 & 0xDF;
if (upper < 'A') {
return false; // Low ASCII
}
return (upper <= 'Z' // In range A-Z
|| (upper >= 0xC0 && upper <= 0XDE && upper != 0xD7)) // ..or A-grave-Thorn, not multiplication
&& upper == (b2 & 0xDF); // b2 has same uppercase
}
int toTitleCase(int ch) {
return toUpperCase(ch);
}