mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-27 14:54:52 +02:00
8302871: Speed up StringLatin1.regionMatchesCI
Reviewed-by: redestad, martin, alanb
This commit is contained in:
parent
b4ea80731c
commit
17e3769ed7
4 changed files with 161 additions and 18 deletions
|
@ -138,10 +138,11 @@ class CharacterDataLatin1 extends CharacterData {
|
|||
if (ch < 'A') { // Fast path for low code points
|
||||
return ch;
|
||||
}
|
||||
int l = ch | 0x20; // Lowercase using 'oldest ASCII trick in the book'
|
||||
if (l <= 'z' // In range a-z
|
||||
|| (l >= 0xE0 && l <= 0xFE && l != 0xF7)) { // ..or agrave-thorn, excluding division
|
||||
return l;
|
||||
// ASCII and Latin-1 were designed to optimize case-twiddling operations
|
||||
int lower = ch | 0x20;
|
||||
if (lower <= 'z' // In range a-z
|
||||
|| (lower >= 0xE0 && lower <= 0xFE && lower != 0xF7)) { // ..or agrave-thorn, excluding division
|
||||
return lower;
|
||||
}
|
||||
return ch;
|
||||
}
|
||||
|
@ -150,10 +151,11 @@ class CharacterDataLatin1 extends CharacterData {
|
|||
if (ch < 'a') { // Fast path for low code points
|
||||
return ch;
|
||||
}
|
||||
int U = ch & 0xDF; // Uppercase using 'oldest ASCII trick in the book'
|
||||
if (U <= 'Z' // In range A-Z
|
||||
|| (U >= 0xC0 && U <= 0xDE && U != 0xD7)) { // ..or Agrave-Thorn, excluding multiplication
|
||||
return U;
|
||||
// ASCII and Latin-1 were designed to optimize case-twiddling operations
|
||||
int upper = ch & 0xDF;
|
||||
if (upper <= 'Z' // In range A-Z
|
||||
|| (upper >= 0xC0 && upper <= 0xDE && upper != 0xD7)) { // ..or Agrave-Thorn, not multiplication
|
||||
return upper;
|
||||
}
|
||||
|
||||
// Special-case for 'y with Diaeresis' which uppercases out of latin1
|
||||
|
@ -167,6 +169,27 @@ class CharacterDataLatin1 extends CharacterData {
|
|||
return ch;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares two latin1 code points, ignoring case considerations
|
||||
*
|
||||
* @param b1 byte representing a latin1 code point
|
||||
* @param b2 another byte representing a latin1 code point
|
||||
* @return true if the two bytes are considered equals ignoring case in latin1
|
||||
*/
|
||||
static boolean equalsIgnoreCase(byte b1, byte b2) {
|
||||
if (b1 == b2) {
|
||||
return true;
|
||||
}
|
||||
// ASCII and Latin-1 were designed to optimize case-twiddling operations
|
||||
int upper = b1 & 0xDF;
|
||||
if (upper < 'A') {
|
||||
return false; // Low ASCII
|
||||
}
|
||||
return (upper <= 'Z' // In range A-Z
|
||||
|| (upper >= 0xC0 && upper <= 0XDE && upper != 0xD7)) // ..or A-grave-Thorn, not multiplication
|
||||
&& upper == (b2 & 0xDF); // b2 has same uppercase
|
||||
}
|
||||
|
||||
int toTitleCase(int ch) {
|
||||
return toUpperCase(ch);
|
||||
}
|
||||
|
|
|
@ -384,14 +384,9 @@ final class StringLatin1 {
|
|||
byte[] other, int ooffset, int len) {
|
||||
int last = toffset + len;
|
||||
while (toffset < last) {
|
||||
char c1 = (char)(value[toffset++] & 0xff);
|
||||
char c2 = (char)(other[ooffset++] & 0xff);
|
||||
if (c1 == c2) {
|
||||
continue;
|
||||
}
|
||||
int u1 = CharacterDataLatin1.instance.toUpperCase(c1);
|
||||
int u2 = CharacterDataLatin1.instance.toUpperCase(c2);
|
||||
if (u1 == u2) {
|
||||
byte b1 = value[toffset++];
|
||||
byte b2 = other[ooffset++];
|
||||
if (CharacterDataLatin1.equalsIgnoreCase(b1, b2)) {
|
||||
continue;
|
||||
}
|
||||
return false;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue