mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-28 15:24:43 +02:00
8241055: Regex Grapheme Matcher Performance Depends too much on Total Input Sequence Size
Reviewed-by: naoto
This commit is contained in:
parent
d0f5c5c6bb
commit
455eaca215
4 changed files with 116 additions and 83 deletions
|
@ -4035,17 +4035,8 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
|||
if (i < matcher.to) {
|
||||
int ch0 = Character.codePointAt(seq, i);
|
||||
int n = Character.charCount(ch0);
|
||||
int j = i + n;
|
||||
// Fast check if it's necessary to call Normalizer;
|
||||
// testing Grapheme.isBoundary is enough for this case
|
||||
while (j < matcher.to) {
|
||||
int ch1 = Character.codePointAt(seq, j);
|
||||
if (Grapheme.isBoundary(ch0, ch1))
|
||||
break;
|
||||
ch0 = ch1;
|
||||
j += Character.charCount(ch1);
|
||||
}
|
||||
if (i + n == j) { // single, assume nfc cp
|
||||
int j = Grapheme.nextBoundary(seq, i, matcher.to);
|
||||
if (i + n == j) { // single cp grapheme, assume nfc
|
||||
if (predicate.is(ch0))
|
||||
return next.match(matcher, j, seq);
|
||||
} else {
|
||||
|
@ -4109,13 +4100,12 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
|||
endIndex = matcher.getTextLength();
|
||||
}
|
||||
if (i == startIndex) {
|
||||
return next.match(matcher, i, seq);
|
||||
}
|
||||
if (i < endIndex) {
|
||||
if (Character.isSurrogatePair(seq.charAt(i-1), seq.charAt(i)) ||
|
||||
Grapheme.nextBoundary(seq,
|
||||
i - Character.charCount(Character.codePointBefore(seq, i)),
|
||||
i + Character.charCount(Character.codePointAt(seq, i))) > i) {
|
||||
// continue with return below
|
||||
} else if (i < endIndex) {
|
||||
if (Character.isSurrogatePair(seq.charAt(i - 1), seq.charAt(i))) {
|
||||
return false;
|
||||
}
|
||||
if (Grapheme.nextBoundary(seq, matcher.last, endIndex) > i) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue