8237599: Greedy matching against supplementary chars fails to respect the region

Reviewed-by: rriggs
This commit is contained in:
Ivan Gerasimov 2020-03-25 08:46:31 -07:00
parent c01e986cc9
commit d1b506597f
2 changed files with 56 additions and 5 deletions

View file

@ -4340,14 +4340,22 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
this.cmin = cmin;
}
boolean match(Matcher matcher, int i, CharSequence seq) {
int starti = i;
int n = 0;
int to = matcher.to;
// greedy, all the way down
while (i < to) {
int ch = Character.codePointAt(seq, i);
int len = Character.charCount(ch);
if (i + len > to) {
// the region cut off the high half of a surrogate pair
matcher.hitEnd = true;
ch = seq.charAt(i);
len = 1;
}
if (!predicate.is(ch))
break;
i += Character.charCount(ch);
break;
i += len;
n++;
}
if (i >= to) {
@ -4358,9 +4366,10 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
return true;
if (n == cmin)
return false;
// backing off if match fails
// backing off if match fails
int ch = Character.codePointBefore(seq, i);
i -= Character.charCount(ch);
// check if the region cut off the low half of a surrogate pair
i = Math.max(starti, i - Character.charCount(ch));
n--;
}
return false;