mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-27 14:54:52 +02:00
8237599: Greedy matching against supplementary chars fails to respect the region
Reviewed-by: rriggs
This commit is contained in:
parent
c01e986cc9
commit
d1b506597f
2 changed files with 56 additions and 5 deletions
|
@ -4340,14 +4340,22 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
||||||
this.cmin = cmin;
|
this.cmin = cmin;
|
||||||
}
|
}
|
||||||
boolean match(Matcher matcher, int i, CharSequence seq) {
|
boolean match(Matcher matcher, int i, CharSequence seq) {
|
||||||
|
int starti = i;
|
||||||
int n = 0;
|
int n = 0;
|
||||||
int to = matcher.to;
|
int to = matcher.to;
|
||||||
// greedy, all the way down
|
// greedy, all the way down
|
||||||
while (i < to) {
|
while (i < to) {
|
||||||
int ch = Character.codePointAt(seq, i);
|
int ch = Character.codePointAt(seq, i);
|
||||||
|
int len = Character.charCount(ch);
|
||||||
|
if (i + len > to) {
|
||||||
|
// the region cut off the high half of a surrogate pair
|
||||||
|
matcher.hitEnd = true;
|
||||||
|
ch = seq.charAt(i);
|
||||||
|
len = 1;
|
||||||
|
}
|
||||||
if (!predicate.is(ch))
|
if (!predicate.is(ch))
|
||||||
break;
|
break;
|
||||||
i += Character.charCount(ch);
|
i += len;
|
||||||
n++;
|
n++;
|
||||||
}
|
}
|
||||||
if (i >= to) {
|
if (i >= to) {
|
||||||
|
@ -4358,9 +4366,10 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
||||||
return true;
|
return true;
|
||||||
if (n == cmin)
|
if (n == cmin)
|
||||||
return false;
|
return false;
|
||||||
// backing off if match fails
|
// backing off if match fails
|
||||||
int ch = Character.codePointBefore(seq, i);
|
int ch = Character.codePointBefore(seq, i);
|
||||||
i -= Character.charCount(ch);
|
// check if the region cut off the low half of a surrogate pair
|
||||||
|
i = Math.max(starti, i - Character.charCount(ch));
|
||||||
n--;
|
n--;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -36,7 +36,7 @@
|
||||||
* 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895
|
* 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895
|
||||||
* 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
|
* 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
|
||||||
* 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 8235812
|
* 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 8235812
|
||||||
* 8216332 8214245
|
* 8216332 8214245 8237599
|
||||||
*
|
*
|
||||||
* @library /test/lib
|
* @library /test/lib
|
||||||
* @library /lib/testlibrary/java/lang
|
* @library /lib/testlibrary/java/lang
|
||||||
|
@ -195,6 +195,7 @@ public class RegExTest {
|
||||||
surrogatePairWithCanonEq();
|
surrogatePairWithCanonEq();
|
||||||
lineBreakWithQuantifier();
|
lineBreakWithQuantifier();
|
||||||
caseInsensitivePMatch();
|
caseInsensitivePMatch();
|
||||||
|
surrogatePairOverlapRegion();
|
||||||
|
|
||||||
if (failure) {
|
if (failure) {
|
||||||
throw new
|
throw new
|
||||||
|
@ -5155,4 +5156,45 @@ public class RegExTest {
|
||||||
}
|
}
|
||||||
report("caseInsensitivePMatch");
|
report("caseInsensitivePMatch");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This test is for 8237599
|
||||||
|
private static void surrogatePairOverlapRegion() {
|
||||||
|
String input = "\ud801\udc37";
|
||||||
|
|
||||||
|
Pattern p = Pattern.compile(".+");
|
||||||
|
Matcher m = p.matcher(input);
|
||||||
|
m.region(0, 1);
|
||||||
|
|
||||||
|
boolean ok = m.find();
|
||||||
|
if (!ok || !m.group(0).equals(input.substring(0, 1)))
|
||||||
|
{
|
||||||
|
failCount++;
|
||||||
|
System.out.println("Input \"" + input + "\".substr(0, 1)" +
|
||||||
|
" expected to match pattern \"" + p + "\"");
|
||||||
|
if (ok) {
|
||||||
|
System.out.println("group(0): \"" + m.group(0) + "\"");
|
||||||
|
}
|
||||||
|
} else if (!m.hitEnd()) {
|
||||||
|
failCount++;
|
||||||
|
System.out.println("Expected m.hitEnd() == true");
|
||||||
|
}
|
||||||
|
|
||||||
|
p = Pattern.compile(".*(.)");
|
||||||
|
m = p.matcher(input);
|
||||||
|
m.region(1, 2);
|
||||||
|
|
||||||
|
ok = m.find();
|
||||||
|
if (!ok || !m.group(0).equals(input.substring(1, 2))
|
||||||
|
|| !m.group(1).equals(input.substring(1, 2)))
|
||||||
|
{
|
||||||
|
failCount++;
|
||||||
|
System.out.println("Input \"" + input + "\".substr(1, 2)" +
|
||||||
|
" expected to match pattern \"" + p + "\"");
|
||||||
|
if (ok) {
|
||||||
|
System.out.println("group(0): \"" + m.group(0) + "\"");
|
||||||
|
System.out.println("group(1): \"" + m.group(1) + "\"");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
report("surrogatePairOverlapRegion");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue