mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-28 15:24:43 +02:00
8235812: Unicode linebreak with quantifier does not match valid input
Reviewed-by: rriggs
This commit is contained in:
parent
b83285facc
commit
987be2bb6d
2 changed files with 148 additions and 12 deletions
|
@ -2064,7 +2064,7 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
||||||
Node prev = null;
|
Node prev = null;
|
||||||
Node firstTail = null;
|
Node firstTail = null;
|
||||||
Branch branch = null;
|
Branch branch = null;
|
||||||
Node branchConn = null;
|
BranchConn branchConn = null;
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
Node node = sequence(end);
|
Node node = sequence(end);
|
||||||
|
@ -2212,7 +2212,24 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (node instanceof LineEnding) {
|
||||||
|
LineEnding le = (LineEnding)node;
|
||||||
|
node = closureOfLineEnding(le);
|
||||||
|
|
||||||
|
if (node != le) {
|
||||||
|
// LineEnding was replaced with an anonymous group
|
||||||
|
if (head == null)
|
||||||
|
head = node;
|
||||||
|
else
|
||||||
|
tail.next = node;
|
||||||
|
// Double return: Tail was returned in root
|
||||||
|
tail = root;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
node = closure(node);
|
node = closure(node);
|
||||||
|
}
|
||||||
|
|
||||||
/* save the top dot-greedy nodes (.*, .+) as well
|
/* save the top dot-greedy nodes (.*, .+) as well
|
||||||
if (node instanceof GreedyCharProperty &&
|
if (node instanceof GreedyCharProperty &&
|
||||||
((GreedyCharProperty)node).cp instanceof Dot) {
|
((GreedyCharProperty)node).cp instanceof Dot) {
|
||||||
|
@ -3079,18 +3096,31 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
||||||
if (saveTCNCount < topClosureNodes.size())
|
if (saveTCNCount < topClosureNodes.size())
|
||||||
topClosureNodes.subList(saveTCNCount, topClosureNodes.size()).clear();
|
topClosureNodes.subList(saveTCNCount, topClosureNodes.size()).clear();
|
||||||
|
|
||||||
|
return groupWithClosure(node, head, tail, capturingGroup);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Transforms a Group with quantifiers into some special constructs
|
||||||
|
* (such as Branch or Loop/GroupCurly), if necessary.
|
||||||
|
*
|
||||||
|
* This method is applied either to actual groups or to the Unicode
|
||||||
|
* linebreak (aka \\R) represented as an anonymous group.
|
||||||
|
*/
|
||||||
|
private Node groupWithClosure(Node node, Node head, Node tail,
|
||||||
|
boolean capturingGroup)
|
||||||
|
{
|
||||||
if (node instanceof Ques) {
|
if (node instanceof Ques) {
|
||||||
Ques ques = (Ques) node;
|
Ques ques = (Ques) node;
|
||||||
if (ques.type == Qtype.POSSESSIVE) {
|
if (ques.type == Qtype.POSSESSIVE) {
|
||||||
root = node;
|
root = node;
|
||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
tail.next = new BranchConn();
|
BranchConn branchConn = new BranchConn();
|
||||||
tail = tail.next;
|
tail = tail.next = branchConn;
|
||||||
if (ques.type == Qtype.GREEDY) {
|
if (ques.type == Qtype.GREEDY) {
|
||||||
head = new Branch(head, null, tail);
|
head = new Branch(head, null, branchConn);
|
||||||
} else { // Reluctant quantifier
|
} else { // Reluctant quantifier
|
||||||
head = new Branch(null, head, tail);
|
head = new Branch(null, head, branchConn);
|
||||||
}
|
}
|
||||||
root = tail;
|
root = tail;
|
||||||
return head;
|
return head;
|
||||||
|
@ -3267,6 +3297,31 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
||||||
return new Curly(prev, cmin, MAX_REPS, qtype);
|
return new Curly(prev, cmin, MAX_REPS, qtype);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Processing repetition of a Unicode linebreak \\R.
|
||||||
|
*/
|
||||||
|
private Node closureOfLineEnding(LineEnding le) {
|
||||||
|
int ch = peek();
|
||||||
|
if (ch != '?' && ch != '*' && ch != '+' && ch != '{') {
|
||||||
|
return le;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Replace the LineEnding with an anonymous group
|
||||||
|
// (?:\\u000D\\u000A|[\\u000A\\u000B\\u000C\\u000D\\u0085\\u2028\\u2029])
|
||||||
|
Node grHead = createGroup(true);
|
||||||
|
Node grTail = root;
|
||||||
|
BranchConn branchConn = new BranchConn();
|
||||||
|
branchConn.next = grTail;
|
||||||
|
Node slice = new Slice(new int[] {0x0D, 0x0A});
|
||||||
|
slice.next = branchConn;
|
||||||
|
Node chClass = newCharProperty(x -> x == 0x0A || x == 0x0B ||
|
||||||
|
x == 0x0C || x == 0x0D || x == 0x85 || x == 0x2028 ||
|
||||||
|
x == 0x2029);
|
||||||
|
chClass.next = branchConn;
|
||||||
|
grHead.next = new Branch(slice, chClass, branchConn);
|
||||||
|
return groupWithClosure(closure(grHead), grHead, grTail, false);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Processes repetition. If the next character peeked is a quantifier
|
* Processes repetition. If the next character peeked is a quantifier
|
||||||
* then new nodes must be appended to handle the repetition.
|
* then new nodes must be appended to handle the repetition.
|
||||||
|
@ -4723,8 +4778,8 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
||||||
static final class Branch extends Node {
|
static final class Branch extends Node {
|
||||||
Node[] atoms = new Node[2];
|
Node[] atoms = new Node[2];
|
||||||
int size = 2;
|
int size = 2;
|
||||||
Node conn;
|
BranchConn conn;
|
||||||
Branch(Node first, Node second, Node branchConn) {
|
Branch(Node first, Node second, BranchConn branchConn) {
|
||||||
conn = branchConn;
|
conn = branchConn;
|
||||||
atoms[0] = first;
|
atoms[0] = first;
|
||||||
atoms[1] = second;
|
atoms[1] = second;
|
||||||
|
@ -4732,9 +4787,10 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
||||||
|
|
||||||
void add(Node node) {
|
void add(Node node) {
|
||||||
if (size >= atoms.length) {
|
if (size >= atoms.length) {
|
||||||
Node[] tmp = new Node[atoms.length*2];
|
int len = ArraysSupport.newLength(size,
|
||||||
System.arraycopy(atoms, 0, tmp, 0, atoms.length);
|
1, /* minimum growth */
|
||||||
atoms = tmp;
|
size /* preferred growth */);
|
||||||
|
atoms = Arrays.copyOf(atoms, len);
|
||||||
}
|
}
|
||||||
atoms[size++] = node;
|
atoms[size++] = node;
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,7 +35,7 @@
|
||||||
* 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819
|
* 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819
|
||||||
* 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895
|
* 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895
|
||||||
* 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
|
* 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
|
||||||
* 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034
|
* 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 8235812
|
||||||
*
|
*
|
||||||
* @library /test/lib
|
* @library /test/lib
|
||||||
* @library /lib/testlibrary/java/lang
|
* @library /lib/testlibrary/java/lang
|
||||||
|
@ -57,7 +57,9 @@ import java.nio.CharBuffer;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
import java.util.Scanner;
|
import java.util.Scanner;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
|
@ -186,6 +188,7 @@ public class RegExTest {
|
||||||
invalidGroupName();
|
invalidGroupName();
|
||||||
illegalRepetitionRange();
|
illegalRepetitionRange();
|
||||||
surrogatePairWithCanonEq();
|
surrogatePairWithCanonEq();
|
||||||
|
lineBreakWithQuantifier();
|
||||||
|
|
||||||
if (failure) {
|
if (failure) {
|
||||||
throw new
|
throw new
|
||||||
|
@ -5000,4 +5003,81 @@ public class RegExTest {
|
||||||
}
|
}
|
||||||
report("surrogatePairWithCanonEq");
|
report("surrogatePairWithCanonEq");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This test is for 8235812
|
||||||
|
private static void lineBreakWithQuantifier() {
|
||||||
|
// key: pattern
|
||||||
|
// value: lengths of input that must match the pattern
|
||||||
|
Map<String, List<Integer>> cases = Map.ofEntries(
|
||||||
|
Map.entry("\\R?", List.of(0, 1)),
|
||||||
|
Map.entry("\\R*", List.of(0, 1, 2, 3)),
|
||||||
|
Map.entry("\\R+", List.of(1, 2, 3)),
|
||||||
|
Map.entry("\\R{0}", List.of(0)),
|
||||||
|
Map.entry("\\R{1}", List.of(1)),
|
||||||
|
Map.entry("\\R{2}", List.of(2)),
|
||||||
|
Map.entry("\\R{3}", List.of(3)),
|
||||||
|
Map.entry("\\R{0,}", List.of(0, 1, 2, 3)),
|
||||||
|
Map.entry("\\R{1,}", List.of(1, 2, 3)),
|
||||||
|
Map.entry("\\R{2,}", List.of(2, 3)),
|
||||||
|
Map.entry("\\R{3,}", List.of(3)),
|
||||||
|
Map.entry("\\R{0,0}", List.of(0)),
|
||||||
|
Map.entry("\\R{0,1}", List.of(0, 1)),
|
||||||
|
Map.entry("\\R{0,2}", List.of(0, 1, 2)),
|
||||||
|
Map.entry("\\R{0,3}", List.of(0, 1, 2, 3)),
|
||||||
|
Map.entry("\\R{1,1}", List.of(1)),
|
||||||
|
Map.entry("\\R{1,2}", List.of(1, 2)),
|
||||||
|
Map.entry("\\R{1,3}", List.of(1, 2, 3)),
|
||||||
|
Map.entry("\\R{2,2}", List.of(2)),
|
||||||
|
Map.entry("\\R{2,3}", List.of(2, 3)),
|
||||||
|
Map.entry("\\R{3,3}", List.of(3)),
|
||||||
|
Map.entry("\\R", List.of(1)),
|
||||||
|
Map.entry("\\R\\R", List.of(2)),
|
||||||
|
Map.entry("\\R\\R\\R", List.of(3))
|
||||||
|
);
|
||||||
|
|
||||||
|
// key: length of input
|
||||||
|
// value: all possible inputs of given length
|
||||||
|
Map<Integer, List<String>> inputs = new HashMap<>();
|
||||||
|
String[] Rs = { "\r\n", "\r", "\n",
|
||||||
|
"\u000B", "\u000C", "\u0085", "\u2028", "\u2029" };
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for (int len = 0; len <= 3; ++len) {
|
||||||
|
int[] idx = new int[len + 1];
|
||||||
|
do {
|
||||||
|
sb.setLength(0);
|
||||||
|
for (int j = 0; j < len; ++j)
|
||||||
|
sb.append(Rs[idx[j]]);
|
||||||
|
inputs.computeIfAbsent(len, ArrayList::new).add(sb.toString());
|
||||||
|
idx[0]++;
|
||||||
|
for (int j = 0; j < len; ++j) {
|
||||||
|
if (idx[j] < Rs.length)
|
||||||
|
break;
|
||||||
|
idx[j] = 0;
|
||||||
|
idx[j+1]++;
|
||||||
|
}
|
||||||
|
} while (idx[len] == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// exhaustive testing
|
||||||
|
for (String patStr : cases.keySet()) {
|
||||||
|
Pattern[] pats = patStr.endsWith("R")
|
||||||
|
? new Pattern[] { Pattern.compile(patStr) } // no quantifiers
|
||||||
|
: new Pattern[] { Pattern.compile(patStr), // greedy
|
||||||
|
Pattern.compile(patStr + "?") }; // reluctant
|
||||||
|
Matcher m = pats[0].matcher("");
|
||||||
|
for (Pattern p : pats) {
|
||||||
|
m.usePattern(p);
|
||||||
|
for (int len : cases.get(patStr)) {
|
||||||
|
for (String in : inputs.get(len)) {
|
||||||
|
if (!m.reset(in).matches()) {
|
||||||
|
failCount++;
|
||||||
|
System.err.println("Expected to match '" +
|
||||||
|
in + "' =~ /" + p + "/");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
report("lineBreakWithQuantifier");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue