mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-27 23:04:50 +02:00
8235812: Unicode linebreak with quantifier does not match valid input
Reviewed-by: rriggs
This commit is contained in:
parent
b83285facc
commit
987be2bb6d
2 changed files with 148 additions and 12 deletions
|
@ -2064,7 +2064,7 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
|||
Node prev = null;
|
||||
Node firstTail = null;
|
||||
Branch branch = null;
|
||||
Node branchConn = null;
|
||||
BranchConn branchConn = null;
|
||||
|
||||
for (;;) {
|
||||
Node node = sequence(end);
|
||||
|
@ -2212,7 +2212,24 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
|||
break;
|
||||
}
|
||||
|
||||
node = closure(node);
|
||||
if (node instanceof LineEnding) {
|
||||
LineEnding le = (LineEnding)node;
|
||||
node = closureOfLineEnding(le);
|
||||
|
||||
if (node != le) {
|
||||
// LineEnding was replaced with an anonymous group
|
||||
if (head == null)
|
||||
head = node;
|
||||
else
|
||||
tail.next = node;
|
||||
// Double return: Tail was returned in root
|
||||
tail = root;
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
node = closure(node);
|
||||
}
|
||||
|
||||
/* save the top dot-greedy nodes (.*, .+) as well
|
||||
if (node instanceof GreedyCharProperty &&
|
||||
((GreedyCharProperty)node).cp instanceof Dot) {
|
||||
|
@ -3079,18 +3096,31 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
|||
if (saveTCNCount < topClosureNodes.size())
|
||||
topClosureNodes.subList(saveTCNCount, topClosureNodes.size()).clear();
|
||||
|
||||
return groupWithClosure(node, head, tail, capturingGroup);
|
||||
}
|
||||
|
||||
/**
|
||||
* Transforms a Group with quantifiers into some special constructs
|
||||
* (such as Branch or Loop/GroupCurly), if necessary.
|
||||
*
|
||||
* This method is applied either to actual groups or to the Unicode
|
||||
* linebreak (aka \\R) represented as an anonymous group.
|
||||
*/
|
||||
private Node groupWithClosure(Node node, Node head, Node tail,
|
||||
boolean capturingGroup)
|
||||
{
|
||||
if (node instanceof Ques) {
|
||||
Ques ques = (Ques) node;
|
||||
if (ques.type == Qtype.POSSESSIVE) {
|
||||
root = node;
|
||||
return node;
|
||||
}
|
||||
tail.next = new BranchConn();
|
||||
tail = tail.next;
|
||||
BranchConn branchConn = new BranchConn();
|
||||
tail = tail.next = branchConn;
|
||||
if (ques.type == Qtype.GREEDY) {
|
||||
head = new Branch(head, null, tail);
|
||||
head = new Branch(head, null, branchConn);
|
||||
} else { // Reluctant quantifier
|
||||
head = new Branch(null, head, tail);
|
||||
head = new Branch(null, head, branchConn);
|
||||
}
|
||||
root = tail;
|
||||
return head;
|
||||
|
@ -3267,6 +3297,31 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
|||
return new Curly(prev, cmin, MAX_REPS, qtype);
|
||||
}
|
||||
|
||||
/**
|
||||
* Processing repetition of a Unicode linebreak \\R.
|
||||
*/
|
||||
private Node closureOfLineEnding(LineEnding le) {
|
||||
int ch = peek();
|
||||
if (ch != '?' && ch != '*' && ch != '+' && ch != '{') {
|
||||
return le;
|
||||
}
|
||||
|
||||
// Replace the LineEnding with an anonymous group
|
||||
// (?:\\u000D\\u000A|[\\u000A\\u000B\\u000C\\u000D\\u0085\\u2028\\u2029])
|
||||
Node grHead = createGroup(true);
|
||||
Node grTail = root;
|
||||
BranchConn branchConn = new BranchConn();
|
||||
branchConn.next = grTail;
|
||||
Node slice = new Slice(new int[] {0x0D, 0x0A});
|
||||
slice.next = branchConn;
|
||||
Node chClass = newCharProperty(x -> x == 0x0A || x == 0x0B ||
|
||||
x == 0x0C || x == 0x0D || x == 0x85 || x == 0x2028 ||
|
||||
x == 0x2029);
|
||||
chClass.next = branchConn;
|
||||
grHead.next = new Branch(slice, chClass, branchConn);
|
||||
return groupWithClosure(closure(grHead), grHead, grTail, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes repetition. If the next character peeked is a quantifier
|
||||
* then new nodes must be appended to handle the repetition.
|
||||
|
@ -4723,8 +4778,8 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
|||
static final class Branch extends Node {
|
||||
Node[] atoms = new Node[2];
|
||||
int size = 2;
|
||||
Node conn;
|
||||
Branch(Node first, Node second, Node branchConn) {
|
||||
BranchConn conn;
|
||||
Branch(Node first, Node second, BranchConn branchConn) {
|
||||
conn = branchConn;
|
||||
atoms[0] = first;
|
||||
atoms[1] = second;
|
||||
|
@ -4732,9 +4787,10 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
|||
|
||||
void add(Node node) {
|
||||
if (size >= atoms.length) {
|
||||
Node[] tmp = new Node[atoms.length*2];
|
||||
System.arraycopy(atoms, 0, tmp, 0, atoms.length);
|
||||
atoms = tmp;
|
||||
int len = ArraysSupport.newLength(size,
|
||||
1, /* minimum growth */
|
||||
size /* preferred growth */);
|
||||
atoms = Arrays.copyOf(atoms, len);
|
||||
}
|
||||
atoms[size++] = node;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue