mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-28 15:24:43 +02:00
8225198: Optimize regex tree for greedy quantifiers of type {N,}
Reviewed-by: redestad, bchristi
This commit is contained in:
parent
4098f0ecdd
commit
90e6c1cc36
2 changed files with 20 additions and 48 deletions
|
@ -3279,11 +3279,12 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
||||||
cmin = Math.addExact(Math.multiplyExact(cmin, 10),
|
cmin = Math.addExact(Math.multiplyExact(cmin, 10),
|
||||||
ch - '0');
|
ch - '0');
|
||||||
} while (ASCII.isDigit(ch = read()));
|
} while (ASCII.isDigit(ch = read()));
|
||||||
cmax = cmin;
|
|
||||||
if (ch == ',') {
|
if (ch == ',') {
|
||||||
ch = read();
|
ch = read();
|
||||||
cmax = MAX_REPS;
|
if (ch == '}') {
|
||||||
if (ch != '}') {
|
unread();
|
||||||
|
return curly(prev, cmin);
|
||||||
|
} else {
|
||||||
cmax = 0;
|
cmax = 0;
|
||||||
while (ASCII.isDigit(ch)) {
|
while (ASCII.isDigit(ch)) {
|
||||||
cmax = Math.addExact(Math.multiplyExact(cmax, 10),
|
cmax = Math.addExact(Math.multiplyExact(cmax, 10),
|
||||||
|
@ -3291,6 +3292,8 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
||||||
ch = read();
|
ch = read();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
cmax = cmin;
|
||||||
}
|
}
|
||||||
} catch (ArithmeticException ae) {
|
} catch (ArithmeticException ae) {
|
||||||
throw error("Illegal repetition range");
|
throw error("Illegal repetition range");
|
||||||
|
@ -3299,18 +3302,16 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
||||||
throw error("Unclosed counted closure");
|
throw error("Unclosed counted closure");
|
||||||
if (cmax < cmin)
|
if (cmax < cmin)
|
||||||
throw error("Illegal repetition range");
|
throw error("Illegal repetition range");
|
||||||
Curly curly;
|
|
||||||
ch = peek();
|
ch = peek();
|
||||||
if (ch == '?') {
|
if (ch == '?') {
|
||||||
next();
|
next();
|
||||||
curly = new Curly(prev, cmin, cmax, Qtype.LAZY);
|
return new Curly(prev, cmin, cmax, Qtype.LAZY);
|
||||||
} else if (ch == '+') {
|
} else if (ch == '+') {
|
||||||
next();
|
next();
|
||||||
curly = new Curly(prev, cmin, cmax, Qtype.POSSESSIVE);
|
return new Curly(prev, cmin, cmax, Qtype.POSSESSIVE);
|
||||||
} else {
|
} else {
|
||||||
curly = new Curly(prev, cmin, cmax, Qtype.GREEDY);
|
return new Curly(prev, cmin, cmax, Qtype.GREEDY);
|
||||||
}
|
}
|
||||||
return curly;
|
|
||||||
} else {
|
} else {
|
||||||
throw error("Illegal repetition");
|
throw error("Illegal repetition");
|
||||||
}
|
}
|
||||||
|
@ -4266,8 +4267,8 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Handles the greedy style repetition with the minimum either be
|
* Handles the greedy style repetition with the specified minimum
|
||||||
* 0 or 1 and the maximum be MAX_REPS, for * and + quantifier.
|
* and the maximum equal to MAX_REPS, for *, + and {N,} quantifiers.
|
||||||
*/
|
*/
|
||||||
static class CharPropertyGreedy extends Node {
|
static class CharPropertyGreedy extends Node {
|
||||||
final CharPredicate predicate;
|
final CharPredicate predicate;
|
||||||
|
@ -4277,7 +4278,7 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
||||||
this.predicate = cp.predicate;
|
this.predicate = cp.predicate;
|
||||||
this.cmin = cmin;
|
this.cmin = cmin;
|
||||||
}
|
}
|
||||||
boolean match(Matcher matcher, int i, CharSequence seq) {
|
boolean match(Matcher matcher, int i, CharSequence seq) {
|
||||||
int n = 0;
|
int n = 0;
|
||||||
int to = matcher.to;
|
int to = matcher.to;
|
||||||
// greedy, all the way down
|
// greedy, all the way down
|
||||||
|
@ -4320,7 +4321,7 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
||||||
super(bcp, cmin);
|
super(bcp, cmin);
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean match(Matcher matcher, int i, CharSequence seq) {
|
boolean match(Matcher matcher, int i, CharSequence seq) {
|
||||||
int n = 0;
|
int n = 0;
|
||||||
int to = matcher.to;
|
int to = matcher.to;
|
||||||
while (i < to && predicate.is(seq.charAt(i))) {
|
while (i < to && predicate.is(seq.charAt(i))) {
|
||||||
|
@ -5157,41 +5158,6 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static final class Conditional extends Node {
|
|
||||||
Node cond, yes, not;
|
|
||||||
Conditional(Node cond, Node yes, Node not) {
|
|
||||||
this.cond = cond;
|
|
||||||
this.yes = yes;
|
|
||||||
this.not = not;
|
|
||||||
}
|
|
||||||
boolean match(Matcher matcher, int i, CharSequence seq) {
|
|
||||||
if (cond.match(matcher, i, seq)) {
|
|
||||||
return yes.match(matcher, i, seq);
|
|
||||||
} else {
|
|
||||||
return not.match(matcher, i, seq);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
boolean study(TreeInfo info) {
|
|
||||||
int minL = info.minLength;
|
|
||||||
int maxL = info.maxLength;
|
|
||||||
boolean maxV = info.maxValid;
|
|
||||||
info.reset();
|
|
||||||
yes.study(info);
|
|
||||||
|
|
||||||
int minL2 = info.minLength;
|
|
||||||
int maxL2 = info.maxLength;
|
|
||||||
boolean maxV2 = info.maxValid;
|
|
||||||
info.reset();
|
|
||||||
not.study(info);
|
|
||||||
|
|
||||||
info.minLength = minL + Math.min(minL2, info.minLength);
|
|
||||||
info.maxLength = maxL + Math.max(maxL2, info.maxLength);
|
|
||||||
info.maxValid = (maxV & maxV2 & info.maxValid);
|
|
||||||
info.deterministic = false;
|
|
||||||
return next.study(info);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Zero width positive lookahead.
|
* Zero width positive lookahead.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -195,7 +195,13 @@ class PrintPattern {
|
||||||
pstr = gcp.predicate.toString();
|
pstr = gcp.predicate.toString();
|
||||||
else
|
else
|
||||||
pstr = "Single \"" + pstr + "\"";
|
pstr = "Single \"" + pstr + "\"";
|
||||||
str = name + " " + pstr + ((gcp.cmin == 0) ? "*" : "+");
|
str = name + " " + pstr;
|
||||||
|
if (gcp.cmin == 0)
|
||||||
|
str += "*";
|
||||||
|
else if (gcp.cmin == 1)
|
||||||
|
str += "+";
|
||||||
|
else
|
||||||
|
str += "{" + gcp.cmin + ",}";
|
||||||
print(node, str, depth);
|
print(node, str, depth);
|
||||||
} else if (node instanceof Pattern.BackRef) {
|
} else if (node instanceof Pattern.BackRef) {
|
||||||
str = "GroupBackRef " + ((Pattern.BackRef)node).groupIndex / 2;
|
str = "GroupBackRef " + ((Pattern.BackRef)node).groupIndex / 2;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue