8225198: Optimize regex tree for greedy quantifiers of type {N,}

Reviewed-by: redestad, bchristi
This commit is contained in:
Ivan Gerasimov 2019-06-04 18:55:53 -07:00
parent 4098f0ecdd
commit 90e6c1cc36
2 changed files with 20 additions and 48 deletions

View file

@ -3279,11 +3279,12 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
cmin = Math.addExact(Math.multiplyExact(cmin, 10), cmin = Math.addExact(Math.multiplyExact(cmin, 10),
ch - '0'); ch - '0');
} while (ASCII.isDigit(ch = read())); } while (ASCII.isDigit(ch = read()));
cmax = cmin;
if (ch == ',') { if (ch == ',') {
ch = read(); ch = read();
cmax = MAX_REPS; if (ch == '}') {
if (ch != '}') { unread();
return curly(prev, cmin);
} else {
cmax = 0; cmax = 0;
while (ASCII.isDigit(ch)) { while (ASCII.isDigit(ch)) {
cmax = Math.addExact(Math.multiplyExact(cmax, 10), cmax = Math.addExact(Math.multiplyExact(cmax, 10),
@ -3291,6 +3292,8 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
ch = read(); ch = read();
} }
} }
} else {
cmax = cmin;
} }
} catch (ArithmeticException ae) { } catch (ArithmeticException ae) {
throw error("Illegal repetition range"); throw error("Illegal repetition range");
@ -3299,18 +3302,16 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
throw error("Unclosed counted closure"); throw error("Unclosed counted closure");
if (cmax < cmin) if (cmax < cmin)
throw error("Illegal repetition range"); throw error("Illegal repetition range");
Curly curly;
ch = peek(); ch = peek();
if (ch == '?') { if (ch == '?') {
next(); next();
curly = new Curly(prev, cmin, cmax, Qtype.LAZY); return new Curly(prev, cmin, cmax, Qtype.LAZY);
} else if (ch == '+') { } else if (ch == '+') {
next(); next();
curly = new Curly(prev, cmin, cmax, Qtype.POSSESSIVE); return new Curly(prev, cmin, cmax, Qtype.POSSESSIVE);
} else { } else {
curly = new Curly(prev, cmin, cmax, Qtype.GREEDY); return new Curly(prev, cmin, cmax, Qtype.GREEDY);
} }
return curly;
} else { } else {
throw error("Illegal repetition"); throw error("Illegal repetition");
} }
@ -4266,8 +4267,8 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
} }
/** /**
* Handles the greedy style repetition with the minimum either be * Handles the greedy style repetition with the specified minimum
* 0 or 1 and the maximum be MAX_REPS, for * and + quantifier. * and the maximum equal to MAX_REPS, for *, + and {N,} quantifiers.
*/ */
static class CharPropertyGreedy extends Node { static class CharPropertyGreedy extends Node {
final CharPredicate predicate; final CharPredicate predicate;
@ -4277,7 +4278,7 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
this.predicate = cp.predicate; this.predicate = cp.predicate;
this.cmin = cmin; this.cmin = cmin;
} }
boolean match(Matcher matcher, int i, CharSequence seq) { boolean match(Matcher matcher, int i, CharSequence seq) {
int n = 0; int n = 0;
int to = matcher.to; int to = matcher.to;
// greedy, all the way down // greedy, all the way down
@ -4320,7 +4321,7 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
super(bcp, cmin); super(bcp, cmin);
} }
boolean match(Matcher matcher, int i, CharSequence seq) { boolean match(Matcher matcher, int i, CharSequence seq) {
int n = 0; int n = 0;
int to = matcher.to; int to = matcher.to;
while (i < to && predicate.is(seq.charAt(i))) { while (i < to && predicate.is(seq.charAt(i))) {
@ -5157,41 +5158,6 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
} }
} }
static final class Conditional extends Node {
Node cond, yes, not;
Conditional(Node cond, Node yes, Node not) {
this.cond = cond;
this.yes = yes;
this.not = not;
}
boolean match(Matcher matcher, int i, CharSequence seq) {
if (cond.match(matcher, i, seq)) {
return yes.match(matcher, i, seq);
} else {
return not.match(matcher, i, seq);
}
}
boolean study(TreeInfo info) {
int minL = info.minLength;
int maxL = info.maxLength;
boolean maxV = info.maxValid;
info.reset();
yes.study(info);
int minL2 = info.minLength;
int maxL2 = info.maxLength;
boolean maxV2 = info.maxValid;
info.reset();
not.study(info);
info.minLength = minL + Math.min(minL2, info.minLength);
info.maxLength = maxL + Math.max(maxL2, info.maxLength);
info.maxValid = (maxV & maxV2 & info.maxValid);
info.deterministic = false;
return next.study(info);
}
}
/** /**
* Zero width positive lookahead. * Zero width positive lookahead.
*/ */

View file

@ -195,7 +195,13 @@ class PrintPattern {
pstr = gcp.predicate.toString(); pstr = gcp.predicate.toString();
else else
pstr = "Single \"" + pstr + "\""; pstr = "Single \"" + pstr + "\"";
str = name + " " + pstr + ((gcp.cmin == 0) ? "*" : "+"); str = name + " " + pstr;
if (gcp.cmin == 0)
str += "*";
else if (gcp.cmin == 1)
str += "+";
else
str += "{" + gcp.cmin + ",}";
print(node, str, depth); print(node, str, depth);
} else if (node instanceof Pattern.BackRef) { } else if (node instanceof Pattern.BackRef) {
str = "GroupBackRef " + ((Pattern.BackRef)node).groupIndex / 2; str = "GroupBackRef " + ((Pattern.BackRef)node).groupIndex / 2;