8010821: [findbugs] Some classes in jdk.nashorn.internal.runtime.regexp expose mutable objects

Reviewed-by: attila, jlaskey, sundar
This commit is contained in:
Hannes Wallnöfer 2013-07-16 16:12:26 +02:00
parent e85e4fb319
commit 06394dedc7
18 changed files with 263 additions and 771 deletions

View file

@ -84,9 +84,7 @@ public class JoniRegExp extends RegExp {
this.regex = new Regex(javaPattern, 0, javaPattern.length, option, Syntax.JAVASCRIPT); this.regex = new Regex(javaPattern, 0, javaPattern.length, option, Syntax.JAVASCRIPT);
this.groupsInNegativeLookahead = parsed.getGroupsInNegativeLookahead(); this.groupsInNegativeLookahead = parsed.getGroupsInNegativeLookahead();
} }
} catch (final PatternSyntaxException e2) { } catch (final PatternSyntaxException | JOniException e2) {
throwParserException("syntax", e2.getMessage());
} catch (JOniException e2) {
throwParserException("syntax", e2.getMessage()); throwParserException("syntax", e2.getMessage());
} }
} }

View file

@ -41,10 +41,12 @@ import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode;
import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType; import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType;
import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType; import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType;
import jdk.nashorn.internal.runtime.regexp.joni.constants.RegexState;
import jdk.nashorn.internal.runtime.regexp.joni.constants.StackPopLevel; import jdk.nashorn.internal.runtime.regexp.joni.constants.StackPopLevel;
import jdk.nashorn.internal.runtime.regexp.joni.constants.TargetInfo; import jdk.nashorn.internal.runtime.regexp.joni.constants.TargetInfo;
import jdk.nashorn.internal.runtime.regexp.joni.encoding.ObjPtr; import jdk.nashorn.internal.runtime.regexp.joni.encoding.ObjPtr;
import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException;
import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
final class Analyser extends Parser { final class Analyser extends Parser {
@ -53,8 +55,6 @@ final class Analyser extends Parser {
} }
protected final void compile() { protected final void compile() {
regex.state = RegexState.COMPILING;
if (Config.DEBUG) { if (Config.DEBUG) {
Config.log.println(new String(chars, getBegin(), getEnd())); Config.log.println(new String(chars, getBegin(), getEnd()));
} }
@ -115,8 +115,6 @@ final class Analyser extends Parser {
Config.log.println(new ByteCodePrinter(regex).byteCodeListToString()); Config.log.println(new ByteCodePrinter(regex).byteCodeListToString());
} // DEBUG_COMPILE } // DEBUG_COMPILE
regex.state = RegexState.NORMAL;
} }
private void swap(Node a, Node b) { private void swap(Node a, Node b) {
@ -187,14 +185,11 @@ final class Analyser extends Parser {
BackRefNode br = (BackRefNode)node; BackRefNode br = (BackRefNode)node;
if (br.isRecursion()) break; if (br.isRecursion()) break;
if (br.back[0] > env.numMem) newValueException(ERR_INVALID_BACKREF); if (br.backRef > env.numMem) {
min = getMinMatchLength(env.memNodes[br.back[0]]); throw new ValueException(ERR_INVALID_BACKREF);
for (int i=1; i<br.backNum; i++) {
if (br.back[i] > env.numMem) newValueException(ERR_INVALID_BACKREF);
int tmin = getMinMatchLength(env.memNodes[br.back[i]]);
if (min > tmin) min = tmin;
} }
min = getMinMatchLength(env.memNodes[br.backRef]);
break; break;
case NodeType.LIST: case NodeType.LIST:
@ -306,11 +301,11 @@ final class Analyser extends Parser {
break; break;
} }
for (int i=0; i<br.backNum; i++) { if (br.backRef > env.numMem) {
if (br.back[i] > env.numMem) newValueException(ERR_INVALID_BACKREF); throw new ValueException(ERR_INVALID_BACKREF);
int tmax = getMaxMatchLength(env.memNodes[br.back[i]]);
if (max < tmax) max = tmax;
} }
int tmax = getMaxMatchLength(env.memNodes[br.backRef]);
if (max < tmax) max = tmax;
break; break;
case NodeType.QTFR: case NodeType.QTFR:
@ -417,8 +412,6 @@ final class Analyser extends Parser {
break; break;
case NodeType.CTYPE: case NodeType.CTYPE:
len = 1;
case NodeType.CCLASS: case NodeType.CCLASS:
case NodeType.CANY: case NodeType.CANY:
len = 1; len = 1;
@ -712,13 +705,12 @@ final class Analyser extends Parser {
an.charLength = len; an.charLength = len;
break; break;
case GET_CHAR_LEN_VARLEN: case GET_CHAR_LEN_VARLEN:
newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
break;
case GET_CHAR_LEN_TOP_ALT_VARLEN: case GET_CHAR_LEN_TOP_ALT_VARLEN:
if (syntax.differentLengthAltLookBehind()) { if (syntax.differentLengthAltLookBehind()) {
return divideLookBehindAlternatives(node); return divideLookBehindAlternatives(node);
} else { } else {
newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
} }
} }
return node; return node;
@ -955,12 +947,12 @@ final class Analyser extends Parser {
case NodeType.BREF: case NodeType.BREF:
BackRefNode br = (BackRefNode)node; BackRefNode br = (BackRefNode)node;
for (int i=0; i<br.backNum; i++) { if (br.backRef > env.numMem) {
if (br.back[i] > env.numMem) newValueException(ERR_INVALID_BACKREF); throw new ValueException(ERR_INVALID_BACKREF);
env.backrefedMem = bsOnAt(env.backrefedMem, br.back[i]);
env.btMemStart = bsOnAt(env.btMemStart, br.back[i]);
((EncloseNode)env.memNodes[br.back[i]]).setMemBackrefed();
} }
env.backrefedMem = bsOnAt(env.backrefedMem, br.backRef);
env.btMemStart = bsOnAt(env.btMemStart, br.backRef);
((EncloseNode)env.memNodes[br.backRef]).setMemBackrefed();
break; break;
case NodeType.QTFR: case NodeType.QTFR:
@ -1064,14 +1056,18 @@ final class Analyser extends Parser {
break; break;
case AnchorType.LOOK_BEHIND: case AnchorType.LOOK_BEHIND:
if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) {
throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
}
node = setupLookBehind(node); node = setupLookBehind(node);
if (node.getType() != NodeType.ANCHOR) continue restart; if (node.getType() != NodeType.ANCHOR) continue restart;
setupTree(((AnchorNode)node).target, state); setupTree(((AnchorNode)node).target, state);
break; break;
case AnchorType.LOOK_BEHIND_NOT: case AnchorType.LOOK_BEHIND_NOT:
if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) {
throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
}
node = setupLookBehind(node); node = setupLookBehind(node);
if (node.getType() != NodeType.ANCHOR) continue restart; if (node.getType() != NodeType.ANCHOR) continue restart;
setupTree(((AnchorNode)node).target, (state | IN_NOT)); setupTree(((AnchorNode)node).target, (state | IN_NOT));
@ -1218,15 +1214,9 @@ final class Analyser extends Parser {
Node[]nodes = oenv.scanEnv.memNodes; Node[]nodes = oenv.scanEnv.memNodes;
int min = getMinMatchLength(nodes[br.back[0]]); int min = getMinMatchLength(nodes[br.backRef]);
int max = getMaxMatchLength(nodes[br.back[0]]); int max = getMaxMatchLength(nodes[br.backRef]);
for (int i=1; i<br.backNum; i++) {
int tmin = getMinMatchLength(nodes[br.back[i]]);
int tmax = getMaxMatchLength(nodes[br.back[i]]);
if (min > tmin) min = tmin;
if (max < tmax) max = tmax;
}
opt.length.set(min, max); opt.length.set(min, max);
break; break;
} }
@ -1314,7 +1304,7 @@ final class Analyser extends Parser {
} }
default: default:
newInternalException(ERR_PARSER_BUG); throw new InternalException(ERR_PARSER_BUG);
} // switch } // switch
} }

View file

@ -100,12 +100,7 @@ final class ArrayCompiler extends Compiler {
} }
private boolean isNeedStrLenOpExact(int op) { private boolean isNeedStrLenOpExact(int op) {
return op == OPCode.EXACTN || return op == OPCode.EXACTN || op == OPCode.EXACTN_IC;
op == OPCode.EXACTMB2N ||
op == OPCode.EXACTMB3N ||
op == OPCode.EXACTMBN ||
op == OPCode.EXACTN_IC ||
op == OPCode.EXACTN_IC_SB;
} }
private boolean opTemplated(int op) { private boolean opTemplated(int op) {
@ -172,7 +167,6 @@ final class ArrayCompiler extends Compiler {
if (isNeedStrLenOpExact(op)) len += OPSize.LENGTH; if (isNeedStrLenOpExact(op)) len += OPSize.LENGTH;
len += strLength; len += strLength;
} }
if (op == OPCode.EXACTMBN) len += OPSize.LENGTH;
return len; return len;
} }
@ -181,8 +175,6 @@ final class ArrayCompiler extends Compiler {
int op = selectStrOpcode(strLength, ignoreCase); int op = selectStrOpcode(strLength, ignoreCase);
addOpcode(op); addOpcode(op);
if (op == OPCode.EXACTMBN) addLength(1);
if (isNeedStrLenOpExact(op)) { if (isNeedStrLenOpExact(op)) {
addLength(strLength); addLength(strLength);
} }
@ -294,35 +286,22 @@ final class ArrayCompiler extends Compiler {
@Override @Override
protected void compileBackrefNode(BackRefNode node) { protected void compileBackrefNode(BackRefNode node) {
BackRefNode br = node; if (isIgnoreCase(regex.options)) {
// USE_BACKREF_AT_LEVEL addOpcode(OPCode.BACKREFN_IC);
if (br.backNum == 1) { addMemNum(node.backRef);
if (isIgnoreCase(regex.options)) {
addOpcode(OPCode.BACKREFN_IC);
addMemNum(br.back[0]);
} else {
switch (br.back[0]) {
case 1:
addOpcode(OPCode.BACKREF1);
break;
case 2:
addOpcode(OPCode.BACKREF2);
break;
default:
addOpcode(OPCode.BACKREFN);
addOpcode(br.back[0]);
break;
} // switch
}
} else { } else {
if (isIgnoreCase(regex.options)) { switch (node.backRef) {
addOpcode(OPCode.BACKREF_MULTI_IC); case 1:
} else { addOpcode(OPCode.BACKREF1);
addOpcode(OPCode.BACKREF_MULTI); break;
} case 2:
// !add_bacref_mems:! addOpcode(OPCode.BACKREF2);
addLength(br.backNum); break;
for (int i=br.backNum-1; i>=0; i--) addMemNum(br.back[i]); default:
addOpcode(OPCode.BACKREFN);
addOpcode(node.backRef);
break;
} // switch
} }
} }
@ -791,13 +770,8 @@ final class ArrayCompiler extends Compiler {
case NodeType.BREF: case NodeType.BREF:
BackRefNode br = (BackRefNode)node; BackRefNode br = (BackRefNode)node;
// USE_BACKREF_AT_LEVEL len = ((!isIgnoreCase(regex.options) && br.backRef <= 2)
if (br.backNum == 1) { ? OPSize.OPCODE : (OPSize.OPCODE + OPSize.MEMNUM));
len = ((!isIgnoreCase(regex.options) && br.back[0] <= 2)
? OPSize.OPCODE : (OPSize.OPCODE + OPSize.MEMNUM));
} else {
len = OPSize.OPCODE + OPSize.LENGTH + (OPSize.MEMNUM * br.backNum);
}
break; break;
case NodeType.QTFR: case NodeType.QTFR:
@ -873,15 +847,10 @@ final class ArrayCompiler extends Compiler {
switch(opcode) { switch(opcode) {
case OPCode.ANYCHAR_STAR: case OPCode.ANYCHAR_STAR:
case OPCode.ANYCHAR_STAR_SB:
case OPCode.ANYCHAR_ML_STAR: case OPCode.ANYCHAR_ML_STAR:
case OPCode.ANYCHAR_ML_STAR_SB:
case OPCode.ANYCHAR_STAR_PEEK_NEXT: case OPCode.ANYCHAR_STAR_PEEK_NEXT:
case OPCode.ANYCHAR_STAR_PEEK_NEXT_SB:
case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT: case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT:
case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT_SB:
case OPCode.STATE_CHECK_ANYCHAR_STAR: case OPCode.STATE_CHECK_ANYCHAR_STAR:
case OPCode.STATE_CHECK_ANYCHAR_STAR_SB:
case OPCode.STATE_CHECK_ANYCHAR_ML_STAR: case OPCode.STATE_CHECK_ANYCHAR_ML_STAR:
case OPCode.MEMORY_START_PUSH: case OPCode.MEMORY_START_PUSH:
case OPCode.MEMORY_END_PUSH: case OPCode.MEMORY_END_PUSH:

View file

@ -45,7 +45,7 @@ class ByteCodeMachine extends StackMachine {
private int sstart; private int sstart;
private int sbegin; private int sbegin;
private final int[]code; // byte code private final int[] code; // byte code
private int ip; // instruction pointer private int ip; // instruction pointer
ByteCodeMachine(Regex regex, char[] chars, int p, int end) { ByteCodeMachine(Regex regex, char[] chars, int p, int end) {

View file

@ -26,13 +26,11 @@ import jdk.nashorn.internal.runtime.regexp.joni.constants.OPSize;
import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException; import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
class ByteCodePrinter { class ByteCodePrinter {
final int[]code; final int[] code;
final int codeLength; final int codeLength;
final char[][] templates; final char[][] templates;
Object[]operands; Object[] operands;
int operantCount;
WarnCallback warnings;
private final static String OpCodeNames[] = new String[] { private final static String OpCodeNames[] = new String[] {
"finish", /*OP_FINISH*/ "finish", /*OP_FINISH*/
@ -123,32 +121,6 @@ class ByteCodePrinter {
"state-check-anychar-ml*", /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/ "state-check-anychar-ml*", /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
"set-option-push", /*OP_SET_OPTION_PUSH*/ "set-option-push", /*OP_SET_OPTION_PUSH*/
"set-option", /*OP_SET_OPTION*/ "set-option", /*OP_SET_OPTION*/
// single byte versions
"anychar-sb", /*OP_ANYCHAR*/
"anychar-ml-sb", /*OP_ANYCHAR_ML*/
"anychar*-sb", /*OP_ANYCHAR_STAR*/
"anychar-ml*-sb", /*OP_ANYCHAR_ML_STAR*/
"anychar*-peek-next-sb", /*OP_ANYCHAR_STAR_PEEK_NEXT*/
"anychar-ml*-peek-next-sb", /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/
"state-check-anychar*-sb", /*OP_STATE_CHECK_ANYCHAR_STAR*/
"state-check-anychar-ml*-sb", /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
"cclass-sb", /*OP_CCLASS*/
"cclass-not-sb", /*OP_CCLASS_NOT*/
"word-sb", /*OP_WORD*/
"not-word-sb", /*OP_NOT_WORD*/
"word-bound-sb", /*OP_WORD_BOUND*/
"not-word-bound-sb", /*OP_NOT_WORD_BOUND*/
"word-begin-sb", /*OP_WORD_BEGIN*/
"word-end-sb", /*OP_WORD_END*/
"look-behind-sb", /*OP_LOOK_BEHIND*/
"exact1-ic-sb", /*OP_EXACT1_IC*/
"exactn-ic-sb", /*OP_EXACTN_IC*/
}; };
private final static int OpCodeArgTypes[] = new int[] { private final static int OpCodeArgTypes[] = new int[] {
@ -240,41 +212,14 @@ class ByteCodePrinter {
Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/ Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
Arguments.OPTION, /*OP_SET_OPTION_PUSH*/ Arguments.OPTION, /*OP_SET_OPTION_PUSH*/
Arguments.OPTION, /*OP_SET_OPTION*/ Arguments.OPTION, /*OP_SET_OPTION*/
// single byte versions
Arguments.NON, /*OP_ANYCHAR*/
Arguments.NON, /*OP_ANYCHAR_ML*/
Arguments.NON, /*OP_ANYCHAR_STAR*/
Arguments.NON, /*OP_ANYCHAR_ML_STAR*/
Arguments.SPECIAL, /*OP_ANYCHAR_STAR_PEEK_NEXT*/
Arguments.SPECIAL, /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/
Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_STAR*/
Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
Arguments.SPECIAL, /*OP_CCLASS*/
Arguments.SPECIAL, /*OP_CCLASS_NOT*/
Arguments.NON, /*OP_WORD*/
Arguments.NON, /*OP_NOT_WORD*/
Arguments.NON, /*OP_WORD_BOUND*/
Arguments.NON, /*OP_NOT_WORD_BOUND*/
Arguments.NON, /*OP_WORD_BEGIN*/
Arguments.NON, /*OP_WORD_END*/
Arguments.SPECIAL, /*OP_LOOK_BEHIND*/
Arguments.SPECIAL, /*OP_EXACT1_IC*/
Arguments.SPECIAL, /*OP_EXACTN_IC*/
}; };
public ByteCodePrinter(Regex regex) { public ByteCodePrinter(Regex regex) {
code = regex.code; code = regex.code;
codeLength = regex.codeLength; codeLength = regex.codeLength;
operands = regex.operands; operands = regex.operands;
operantCount = regex.operandLength;
templates = regex.templates; templates = regex.templates;
warnings = regex.warnings;
} }
public String byteCodeListToString() { public String byteCodeListToString() {
@ -283,24 +228,17 @@ class ByteCodePrinter {
private void pString(StringBuilder sb, int len, int s) { private void pString(StringBuilder sb, int len, int s) {
sb.append(":"); sb.append(":");
while (len-- > 0) sb.append(new String(new byte[]{(byte)code[s++]})); sb.append(new String(code, s, len));
} }
private void pStringFromTemplate(StringBuilder sb, int len, byte[]tm, int idx) { private void pLenString(StringBuilder sb, int len, int s) {
sb.append(":T:"); sb.append(":").append(len).append(":");
while (len-- > 0) sb.append(new String(new byte[]{tm[idx++]})); sb.append(new String(code, s, len));
} }
private void pLenString(StringBuilder sb, int len, int mbLen, int s) { private void pLenStringFromTemplate(StringBuilder sb, int len, char[] tm, int idx) {
int x = len * mbLen; sb.append(":T:").append(len).append(":");
sb.append(":" + len + ":"); sb.append(tm, idx, len);
while (x-- > 0) sb.append(new String(new byte[]{(byte)code[s++]}));
}
private void pLenStringFromTemplate(StringBuilder sb, int len, int mbLen, char[] tm, int idx) {
int x = len * mbLen;
sb.append(":T:" + len + ":");
while (x-- > 0) sb.append(new String(new byte[]{(byte)tm[idx++]}));
} }
public int compiledByteCodeToString(StringBuilder sb, int bp) { public int compiledByteCodeToString(StringBuilder sb, int bp) {
@ -309,7 +247,7 @@ class ByteCodePrinter {
CClassNode cc; CClassNode cc;
int tm, idx; int tm, idx;
sb.append("[" + OpCodeNames[code[bp]]); sb.append("[").append(OpCodeNames[code[bp]]);
int argType = OpCodeArgTypes[code[bp]]; int argType = OpCodeArgTypes[code[bp]];
int ip = bp; int ip = bp;
if (argType != Arguments.SPECIAL) { if (argType != Arguments.SPECIAL) {
@ -319,32 +257,32 @@ class ByteCodePrinter {
break; break;
case Arguments.RELADDR: case Arguments.RELADDR:
sb.append(":(" + code[bp] + ")"); sb.append(":(").append(code[bp]).append(")");
bp += OPSize.RELADDR; bp += OPSize.RELADDR;
break; break;
case Arguments.ABSADDR: case Arguments.ABSADDR:
sb.append(":(" + code[bp] + ")"); sb.append(":(").append(code[bp]).append(")");
bp += OPSize.ABSADDR; bp += OPSize.ABSADDR;
break; break;
case Arguments.LENGTH: case Arguments.LENGTH:
sb.append(":" + code[bp]); sb.append(":").append(code[bp]);
bp += OPSize.LENGTH; bp += OPSize.LENGTH;
break; break;
case Arguments.MEMNUM: case Arguments.MEMNUM:
sb.append(":" + code[bp]); sb.append(":").append(code[bp]);
bp += OPSize.MEMNUM; bp += OPSize.MEMNUM;
break; break;
case Arguments.OPTION: case Arguments.OPTION:
sb.append(":" + code[bp]); sb.append(":").append(code[bp]);
bp += OPSize.OPTION; bp += OPSize.OPTION;
break; break;
case Arguments.STATE_CHECK: case Arguments.STATE_CHECK:
sb.append(":" + code[bp]); sb.append(":").append(code[bp]);
bp += OPSize.STATE_CHECK; bp += OPSize.STATE_CHECK;
break; break;
} }
@ -353,8 +291,6 @@ class ByteCodePrinter {
case OPCode.EXACT1: case OPCode.EXACT1:
case OPCode.ANYCHAR_STAR_PEEK_NEXT: case OPCode.ANYCHAR_STAR_PEEK_NEXT:
case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT: case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT:
case OPCode.ANYCHAR_STAR_PEEK_NEXT_SB:
case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT_SB:
pString(sb, 1, bp++); pString(sb, 1, bp++);
break; break;
@ -386,92 +322,19 @@ class ByteCodePrinter {
bp += OPSize.INDEX; bp += OPSize.INDEX;
idx = code[bp]; idx = code[bp];
bp += OPSize.INDEX; bp += OPSize.INDEX;
pLenStringFromTemplate(sb, len, 1, templates[tm], idx); pLenStringFromTemplate(sb, len, templates[tm], idx);
} else { } else {
pLenString(sb, len, 1, bp); pLenString(sb, len, bp);
bp += len; bp += len;
} }
break; break;
case OPCode.EXACTMB2N1:
pString(sb, 2, bp);
bp += 2;
break;
case OPCode.EXACTMB2N2:
pString(sb, 4, bp);
bp += 4;
break;
case OPCode.EXACTMB2N3:
pString(sb, 6, bp);
bp += 6;
break;
case OPCode.EXACTMB2N:
len = code[bp];
bp += OPSize.LENGTH;
if (Config.USE_STRING_TEMPLATES) {
tm = code[bp];
bp += OPSize.INDEX;
idx = code[bp];
bp += OPSize.INDEX;
pLenStringFromTemplate(sb, len, 2, templates[tm], idx);
} else {
pLenString(sb, len, 2, bp);
bp += len * 2;
}
break;
case OPCode.EXACTMB3N:
len = code[bp];
bp += OPSize.LENGTH;
if (Config.USE_STRING_TEMPLATES) {
tm = code[bp];
bp += OPSize.INDEX;
idx = code[bp];
bp += OPSize.INDEX;
pLenStringFromTemplate(sb, len, 3, templates[tm], idx);
} else {
pLenString(sb, len, 3, bp);
bp += len * 3;
}
break;
case OPCode.EXACTMBN:
int mbLen = code[bp];
bp += OPSize.LENGTH;
len = code[bp];
bp += OPSize.LENGTH;
n = len * mbLen;
if (Config.USE_STRING_TEMPLATES) {
tm = code[bp];
bp += OPSize.INDEX;
idx = code[bp];
bp += OPSize.INDEX;
sb.append(":T:" + mbLen + ":" + len + ":");
while (n-- > 0) sb.append(new String(new char[]{templates[tm][idx++]}));
} else {
sb.append(":" + mbLen + ":" + len + ":");
while (n-- > 0) sb.append(new String(new byte[]{(byte)code[bp++]}));
}
break;
case OPCode.EXACT1_IC: case OPCode.EXACT1_IC:
case OPCode.EXACT1_IC_SB:
final int MAX_CHAR_LENGTH = 6;
byte[]bytes = new byte[MAX_CHAR_LENGTH];
for (int i = 0; bp + i < code.length && i < MAX_CHAR_LENGTH; i++) bytes[i] = (byte)code[bp + i];
pString(sb, 1, bp); pString(sb, 1, bp);
bp++; bp++;
break; break;
case OPCode.EXACTN_IC: case OPCode.EXACTN_IC:
case OPCode.EXACTN_IC_SB:
len = code[bp]; len = code[bp];
bp += OPSize.LENGTH; bp += OPSize.LENGTH;
if (Config.USE_STRING_TEMPLATES) { if (Config.USE_STRING_TEMPLATES) {
@ -479,29 +342,27 @@ class ByteCodePrinter {
bp += OPSize.INDEX; bp += OPSize.INDEX;
idx = code[bp]; idx = code[bp];
bp += OPSize.INDEX; bp += OPSize.INDEX;
pLenStringFromTemplate(sb, len, 1, templates[tm], idx); pLenStringFromTemplate(sb, len, templates[tm], idx);
} else { } else {
pLenString(sb, len, 1, bp); pLenString(sb, len, bp);
bp += len; bp += len;
} }
break; break;
case OPCode.CCLASS: case OPCode.CCLASS:
case OPCode.CCLASS_SB:
bs = new BitSet(); bs = new BitSet();
System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE); System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE);
n = bs.numOn(); n = bs.numOn();
bp += BitSet.BITSET_SIZE; bp += BitSet.BITSET_SIZE;
sb.append(":" + n); sb.append(":").append(n);
break; break;
case OPCode.CCLASS_NOT: case OPCode.CCLASS_NOT:
case OPCode.CCLASS_NOT_SB:
bs = new BitSet(); bs = new BitSet();
System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE); System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE);
n = bs.numOn(); n = bs.numOn();
bp += BitSet.BITSET_SIZE; bp += BitSet.BITSET_SIZE;
sb.append(":" + n); sb.append(":").append(n);
break; break;
case OPCode.CCLASS_MB: case OPCode.CCLASS_MB:
@ -511,7 +372,7 @@ class ByteCodePrinter {
cod = code[bp]; cod = code[bp];
//bp += OPSize.CODE_POINT; //bp += OPSize.CODE_POINT;
bp += len; bp += len;
sb.append(":" + cod + ":" + len); sb.append(":").append(cod).append(":").append(len);
break; break;
case OPCode.CCLASS_MIX: case OPCode.CCLASS_MIX:
@ -525,20 +386,20 @@ class ByteCodePrinter {
cod = code[bp]; cod = code[bp];
//bp += OPSize.CODE_POINT; //bp += OPSize.CODE_POINT;
bp += len; bp += len;
sb.append(":" + n + ":" + cod + ":" + len); sb.append(":").append(n).append(":").append(cod).append(":").append(len);
break; break;
case OPCode.CCLASS_NODE: case OPCode.CCLASS_NODE:
cc = (CClassNode)operands[code[bp]]; cc = (CClassNode)operands[code[bp]];
bp += OPSize.POINTER; bp += OPSize.POINTER;
n = cc.bs.numOn(); n = cc.bs.numOn();
sb.append(":" + cc + ":" + n); sb.append(":").append(cc).append(":").append(n);
break; break;
case OPCode.BACKREFN_IC: case OPCode.BACKREFN_IC:
mem = code[bp]; mem = code[bp];
bp += OPSize.MEMNUM; bp += OPSize.MEMNUM;
sb.append(":" + mem); sb.append(":").append(mem);
break; break;
case OPCode.BACKREF_MULTI_IC: case OPCode.BACKREF_MULTI_IC:
@ -557,10 +418,10 @@ class ByteCodePrinter {
case OPCode.BACKREF_WITH_LEVEL: { case OPCode.BACKREF_WITH_LEVEL: {
int option = code[bp]; int option = code[bp];
bp += OPSize.OPTION; bp += OPSize.OPTION;
sb.append(":" + option); sb.append(":").append(option);
int level = code[bp]; int level = code[bp];
bp += OPSize.LENGTH; bp += OPSize.LENGTH;
sb.append(":" + level); sb.append(":").append(level);
sb.append(" "); sb.append(" ");
len = code[bp]; len = code[bp];
bp += OPSize.LENGTH; bp += OPSize.LENGTH;
@ -579,23 +440,22 @@ class ByteCodePrinter {
bp += OPSize.MEMNUM; bp += OPSize.MEMNUM;
addr = code[bp]; addr = code[bp];
bp += OPSize.RELADDR; bp += OPSize.RELADDR;
sb.append(":" + mem + ":" + addr); sb.append(":").append(mem).append(":").append(addr);
break; break;
case OPCode.PUSH_OR_JUMP_EXACT1: case OPCode.PUSH_OR_JUMP_EXACT1:
case OPCode.PUSH_IF_PEEK_NEXT: case OPCode.PUSH_IF_PEEK_NEXT:
addr = code[bp]; addr = code[bp];
bp += OPSize.RELADDR; bp += OPSize.RELADDR;
sb.append(":(" + addr + ")"); sb.append(":(").append(addr).append(")");
pString(sb, 1, bp); pString(sb, 1, bp);
bp++; bp++;
break; break;
case OPCode.LOOK_BEHIND: case OPCode.LOOK_BEHIND:
case OPCode.LOOK_BEHIND_SB:
len = code[bp]; len = code[bp];
bp += OPSize.LENGTH; bp += OPSize.LENGTH;
sb.append(":" + len); sb.append(":").append(len);
break; break;
case OPCode.PUSH_LOOK_BEHIND_NOT: case OPCode.PUSH_LOOK_BEHIND_NOT:
@ -603,7 +463,7 @@ class ByteCodePrinter {
bp += OPSize.RELADDR; bp += OPSize.RELADDR;
len = code[bp]; len = code[bp];
bp += OPSize.LENGTH; bp += OPSize.LENGTH;
sb.append(":" + len + ":(" + addr + ")"); sb.append(":").append(len).append(":(").append(addr).append(")");
break; break;
case OPCode.STATE_CHECK_PUSH: case OPCode.STATE_CHECK_PUSH:
@ -612,7 +472,7 @@ class ByteCodePrinter {
bp += OPSize.STATE_CHECK_NUM; bp += OPSize.STATE_CHECK_NUM;
addr = code[bp]; addr = code[bp];
bp += OPSize.RELADDR; bp += OPSize.RELADDR;
sb.append(":" + scn + ":(" + addr + ")"); sb.append(":").append(scn).append(":(").append(addr).append(")");
break; break;
default: default:
@ -623,14 +483,16 @@ class ByteCodePrinter {
sb.append("]"); sb.append("]");
// @opcode_address(opcode_size) // @opcode_address(opcode_size)
if (Config.DEBUG_COMPILE_BYTE_CODE_INFO) sb.append("@" + ip + "(" + (bp - ip) + ")"); if (Config.DEBUG_COMPILE_BYTE_CODE_INFO) {
sb.append("@").append(ip).append("(").append((bp - ip)).append(")");
}
return bp; return bp;
} }
private String compiledByteCodeListToString() { private String compiledByteCodeListToString() {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
sb.append("code length: " + codeLength + "\n"); sb.append("code length: ").append(codeLength).append("\n");
int ncode = 0; int ncode = 0;
int bp = 0; int bp = 0;

View file

@ -22,25 +22,34 @@ package jdk.nashorn.internal.runtime.regexp.joni;
import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException; import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
public final class CodeRangeBuffer { public final class CodeRangeBuffer implements Cloneable {
private static final int INIT_MULTI_BYTE_RANGE_SIZE = 5; private static final int INIT_MULTI_BYTE_RANGE_SIZE = 5;
private static final int ALL_MULTI_BYTE_RANGE = 0x7fffffff; private static final int ALL_MULTI_BYTE_RANGE = 0x7fffffff;
int[]p; int[] p;
int used; int used;
public CodeRangeBuffer(int[]ranges) {
p = ranges;
used = ranges[0] + 1;
}
public CodeRangeBuffer() { public CodeRangeBuffer() {
p = new int[INIT_MULTI_BYTE_RANGE_SIZE]; p = new int[INIT_MULTI_BYTE_RANGE_SIZE];
writeCodePoint(0, 0); writeCodePoint(0, 0);
} }
public int[]getCodeRange() { // CodeRange.isInCodeRange
return p; public boolean isInCodeRange(int code) {
int low = 0;
int n = p[0];
int high = n;
while (low < high) {
int x = (low + high) >> 1;
if (code > p[(x << 1) + 2]) {
low = x + 1;
} else {
high = x;
}
}
return low < n && code >= p[(low << 1) + 1];
} }
private CodeRangeBuffer(CodeRangeBuffer orig) { private CodeRangeBuffer(CodeRangeBuffer orig) {
@ -52,12 +61,12 @@ public final class CodeRangeBuffer {
public String toString() { public String toString() {
StringBuilder buf = new StringBuilder(); StringBuilder buf = new StringBuilder();
buf.append("CodeRange"); buf.append("CodeRange");
buf.append("\n used: " + used); buf.append("\n used: ").append(used);
buf.append("\n code point: " + p[0]); buf.append("\n code point: ").append(p[0]);
buf.append("\n ranges: "); buf.append("\n ranges: ");
for (int i=0; i<p[0]; i++) { for (int i=0; i<p[0]; i++) {
buf.append("[" + rangeNumToString(p[i * 2 + 1]) + ".." + rangeNumToString(p[i * 2 + 2]) + "]"); buf.append("[").append(rangeNumToString(p[i * 2 + 1])).append("..").append(rangeNumToString(p[i * 2 + 2])).append("]");
if (i > 0 && i % 6 == 0) buf.append("\n "); if (i > 0 && i % 6 == 0) buf.append("\n ");
} }
@ -108,6 +117,7 @@ public final class CodeRangeBuffer {
if (used < u) used = u; if (used < u) used = u;
} }
@Override
public CodeRangeBuffer clone() { public CodeRangeBuffer clone() {
return new CodeRangeBuffer(this); return new CodeRangeBuffer(this);
} }

View file

@ -132,23 +132,6 @@ public class EncodingHelper {
} }
} }
// CodeRange.isInCodeRange
public static boolean isInCodeRange(int[]p, int code) {
int low = 0;
int n = p[0];
int high = n;
while (low < high) {
int x = (low + high) >> 1;
if (code > p[(x << 1) + 2]) {
low = x + 1;
} else {
high = x;
}
}
return low < n && code >= p[(low << 1) + 1];
}
public static int[] ctypeCodeRange(int ctype, IntHolder sbOut) { public static int[] ctypeCodeRange(int ctype, IntHolder sbOut) {
sbOut.value = 0x100; // use bitset for codes smaller than 256 sbOut.value = 0x100; // use bitset for codes smaller than 256
int[] range = null; int[] range = null;
@ -188,7 +171,7 @@ public class EncodingHelper {
} }
// CodeRange.isInCodeRange // CodeRange.isInCodeRange
public static boolean isInCodeRange(int[]p, int offset, int code) { public static boolean isInCodeRange(int[] p, int offset, int code) {
int low = 0; int low = 0;
int n = p[offset]; int n = p[offset];
int high = n ; int high = n ;

View file

@ -28,6 +28,8 @@ import jdk.nashorn.internal.runtime.regexp.joni.constants.MetaChar;
import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType; import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType;
import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType; import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType;
import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException;
import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
class Lexer extends ScannerSupport { class Lexer extends ScannerSupport {
protected final ScanEnvironment env; protected final ScanEnvironment env;
@ -52,20 +54,24 @@ class Lexer extends ScannerSupport {
if (synAllow) { if (synAllow) {
return 1; /* "....{" : OK! */ return 1; /* "....{" : OK! */
} else { } else {
newSyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE); throw new SyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE);
} }
} }
if (!synAllow) { if (!synAllow) {
c = peek(); c = peek();
if (c == ')' || c == '(' || c == '|') { if (c == ')' || c == '(' || c == '|') {
newSyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE); throw new SyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE);
} }
} }
int low = scanUnsignedNumber(); int low = scanUnsignedNumber();
if (low < 0) newSyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); if (low < 0) {
if (low > Config.MAX_REPEAT_NUM) newSyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); throw new SyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
}
if (low > Config.MAX_REPEAT_NUM) {
throw new SyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
}
boolean nonLow = false; boolean nonLow = false;
if (p == _p) { /* can't read low */ if (p == _p) { /* can't read low */
@ -85,8 +91,12 @@ class Lexer extends ScannerSupport {
if (c == ',') { if (c == ',') {
int prev = p; // ??? last int prev = p; // ??? last
up = scanUnsignedNumber(); up = scanUnsignedNumber();
if (up < 0) newValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); if (up < 0) {
if (up > Config.MAX_REPEAT_NUM) newValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); throw new ValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
}
if (up > Config.MAX_REPEAT_NUM) {
throw new ValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
}
if (p == prev) { if (p == prev) {
if (nonLow) return invalidRangeQuantifier(synAllow); if (nonLow) return invalidRangeQuantifier(synAllow);
@ -110,7 +120,7 @@ class Lexer extends ScannerSupport {
if (c != '}') return invalidRangeQuantifier(synAllow); if (c != '}') return invalidRangeQuantifier(synAllow);
if (!isRepeatInfinite(up) && low > up) { if (!isRepeatInfinite(up) && low > up) {
newValueException(ERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE); throw new ValueException(ERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE);
} }
token.type = TokenType.INTERVAL; token.type = TokenType.INTERVAL;
@ -125,24 +135,31 @@ class Lexer extends ScannerSupport {
restore(); restore();
return 1; return 1;
} else { } else {
newSyntaxException(ERR_INVALID_REPEAT_RANGE_PATTERN); throw new SyntaxException(ERR_INVALID_REPEAT_RANGE_PATTERN);
return 0; // not reached
} }
} }
/* \M-, \C-, \c, or \... */ /* \M-, \C-, \c, or \... */
private int fetchEscapedValue() { private int fetchEscapedValue() {
if (!left()) newSyntaxException(ERR_END_PATTERN_AT_ESCAPE); if (!left()) {
throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE);
}
fetch(); fetch();
switch(c) { switch(c) {
case 'M': case 'M':
if (syntax.op2EscCapitalMBarMeta()) { if (syntax.op2EscCapitalMBarMeta()) {
if (!left()) newSyntaxException(ERR_END_PATTERN_AT_META); if (!left()) {
throw new SyntaxException(ERR_END_PATTERN_AT_META);
}
fetch(); fetch();
if (c != '-') newSyntaxException(ERR_META_CODE_SYNTAX); if (c != '-') {
if (!left()) newSyntaxException(ERR_END_PATTERN_AT_META); throw new SyntaxException(ERR_META_CODE_SYNTAX);
}
if (!left()) {
throw new SyntaxException(ERR_END_PATTERN_AT_META);
}
fetch(); fetch();
if (c == syntax.metaCharTable.esc) { if (c == syntax.metaCharTable.esc) {
c = fetchEscapedValue(); c = fetchEscapedValue();
@ -155,9 +172,13 @@ class Lexer extends ScannerSupport {
case 'C': case 'C':
if (syntax.op2EscCapitalCBarControl()) { if (syntax.op2EscCapitalCBarControl()) {
if (!left()) newSyntaxException(ERR_END_PATTERN_AT_CONTROL); if (!left()) {
throw new SyntaxException(ERR_END_PATTERN_AT_CONTROL);
}
fetch(); fetch();
if (c != '-') newSyntaxException(ERR_CONTROL_CODE_SYNTAX); if (c != '-') {
throw new SyntaxException(ERR_CONTROL_CODE_SYNTAX);
}
fetchEscapedValueControl(); fetchEscapedValueControl();
} else { } else {
fetchEscapedValueBackSlash(); fetchEscapedValueBackSlash();
@ -182,7 +203,9 @@ class Lexer extends ScannerSupport {
} }
private void fetchEscapedValueControl() { private void fetchEscapedValueControl() {
if (!left()) newSyntaxException(ERR_END_PATTERN_AT_CONTROL); if (!left()) {
throw new SyntaxException(ERR_END_PATTERN_AT_CONTROL);
}
fetch(); fetch();
if (c == '?') { if (c == '?') {
c = 0177; c = 0177;
@ -205,115 +228,6 @@ class Lexer extends ScannerSupport {
} }
} }
// USE_NAMED_GROUP && USE_BACKREF_AT_LEVEL
/*
\k<name+n>, \k<name-n>
\k<num+n>, \k<num-n>
\k<-num+n>, \k<-num-n>
*/
// #else USE_NAMED_GROUP
// make it return nameEnd!
private final int fetchNameForNoNamedGroup(int startCode, boolean ref) {
int src = p;
value = 0;
int isNum = 0;
int sign = 1;
int endCode = nameEndCodePoint(startCode);
int pnumHead = p;
int nameEnd = stop;
String err = null;
if (!left()) {
newValueException(ERR_EMPTY_GROUP_NAME);
} else {
fetch();
if (c == endCode) newValueException(ERR_EMPTY_GROUP_NAME);
if (EncodingHelper.isDigit(c)) {
isNum = 1;
} else if (c == '-') {
isNum = 2;
sign = -1;
pnumHead = p;
} else {
err = ERR_INVALID_CHAR_IN_GROUP_NAME;
}
}
while(left()) {
nameEnd = p;
fetch();
if (c == endCode || c == ')') break;
if (!EncodingHelper.isDigit(c)) err = ERR_INVALID_CHAR_IN_GROUP_NAME;
}
if (err == null && c != endCode) {
err = ERR_INVALID_GROUP_NAME;
nameEnd = stop;
}
if (err == null) {
mark();
p = pnumHead;
int backNum = scanUnsignedNumber();
restore();
if (backNum < 0) {
newValueException(ERR_TOO_BIG_NUMBER);
} else if (backNum == 0){
newValueException(ERR_INVALID_GROUP_NAME, src, nameEnd);
}
backNum *= sign;
value = nameEnd;
return backNum;
} else {
newValueException(err, src, nameEnd);
return 0; // not reached
}
}
protected final int fetchName(int startCode, boolean ref) {
return fetchNameForNoNamedGroup(startCode, ref);
}
private boolean strExistCheckWithEsc(int[]s, int n, int bad) {
int p = this.p;
int to = this.stop;
boolean inEsc = false;
int i=0;
while(p < to) {
if (inEsc) {
inEsc = false;
p ++;
} else {
int x = chars[p];
int q = p + 1;
if (x == s[0]) {
for (i=1; i<n && q < to; i++) {
x = chars[q];
if (x != s[i]) break;
q++;
}
if (i >= n) return true;
p++;
} else {
x = chars[p];
if (x == bad) return false;
else if (x == syntax.metaCharTable.esc) inEsc = true;
p = q;
}
}
}
return false;
}
private static final int send[] = new int[]{':', ']'};
private void fetchTokenInCCFor_charType(boolean flag, int type) { private void fetchTokenInCCFor_charType(boolean flag, int type) {
token.type = TokenType.CHAR_TYPE; token.type = TokenType.CHAR_TYPE;
token.setPropCType(type); token.setPropCType(type);
@ -327,16 +241,19 @@ class Lexer extends ScannerSupport {
if (peekIs('{') && syntax.opEscXBraceHex8()) { if (peekIs('{') && syntax.opEscXBraceHex8()) {
inc(); inc();
int num = scanUnsignedHexadecimalNumber(8); int num = scanUnsignedHexadecimalNumber(8);
if (num < 0) newValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE); if (num < 0) {
throw new ValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE);
}
if (left()) { if (left()) {
int c2 = peek(); int c2 = peek();
if (EncodingHelper.isXDigit(c2)) newValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE); if (EncodingHelper.isXDigit(c2)) {
throw new ValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE);
}
} }
if (p > last + 1 && left() && peekIs('}')) { if (p > last + 1 && left() && peekIs('}')) {
inc(); inc();
token.type = TokenType.CODE_POINT; token.type = TokenType.CODE_POINT;
token.base = 16;
token.setCode(num); token.setCode(num);
} else { } else {
/* can't read nothing or invalid format */ /* can't read nothing or invalid format */
@ -344,12 +261,13 @@ class Lexer extends ScannerSupport {
} }
} else if (syntax.opEscXHex2()) { } else if (syntax.opEscXHex2()) {
int num = scanUnsignedHexadecimalNumber(2); int num = scanUnsignedHexadecimalNumber(2);
if (num < 0) newValueException(ERR_TOO_BIG_NUMBER); if (num < 0) {
throw new ValueException(ERR_TOO_BIG_NUMBER);
}
if (p == last) { /* can't read nothing. */ if (p == last) { /* can't read nothing. */
num = 0; /* but, it's not error */ num = 0; /* but, it's not error */
} }
token.type = TokenType.RAW_BYTE; token.type = TokenType.RAW_BYTE;
token.base = 16;
token.setC(num); token.setC(num);
} }
} }
@ -360,12 +278,13 @@ class Lexer extends ScannerSupport {
if (syntax.op2EscUHex4()) { if (syntax.op2EscUHex4()) {
int num = scanUnsignedHexadecimalNumber(4); int num = scanUnsignedHexadecimalNumber(4);
if (num < 0) newValueException(ERR_TOO_BIG_NUMBER); if (num < 0) {
throw new ValueException(ERR_TOO_BIG_NUMBER);
}
if (p == last) { /* can't read nothing. */ if (p == last) { /* can't read nothing. */
num = 0; /* but, it's not error */ num = 0; /* but, it's not error */
} }
token.type = TokenType.CODE_POINT; token.type = TokenType.CODE_POINT;
token.base = 16;
token.setCode(num); token.setCode(num);
} }
} }
@ -375,12 +294,13 @@ class Lexer extends ScannerSupport {
unfetch(); unfetch();
int last = p; int last = p;
int num = scanUnsignedOctalNumber(3); int num = scanUnsignedOctalNumber(3);
if (num < 0) newValueException(ERR_TOO_BIG_NUMBER); if (num < 0) {
throw new ValueException(ERR_TOO_BIG_NUMBER);
}
if (p == last) { /* can't read nothing. */ if (p == last) { /* can't read nothing. */
num = 0; /* but, it's not error */ num = 0; /* but, it's not error */
} }
token.type = TokenType.RAW_BYTE; token.type = TokenType.RAW_BYTE;
token.base = 8;
token.setC(num); token.setC(num);
} }
} }
@ -400,7 +320,6 @@ class Lexer extends ScannerSupport {
fetch(); fetch();
token.type = TokenType.CHAR; token.type = TokenType.CHAR;
token.base = 0;
token.setC(c); token.setC(c);
token.escaped = false; token.escaped = false;
@ -410,7 +329,9 @@ class Lexer extends ScannerSupport {
token.type = TokenType.CC_RANGE; token.type = TokenType.CC_RANGE;
} else if (c == syntax.metaCharTable.esc) { } else if (c == syntax.metaCharTable.esc) {
if (!syntax.backSlashEscapeInCC()) return token.type; if (!syntax.backSlashEscapeInCC()) return token.type;
if (!left()) newSyntaxException(ERR_END_PATTERN_AT_ESCAPE); if (!left()) {
throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE);
}
fetch(); fetch();
token.escaped = true; token.escaped = true;
token.setC(c); token.setC(c);
@ -508,9 +429,13 @@ class Lexer extends ScannerSupport {
if (peekIs('{') && syntax.opEscXBraceHex8()) { if (peekIs('{') && syntax.opEscXBraceHex8()) {
inc(); inc();
int num = scanUnsignedHexadecimalNumber(8); int num = scanUnsignedHexadecimalNumber(8);
if (num < 0) newValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE); if (num < 0) {
throw new ValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE);
}
if (left()) { if (left()) {
if (EncodingHelper.isXDigit(peek())) newValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE); if (EncodingHelper.isXDigit(peek())) {
throw new ValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE);
}
} }
if (p > last + 1 && left() && peekIs('}')) { if (p > last + 1 && left() && peekIs('}')) {
@ -523,12 +448,13 @@ class Lexer extends ScannerSupport {
} }
} else if (syntax.opEscXHex2()) { } else if (syntax.opEscXHex2()) {
int num = scanUnsignedHexadecimalNumber(2); int num = scanUnsignedHexadecimalNumber(2);
if (num < 0) newValueException(ERR_TOO_BIG_NUMBER); if (num < 0) {
throw new ValueException(ERR_TOO_BIG_NUMBER);
}
if (p == last) { /* can't read nothing. */ if (p == last) { /* can't read nothing. */
num = 0; /* but, it's not error */ num = 0; /* but, it's not error */
} }
token.type = TokenType.RAW_BYTE; token.type = TokenType.RAW_BYTE;
token.base = 16;
token.setC(num); token.setC(num);
} }
} }
@ -539,12 +465,13 @@ class Lexer extends ScannerSupport {
if (syntax.op2EscUHex4()) { if (syntax.op2EscUHex4()) {
int num = scanUnsignedHexadecimalNumber(4); int num = scanUnsignedHexadecimalNumber(4);
if (num < 0) newValueException(ERR_TOO_BIG_NUMBER); if (num < 0) {
throw new ValueException(ERR_TOO_BIG_NUMBER);
}
if (p == last) { /* can't read nothing. */ if (p == last) { /* can't read nothing. */
num = 0; /* but, it's not error */ num = 0; /* but, it's not error */
} }
token.type = TokenType.CODE_POINT; token.type = TokenType.CODE_POINT;
token.base = 16;
token.setCode(num); token.setCode(num);
} }
} }
@ -556,12 +483,12 @@ class Lexer extends ScannerSupport {
if (num < 0 || num > Config.MAX_BACKREF_NUM) { // goto skip_backref if (num < 0 || num > Config.MAX_BACKREF_NUM) { // goto skip_backref
} else if (syntax.opDecimalBackref() && (num <= env.numMem || num <= 9)) { /* This spec. from GNU regex */ } else if (syntax.opDecimalBackref() && (num <= env.numMem || num <= 9)) { /* This spec. from GNU regex */
if (syntax.strictCheckBackref()) { if (syntax.strictCheckBackref()) {
if (num > env.numMem || env.memNodes == null || env.memNodes[num] == null) newValueException(ERR_INVALID_BACKREF); if (num > env.numMem || env.memNodes == null || env.memNodes[num] == null) {
throw new ValueException(ERR_INVALID_BACKREF);
}
} }
token.type = TokenType.BACKREF; token.type = TokenType.BACKREF;
token.setBackrefNum(1); token.setBackrefRef(num);
token.setBackrefRef1(num);
token.setBackrefByName(false);
return; return;
} }
@ -579,40 +506,19 @@ class Lexer extends ScannerSupport {
if (syntax.opEscOctal3()) { if (syntax.opEscOctal3()) {
int last = p; int last = p;
int num = scanUnsignedOctalNumber(c == '0' ? 2 : 3); int num = scanUnsignedOctalNumber(c == '0' ? 2 : 3);
if (num < 0) newValueException(ERR_TOO_BIG_NUMBER); if (num < 0) {
throw new ValueException(ERR_TOO_BIG_NUMBER);
}
if (p == last) { /* can't read nothing. */ if (p == last) { /* can't read nothing. */
num = 0; /* but, it's not error */ num = 0; /* but, it's not error */
} }
token.type = TokenType.RAW_BYTE; token.type = TokenType.RAW_BYTE;
token.base = 8;
token.setC(num); token.setC(num);
} else if (c != '0') { } else if (c != '0') {
inc(); inc();
} }
} }
private void fetchTokenFor_subexpCall() {
if (syntax.op2EscGSubexpCall()) {
if (left()) {
fetch();
if (c == '<' || c == '\'') {
int last = p;
int gNum = fetchName(c, true);
int nameEnd = value;
token.type = TokenType.CALL;
token.setCallNameP(last);
token.setCallNameEnd(nameEnd);
token.setCallGNum(gNum);
} else {
unfetch();
syntaxWarn(Warnings.INVALID_SUBEXP_CALL);
}
} else {
syntaxWarn(Warnings.INVALID_SUBEXP_CALL);
}
}
}
private void fetchTokenFor_metaChars() { private void fetchTokenFor_metaChars() {
if (c == syntax.metaCharTable.anyChar) { if (c == syntax.metaCharTable.anyChar) {
token.type = TokenType.ANYCHAR; token.type = TokenType.ANYCHAR;
@ -638,13 +544,14 @@ class Lexer extends ScannerSupport {
} }
token.type = TokenType.STRING; token.type = TokenType.STRING;
token.base = 0;
token.backP = p; token.backP = p;
fetch(); fetch();
if (c == syntax.metaCharTable.esc && !syntax.op2IneffectiveEscape()) { // IS_MC_ESC_CODE(code, syn) if (c == syntax.metaCharTable.esc && !syntax.op2IneffectiveEscape()) { // IS_MC_ESC_CODE(code, syn)
if (!left()) newSyntaxException(ERR_END_PATTERN_AT_ESCAPE); if (!left()) {
throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE);
}
token.backP = p; token.backP = p;
fetch(); fetch();
@ -800,7 +707,9 @@ class Lexer extends ScannerSupport {
if (peekIs('#')) { if (peekIs('#')) {
fetch(); fetch();
while (true) { while (true) {
if (!left()) newSyntaxException(ERR_END_PATTERN_IN_GROUP); if (!left()) {
throw new SyntaxException(ERR_END_PATTERN_IN_GROUP);
}
fetch(); fetch();
if (c == syntax.metaCharTable.esc) { if (c == syntax.metaCharTable.esc) {
if (left()) fetch(); if (left()) fetch();

View file

@ -40,6 +40,9 @@ import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType;
import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType; import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType;
import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType; import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType;
import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType; import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType;
import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException;
import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
class Parser extends Lexer { class Parser extends Lexer {
@ -94,7 +97,9 @@ class Parser extends Lexer {
} }
if (token.type == TokenType.CC_CLOSE) { if (token.type == TokenType.CC_CLOSE) {
if (!codeExistCheck(']', true)) newSyntaxException(ERR_EMPTY_CHAR_CLASS); if (!codeExistCheck(']', true)) {
throw new SyntaxException(ERR_EMPTY_CHAR_CLASS);
}
env.ccEscWarn("]"); env.ccEscWarn("]");
token.type = TokenType.CHAR; /* allow []...] */ token.type = TokenType.CHAR; /* allow []...] */
} }
@ -187,7 +192,7 @@ class Parser extends Lexer {
parseCharClassValEntry2(cc, arg); // goto val_entry2 /* [0-9-a] is allowed as [0-9\-a] */ parseCharClassValEntry2(cc, arg); // goto val_entry2 /* [0-9-a] is allowed as [0-9\-a] */
break; break;
} }
newSyntaxException(ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS); throw new SyntaxException(ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS);
} }
break; break;
@ -216,10 +221,10 @@ class Parser extends Lexer {
break; break;
case EOT: case EOT:
newSyntaxException(ERR_PREMATURE_END_OF_CHAR_CLASS); throw new SyntaxException(ERR_PREMATURE_END_OF_CHAR_CLASS);
default: default:
newInternalException(ERR_PARSER_BUG); throw new InternalException(ERR_PARSER_BUG);
} // switch } // switch
if (!fetched) fetchTokenInCC(); if (!fetched) fetchTokenInCC();
@ -280,13 +285,17 @@ class Parser extends Lexer {
private Node parseEnclose(TokenType term) { private Node parseEnclose(TokenType term) {
Node node = null; Node node = null;
if (!left()) newSyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS); if (!left()) {
throw new SyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS);
}
int option = env.option; int option = env.option;
if (peekIs('?') && syntax.op2QMarkGroupEffect()) { if (peekIs('?') && syntax.op2QMarkGroupEffect()) {
inc(); inc();
if (!left()) newSyntaxException(ERR_END_PATTERN_IN_GROUP); if (!left()) {
throw new SyntaxException(ERR_END_PATTERN_IN_GROUP);
}
boolean listCapture = false; boolean listCapture = false;
@ -315,18 +324,20 @@ class Parser extends Lexer {
} else if (c == '!') { } else if (c == '!') {
node = new AnchorNode(AnchorType.LOOK_BEHIND_NOT); node = new AnchorNode(AnchorType.LOOK_BEHIND_NOT);
} else { } else {
newSyntaxException(ERR_UNDEFINED_GROUP_OPTION); throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
} }
break; break;
case '@': case '@':
if (syntax.op2AtMarkCaptureHistory()) { if (syntax.op2AtMarkCaptureHistory()) {
EncloseNode en = new EncloseNode(); // node_new_enclose_memory EncloseNode en = new EncloseNode(); // node_new_enclose_memory
int num = env.addMemEntry(); int num = env.addMemEntry();
if (num >= BitStatus.BIT_STATUS_BITS_NUM) newValueException(ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY); if (num >= BitStatus.BIT_STATUS_BITS_NUM) {
throw new ValueException(ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY);
}
en.regNum = num; en.regNum = num;
node = en; node = en;
} else { } else {
newSyntaxException(ERR_UNDEFINED_GROUP_OPTION); throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
} }
break; break;
@ -355,7 +366,7 @@ class Parser extends Lexer {
if (syntax.op2OptionPerl()) { if (syntax.op2OptionPerl()) {
option = bsOnOff(option, Option.MULTILINE, neg); option = bsOnOff(option, Option.MULTILINE, neg);
} else { } else {
newSyntaxException(ERR_UNDEFINED_GROUP_OPTION); throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
} }
break; break;
case 'm': case 'm':
@ -364,7 +375,7 @@ class Parser extends Lexer {
} else if (syntax.op2OptionRuby()) { } else if (syntax.op2OptionRuby()) {
option = bsOnOff(option, Option.MULTILINE, neg); option = bsOnOff(option, Option.MULTILINE, neg);
} else { } else {
newSyntaxException(ERR_UNDEFINED_GROUP_OPTION); throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
} }
break; break;
// case 'p': #ifdef USE_POSIXLINE_OPTION // not defined // case 'p': #ifdef USE_POSIXLINE_OPTION // not defined
@ -372,7 +383,7 @@ class Parser extends Lexer {
// break; // break;
default: default:
newSyntaxException(ERR_UNDEFINED_GROUP_OPTION); throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
} // switch } // switch
if (c == ')') { if (c == ')') {
@ -392,12 +403,14 @@ class Parser extends Lexer {
returnCode = 0; returnCode = 0;
return node; return node;
} }
if (!left()) newSyntaxException(ERR_END_PATTERN_IN_GROUP); if (!left()) {
throw new SyntaxException(ERR_END_PATTERN_IN_GROUP);
}
fetch(); fetch();
} // while } // while
default: default:
newSyntaxException(ERR_UNDEFINED_GROUP_OPTION); throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
} // switch } // switch
} else { } else {
@ -458,7 +471,9 @@ class Parser extends Lexer {
} }
break; break;
case SUBEXP_CLOSE: case SUBEXP_CLOSE:
if (!syntax.allowUnmatchedCloseSubexp()) newSyntaxException(ERR_UNMATCHED_CLOSE_PARENTHESIS); if (!syntax.allowUnmatchedCloseSubexp()) {
throw new SyntaxException(ERR_UNMATCHED_CLOSE_PARENTHESIS);
}
if (token.escaped) { if (token.escaped) {
return parseExpTkRawByte(group); // goto tk_raw_byte return parseExpTkRawByte(group); // goto tk_raw_byte
} else { } else {
@ -499,7 +514,7 @@ class Parser extends Lexer {
break; break;
default: default:
newInternalException(ERR_PARSER_BUG); throw new InternalException(ERR_PARSER_BUG);
} // inner switch } // inner switch
break; break;
@ -529,13 +544,8 @@ class Parser extends Lexer {
break; break;
case BACKREF: case BACKREF:
int[]backRefs = token.getBackrefNum() > 1 ? token.getBackrefRefs() : new int[]{token.getBackrefRef1()}; int backRef = token.getBackrefRef();
node = new BackRefNode(token.getBackrefNum(), node = new BackRefNode(backRef, env);
backRefs,
token.getBackrefByName(),
token.getBackrefExistLevel(), // #ifdef USE_BACKREF_AT_LEVEL
token.getBackrefLevel(), // ...
env);
break; break;
case ANCHOR: case ANCHOR:
@ -546,7 +556,7 @@ class Parser extends Lexer {
case INTERVAL: case INTERVAL:
if (syntax.contextIndepRepeatOps()) { if (syntax.contextIndepRepeatOps()) {
if (syntax.contextInvalidRepeatOps()) { if (syntax.contextInvalidRepeatOps()) {
newSyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED); throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED);
} else { } else {
node = StringNode.EMPTY; // node_new_empty node = StringNode.EMPTY; // node_new_empty
} }
@ -556,7 +566,7 @@ class Parser extends Lexer {
break; break;
default: default:
newInternalException(ERR_PARSER_BUG); throw new InternalException(ERR_PARSER_BUG);
} //switch } //switch
//targetp = node; //targetp = node;
@ -599,7 +609,9 @@ class Parser extends Lexer {
private Node parseExpRepeat(Node target, boolean group) { private Node parseExpRepeat(Node target, boolean group) {
while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat: while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat:
if (target.isInvalidQuantifier()) newSyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID); if (target.isInvalidQuantifier()) {
throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID);
}
QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(), QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(),
token.getRepeatUpper(), token.getRepeatUpper(),
@ -631,7 +643,9 @@ class Parser extends Lexer {
private Node parseExpRepeatForCar(Node top, ConsAltNode target, boolean group) { private Node parseExpRepeatForCar(Node top, ConsAltNode target, boolean group) {
while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat: while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat:
if (target.car.isInvalidQuantifier()) newSyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID); if (target.car.isInvalidQuantifier()) {
throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID);
}
QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(), QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(),
token.getRepeatUpper(), token.getRepeatUpper(),
@ -709,9 +723,9 @@ class Parser extends Lexer {
private void parseSubExpError(TokenType term) { private void parseSubExpError(TokenType term) {
if (term == TokenType.SUBEXP_CLOSE) { if (term == TokenType.SUBEXP_CLOSE) {
newSyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS); throw new SyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS);
} else { } else {
newInternalException(ERR_PARSER_BUG); throw new InternalException(ERR_PARSER_BUG);
} }
} }

View file

@ -19,19 +19,9 @@
*/ */
package jdk.nashorn.internal.runtime.regexp.joni; package jdk.nashorn.internal.runtime.regexp.joni;
import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt;
import static jdk.nashorn.internal.runtime.regexp.joni.Option.isCaptureGroup;
import static jdk.nashorn.internal.runtime.regexp.joni.Option.isDontCaptureGroup;
import java.nio.file.Files;
import java.util.HashMap;
import java.util.Iterator;
import jdk.nashorn.internal.runtime.regexp.joni.ast.Node;
import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
import jdk.nashorn.internal.runtime.regexp.joni.constants.RegexState; import jdk.nashorn.internal.runtime.regexp.joni.constants.RegexState;
import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException; import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
public final class Regex implements RegexState { public final class Regex implements RegexState {
@ -39,31 +29,26 @@ public final class Regex implements RegexState {
int[] code; /* compiled pattern */ int[] code; /* compiled pattern */
int codeLength; int codeLength;
boolean stackNeeded; boolean stackNeeded;
Object[]operands; /* e.g. shared CClassNode */ Object[] operands; /* e.g. shared CClassNode */
int operandLength; int operandLength;
int state; /* normal, searching, compiling */ // remove
int numMem; /* used memory(...) num counted from 1 */ int numMem; /* used memory(...) num counted from 1 */
int numRepeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ int numRepeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
int numNullCheck; /* OP_NULL_CHECK_START/END id counter */ int numNullCheck; /* OP_NULL_CHECK_START/END id counter */
int numCall; /* number of subexp call */
int captureHistory; /* (?@...) flag (1-31) */ int captureHistory; /* (?@...) flag (1-31) */
int btMemStart; /* need backtrack flag */ int btMemStart; /* need backtrack flag */
int btMemEnd; /* need backtrack flag */ int btMemEnd; /* need backtrack flag */
int stackPopLevel; int stackPopLevel;
int[]repeatRangeLo; int[] repeatRangeLo;
int[]repeatRangeHi; int[] repeatRangeHi;
WarnCallback warnings; WarnCallback warnings;
MatcherFactory factory; MatcherFactory factory;
protected Analyser analyser; protected Analyser analyser;
int options; int options;
int userOptions;
Object userObject;
//final Syntax syntax;
final int caseFoldFlag; final int caseFoldFlag;
/* optimization info (string search, char-map and anchors) */ /* optimization info (string search, char-map and anchors) */
@ -247,46 +232,48 @@ public final class Regex implements RegexState {
} }
public String optimizeInfoToString() { public String optimizeInfoToString() {
String s = ""; StringBuilder s = new StringBuilder();
s += "optimize: " + searchAlgorithm.getName() + "\n"; s.append("optimize: ").append(searchAlgorithm.getName()).append("\n");
s += " anchor: " + OptAnchorInfo.anchorToString(anchor); s.append(" anchor: ").append(OptAnchorInfo.anchorToString(anchor));
if ((anchor & AnchorType.END_BUF_MASK) != 0) { if ((anchor & AnchorType.END_BUF_MASK) != 0) {
s += MinMaxLen.distanceRangeToString(anchorDmin, anchorDmax); s.append(MinMaxLen.distanceRangeToString(anchorDmin, anchorDmax));
} }
s += "\n"; s.append("\n");
if (searchAlgorithm != SearchAlgorithm.NONE) { if (searchAlgorithm != SearchAlgorithm.NONE) {
s += " sub anchor: " + OptAnchorInfo.anchorToString(subAnchor) + "\n"; s.append(" sub anchor: ").append(OptAnchorInfo.anchorToString(subAnchor)).append("\n");
} }
s += "dmin: " + dMin + " dmax: " + dMax + "\n"; s.append("dmin: ").append(dMin).append(" dmax: ").append(dMax).append("\n");
s += "threshold length: " + thresholdLength + "\n"; s.append("threshold length: ").append(thresholdLength).append("\n");
if (exact != null) { if (exact != null) {
s += "exact: [" + new String(exact, exactP, exactEnd - exactP) + "]: length: " + (exactEnd - exactP) + "\n"; s.append("exact: [").append(exact, exactP, exactEnd - exactP).append("]: length: ").append(exactEnd - exactP).append("\n");
} else if (searchAlgorithm == SearchAlgorithm.MAP) { } else if (searchAlgorithm == SearchAlgorithm.MAP) {
int n=0; int n=0;
for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) if (map[i] != 0) n++; for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) if (map[i] != 0) n++;
s += "map: n = " + n + "\n"; s.append("map: n = ").append(n).append("\n");
if (n > 0) { if (n > 0) {
int c=0; int c=0;
s += "["; s.append("[");
for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) { for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) {
if (map[i] != 0) { if (map[i] != 0) {
if (c > 0) s += ", "; if (c > 0) {
s.append(", ");
}
c++; c++;
// TODO if (enc.isPrint(i) // TODO if (enc.isPrint(i)
s += ((char)i); s.append((char)i);
} }
} }
s += "]\n"; s.append("]\n");
} }
} }
return s; return s.toString();
} }
public int getOptions() { public int getOptions() {

View file

@ -39,13 +39,10 @@ public final class ScanEnvironment {
final public Regex reg; final public Regex reg;
int numCall;
public int numMem; public int numMem;
public Node memNodes[]; public Node memNodes[];
int currMaxRegNum;
boolean hasRecursion;
public ScanEnvironment(Regex regex, Syntax syntax) { public ScanEnvironment(Regex regex, Syntax syntax) {
this.reg = regex; this.reg = regex;
@ -60,13 +57,8 @@ public final class ScanEnvironment {
btMemEnd = bsClear(); btMemEnd = bsClear();
backrefedMem = bsClear(); backrefedMem = bsClear();
numCall = 0;
numMem = 0; numMem = 0;
memNodes = null; memNodes = null;
currMaxRegNum = 0;
hasRecursion = false;
} }
public int addMemEntry() { public int addMemEntry() {
@ -117,11 +109,4 @@ public final class ScanEnvironment {
} }
} }
void closeBracketWithoutEscapeWarn(String s) {
if (Config.USE_WARN) {
if (syntax.warnCCOpNotEscaped()) {
reg.warnings.warn("regular expression has '" + s + "' without escape");
}
}
}
} }

View file

@ -155,24 +155,4 @@ abstract class ScannerSupport extends IntHolder implements ErrorMessages {
return p < stop; return p < stop;
} }
protected void newSyntaxException(String message) {
throw new SyntaxException(message);
}
protected void newValueException(String message) {
throw new ValueException(message);
}
protected void newValueException(String message, String str) {
throw new ValueException(message, str);
}
protected void newValueException(String message, int p, int end) {
throw new ValueException(message, new String(chars, p, end - p));
}
protected void newInternalException(String message) {
throw new InternalException(message);
}
} }

View file

@ -24,12 +24,10 @@ import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType;
final class Token { final class Token {
TokenType type; TokenType type;
boolean escaped; boolean escaped;
int base; /* is number: 8, 16 (used in [....]) */
int backP; int backP;
// union fields // union fields
private int INT1, INT2, INT3, INT4, INT5; private int INT1, INT2, INT3, INT4;
private int []INTA1;
// union accessors // union accessors
int getC() { int getC() {
@ -53,13 +51,6 @@ final class Token {
INT1 = anchor; INT1 = anchor;
} }
int getSubtype() {
return INT1;
}
void setSubtype(int subtype) {
INT1 = subtype;
}
// repeat union member // repeat union member
int getRepeatLower() { int getRepeatLower() {
return INT1; return INT1;
@ -89,72 +80,13 @@ final class Token {
INT4 = possessive ? 1 : 0; INT4 = possessive ? 1 : 0;
} }
// backref union member int getBackrefRef() {
int getBackrefNum() {
return INT1;
}
void setBackrefNum(int num) {
INT1 = num;
}
int getBackrefRef1() {
return INT2; return INT2;
} }
void setBackrefRef1(int ref1) { void setBackrefRef(int ref1) {
INT2 = ref1; INT2 = ref1;
} }
int[]getBackrefRefs() {
return INTA1;
}
void setBackrefRefs(int[]refs) {
INTA1 = refs;
}
boolean getBackrefByName() {
return INT3 != 0;
}
void setBackrefByName(boolean byName) {
INT3 = byName ? 1 : 0;
}
// USE_BACKREF_AT_LEVEL
boolean getBackrefExistLevel() {
return INT4 != 0;
}
void setBackrefExistLevel(boolean existLevel) {
INT4 = existLevel ? 1 : 0;
}
int getBackrefLevel() {
return INT5;
}
void setBackrefLevel(int level) {
INT5 = level;
}
// call union member
int getCallNameP() {
return INT1;
}
void setCallNameP(int nameP) {
INT1 = nameP;
}
int getCallNameEnd() {
return INT2;
}
void setCallNameEnd(int nameEnd) {
INT2 = nameEnd;
}
int getCallGNum() {
return INT3;
}
void setCallGNum(int gnum) {
INT3 = gnum;
}
// prop union member // prop union member
int getPropCType() { int getPropCType() {
return INT1; return INT1;

View file

@ -20,41 +20,15 @@
package jdk.nashorn.internal.runtime.regexp.joni.ast; package jdk.nashorn.internal.runtime.regexp.joni.ast;
import jdk.nashorn.internal.runtime.regexp.joni.ScanEnvironment; import jdk.nashorn.internal.runtime.regexp.joni.ScanEnvironment;
import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
public final class BackRefNode extends StateNode { public final class BackRefNode extends StateNode {
//private static int NODE_BACKREFS_SIZE = 6; public final int backRef;
//int state; public BackRefNode(int backRef, ScanEnvironment env) {
public int backNum; this.backRef = backRef;
public int back[];
public int nestLevel; if (backRef <= env.numMem && env.memNodes[backRef] == null) {
setRecursion(); /* /...(\1).../ */
public BackRefNode(int backNum, int[]backRefs, boolean byName, ScanEnvironment env) {
this.backNum = backNum;
if (byName) setNameRef();
for (int i=0; i<backNum; i++) {
if (backRefs[i] <= env.numMem && env.memNodes[backRefs[i]] == null) {
setRecursion(); /* /...(\1).../ */
break;
}
}
back = new int[backNum];
System.arraycopy(backRefs, 0, back, 0, backNum); // shall we really dup it ???
}
// #ifdef USE_BACKREF_AT_LEVEL
public BackRefNode(int backNum, int[]backRefs, boolean byName, boolean existLevel, int nestLevel, ScanEnvironment env) {
this(backNum, backRefs, byName, env);
if (existLevel) {
//state |= NST_NEST_LEVEL;
setNestLevel();
this.nestLevel = nestLevel;
} }
} }
@ -71,28 +45,7 @@ public final class BackRefNode extends StateNode {
@Override @Override
public String toString(int level) { public String toString(int level) {
StringBuilder value = new StringBuilder(super.toString(level)); StringBuilder value = new StringBuilder(super.toString(level));
value.append("\n backNum: " + backNum); value.append("\n back: ").append(backRef);
String backs = "";
for (int i=0; i<back.length; i++) backs += back[i] + ", ";
value.append("\n back: " + backs);
value.append("\n nextLevel: " + nestLevel);
return value.toString(); return value.toString();
} }
public void renumber(int[]map) {
if (!isNameRef()) throw new ValueException(ErrorMessages.ERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED);
int oldNum = backNum;
int pos = 0;
for (int i=0; i<oldNum; i++) {
int n = map[back[i]];
if (n > 0) {
back[pos] = n;
pos++;
}
}
backNum = pos;
}
} }

View file

@ -77,39 +77,12 @@ public final class CClassNode extends Node {
// node_new_cclass // node_new_cclass
public CClassNode() {} public CClassNode() {}
public CClassNode(int ctype, boolean not, int sbOut, int[]ranges) {
this(not, sbOut, ranges);
this.ctype = ctype;
}
public void clear() { public void clear() {
bs.clear(); bs.clear();
flags = 0; flags = 0;
mbuf = null; mbuf = null;
} }
// node_new_cclass_by_codepoint_range, only used by shared Char Classes
public CClassNode(boolean not, int sbOut, int[]ranges) {
if (not) setNot();
// bs.clear();
if (sbOut > 0 && ranges != null) {
int n = ranges[0];
for (int i=0; i<n; i++) {
int from = ranges[i * 2 + 1];
int to = ranges[i * 2 + 2];
for (int j=from; j<=to; j++) {
if (j >= sbOut) {
setupBuffer(ranges);
return;
}
bs.set(j);
}
}
}
setupBuffer(ranges);
}
@Override @Override
public int getType() { public int getType() {
return CCLASS; return CCLASS;
@ -156,13 +129,6 @@ public final class CClassNode extends Node {
return flags.toString(); return flags.toString();
} }
private void setupBuffer(int[]ranges) {
if (ranges != null) {
if (ranges[0] == 0) return;
mbuf = new CodeRangeBuffer(ranges);
}
}
public boolean isEmpty() { public boolean isEmpty() {
return mbuf == null && bs.isEmpty(); return mbuf == null && bs.isEmpty();
} }
@ -531,11 +497,7 @@ public final class CClassNode extends Node {
boolean found; boolean found;
if (code > 0xff) { if (code > 0xff) {
if (mbuf == null) { found = mbuf != null && mbuf.isInCodeRange(code);
found = false;
} else {
found = EncodingHelper.isInCodeRange(mbuf.getCodeRange(), code);
}
} else { } else {
found = bs.at(code); found = bs.at(code);
} }

View file

@ -40,7 +40,6 @@ public abstract class StateNode extends Node implements NodeStatus {
if (isRecursion()) states.append("RECURSION "); if (isRecursion()) states.append("RECURSION ");
if (isCalled()) states.append("CALLED "); if (isCalled()) states.append("CALLED ");
if (isAddrFixed()) states.append("ADDR_FIXED "); if (isAddrFixed()) states.append("ADDR_FIXED ");
if (isNameRef()) states.append("NAME_REF ");
if (isInRepeat()) states.append("IN_REPEAT "); if (isInRepeat()) states.append("IN_REPEAT ");
if (isNestLevel()) states.append("NEST_LEVEL "); if (isNestLevel()) states.append("NEST_LEVEL ");
if (isByNumber()) states.append("BY_NUMBER "); if (isByNumber()) states.append("BY_NUMBER ");
@ -132,14 +131,6 @@ public abstract class StateNode extends Node implements NodeStatus {
state |= NST_ADDR_FIXED; state |= NST_ADDR_FIXED;
} }
public boolean isNameRef() {
return (state & NST_NAME_REF) != 0;
}
public void setNameRef() {
state |= NST_NAME_REF;
}
public boolean isInRepeat() { public boolean isInRepeat() {
return (state & NST_IN_REPEAT) != 0; return (state & NST_IN_REPEAT) != 0;
} }

View file

@ -96,7 +96,7 @@ public final class StringNode extends Node implements StringType {
if (chars[i] >= 0x20 && chars[i] < 0x7f) { if (chars[i] >= 0x20 && chars[i] < 0x7f) {
value.append(chars[i]); value.append(chars[i]);
} else { } else {
value.append(String.format("[0x%04x]", chars[i])); value.append(String.format("[0x%04x]", (int)chars[i]));
} }
} }
value.append("'"); value.append("'");
@ -122,10 +122,7 @@ public final class StringNode extends Node implements StringType {
} }
public boolean canBeSplit() { public boolean canBeSplit() {
if (end > p) { return end > p && 1 < (end - p);
return 1 < (end - p);
}
return false;
} }
public void set(char[] chars, int p, int end) { public void set(char[] chars, int p, int end) {

View file

@ -29,12 +29,6 @@ public interface OPCode {
final int EXACT4 = 5; /* single byte, N = 4 */ final int EXACT4 = 5; /* single byte, N = 4 */
final int EXACT5 = 6; /* single byte, N = 5 */ final int EXACT5 = 6; /* single byte, N = 5 */
final int EXACTN = 7; /* single byte */ final int EXACTN = 7; /* single byte */
final int EXACTMB2N1 = 8; /* mb-length = 2 N = 1 */
final int EXACTMB2N2 = 9; /* mb-length = 2 N = 2 */
final int EXACTMB2N3 = 10; /* mb-length = 2 N = 3 */
final int EXACTMB2N = 11; /* mb-length = 2 */
final int EXACTMB3N = 12; /* mb-length = 3 */
final int EXACTMBN = 13; /* other length */
final int EXACT1_IC = 14; /* single byte, N = 1, ignore case */ final int EXACT1_IC = 14; /* single byte, N = 1, ignore case */
final int EXACTN_IC = 15; /* single byte, ignore case */ final int EXACTN_IC = 15; /* single byte, ignore case */
@ -125,28 +119,4 @@ public interface OPCode {
final int SET_OPTION_PUSH = 86; /* set option and push recover option */ final int SET_OPTION_PUSH = 86; /* set option and push recover option */
final int SET_OPTION = 87; /* set option */ final int SET_OPTION = 87; /* set option */
// single byte versions
final int ANYCHAR_SB = 88; /* "." */
final int ANYCHAR_ML_SB = 89; /* "." multi-line */
final int ANYCHAR_STAR_SB = 90; /* ".*" */
final int ANYCHAR_ML_STAR_SB = 91; /* ".*" multi-line */
final int ANYCHAR_STAR_PEEK_NEXT_SB = 92;
final int ANYCHAR_ML_STAR_PEEK_NEXT_SB = 93;
final int STATE_CHECK_ANYCHAR_STAR_SB = 94;
final int STATE_CHECK_ANYCHAR_ML_STAR_SB= 95;
final int CCLASS_SB = 96;
final int CCLASS_NOT_SB = 97;
final int WORD_SB = 98;
final int NOT_WORD_SB = 99;
final int WORD_BOUND_SB = 100;
final int NOT_WORD_BOUND_SB = 101;
final int WORD_BEGIN_SB = 102;
final int WORD_END_SB = 103;
final int LOOK_BEHIND_SB = 104;
final int EXACT1_IC_SB = 105; /* single byte, N = 1, ignore case */
final int EXACTN_IC_SB = 106; /* single byte, ignore case */
} }