8010821: [findbugs] Some classes in jdk.nashorn.internal.runtime.regexp expose mutable objects

Reviewed-by: attila, jlaskey, sundar
This commit is contained in:
Hannes Wallnöfer 2013-07-16 16:12:26 +02:00
parent e85e4fb319
commit 06394dedc7
18 changed files with 263 additions and 771 deletions

View file

@ -84,9 +84,7 @@ public class JoniRegExp extends RegExp {
this.regex = new Regex(javaPattern, 0, javaPattern.length, option, Syntax.JAVASCRIPT);
this.groupsInNegativeLookahead = parsed.getGroupsInNegativeLookahead();
}
} catch (final PatternSyntaxException e2) {
throwParserException("syntax", e2.getMessage());
} catch (JOniException e2) {
} catch (final PatternSyntaxException | JOniException e2) {
throwParserException("syntax", e2.getMessage());
}
}

View file

@ -41,10 +41,12 @@ import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode;
import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType;
import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType;
import jdk.nashorn.internal.runtime.regexp.joni.constants.RegexState;
import jdk.nashorn.internal.runtime.regexp.joni.constants.StackPopLevel;
import jdk.nashorn.internal.runtime.regexp.joni.constants.TargetInfo;
import jdk.nashorn.internal.runtime.regexp.joni.encoding.ObjPtr;
import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException;
import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
final class Analyser extends Parser {
@ -53,8 +55,6 @@ final class Analyser extends Parser {
}
protected final void compile() {
regex.state = RegexState.COMPILING;
if (Config.DEBUG) {
Config.log.println(new String(chars, getBegin(), getEnd()));
}
@ -115,8 +115,6 @@ final class Analyser extends Parser {
Config.log.println(new ByteCodePrinter(regex).byteCodeListToString());
} // DEBUG_COMPILE
regex.state = RegexState.NORMAL;
}
private void swap(Node a, Node b) {
@ -187,14 +185,11 @@ final class Analyser extends Parser {
BackRefNode br = (BackRefNode)node;
if (br.isRecursion()) break;
if (br.back[0] > env.numMem) newValueException(ERR_INVALID_BACKREF);
min = getMinMatchLength(env.memNodes[br.back[0]]);
for (int i=1; i<br.backNum; i++) {
if (br.back[i] > env.numMem) newValueException(ERR_INVALID_BACKREF);
int tmin = getMinMatchLength(env.memNodes[br.back[i]]);
if (min > tmin) min = tmin;
if (br.backRef > env.numMem) {
throw new ValueException(ERR_INVALID_BACKREF);
}
min = getMinMatchLength(env.memNodes[br.backRef]);
break;
case NodeType.LIST:
@ -306,11 +301,11 @@ final class Analyser extends Parser {
break;
}
for (int i=0; i<br.backNum; i++) {
if (br.back[i] > env.numMem) newValueException(ERR_INVALID_BACKREF);
int tmax = getMaxMatchLength(env.memNodes[br.back[i]]);
if (max < tmax) max = tmax;
if (br.backRef > env.numMem) {
throw new ValueException(ERR_INVALID_BACKREF);
}
int tmax = getMaxMatchLength(env.memNodes[br.backRef]);
if (max < tmax) max = tmax;
break;
case NodeType.QTFR:
@ -417,8 +412,6 @@ final class Analyser extends Parser {
break;
case NodeType.CTYPE:
len = 1;
case NodeType.CCLASS:
case NodeType.CANY:
len = 1;
@ -712,13 +705,12 @@ final class Analyser extends Parser {
an.charLength = len;
break;
case GET_CHAR_LEN_VARLEN:
newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
break;
throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
case GET_CHAR_LEN_TOP_ALT_VARLEN:
if (syntax.differentLengthAltLookBehind()) {
return divideLookBehindAlternatives(node);
} else {
newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
}
}
return node;
@ -955,12 +947,12 @@ final class Analyser extends Parser {
case NodeType.BREF:
BackRefNode br = (BackRefNode)node;
for (int i=0; i<br.backNum; i++) {
if (br.back[i] > env.numMem) newValueException(ERR_INVALID_BACKREF);
env.backrefedMem = bsOnAt(env.backrefedMem, br.back[i]);
env.btMemStart = bsOnAt(env.btMemStart, br.back[i]);
((EncloseNode)env.memNodes[br.back[i]]).setMemBackrefed();
if (br.backRef > env.numMem) {
throw new ValueException(ERR_INVALID_BACKREF);
}
env.backrefedMem = bsOnAt(env.backrefedMem, br.backRef);
env.btMemStart = bsOnAt(env.btMemStart, br.backRef);
((EncloseNode)env.memNodes[br.backRef]).setMemBackrefed();
break;
case NodeType.QTFR:
@ -1064,14 +1056,18 @@ final class Analyser extends Parser {
break;
case AnchorType.LOOK_BEHIND:
if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) {
throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
}
node = setupLookBehind(node);
if (node.getType() != NodeType.ANCHOR) continue restart;
setupTree(((AnchorNode)node).target, state);
break;
case AnchorType.LOOK_BEHIND_NOT:
if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) {
throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
}
node = setupLookBehind(node);
if (node.getType() != NodeType.ANCHOR) continue restart;
setupTree(((AnchorNode)node).target, (state | IN_NOT));
@ -1218,15 +1214,9 @@ final class Analyser extends Parser {
Node[]nodes = oenv.scanEnv.memNodes;
int min = getMinMatchLength(nodes[br.back[0]]);
int max = getMaxMatchLength(nodes[br.back[0]]);
int min = getMinMatchLength(nodes[br.backRef]);
int max = getMaxMatchLength(nodes[br.backRef]);
for (int i=1; i<br.backNum; i++) {
int tmin = getMinMatchLength(nodes[br.back[i]]);
int tmax = getMaxMatchLength(nodes[br.back[i]]);
if (min > tmin) min = tmin;
if (max < tmax) max = tmax;
}
opt.length.set(min, max);
break;
}
@ -1314,7 +1304,7 @@ final class Analyser extends Parser {
}
default:
newInternalException(ERR_PARSER_BUG);
throw new InternalException(ERR_PARSER_BUG);
} // switch
}

View file

@ -100,12 +100,7 @@ final class ArrayCompiler extends Compiler {
}
private boolean isNeedStrLenOpExact(int op) {
return op == OPCode.EXACTN ||
op == OPCode.EXACTMB2N ||
op == OPCode.EXACTMB3N ||
op == OPCode.EXACTMBN ||
op == OPCode.EXACTN_IC ||
op == OPCode.EXACTN_IC_SB;
return op == OPCode.EXACTN || op == OPCode.EXACTN_IC;
}
private boolean opTemplated(int op) {
@ -172,7 +167,6 @@ final class ArrayCompiler extends Compiler {
if (isNeedStrLenOpExact(op)) len += OPSize.LENGTH;
len += strLength;
}
if (op == OPCode.EXACTMBN) len += OPSize.LENGTH;
return len;
}
@ -181,8 +175,6 @@ final class ArrayCompiler extends Compiler {
int op = selectStrOpcode(strLength, ignoreCase);
addOpcode(op);
if (op == OPCode.EXACTMBN) addLength(1);
if (isNeedStrLenOpExact(op)) {
addLength(strLength);
}
@ -294,14 +286,11 @@ final class ArrayCompiler extends Compiler {
@Override
protected void compileBackrefNode(BackRefNode node) {
BackRefNode br = node;
// USE_BACKREF_AT_LEVEL
if (br.backNum == 1) {
if (isIgnoreCase(regex.options)) {
addOpcode(OPCode.BACKREFN_IC);
addMemNum(br.back[0]);
addMemNum(node.backRef);
} else {
switch (br.back[0]) {
switch (node.backRef) {
case 1:
addOpcode(OPCode.BACKREF1);
break;
@ -310,20 +299,10 @@ final class ArrayCompiler extends Compiler {
break;
default:
addOpcode(OPCode.BACKREFN);
addOpcode(br.back[0]);
addOpcode(node.backRef);
break;
} // switch
}
} else {
if (isIgnoreCase(regex.options)) {
addOpcode(OPCode.BACKREF_MULTI_IC);
} else {
addOpcode(OPCode.BACKREF_MULTI);
}
// !add_bacref_mems:!
addLength(br.backNum);
for (int i=br.backNum-1; i>=0; i--) addMemNum(br.back[i]);
}
}
private static final int REPEAT_RANGE_ALLOC = 8;
@ -791,13 +770,8 @@ final class ArrayCompiler extends Compiler {
case NodeType.BREF:
BackRefNode br = (BackRefNode)node;
// USE_BACKREF_AT_LEVEL
if (br.backNum == 1) {
len = ((!isIgnoreCase(regex.options) && br.back[0] <= 2)
len = ((!isIgnoreCase(regex.options) && br.backRef <= 2)
? OPSize.OPCODE : (OPSize.OPCODE + OPSize.MEMNUM));
} else {
len = OPSize.OPCODE + OPSize.LENGTH + (OPSize.MEMNUM * br.backNum);
}
break;
case NodeType.QTFR:
@ -873,15 +847,10 @@ final class ArrayCompiler extends Compiler {
switch(opcode) {
case OPCode.ANYCHAR_STAR:
case OPCode.ANYCHAR_STAR_SB:
case OPCode.ANYCHAR_ML_STAR:
case OPCode.ANYCHAR_ML_STAR_SB:
case OPCode.ANYCHAR_STAR_PEEK_NEXT:
case OPCode.ANYCHAR_STAR_PEEK_NEXT_SB:
case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT:
case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT_SB:
case OPCode.STATE_CHECK_ANYCHAR_STAR:
case OPCode.STATE_CHECK_ANYCHAR_STAR_SB:
case OPCode.STATE_CHECK_ANYCHAR_ML_STAR:
case OPCode.MEMORY_START_PUSH:
case OPCode.MEMORY_END_PUSH:

View file

@ -31,8 +31,6 @@ class ByteCodePrinter {
final char[][] templates;
Object[] operands;
int operantCount;
WarnCallback warnings;
private final static String OpCodeNames[] = new String[] {
"finish", /*OP_FINISH*/
@ -123,32 +121,6 @@ class ByteCodePrinter {
"state-check-anychar-ml*", /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
"set-option-push", /*OP_SET_OPTION_PUSH*/
"set-option", /*OP_SET_OPTION*/
// single byte versions
"anychar-sb", /*OP_ANYCHAR*/
"anychar-ml-sb", /*OP_ANYCHAR_ML*/
"anychar*-sb", /*OP_ANYCHAR_STAR*/
"anychar-ml*-sb", /*OP_ANYCHAR_ML_STAR*/
"anychar*-peek-next-sb", /*OP_ANYCHAR_STAR_PEEK_NEXT*/
"anychar-ml*-peek-next-sb", /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/
"state-check-anychar*-sb", /*OP_STATE_CHECK_ANYCHAR_STAR*/
"state-check-anychar-ml*-sb", /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
"cclass-sb", /*OP_CCLASS*/
"cclass-not-sb", /*OP_CCLASS_NOT*/
"word-sb", /*OP_WORD*/
"not-word-sb", /*OP_NOT_WORD*/
"word-bound-sb", /*OP_WORD_BOUND*/
"not-word-bound-sb", /*OP_NOT_WORD_BOUND*/
"word-begin-sb", /*OP_WORD_BEGIN*/
"word-end-sb", /*OP_WORD_END*/
"look-behind-sb", /*OP_LOOK_BEHIND*/
"exact1-ic-sb", /*OP_EXACT1_IC*/
"exactn-ic-sb", /*OP_EXACTN_IC*/
};
private final static int OpCodeArgTypes[] = new int[] {
@ -240,41 +212,14 @@ class ByteCodePrinter {
Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
Arguments.OPTION, /*OP_SET_OPTION_PUSH*/
Arguments.OPTION, /*OP_SET_OPTION*/
// single byte versions
Arguments.NON, /*OP_ANYCHAR*/
Arguments.NON, /*OP_ANYCHAR_ML*/
Arguments.NON, /*OP_ANYCHAR_STAR*/
Arguments.NON, /*OP_ANYCHAR_ML_STAR*/
Arguments.SPECIAL, /*OP_ANYCHAR_STAR_PEEK_NEXT*/
Arguments.SPECIAL, /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/
Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_STAR*/
Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
Arguments.SPECIAL, /*OP_CCLASS*/
Arguments.SPECIAL, /*OP_CCLASS_NOT*/
Arguments.NON, /*OP_WORD*/
Arguments.NON, /*OP_NOT_WORD*/
Arguments.NON, /*OP_WORD_BOUND*/
Arguments.NON, /*OP_NOT_WORD_BOUND*/
Arguments.NON, /*OP_WORD_BEGIN*/
Arguments.NON, /*OP_WORD_END*/
Arguments.SPECIAL, /*OP_LOOK_BEHIND*/
Arguments.SPECIAL, /*OP_EXACT1_IC*/
Arguments.SPECIAL, /*OP_EXACTN_IC*/
};
public ByteCodePrinter(Regex regex) {
code = regex.code;
codeLength = regex.codeLength;
operands = regex.operands;
operantCount = regex.operandLength;
templates = regex.templates;
warnings = regex.warnings;
}
public String byteCodeListToString() {
@ -283,24 +228,17 @@ class ByteCodePrinter {
private void pString(StringBuilder sb, int len, int s) {
sb.append(":");
while (len-- > 0) sb.append(new String(new byte[]{(byte)code[s++]}));
sb.append(new String(code, s, len));
}
private void pStringFromTemplate(StringBuilder sb, int len, byte[]tm, int idx) {
sb.append(":T:");
while (len-- > 0) sb.append(new String(new byte[]{tm[idx++]}));
private void pLenString(StringBuilder sb, int len, int s) {
sb.append(":").append(len).append(":");
sb.append(new String(code, s, len));
}
private void pLenString(StringBuilder sb, int len, int mbLen, int s) {
int x = len * mbLen;
sb.append(":" + len + ":");
while (x-- > 0) sb.append(new String(new byte[]{(byte)code[s++]}));
}
private void pLenStringFromTemplate(StringBuilder sb, int len, int mbLen, char[] tm, int idx) {
int x = len * mbLen;
sb.append(":T:" + len + ":");
while (x-- > 0) sb.append(new String(new byte[]{(byte)tm[idx++]}));
private void pLenStringFromTemplate(StringBuilder sb, int len, char[] tm, int idx) {
sb.append(":T:").append(len).append(":");
sb.append(tm, idx, len);
}
public int compiledByteCodeToString(StringBuilder sb, int bp) {
@ -309,7 +247,7 @@ class ByteCodePrinter {
CClassNode cc;
int tm, idx;
sb.append("[" + OpCodeNames[code[bp]]);
sb.append("[").append(OpCodeNames[code[bp]]);
int argType = OpCodeArgTypes[code[bp]];
int ip = bp;
if (argType != Arguments.SPECIAL) {
@ -319,32 +257,32 @@ class ByteCodePrinter {
break;
case Arguments.RELADDR:
sb.append(":(" + code[bp] + ")");
sb.append(":(").append(code[bp]).append(")");
bp += OPSize.RELADDR;
break;
case Arguments.ABSADDR:
sb.append(":(" + code[bp] + ")");
sb.append(":(").append(code[bp]).append(")");
bp += OPSize.ABSADDR;
break;
case Arguments.LENGTH:
sb.append(":" + code[bp]);
sb.append(":").append(code[bp]);
bp += OPSize.LENGTH;
break;
case Arguments.MEMNUM:
sb.append(":" + code[bp]);
sb.append(":").append(code[bp]);
bp += OPSize.MEMNUM;
break;
case Arguments.OPTION:
sb.append(":" + code[bp]);
sb.append(":").append(code[bp]);
bp += OPSize.OPTION;
break;
case Arguments.STATE_CHECK:
sb.append(":" + code[bp]);
sb.append(":").append(code[bp]);
bp += OPSize.STATE_CHECK;
break;
}
@ -353,8 +291,6 @@ class ByteCodePrinter {
case OPCode.EXACT1:
case OPCode.ANYCHAR_STAR_PEEK_NEXT:
case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT:
case OPCode.ANYCHAR_STAR_PEEK_NEXT_SB:
case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT_SB:
pString(sb, 1, bp++);
break;
@ -386,92 +322,19 @@ class ByteCodePrinter {
bp += OPSize.INDEX;
idx = code[bp];
bp += OPSize.INDEX;
pLenStringFromTemplate(sb, len, 1, templates[tm], idx);
pLenStringFromTemplate(sb, len, templates[tm], idx);
} else {
pLenString(sb, len, 1, bp);
pLenString(sb, len, bp);
bp += len;
}
break;
case OPCode.EXACTMB2N1:
pString(sb, 2, bp);
bp += 2;
break;
case OPCode.EXACTMB2N2:
pString(sb, 4, bp);
bp += 4;
break;
case OPCode.EXACTMB2N3:
pString(sb, 6, bp);
bp += 6;
break;
case OPCode.EXACTMB2N:
len = code[bp];
bp += OPSize.LENGTH;
if (Config.USE_STRING_TEMPLATES) {
tm = code[bp];
bp += OPSize.INDEX;
idx = code[bp];
bp += OPSize.INDEX;
pLenStringFromTemplate(sb, len, 2, templates[tm], idx);
} else {
pLenString(sb, len, 2, bp);
bp += len * 2;
}
break;
case OPCode.EXACTMB3N:
len = code[bp];
bp += OPSize.LENGTH;
if (Config.USE_STRING_TEMPLATES) {
tm = code[bp];
bp += OPSize.INDEX;
idx = code[bp];
bp += OPSize.INDEX;
pLenStringFromTemplate(sb, len, 3, templates[tm], idx);
} else {
pLenString(sb, len, 3, bp);
bp += len * 3;
}
break;
case OPCode.EXACTMBN:
int mbLen = code[bp];
bp += OPSize.LENGTH;
len = code[bp];
bp += OPSize.LENGTH;
n = len * mbLen;
if (Config.USE_STRING_TEMPLATES) {
tm = code[bp];
bp += OPSize.INDEX;
idx = code[bp];
bp += OPSize.INDEX;
sb.append(":T:" + mbLen + ":" + len + ":");
while (n-- > 0) sb.append(new String(new char[]{templates[tm][idx++]}));
} else {
sb.append(":" + mbLen + ":" + len + ":");
while (n-- > 0) sb.append(new String(new byte[]{(byte)code[bp++]}));
}
break;
case OPCode.EXACT1_IC:
case OPCode.EXACT1_IC_SB:
final int MAX_CHAR_LENGTH = 6;
byte[]bytes = new byte[MAX_CHAR_LENGTH];
for (int i = 0; bp + i < code.length && i < MAX_CHAR_LENGTH; i++) bytes[i] = (byte)code[bp + i];
pString(sb, 1, bp);
bp++;
break;
case OPCode.EXACTN_IC:
case OPCode.EXACTN_IC_SB:
len = code[bp];
bp += OPSize.LENGTH;
if (Config.USE_STRING_TEMPLATES) {
@ -479,29 +342,27 @@ class ByteCodePrinter {
bp += OPSize.INDEX;
idx = code[bp];
bp += OPSize.INDEX;
pLenStringFromTemplate(sb, len, 1, templates[tm], idx);
pLenStringFromTemplate(sb, len, templates[tm], idx);
} else {
pLenString(sb, len, 1, bp);
pLenString(sb, len, bp);
bp += len;
}
break;
case OPCode.CCLASS:
case OPCode.CCLASS_SB:
bs = new BitSet();
System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE);
n = bs.numOn();
bp += BitSet.BITSET_SIZE;
sb.append(":" + n);
sb.append(":").append(n);
break;
case OPCode.CCLASS_NOT:
case OPCode.CCLASS_NOT_SB:
bs = new BitSet();
System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE);
n = bs.numOn();
bp += BitSet.BITSET_SIZE;
sb.append(":" + n);
sb.append(":").append(n);
break;
case OPCode.CCLASS_MB:
@ -511,7 +372,7 @@ class ByteCodePrinter {
cod = code[bp];
//bp += OPSize.CODE_POINT;
bp += len;
sb.append(":" + cod + ":" + len);
sb.append(":").append(cod).append(":").append(len);
break;
case OPCode.CCLASS_MIX:
@ -525,20 +386,20 @@ class ByteCodePrinter {
cod = code[bp];
//bp += OPSize.CODE_POINT;
bp += len;
sb.append(":" + n + ":" + cod + ":" + len);
sb.append(":").append(n).append(":").append(cod).append(":").append(len);
break;
case OPCode.CCLASS_NODE:
cc = (CClassNode)operands[code[bp]];
bp += OPSize.POINTER;
n = cc.bs.numOn();
sb.append(":" + cc + ":" + n);
sb.append(":").append(cc).append(":").append(n);
break;
case OPCode.BACKREFN_IC:
mem = code[bp];
bp += OPSize.MEMNUM;
sb.append(":" + mem);
sb.append(":").append(mem);
break;
case OPCode.BACKREF_MULTI_IC:
@ -557,10 +418,10 @@ class ByteCodePrinter {
case OPCode.BACKREF_WITH_LEVEL: {
int option = code[bp];
bp += OPSize.OPTION;
sb.append(":" + option);
sb.append(":").append(option);
int level = code[bp];
bp += OPSize.LENGTH;
sb.append(":" + level);
sb.append(":").append(level);
sb.append(" ");
len = code[bp];
bp += OPSize.LENGTH;
@ -579,23 +440,22 @@ class ByteCodePrinter {
bp += OPSize.MEMNUM;
addr = code[bp];
bp += OPSize.RELADDR;
sb.append(":" + mem + ":" + addr);
sb.append(":").append(mem).append(":").append(addr);
break;
case OPCode.PUSH_OR_JUMP_EXACT1:
case OPCode.PUSH_IF_PEEK_NEXT:
addr = code[bp];
bp += OPSize.RELADDR;
sb.append(":(" + addr + ")");
sb.append(":(").append(addr).append(")");
pString(sb, 1, bp);
bp++;
break;
case OPCode.LOOK_BEHIND:
case OPCode.LOOK_BEHIND_SB:
len = code[bp];
bp += OPSize.LENGTH;
sb.append(":" + len);
sb.append(":").append(len);
break;
case OPCode.PUSH_LOOK_BEHIND_NOT:
@ -603,7 +463,7 @@ class ByteCodePrinter {
bp += OPSize.RELADDR;
len = code[bp];
bp += OPSize.LENGTH;
sb.append(":" + len + ":(" + addr + ")");
sb.append(":").append(len).append(":(").append(addr).append(")");
break;
case OPCode.STATE_CHECK_PUSH:
@ -612,7 +472,7 @@ class ByteCodePrinter {
bp += OPSize.STATE_CHECK_NUM;
addr = code[bp];
bp += OPSize.RELADDR;
sb.append(":" + scn + ":(" + addr + ")");
sb.append(":").append(scn).append(":(").append(addr).append(")");
break;
default:
@ -623,14 +483,16 @@ class ByteCodePrinter {
sb.append("]");
// @opcode_address(opcode_size)
if (Config.DEBUG_COMPILE_BYTE_CODE_INFO) sb.append("@" + ip + "(" + (bp - ip) + ")");
if (Config.DEBUG_COMPILE_BYTE_CODE_INFO) {
sb.append("@").append(ip).append("(").append((bp - ip)).append(")");
}
return bp;
}
private String compiledByteCodeListToString() {
StringBuilder sb = new StringBuilder();
sb.append("code length: " + codeLength + "\n");
sb.append("code length: ").append(codeLength).append("\n");
int ncode = 0;
int bp = 0;

View file

@ -22,25 +22,34 @@ package jdk.nashorn.internal.runtime.regexp.joni;
import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
public final class CodeRangeBuffer {
public final class CodeRangeBuffer implements Cloneable {
private static final int INIT_MULTI_BYTE_RANGE_SIZE = 5;
private static final int ALL_MULTI_BYTE_RANGE = 0x7fffffff;
int[] p;
int used;
public CodeRangeBuffer(int[]ranges) {
p = ranges;
used = ranges[0] + 1;
}
public CodeRangeBuffer() {
p = new int[INIT_MULTI_BYTE_RANGE_SIZE];
writeCodePoint(0, 0);
}
public int[]getCodeRange() {
return p;
// CodeRange.isInCodeRange
public boolean isInCodeRange(int code) {
int low = 0;
int n = p[0];
int high = n;
while (low < high) {
int x = (low + high) >> 1;
if (code > p[(x << 1) + 2]) {
low = x + 1;
} else {
high = x;
}
}
return low < n && code >= p[(low << 1) + 1];
}
private CodeRangeBuffer(CodeRangeBuffer orig) {
@ -52,12 +61,12 @@ public final class CodeRangeBuffer {
public String toString() {
StringBuilder buf = new StringBuilder();
buf.append("CodeRange");
buf.append("\n used: " + used);
buf.append("\n code point: " + p[0]);
buf.append("\n used: ").append(used);
buf.append("\n code point: ").append(p[0]);
buf.append("\n ranges: ");
for (int i=0; i<p[0]; i++) {
buf.append("[" + rangeNumToString(p[i * 2 + 1]) + ".." + rangeNumToString(p[i * 2 + 2]) + "]");
buf.append("[").append(rangeNumToString(p[i * 2 + 1])).append("..").append(rangeNumToString(p[i * 2 + 2])).append("]");
if (i > 0 && i % 6 == 0) buf.append("\n ");
}
@ -108,6 +117,7 @@ public final class CodeRangeBuffer {
if (used < u) used = u;
}
@Override
public CodeRangeBuffer clone() {
return new CodeRangeBuffer(this);
}

View file

@ -132,23 +132,6 @@ public class EncodingHelper {
}
}
// CodeRange.isInCodeRange
public static boolean isInCodeRange(int[]p, int code) {
int low = 0;
int n = p[0];
int high = n;
while (low < high) {
int x = (low + high) >> 1;
if (code > p[(x << 1) + 2]) {
low = x + 1;
} else {
high = x;
}
}
return low < n && code >= p[(low << 1) + 1];
}
public static int[] ctypeCodeRange(int ctype, IntHolder sbOut) {
sbOut.value = 0x100; // use bitset for codes smaller than 256
int[] range = null;

View file

@ -28,6 +28,8 @@ import jdk.nashorn.internal.runtime.regexp.joni.constants.MetaChar;
import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType;
import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType;
import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException;
import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
class Lexer extends ScannerSupport {
protected final ScanEnvironment env;
@ -52,20 +54,24 @@ class Lexer extends ScannerSupport {
if (synAllow) {
return 1; /* "....{" : OK! */
} else {
newSyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE);
throw new SyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE);
}
}
if (!synAllow) {
c = peek();
if (c == ')' || c == '(' || c == '|') {
newSyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE);
throw new SyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE);
}
}
int low = scanUnsignedNumber();
if (low < 0) newSyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
if (low > Config.MAX_REPEAT_NUM) newSyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
if (low < 0) {
throw new SyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
}
if (low > Config.MAX_REPEAT_NUM) {
throw new SyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
}
boolean nonLow = false;
if (p == _p) { /* can't read low */
@ -85,8 +91,12 @@ class Lexer extends ScannerSupport {
if (c == ',') {
int prev = p; // ??? last
up = scanUnsignedNumber();
if (up < 0) newValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
if (up > Config.MAX_REPEAT_NUM) newValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
if (up < 0) {
throw new ValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
}
if (up > Config.MAX_REPEAT_NUM) {
throw new ValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
}
if (p == prev) {
if (nonLow) return invalidRangeQuantifier(synAllow);
@ -110,7 +120,7 @@ class Lexer extends ScannerSupport {
if (c != '}') return invalidRangeQuantifier(synAllow);
if (!isRepeatInfinite(up) && low > up) {
newValueException(ERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE);
throw new ValueException(ERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE);
}
token.type = TokenType.INTERVAL;
@ -125,24 +135,31 @@ class Lexer extends ScannerSupport {
restore();
return 1;
} else {
newSyntaxException(ERR_INVALID_REPEAT_RANGE_PATTERN);
return 0; // not reached
throw new SyntaxException(ERR_INVALID_REPEAT_RANGE_PATTERN);
}
}
/* \M-, \C-, \c, or \... */
private int fetchEscapedValue() {
if (!left()) newSyntaxException(ERR_END_PATTERN_AT_ESCAPE);
if (!left()) {
throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE);
}
fetch();
switch(c) {
case 'M':
if (syntax.op2EscCapitalMBarMeta()) {
if (!left()) newSyntaxException(ERR_END_PATTERN_AT_META);
if (!left()) {
throw new SyntaxException(ERR_END_PATTERN_AT_META);
}
fetch();
if (c != '-') newSyntaxException(ERR_META_CODE_SYNTAX);
if (!left()) newSyntaxException(ERR_END_PATTERN_AT_META);
if (c != '-') {
throw new SyntaxException(ERR_META_CODE_SYNTAX);
}
if (!left()) {
throw new SyntaxException(ERR_END_PATTERN_AT_META);
}
fetch();
if (c == syntax.metaCharTable.esc) {
c = fetchEscapedValue();
@ -155,9 +172,13 @@ class Lexer extends ScannerSupport {
case 'C':
if (syntax.op2EscCapitalCBarControl()) {
if (!left()) newSyntaxException(ERR_END_PATTERN_AT_CONTROL);
if (!left()) {
throw new SyntaxException(ERR_END_PATTERN_AT_CONTROL);
}
fetch();
if (c != '-') newSyntaxException(ERR_CONTROL_CODE_SYNTAX);
if (c != '-') {
throw new SyntaxException(ERR_CONTROL_CODE_SYNTAX);
}
fetchEscapedValueControl();
} else {
fetchEscapedValueBackSlash();
@ -182,7 +203,9 @@ class Lexer extends ScannerSupport {
}
private void fetchEscapedValueControl() {
if (!left()) newSyntaxException(ERR_END_PATTERN_AT_CONTROL);
if (!left()) {
throw new SyntaxException(ERR_END_PATTERN_AT_CONTROL);
}
fetch();
if (c == '?') {
c = 0177;
@ -205,115 +228,6 @@ class Lexer extends ScannerSupport {
}
}
// USE_NAMED_GROUP && USE_BACKREF_AT_LEVEL
/*
\k<name+n>, \k<name-n>
\k<num+n>, \k<num-n>
\k<-num+n>, \k<-num-n>
*/
// #else USE_NAMED_GROUP
// make it return nameEnd!
private final int fetchNameForNoNamedGroup(int startCode, boolean ref) {
int src = p;
value = 0;
int isNum = 0;
int sign = 1;
int endCode = nameEndCodePoint(startCode);
int pnumHead = p;
int nameEnd = stop;
String err = null;
if (!left()) {
newValueException(ERR_EMPTY_GROUP_NAME);
} else {
fetch();
if (c == endCode) newValueException(ERR_EMPTY_GROUP_NAME);
if (EncodingHelper.isDigit(c)) {
isNum = 1;
} else if (c == '-') {
isNum = 2;
sign = -1;
pnumHead = p;
} else {
err = ERR_INVALID_CHAR_IN_GROUP_NAME;
}
}
while(left()) {
nameEnd = p;
fetch();
if (c == endCode || c == ')') break;
if (!EncodingHelper.isDigit(c)) err = ERR_INVALID_CHAR_IN_GROUP_NAME;
}
if (err == null && c != endCode) {
err = ERR_INVALID_GROUP_NAME;
nameEnd = stop;
}
if (err == null) {
mark();
p = pnumHead;
int backNum = scanUnsignedNumber();
restore();
if (backNum < 0) {
newValueException(ERR_TOO_BIG_NUMBER);
} else if (backNum == 0){
newValueException(ERR_INVALID_GROUP_NAME, src, nameEnd);
}
backNum *= sign;
value = nameEnd;
return backNum;
} else {
newValueException(err, src, nameEnd);
return 0; // not reached
}
}
protected final int fetchName(int startCode, boolean ref) {
return fetchNameForNoNamedGroup(startCode, ref);
}
private boolean strExistCheckWithEsc(int[]s, int n, int bad) {
int p = this.p;
int to = this.stop;
boolean inEsc = false;
int i=0;
while(p < to) {
if (inEsc) {
inEsc = false;
p ++;
} else {
int x = chars[p];
int q = p + 1;
if (x == s[0]) {
for (i=1; i<n && q < to; i++) {
x = chars[q];
if (x != s[i]) break;
q++;
}
if (i >= n) return true;
p++;
} else {
x = chars[p];
if (x == bad) return false;
else if (x == syntax.metaCharTable.esc) inEsc = true;
p = q;
}
}
}
return false;
}
private static final int send[] = new int[]{':', ']'};
private void fetchTokenInCCFor_charType(boolean flag, int type) {
token.type = TokenType.CHAR_TYPE;
token.setPropCType(type);
@ -327,16 +241,19 @@ class Lexer extends ScannerSupport {
if (peekIs('{') && syntax.opEscXBraceHex8()) {
inc();
int num = scanUnsignedHexadecimalNumber(8);
if (num < 0) newValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE);
if (num < 0) {
throw new ValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE);
}
if (left()) {
int c2 = peek();
if (EncodingHelper.isXDigit(c2)) newValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE);
if (EncodingHelper.isXDigit(c2)) {
throw new ValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE);
}
}
if (p > last + 1 && left() && peekIs('}')) {
inc();
token.type = TokenType.CODE_POINT;
token.base = 16;
token.setCode(num);
} else {
/* can't read nothing or invalid format */
@ -344,12 +261,13 @@ class Lexer extends ScannerSupport {
}
} else if (syntax.opEscXHex2()) {
int num = scanUnsignedHexadecimalNumber(2);
if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
if (num < 0) {
throw new ValueException(ERR_TOO_BIG_NUMBER);
}
if (p == last) { /* can't read nothing. */
num = 0; /* but, it's not error */
}
token.type = TokenType.RAW_BYTE;
token.base = 16;
token.setC(num);
}
}
@ -360,12 +278,13 @@ class Lexer extends ScannerSupport {
if (syntax.op2EscUHex4()) {
int num = scanUnsignedHexadecimalNumber(4);
if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
if (num < 0) {
throw new ValueException(ERR_TOO_BIG_NUMBER);
}
if (p == last) { /* can't read nothing. */
num = 0; /* but, it's not error */
}
token.type = TokenType.CODE_POINT;
token.base = 16;
token.setCode(num);
}
}
@ -375,12 +294,13 @@ class Lexer extends ScannerSupport {
unfetch();
int last = p;
int num = scanUnsignedOctalNumber(3);
if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
if (num < 0) {
throw new ValueException(ERR_TOO_BIG_NUMBER);
}
if (p == last) { /* can't read nothing. */
num = 0; /* but, it's not error */
}
token.type = TokenType.RAW_BYTE;
token.base = 8;
token.setC(num);
}
}
@ -400,7 +320,6 @@ class Lexer extends ScannerSupport {
fetch();
token.type = TokenType.CHAR;
token.base = 0;
token.setC(c);
token.escaped = false;
@ -410,7 +329,9 @@ class Lexer extends ScannerSupport {
token.type = TokenType.CC_RANGE;
} else if (c == syntax.metaCharTable.esc) {
if (!syntax.backSlashEscapeInCC()) return token.type;
if (!left()) newSyntaxException(ERR_END_PATTERN_AT_ESCAPE);
if (!left()) {
throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE);
}
fetch();
token.escaped = true;
token.setC(c);
@ -508,9 +429,13 @@ class Lexer extends ScannerSupport {
if (peekIs('{') && syntax.opEscXBraceHex8()) {
inc();
int num = scanUnsignedHexadecimalNumber(8);
if (num < 0) newValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE);
if (num < 0) {
throw new ValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE);
}
if (left()) {
if (EncodingHelper.isXDigit(peek())) newValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE);
if (EncodingHelper.isXDigit(peek())) {
throw new ValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE);
}
}
if (p > last + 1 && left() && peekIs('}')) {
@ -523,12 +448,13 @@ class Lexer extends ScannerSupport {
}
} else if (syntax.opEscXHex2()) {
int num = scanUnsignedHexadecimalNumber(2);
if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
if (num < 0) {
throw new ValueException(ERR_TOO_BIG_NUMBER);
}
if (p == last) { /* can't read nothing. */
num = 0; /* but, it's not error */
}
token.type = TokenType.RAW_BYTE;
token.base = 16;
token.setC(num);
}
}
@ -539,12 +465,13 @@ class Lexer extends ScannerSupport {
if (syntax.op2EscUHex4()) {
int num = scanUnsignedHexadecimalNumber(4);
if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
if (num < 0) {
throw new ValueException(ERR_TOO_BIG_NUMBER);
}
if (p == last) { /* can't read nothing. */
num = 0; /* but, it's not error */
}
token.type = TokenType.CODE_POINT;
token.base = 16;
token.setCode(num);
}
}
@ -556,12 +483,12 @@ class Lexer extends ScannerSupport {
if (num < 0 || num > Config.MAX_BACKREF_NUM) { // goto skip_backref
} else if (syntax.opDecimalBackref() && (num <= env.numMem || num <= 9)) { /* This spec. from GNU regex */
if (syntax.strictCheckBackref()) {
if (num > env.numMem || env.memNodes == null || env.memNodes[num] == null) newValueException(ERR_INVALID_BACKREF);
if (num > env.numMem || env.memNodes == null || env.memNodes[num] == null) {
throw new ValueException(ERR_INVALID_BACKREF);
}
}
token.type = TokenType.BACKREF;
token.setBackrefNum(1);
token.setBackrefRef1(num);
token.setBackrefByName(false);
token.setBackrefRef(num);
return;
}
@ -579,40 +506,19 @@ class Lexer extends ScannerSupport {
if (syntax.opEscOctal3()) {
int last = p;
int num = scanUnsignedOctalNumber(c == '0' ? 2 : 3);
if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
if (num < 0) {
throw new ValueException(ERR_TOO_BIG_NUMBER);
}
if (p == last) { /* can't read nothing. */
num = 0; /* but, it's not error */
}
token.type = TokenType.RAW_BYTE;
token.base = 8;
token.setC(num);
} else if (c != '0') {
inc();
}
}
private void fetchTokenFor_subexpCall() {
if (syntax.op2EscGSubexpCall()) {
if (left()) {
fetch();
if (c == '<' || c == '\'') {
int last = p;
int gNum = fetchName(c, true);
int nameEnd = value;
token.type = TokenType.CALL;
token.setCallNameP(last);
token.setCallNameEnd(nameEnd);
token.setCallGNum(gNum);
} else {
unfetch();
syntaxWarn(Warnings.INVALID_SUBEXP_CALL);
}
} else {
syntaxWarn(Warnings.INVALID_SUBEXP_CALL);
}
}
}
private void fetchTokenFor_metaChars() {
if (c == syntax.metaCharTable.anyChar) {
token.type = TokenType.ANYCHAR;
@ -638,13 +544,14 @@ class Lexer extends ScannerSupport {
}
token.type = TokenType.STRING;
token.base = 0;
token.backP = p;
fetch();
if (c == syntax.metaCharTable.esc && !syntax.op2IneffectiveEscape()) { // IS_MC_ESC_CODE(code, syn)
if (!left()) newSyntaxException(ERR_END_PATTERN_AT_ESCAPE);
if (!left()) {
throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE);
}
token.backP = p;
fetch();
@ -800,7 +707,9 @@ class Lexer extends ScannerSupport {
if (peekIs('#')) {
fetch();
while (true) {
if (!left()) newSyntaxException(ERR_END_PATTERN_IN_GROUP);
if (!left()) {
throw new SyntaxException(ERR_END_PATTERN_IN_GROUP);
}
fetch();
if (c == syntax.metaCharTable.esc) {
if (left()) fetch();

View file

@ -40,6 +40,9 @@ import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType;
import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType;
import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType;
import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType;
import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException;
import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
class Parser extends Lexer {
@ -94,7 +97,9 @@ class Parser extends Lexer {
}
if (token.type == TokenType.CC_CLOSE) {
if (!codeExistCheck(']', true)) newSyntaxException(ERR_EMPTY_CHAR_CLASS);
if (!codeExistCheck(']', true)) {
throw new SyntaxException(ERR_EMPTY_CHAR_CLASS);
}
env.ccEscWarn("]");
token.type = TokenType.CHAR; /* allow []...] */
}
@ -187,7 +192,7 @@ class Parser extends Lexer {
parseCharClassValEntry2(cc, arg); // goto val_entry2 /* [0-9-a] is allowed as [0-9\-a] */
break;
}
newSyntaxException(ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS);
throw new SyntaxException(ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS);
}
break;
@ -216,10 +221,10 @@ class Parser extends Lexer {
break;
case EOT:
newSyntaxException(ERR_PREMATURE_END_OF_CHAR_CLASS);
throw new SyntaxException(ERR_PREMATURE_END_OF_CHAR_CLASS);
default:
newInternalException(ERR_PARSER_BUG);
throw new InternalException(ERR_PARSER_BUG);
} // switch
if (!fetched) fetchTokenInCC();
@ -280,13 +285,17 @@ class Parser extends Lexer {
private Node parseEnclose(TokenType term) {
Node node = null;
if (!left()) newSyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS);
if (!left()) {
throw new SyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS);
}
int option = env.option;
if (peekIs('?') && syntax.op2QMarkGroupEffect()) {
inc();
if (!left()) newSyntaxException(ERR_END_PATTERN_IN_GROUP);
if (!left()) {
throw new SyntaxException(ERR_END_PATTERN_IN_GROUP);
}
boolean listCapture = false;
@ -315,18 +324,20 @@ class Parser extends Lexer {
} else if (c == '!') {
node = new AnchorNode(AnchorType.LOOK_BEHIND_NOT);
} else {
newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
}
break;
case '@':
if (syntax.op2AtMarkCaptureHistory()) {
EncloseNode en = new EncloseNode(); // node_new_enclose_memory
int num = env.addMemEntry();
if (num >= BitStatus.BIT_STATUS_BITS_NUM) newValueException(ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY);
if (num >= BitStatus.BIT_STATUS_BITS_NUM) {
throw new ValueException(ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY);
}
en.regNum = num;
node = en;
} else {
newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
}
break;
@ -355,7 +366,7 @@ class Parser extends Lexer {
if (syntax.op2OptionPerl()) {
option = bsOnOff(option, Option.MULTILINE, neg);
} else {
newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
}
break;
case 'm':
@ -364,7 +375,7 @@ class Parser extends Lexer {
} else if (syntax.op2OptionRuby()) {
option = bsOnOff(option, Option.MULTILINE, neg);
} else {
newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
}
break;
// case 'p': #ifdef USE_POSIXLINE_OPTION // not defined
@ -372,7 +383,7 @@ class Parser extends Lexer {
// break;
default:
newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
} // switch
if (c == ')') {
@ -392,12 +403,14 @@ class Parser extends Lexer {
returnCode = 0;
return node;
}
if (!left()) newSyntaxException(ERR_END_PATTERN_IN_GROUP);
if (!left()) {
throw new SyntaxException(ERR_END_PATTERN_IN_GROUP);
}
fetch();
} // while
default:
newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
} // switch
} else {
@ -458,7 +471,9 @@ class Parser extends Lexer {
}
break;
case SUBEXP_CLOSE:
if (!syntax.allowUnmatchedCloseSubexp()) newSyntaxException(ERR_UNMATCHED_CLOSE_PARENTHESIS);
if (!syntax.allowUnmatchedCloseSubexp()) {
throw new SyntaxException(ERR_UNMATCHED_CLOSE_PARENTHESIS);
}
if (token.escaped) {
return parseExpTkRawByte(group); // goto tk_raw_byte
} else {
@ -499,7 +514,7 @@ class Parser extends Lexer {
break;
default:
newInternalException(ERR_PARSER_BUG);
throw new InternalException(ERR_PARSER_BUG);
} // inner switch
break;
@ -529,13 +544,8 @@ class Parser extends Lexer {
break;
case BACKREF:
int[]backRefs = token.getBackrefNum() > 1 ? token.getBackrefRefs() : new int[]{token.getBackrefRef1()};
node = new BackRefNode(token.getBackrefNum(),
backRefs,
token.getBackrefByName(),
token.getBackrefExistLevel(), // #ifdef USE_BACKREF_AT_LEVEL
token.getBackrefLevel(), // ...
env);
int backRef = token.getBackrefRef();
node = new BackRefNode(backRef, env);
break;
case ANCHOR:
@ -546,7 +556,7 @@ class Parser extends Lexer {
case INTERVAL:
if (syntax.contextIndepRepeatOps()) {
if (syntax.contextInvalidRepeatOps()) {
newSyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED);
throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED);
} else {
node = StringNode.EMPTY; // node_new_empty
}
@ -556,7 +566,7 @@ class Parser extends Lexer {
break;
default:
newInternalException(ERR_PARSER_BUG);
throw new InternalException(ERR_PARSER_BUG);
} //switch
//targetp = node;
@ -599,7 +609,9 @@ class Parser extends Lexer {
private Node parseExpRepeat(Node target, boolean group) {
while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat:
if (target.isInvalidQuantifier()) newSyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID);
if (target.isInvalidQuantifier()) {
throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID);
}
QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(),
token.getRepeatUpper(),
@ -631,7 +643,9 @@ class Parser extends Lexer {
private Node parseExpRepeatForCar(Node top, ConsAltNode target, boolean group) {
while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat:
if (target.car.isInvalidQuantifier()) newSyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID);
if (target.car.isInvalidQuantifier()) {
throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID);
}
QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(),
token.getRepeatUpper(),
@ -709,9 +723,9 @@ class Parser extends Lexer {
private void parseSubExpError(TokenType term) {
if (term == TokenType.SUBEXP_CLOSE) {
newSyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS);
throw new SyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS);
} else {
newInternalException(ERR_PARSER_BUG);
throw new InternalException(ERR_PARSER_BUG);
}
}

View file

@ -19,19 +19,9 @@
*/
package jdk.nashorn.internal.runtime.regexp.joni;
import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt;
import static jdk.nashorn.internal.runtime.regexp.joni.Option.isCaptureGroup;
import static jdk.nashorn.internal.runtime.regexp.joni.Option.isDontCaptureGroup;
import java.nio.file.Files;
import java.util.HashMap;
import java.util.Iterator;
import jdk.nashorn.internal.runtime.regexp.joni.ast.Node;
import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
import jdk.nashorn.internal.runtime.regexp.joni.constants.RegexState;
import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
public final class Regex implements RegexState {
@ -42,11 +32,9 @@ public final class Regex implements RegexState {
Object[] operands; /* e.g. shared CClassNode */
int operandLength;
int state; /* normal, searching, compiling */ // remove
int numMem; /* used memory(...) num counted from 1 */
int numRepeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
int numNullCheck; /* OP_NULL_CHECK_START/END id counter */
int numCall; /* number of subexp call */
int captureHistory; /* (?@...) flag (1-31) */
int btMemStart; /* need backtrack flag */
int btMemEnd; /* need backtrack flag */
@ -61,9 +49,6 @@ public final class Regex implements RegexState {
protected Analyser analyser;
int options;
int userOptions;
Object userObject;
//final Syntax syntax;
final int caseFoldFlag;
/* optimization info (string search, char-map and anchors) */
@ -247,46 +232,48 @@ public final class Regex implements RegexState {
}
public String optimizeInfoToString() {
String s = "";
s += "optimize: " + searchAlgorithm.getName() + "\n";
s += " anchor: " + OptAnchorInfo.anchorToString(anchor);
StringBuilder s = new StringBuilder();
s.append("optimize: ").append(searchAlgorithm.getName()).append("\n");
s.append(" anchor: ").append(OptAnchorInfo.anchorToString(anchor));
if ((anchor & AnchorType.END_BUF_MASK) != 0) {
s += MinMaxLen.distanceRangeToString(anchorDmin, anchorDmax);
s.append(MinMaxLen.distanceRangeToString(anchorDmin, anchorDmax));
}
s += "\n";
s.append("\n");
if (searchAlgorithm != SearchAlgorithm.NONE) {
s += " sub anchor: " + OptAnchorInfo.anchorToString(subAnchor) + "\n";
s.append(" sub anchor: ").append(OptAnchorInfo.anchorToString(subAnchor)).append("\n");
}
s += "dmin: " + dMin + " dmax: " + dMax + "\n";
s += "threshold length: " + thresholdLength + "\n";
s.append("dmin: ").append(dMin).append(" dmax: ").append(dMax).append("\n");
s.append("threshold length: ").append(thresholdLength).append("\n");
if (exact != null) {
s += "exact: [" + new String(exact, exactP, exactEnd - exactP) + "]: length: " + (exactEnd - exactP) + "\n";
s.append("exact: [").append(exact, exactP, exactEnd - exactP).append("]: length: ").append(exactEnd - exactP).append("\n");
} else if (searchAlgorithm == SearchAlgorithm.MAP) {
int n=0;
for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) if (map[i] != 0) n++;
s += "map: n = " + n + "\n";
s.append("map: n = ").append(n).append("\n");
if (n > 0) {
int c=0;
s += "[";
s.append("[");
for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) {
if (map[i] != 0) {
if (c > 0) s += ", ";
if (c > 0) {
s.append(", ");
}
c++;
// TODO if (enc.isPrint(i)
s += ((char)i);
s.append((char)i);
}
}
s += "]\n";
s.append("]\n");
}
}
return s;
return s.toString();
}
public int getOptions() {

View file

@ -39,13 +39,10 @@ public final class ScanEnvironment {
final public Regex reg;
int numCall;
public int numMem;
public Node memNodes[];
int currMaxRegNum;
boolean hasRecursion;
public ScanEnvironment(Regex regex, Syntax syntax) {
this.reg = regex;
@ -60,13 +57,8 @@ public final class ScanEnvironment {
btMemEnd = bsClear();
backrefedMem = bsClear();
numCall = 0;
numMem = 0;
memNodes = null;
currMaxRegNum = 0;
hasRecursion = false;
}
public int addMemEntry() {
@ -117,11 +109,4 @@ public final class ScanEnvironment {
}
}
void closeBracketWithoutEscapeWarn(String s) {
if (Config.USE_WARN) {
if (syntax.warnCCOpNotEscaped()) {
reg.warnings.warn("regular expression has '" + s + "' without escape");
}
}
}
}

View file

@ -155,24 +155,4 @@ abstract class ScannerSupport extends IntHolder implements ErrorMessages {
return p < stop;
}
protected void newSyntaxException(String message) {
throw new SyntaxException(message);
}
protected void newValueException(String message) {
throw new ValueException(message);
}
protected void newValueException(String message, String str) {
throw new ValueException(message, str);
}
protected void newValueException(String message, int p, int end) {
throw new ValueException(message, new String(chars, p, end - p));
}
protected void newInternalException(String message) {
throw new InternalException(message);
}
}

View file

@ -24,12 +24,10 @@ import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType;
final class Token {
TokenType type;
boolean escaped;
int base; /* is number: 8, 16 (used in [....]) */
int backP;
// union fields
private int INT1, INT2, INT3, INT4, INT5;
private int []INTA1;
private int INT1, INT2, INT3, INT4;
// union accessors
int getC() {
@ -53,13 +51,6 @@ final class Token {
INT1 = anchor;
}
int getSubtype() {
return INT1;
}
void setSubtype(int subtype) {
INT1 = subtype;
}
// repeat union member
int getRepeatLower() {
return INT1;
@ -89,72 +80,13 @@ final class Token {
INT4 = possessive ? 1 : 0;
}
// backref union member
int getBackrefNum() {
return INT1;
}
void setBackrefNum(int num) {
INT1 = num;
}
int getBackrefRef1() {
int getBackrefRef() {
return INT2;
}
void setBackrefRef1(int ref1) {
void setBackrefRef(int ref1) {
INT2 = ref1;
}
int[]getBackrefRefs() {
return INTA1;
}
void setBackrefRefs(int[]refs) {
INTA1 = refs;
}
boolean getBackrefByName() {
return INT3 != 0;
}
void setBackrefByName(boolean byName) {
INT3 = byName ? 1 : 0;
}
// USE_BACKREF_AT_LEVEL
boolean getBackrefExistLevel() {
return INT4 != 0;
}
void setBackrefExistLevel(boolean existLevel) {
INT4 = existLevel ? 1 : 0;
}
int getBackrefLevel() {
return INT5;
}
void setBackrefLevel(int level) {
INT5 = level;
}
// call union member
int getCallNameP() {
return INT1;
}
void setCallNameP(int nameP) {
INT1 = nameP;
}
int getCallNameEnd() {
return INT2;
}
void setCallNameEnd(int nameEnd) {
INT2 = nameEnd;
}
int getCallGNum() {
return INT3;
}
void setCallGNum(int gnum) {
INT3 = gnum;
}
// prop union member
int getPropCType() {
return INT1;

View file

@ -20,41 +20,15 @@
package jdk.nashorn.internal.runtime.regexp.joni.ast;
import jdk.nashorn.internal.runtime.regexp.joni.ScanEnvironment;
import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
public final class BackRefNode extends StateNode {
//private static int NODE_BACKREFS_SIZE = 6;
public final int backRef;
//int state;
public int backNum;
public int back[];
public BackRefNode(int backRef, ScanEnvironment env) {
this.backRef = backRef;
public int nestLevel;
public BackRefNode(int backNum, int[]backRefs, boolean byName, ScanEnvironment env) {
this.backNum = backNum;
if (byName) setNameRef();
for (int i=0; i<backNum; i++) {
if (backRefs[i] <= env.numMem && env.memNodes[backRefs[i]] == null) {
if (backRef <= env.numMem && env.memNodes[backRef] == null) {
setRecursion(); /* /...(\1).../ */
break;
}
}
back = new int[backNum];
System.arraycopy(backRefs, 0, back, 0, backNum); // shall we really dup it ???
}
// #ifdef USE_BACKREF_AT_LEVEL
public BackRefNode(int backNum, int[]backRefs, boolean byName, boolean existLevel, int nestLevel, ScanEnvironment env) {
this(backNum, backRefs, byName, env);
if (existLevel) {
//state |= NST_NEST_LEVEL;
setNestLevel();
this.nestLevel = nestLevel;
}
}
@ -71,28 +45,7 @@ public final class BackRefNode extends StateNode {
@Override
public String toString(int level) {
StringBuilder value = new StringBuilder(super.toString(level));
value.append("\n backNum: " + backNum);
String backs = "";
for (int i=0; i<back.length; i++) backs += back[i] + ", ";
value.append("\n back: " + backs);
value.append("\n nextLevel: " + nestLevel);
value.append("\n back: ").append(backRef);
return value.toString();
}
public void renumber(int[]map) {
if (!isNameRef()) throw new ValueException(ErrorMessages.ERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED);
int oldNum = backNum;
int pos = 0;
for (int i=0; i<oldNum; i++) {
int n = map[back[i]];
if (n > 0) {
back[pos] = n;
pos++;
}
}
backNum = pos;
}
}

View file

@ -77,39 +77,12 @@ public final class CClassNode extends Node {
// node_new_cclass
public CClassNode() {}
public CClassNode(int ctype, boolean not, int sbOut, int[]ranges) {
this(not, sbOut, ranges);
this.ctype = ctype;
}
public void clear() {
bs.clear();
flags = 0;
mbuf = null;
}
// node_new_cclass_by_codepoint_range, only used by shared Char Classes
public CClassNode(boolean not, int sbOut, int[]ranges) {
if (not) setNot();
// bs.clear();
if (sbOut > 0 && ranges != null) {
int n = ranges[0];
for (int i=0; i<n; i++) {
int from = ranges[i * 2 + 1];
int to = ranges[i * 2 + 2];
for (int j=from; j<=to; j++) {
if (j >= sbOut) {
setupBuffer(ranges);
return;
}
bs.set(j);
}
}
}
setupBuffer(ranges);
}
@Override
public int getType() {
return CCLASS;
@ -156,13 +129,6 @@ public final class CClassNode extends Node {
return flags.toString();
}
private void setupBuffer(int[]ranges) {
if (ranges != null) {
if (ranges[0] == 0) return;
mbuf = new CodeRangeBuffer(ranges);
}
}
public boolean isEmpty() {
return mbuf == null && bs.isEmpty();
}
@ -531,11 +497,7 @@ public final class CClassNode extends Node {
boolean found;
if (code > 0xff) {
if (mbuf == null) {
found = false;
} else {
found = EncodingHelper.isInCodeRange(mbuf.getCodeRange(), code);
}
found = mbuf != null && mbuf.isInCodeRange(code);
} else {
found = bs.at(code);
}

View file

@ -40,7 +40,6 @@ public abstract class StateNode extends Node implements NodeStatus {
if (isRecursion()) states.append("RECURSION ");
if (isCalled()) states.append("CALLED ");
if (isAddrFixed()) states.append("ADDR_FIXED ");
if (isNameRef()) states.append("NAME_REF ");
if (isInRepeat()) states.append("IN_REPEAT ");
if (isNestLevel()) states.append("NEST_LEVEL ");
if (isByNumber()) states.append("BY_NUMBER ");
@ -132,14 +131,6 @@ public abstract class StateNode extends Node implements NodeStatus {
state |= NST_ADDR_FIXED;
}
public boolean isNameRef() {
return (state & NST_NAME_REF) != 0;
}
public void setNameRef() {
state |= NST_NAME_REF;
}
public boolean isInRepeat() {
return (state & NST_IN_REPEAT) != 0;
}

View file

@ -96,7 +96,7 @@ public final class StringNode extends Node implements StringType {
if (chars[i] >= 0x20 && chars[i] < 0x7f) {
value.append(chars[i]);
} else {
value.append(String.format("[0x%04x]", chars[i]));
value.append(String.format("[0x%04x]", (int)chars[i]));
}
}
value.append("'");
@ -122,10 +122,7 @@ public final class StringNode extends Node implements StringType {
}
public boolean canBeSplit() {
if (end > p) {
return 1 < (end - p);
}
return false;
return end > p && 1 < (end - p);
}
public void set(char[] chars, int p, int end) {

View file

@ -29,12 +29,6 @@ public interface OPCode {
final int EXACT4 = 5; /* single byte, N = 4 */
final int EXACT5 = 6; /* single byte, N = 5 */
final int EXACTN = 7; /* single byte */
final int EXACTMB2N1 = 8; /* mb-length = 2 N = 1 */
final int EXACTMB2N2 = 9; /* mb-length = 2 N = 2 */
final int EXACTMB2N3 = 10; /* mb-length = 2 N = 3 */
final int EXACTMB2N = 11; /* mb-length = 2 */
final int EXACTMB3N = 12; /* mb-length = 3 */
final int EXACTMBN = 13; /* other length */
final int EXACT1_IC = 14; /* single byte, N = 1, ignore case */
final int EXACTN_IC = 15; /* single byte, ignore case */
@ -125,28 +119,4 @@ public interface OPCode {
final int SET_OPTION_PUSH = 86; /* set option and push recover option */
final int SET_OPTION = 87; /* set option */
// single byte versions
final int ANYCHAR_SB = 88; /* "." */
final int ANYCHAR_ML_SB = 89; /* "." multi-line */
final int ANYCHAR_STAR_SB = 90; /* ".*" */
final int ANYCHAR_ML_STAR_SB = 91; /* ".*" multi-line */
final int ANYCHAR_STAR_PEEK_NEXT_SB = 92;
final int ANYCHAR_ML_STAR_PEEK_NEXT_SB = 93;
final int STATE_CHECK_ANYCHAR_STAR_SB = 94;
final int STATE_CHECK_ANYCHAR_ML_STAR_SB= 95;
final int CCLASS_SB = 96;
final int CCLASS_NOT_SB = 97;
final int WORD_SB = 98;
final int NOT_WORD_SB = 99;
final int WORD_BOUND_SB = 100;
final int NOT_WORD_BOUND_SB = 101;
final int WORD_BEGIN_SB = 102;
final int WORD_END_SB = 103;
final int LOOK_BEHIND_SB = 104;
final int EXACT1_IC_SB = 105; /* single byte, N = 1, ignore case */
final int EXACTN_IC_SB = 106; /* single byte, ignore case */
}