mirror of
https://github.com/openjdk/jdk.git
synced 2025-09-24 04:54:40 +02:00
8010821: [findbugs] Some classes in jdk.nashorn.internal.runtime.regexp expose mutable objects
Reviewed-by: attila, jlaskey, sundar
This commit is contained in:
parent
e85e4fb319
commit
06394dedc7
18 changed files with 263 additions and 771 deletions
|
@ -84,9 +84,7 @@ public class JoniRegExp extends RegExp {
|
||||||
this.regex = new Regex(javaPattern, 0, javaPattern.length, option, Syntax.JAVASCRIPT);
|
this.regex = new Regex(javaPattern, 0, javaPattern.length, option, Syntax.JAVASCRIPT);
|
||||||
this.groupsInNegativeLookahead = parsed.getGroupsInNegativeLookahead();
|
this.groupsInNegativeLookahead = parsed.getGroupsInNegativeLookahead();
|
||||||
}
|
}
|
||||||
} catch (final PatternSyntaxException e2) {
|
} catch (final PatternSyntaxException | JOniException e2) {
|
||||||
throwParserException("syntax", e2.getMessage());
|
|
||||||
} catch (JOniException e2) {
|
|
||||||
throwParserException("syntax", e2.getMessage());
|
throwParserException("syntax", e2.getMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,10 +41,12 @@ import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode;
|
||||||
import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
|
import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
|
||||||
import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType;
|
import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType;
|
||||||
import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType;
|
import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType;
|
||||||
import jdk.nashorn.internal.runtime.regexp.joni.constants.RegexState;
|
|
||||||
import jdk.nashorn.internal.runtime.regexp.joni.constants.StackPopLevel;
|
import jdk.nashorn.internal.runtime.regexp.joni.constants.StackPopLevel;
|
||||||
import jdk.nashorn.internal.runtime.regexp.joni.constants.TargetInfo;
|
import jdk.nashorn.internal.runtime.regexp.joni.constants.TargetInfo;
|
||||||
import jdk.nashorn.internal.runtime.regexp.joni.encoding.ObjPtr;
|
import jdk.nashorn.internal.runtime.regexp.joni.encoding.ObjPtr;
|
||||||
|
import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
|
||||||
|
import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException;
|
||||||
|
import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
|
||||||
|
|
||||||
final class Analyser extends Parser {
|
final class Analyser extends Parser {
|
||||||
|
|
||||||
|
@ -53,8 +55,6 @@ final class Analyser extends Parser {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected final void compile() {
|
protected final void compile() {
|
||||||
regex.state = RegexState.COMPILING;
|
|
||||||
|
|
||||||
if (Config.DEBUG) {
|
if (Config.DEBUG) {
|
||||||
Config.log.println(new String(chars, getBegin(), getEnd()));
|
Config.log.println(new String(chars, getBegin(), getEnd()));
|
||||||
}
|
}
|
||||||
|
@ -115,8 +115,6 @@ final class Analyser extends Parser {
|
||||||
Config.log.println(new ByteCodePrinter(regex).byteCodeListToString());
|
Config.log.println(new ByteCodePrinter(regex).byteCodeListToString());
|
||||||
|
|
||||||
} // DEBUG_COMPILE
|
} // DEBUG_COMPILE
|
||||||
|
|
||||||
regex.state = RegexState.NORMAL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void swap(Node a, Node b) {
|
private void swap(Node a, Node b) {
|
||||||
|
@ -187,14 +185,11 @@ final class Analyser extends Parser {
|
||||||
BackRefNode br = (BackRefNode)node;
|
BackRefNode br = (BackRefNode)node;
|
||||||
if (br.isRecursion()) break;
|
if (br.isRecursion()) break;
|
||||||
|
|
||||||
if (br.back[0] > env.numMem) newValueException(ERR_INVALID_BACKREF);
|
if (br.backRef > env.numMem) {
|
||||||
min = getMinMatchLength(env.memNodes[br.back[0]]);
|
throw new ValueException(ERR_INVALID_BACKREF);
|
||||||
|
|
||||||
for (int i=1; i<br.backNum; i++) {
|
|
||||||
if (br.back[i] > env.numMem) newValueException(ERR_INVALID_BACKREF);
|
|
||||||
int tmin = getMinMatchLength(env.memNodes[br.back[i]]);
|
|
||||||
if (min > tmin) min = tmin;
|
|
||||||
}
|
}
|
||||||
|
min = getMinMatchLength(env.memNodes[br.backRef]);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case NodeType.LIST:
|
case NodeType.LIST:
|
||||||
|
@ -306,11 +301,11 @@ final class Analyser extends Parser {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i=0; i<br.backNum; i++) {
|
if (br.backRef > env.numMem) {
|
||||||
if (br.back[i] > env.numMem) newValueException(ERR_INVALID_BACKREF);
|
throw new ValueException(ERR_INVALID_BACKREF);
|
||||||
int tmax = getMaxMatchLength(env.memNodes[br.back[i]]);
|
|
||||||
if (max < tmax) max = tmax;
|
|
||||||
}
|
}
|
||||||
|
int tmax = getMaxMatchLength(env.memNodes[br.backRef]);
|
||||||
|
if (max < tmax) max = tmax;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case NodeType.QTFR:
|
case NodeType.QTFR:
|
||||||
|
@ -417,8 +412,6 @@ final class Analyser extends Parser {
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case NodeType.CTYPE:
|
case NodeType.CTYPE:
|
||||||
len = 1;
|
|
||||||
|
|
||||||
case NodeType.CCLASS:
|
case NodeType.CCLASS:
|
||||||
case NodeType.CANY:
|
case NodeType.CANY:
|
||||||
len = 1;
|
len = 1;
|
||||||
|
@ -712,13 +705,12 @@ final class Analyser extends Parser {
|
||||||
an.charLength = len;
|
an.charLength = len;
|
||||||
break;
|
break;
|
||||||
case GET_CHAR_LEN_VARLEN:
|
case GET_CHAR_LEN_VARLEN:
|
||||||
newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
|
throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
|
||||||
break;
|
|
||||||
case GET_CHAR_LEN_TOP_ALT_VARLEN:
|
case GET_CHAR_LEN_TOP_ALT_VARLEN:
|
||||||
if (syntax.differentLengthAltLookBehind()) {
|
if (syntax.differentLengthAltLookBehind()) {
|
||||||
return divideLookBehindAlternatives(node);
|
return divideLookBehindAlternatives(node);
|
||||||
} else {
|
} else {
|
||||||
newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
|
throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return node;
|
return node;
|
||||||
|
@ -955,12 +947,12 @@ final class Analyser extends Parser {
|
||||||
|
|
||||||
case NodeType.BREF:
|
case NodeType.BREF:
|
||||||
BackRefNode br = (BackRefNode)node;
|
BackRefNode br = (BackRefNode)node;
|
||||||
for (int i=0; i<br.backNum; i++) {
|
if (br.backRef > env.numMem) {
|
||||||
if (br.back[i] > env.numMem) newValueException(ERR_INVALID_BACKREF);
|
throw new ValueException(ERR_INVALID_BACKREF);
|
||||||
env.backrefedMem = bsOnAt(env.backrefedMem, br.back[i]);
|
|
||||||
env.btMemStart = bsOnAt(env.btMemStart, br.back[i]);
|
|
||||||
((EncloseNode)env.memNodes[br.back[i]]).setMemBackrefed();
|
|
||||||
}
|
}
|
||||||
|
env.backrefedMem = bsOnAt(env.backrefedMem, br.backRef);
|
||||||
|
env.btMemStart = bsOnAt(env.btMemStart, br.backRef);
|
||||||
|
((EncloseNode)env.memNodes[br.backRef]).setMemBackrefed();
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case NodeType.QTFR:
|
case NodeType.QTFR:
|
||||||
|
@ -1064,14 +1056,18 @@ final class Analyser extends Parser {
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case AnchorType.LOOK_BEHIND:
|
case AnchorType.LOOK_BEHIND:
|
||||||
if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
|
if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) {
|
||||||
|
throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
|
||||||
|
}
|
||||||
node = setupLookBehind(node);
|
node = setupLookBehind(node);
|
||||||
if (node.getType() != NodeType.ANCHOR) continue restart;
|
if (node.getType() != NodeType.ANCHOR) continue restart;
|
||||||
setupTree(((AnchorNode)node).target, state);
|
setupTree(((AnchorNode)node).target, state);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case AnchorType.LOOK_BEHIND_NOT:
|
case AnchorType.LOOK_BEHIND_NOT:
|
||||||
if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
|
if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) {
|
||||||
|
throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
|
||||||
|
}
|
||||||
node = setupLookBehind(node);
|
node = setupLookBehind(node);
|
||||||
if (node.getType() != NodeType.ANCHOR) continue restart;
|
if (node.getType() != NodeType.ANCHOR) continue restart;
|
||||||
setupTree(((AnchorNode)node).target, (state | IN_NOT));
|
setupTree(((AnchorNode)node).target, (state | IN_NOT));
|
||||||
|
@ -1218,15 +1214,9 @@ final class Analyser extends Parser {
|
||||||
|
|
||||||
Node[]nodes = oenv.scanEnv.memNodes;
|
Node[]nodes = oenv.scanEnv.memNodes;
|
||||||
|
|
||||||
int min = getMinMatchLength(nodes[br.back[0]]);
|
int min = getMinMatchLength(nodes[br.backRef]);
|
||||||
int max = getMaxMatchLength(nodes[br.back[0]]);
|
int max = getMaxMatchLength(nodes[br.backRef]);
|
||||||
|
|
||||||
for (int i=1; i<br.backNum; i++) {
|
|
||||||
int tmin = getMinMatchLength(nodes[br.back[i]]);
|
|
||||||
int tmax = getMaxMatchLength(nodes[br.back[i]]);
|
|
||||||
if (min > tmin) min = tmin;
|
|
||||||
if (max < tmax) max = tmax;
|
|
||||||
}
|
|
||||||
opt.length.set(min, max);
|
opt.length.set(min, max);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -1314,7 +1304,7 @@ final class Analyser extends Parser {
|
||||||
}
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
newInternalException(ERR_PARSER_BUG);
|
throw new InternalException(ERR_PARSER_BUG);
|
||||||
} // switch
|
} // switch
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -100,12 +100,7 @@ final class ArrayCompiler extends Compiler {
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean isNeedStrLenOpExact(int op) {
|
private boolean isNeedStrLenOpExact(int op) {
|
||||||
return op == OPCode.EXACTN ||
|
return op == OPCode.EXACTN || op == OPCode.EXACTN_IC;
|
||||||
op == OPCode.EXACTMB2N ||
|
|
||||||
op == OPCode.EXACTMB3N ||
|
|
||||||
op == OPCode.EXACTMBN ||
|
|
||||||
op == OPCode.EXACTN_IC ||
|
|
||||||
op == OPCode.EXACTN_IC_SB;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean opTemplated(int op) {
|
private boolean opTemplated(int op) {
|
||||||
|
@ -172,7 +167,6 @@ final class ArrayCompiler extends Compiler {
|
||||||
if (isNeedStrLenOpExact(op)) len += OPSize.LENGTH;
|
if (isNeedStrLenOpExact(op)) len += OPSize.LENGTH;
|
||||||
len += strLength;
|
len += strLength;
|
||||||
}
|
}
|
||||||
if (op == OPCode.EXACTMBN) len += OPSize.LENGTH;
|
|
||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -181,8 +175,6 @@ final class ArrayCompiler extends Compiler {
|
||||||
int op = selectStrOpcode(strLength, ignoreCase);
|
int op = selectStrOpcode(strLength, ignoreCase);
|
||||||
addOpcode(op);
|
addOpcode(op);
|
||||||
|
|
||||||
if (op == OPCode.EXACTMBN) addLength(1);
|
|
||||||
|
|
||||||
if (isNeedStrLenOpExact(op)) {
|
if (isNeedStrLenOpExact(op)) {
|
||||||
addLength(strLength);
|
addLength(strLength);
|
||||||
}
|
}
|
||||||
|
@ -294,14 +286,11 @@ final class ArrayCompiler extends Compiler {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void compileBackrefNode(BackRefNode node) {
|
protected void compileBackrefNode(BackRefNode node) {
|
||||||
BackRefNode br = node;
|
|
||||||
// USE_BACKREF_AT_LEVEL
|
|
||||||
if (br.backNum == 1) {
|
|
||||||
if (isIgnoreCase(regex.options)) {
|
if (isIgnoreCase(regex.options)) {
|
||||||
addOpcode(OPCode.BACKREFN_IC);
|
addOpcode(OPCode.BACKREFN_IC);
|
||||||
addMemNum(br.back[0]);
|
addMemNum(node.backRef);
|
||||||
} else {
|
} else {
|
||||||
switch (br.back[0]) {
|
switch (node.backRef) {
|
||||||
case 1:
|
case 1:
|
||||||
addOpcode(OPCode.BACKREF1);
|
addOpcode(OPCode.BACKREF1);
|
||||||
break;
|
break;
|
||||||
|
@ -310,20 +299,10 @@ final class ArrayCompiler extends Compiler {
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
addOpcode(OPCode.BACKREFN);
|
addOpcode(OPCode.BACKREFN);
|
||||||
addOpcode(br.back[0]);
|
addOpcode(node.backRef);
|
||||||
break;
|
break;
|
||||||
} // switch
|
} // switch
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
if (isIgnoreCase(regex.options)) {
|
|
||||||
addOpcode(OPCode.BACKREF_MULTI_IC);
|
|
||||||
} else {
|
|
||||||
addOpcode(OPCode.BACKREF_MULTI);
|
|
||||||
}
|
|
||||||
// !add_bacref_mems:!
|
|
||||||
addLength(br.backNum);
|
|
||||||
for (int i=br.backNum-1; i>=0; i--) addMemNum(br.back[i]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final int REPEAT_RANGE_ALLOC = 8;
|
private static final int REPEAT_RANGE_ALLOC = 8;
|
||||||
|
@ -791,13 +770,8 @@ final class ArrayCompiler extends Compiler {
|
||||||
case NodeType.BREF:
|
case NodeType.BREF:
|
||||||
BackRefNode br = (BackRefNode)node;
|
BackRefNode br = (BackRefNode)node;
|
||||||
|
|
||||||
// USE_BACKREF_AT_LEVEL
|
len = ((!isIgnoreCase(regex.options) && br.backRef <= 2)
|
||||||
if (br.backNum == 1) {
|
|
||||||
len = ((!isIgnoreCase(regex.options) && br.back[0] <= 2)
|
|
||||||
? OPSize.OPCODE : (OPSize.OPCODE + OPSize.MEMNUM));
|
? OPSize.OPCODE : (OPSize.OPCODE + OPSize.MEMNUM));
|
||||||
} else {
|
|
||||||
len = OPSize.OPCODE + OPSize.LENGTH + (OPSize.MEMNUM * br.backNum);
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case NodeType.QTFR:
|
case NodeType.QTFR:
|
||||||
|
@ -873,15 +847,10 @@ final class ArrayCompiler extends Compiler {
|
||||||
|
|
||||||
switch(opcode) {
|
switch(opcode) {
|
||||||
case OPCode.ANYCHAR_STAR:
|
case OPCode.ANYCHAR_STAR:
|
||||||
case OPCode.ANYCHAR_STAR_SB:
|
|
||||||
case OPCode.ANYCHAR_ML_STAR:
|
case OPCode.ANYCHAR_ML_STAR:
|
||||||
case OPCode.ANYCHAR_ML_STAR_SB:
|
|
||||||
case OPCode.ANYCHAR_STAR_PEEK_NEXT:
|
case OPCode.ANYCHAR_STAR_PEEK_NEXT:
|
||||||
case OPCode.ANYCHAR_STAR_PEEK_NEXT_SB:
|
|
||||||
case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT:
|
case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT:
|
||||||
case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT_SB:
|
|
||||||
case OPCode.STATE_CHECK_ANYCHAR_STAR:
|
case OPCode.STATE_CHECK_ANYCHAR_STAR:
|
||||||
case OPCode.STATE_CHECK_ANYCHAR_STAR_SB:
|
|
||||||
case OPCode.STATE_CHECK_ANYCHAR_ML_STAR:
|
case OPCode.STATE_CHECK_ANYCHAR_ML_STAR:
|
||||||
case OPCode.MEMORY_START_PUSH:
|
case OPCode.MEMORY_START_PUSH:
|
||||||
case OPCode.MEMORY_END_PUSH:
|
case OPCode.MEMORY_END_PUSH:
|
||||||
|
|
|
@ -45,7 +45,7 @@ class ByteCodeMachine extends StackMachine {
|
||||||
private int sstart;
|
private int sstart;
|
||||||
private int sbegin;
|
private int sbegin;
|
||||||
|
|
||||||
private final int[]code; // byte code
|
private final int[] code; // byte code
|
||||||
private int ip; // instruction pointer
|
private int ip; // instruction pointer
|
||||||
|
|
||||||
ByteCodeMachine(Regex regex, char[] chars, int p, int end) {
|
ByteCodeMachine(Regex regex, char[] chars, int p, int end) {
|
||||||
|
|
|
@ -26,13 +26,11 @@ import jdk.nashorn.internal.runtime.regexp.joni.constants.OPSize;
|
||||||
import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
|
import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
|
||||||
|
|
||||||
class ByteCodePrinter {
|
class ByteCodePrinter {
|
||||||
final int[]code;
|
final int[] code;
|
||||||
final int codeLength;
|
final int codeLength;
|
||||||
final char[][] templates;
|
final char[][] templates;
|
||||||
|
|
||||||
Object[]operands;
|
Object[] operands;
|
||||||
int operantCount;
|
|
||||||
WarnCallback warnings;
|
|
||||||
|
|
||||||
private final static String OpCodeNames[] = new String[] {
|
private final static String OpCodeNames[] = new String[] {
|
||||||
"finish", /*OP_FINISH*/
|
"finish", /*OP_FINISH*/
|
||||||
|
@ -123,32 +121,6 @@ class ByteCodePrinter {
|
||||||
"state-check-anychar-ml*", /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
|
"state-check-anychar-ml*", /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
|
||||||
"set-option-push", /*OP_SET_OPTION_PUSH*/
|
"set-option-push", /*OP_SET_OPTION_PUSH*/
|
||||||
"set-option", /*OP_SET_OPTION*/
|
"set-option", /*OP_SET_OPTION*/
|
||||||
|
|
||||||
// single byte versions
|
|
||||||
"anychar-sb", /*OP_ANYCHAR*/
|
|
||||||
"anychar-ml-sb", /*OP_ANYCHAR_ML*/
|
|
||||||
"anychar*-sb", /*OP_ANYCHAR_STAR*/
|
|
||||||
"anychar-ml*-sb", /*OP_ANYCHAR_ML_STAR*/
|
|
||||||
"anychar*-peek-next-sb", /*OP_ANYCHAR_STAR_PEEK_NEXT*/
|
|
||||||
"anychar-ml*-peek-next-sb", /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/
|
|
||||||
"state-check-anychar*-sb", /*OP_STATE_CHECK_ANYCHAR_STAR*/
|
|
||||||
"state-check-anychar-ml*-sb", /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
|
|
||||||
|
|
||||||
"cclass-sb", /*OP_CCLASS*/
|
|
||||||
"cclass-not-sb", /*OP_CCLASS_NOT*/
|
|
||||||
|
|
||||||
"word-sb", /*OP_WORD*/
|
|
||||||
"not-word-sb", /*OP_NOT_WORD*/
|
|
||||||
"word-bound-sb", /*OP_WORD_BOUND*/
|
|
||||||
"not-word-bound-sb", /*OP_NOT_WORD_BOUND*/
|
|
||||||
"word-begin-sb", /*OP_WORD_BEGIN*/
|
|
||||||
"word-end-sb", /*OP_WORD_END*/
|
|
||||||
|
|
||||||
"look-behind-sb", /*OP_LOOK_BEHIND*/
|
|
||||||
|
|
||||||
"exact1-ic-sb", /*OP_EXACT1_IC*/
|
|
||||||
"exactn-ic-sb", /*OP_EXACTN_IC*/
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
private final static int OpCodeArgTypes[] = new int[] {
|
private final static int OpCodeArgTypes[] = new int[] {
|
||||||
|
@ -240,41 +212,14 @@ class ByteCodePrinter {
|
||||||
Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
|
Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
|
||||||
Arguments.OPTION, /*OP_SET_OPTION_PUSH*/
|
Arguments.OPTION, /*OP_SET_OPTION_PUSH*/
|
||||||
Arguments.OPTION, /*OP_SET_OPTION*/
|
Arguments.OPTION, /*OP_SET_OPTION*/
|
||||||
|
|
||||||
// single byte versions
|
|
||||||
Arguments.NON, /*OP_ANYCHAR*/
|
|
||||||
Arguments.NON, /*OP_ANYCHAR_ML*/
|
|
||||||
Arguments.NON, /*OP_ANYCHAR_STAR*/
|
|
||||||
Arguments.NON, /*OP_ANYCHAR_ML_STAR*/
|
|
||||||
Arguments.SPECIAL, /*OP_ANYCHAR_STAR_PEEK_NEXT*/
|
|
||||||
Arguments.SPECIAL, /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/
|
|
||||||
Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_STAR*/
|
|
||||||
Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
|
|
||||||
|
|
||||||
Arguments.SPECIAL, /*OP_CCLASS*/
|
|
||||||
Arguments.SPECIAL, /*OP_CCLASS_NOT*/
|
|
||||||
|
|
||||||
Arguments.NON, /*OP_WORD*/
|
|
||||||
Arguments.NON, /*OP_NOT_WORD*/
|
|
||||||
Arguments.NON, /*OP_WORD_BOUND*/
|
|
||||||
Arguments.NON, /*OP_NOT_WORD_BOUND*/
|
|
||||||
Arguments.NON, /*OP_WORD_BEGIN*/
|
|
||||||
Arguments.NON, /*OP_WORD_END*/
|
|
||||||
|
|
||||||
Arguments.SPECIAL, /*OP_LOOK_BEHIND*/
|
|
||||||
|
|
||||||
Arguments.SPECIAL, /*OP_EXACT1_IC*/
|
|
||||||
Arguments.SPECIAL, /*OP_EXACTN_IC*/
|
|
||||||
};
|
};
|
||||||
|
|
||||||
public ByteCodePrinter(Regex regex) {
|
public ByteCodePrinter(Regex regex) {
|
||||||
code = regex.code;
|
code = regex.code;
|
||||||
codeLength = regex.codeLength;
|
codeLength = regex.codeLength;
|
||||||
operands = regex.operands;
|
operands = regex.operands;
|
||||||
operantCount = regex.operandLength;
|
|
||||||
|
|
||||||
templates = regex.templates;
|
templates = regex.templates;
|
||||||
warnings = regex.warnings;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public String byteCodeListToString() {
|
public String byteCodeListToString() {
|
||||||
|
@ -283,24 +228,17 @@ class ByteCodePrinter {
|
||||||
|
|
||||||
private void pString(StringBuilder sb, int len, int s) {
|
private void pString(StringBuilder sb, int len, int s) {
|
||||||
sb.append(":");
|
sb.append(":");
|
||||||
while (len-- > 0) sb.append(new String(new byte[]{(byte)code[s++]}));
|
sb.append(new String(code, s, len));
|
||||||
}
|
}
|
||||||
|
|
||||||
private void pStringFromTemplate(StringBuilder sb, int len, byte[]tm, int idx) {
|
private void pLenString(StringBuilder sb, int len, int s) {
|
||||||
sb.append(":T:");
|
sb.append(":").append(len).append(":");
|
||||||
while (len-- > 0) sb.append(new String(new byte[]{tm[idx++]}));
|
sb.append(new String(code, s, len));
|
||||||
}
|
}
|
||||||
|
|
||||||
private void pLenString(StringBuilder sb, int len, int mbLen, int s) {
|
private void pLenStringFromTemplate(StringBuilder sb, int len, char[] tm, int idx) {
|
||||||
int x = len * mbLen;
|
sb.append(":T:").append(len).append(":");
|
||||||
sb.append(":" + len + ":");
|
sb.append(tm, idx, len);
|
||||||
while (x-- > 0) sb.append(new String(new byte[]{(byte)code[s++]}));
|
|
||||||
}
|
|
||||||
|
|
||||||
private void pLenStringFromTemplate(StringBuilder sb, int len, int mbLen, char[] tm, int idx) {
|
|
||||||
int x = len * mbLen;
|
|
||||||
sb.append(":T:" + len + ":");
|
|
||||||
while (x-- > 0) sb.append(new String(new byte[]{(byte)tm[idx++]}));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public int compiledByteCodeToString(StringBuilder sb, int bp) {
|
public int compiledByteCodeToString(StringBuilder sb, int bp) {
|
||||||
|
@ -309,7 +247,7 @@ class ByteCodePrinter {
|
||||||
CClassNode cc;
|
CClassNode cc;
|
||||||
int tm, idx;
|
int tm, idx;
|
||||||
|
|
||||||
sb.append("[" + OpCodeNames[code[bp]]);
|
sb.append("[").append(OpCodeNames[code[bp]]);
|
||||||
int argType = OpCodeArgTypes[code[bp]];
|
int argType = OpCodeArgTypes[code[bp]];
|
||||||
int ip = bp;
|
int ip = bp;
|
||||||
if (argType != Arguments.SPECIAL) {
|
if (argType != Arguments.SPECIAL) {
|
||||||
|
@ -319,32 +257,32 @@ class ByteCodePrinter {
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Arguments.RELADDR:
|
case Arguments.RELADDR:
|
||||||
sb.append(":(" + code[bp] + ")");
|
sb.append(":(").append(code[bp]).append(")");
|
||||||
bp += OPSize.RELADDR;
|
bp += OPSize.RELADDR;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Arguments.ABSADDR:
|
case Arguments.ABSADDR:
|
||||||
sb.append(":(" + code[bp] + ")");
|
sb.append(":(").append(code[bp]).append(")");
|
||||||
bp += OPSize.ABSADDR;
|
bp += OPSize.ABSADDR;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Arguments.LENGTH:
|
case Arguments.LENGTH:
|
||||||
sb.append(":" + code[bp]);
|
sb.append(":").append(code[bp]);
|
||||||
bp += OPSize.LENGTH;
|
bp += OPSize.LENGTH;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Arguments.MEMNUM:
|
case Arguments.MEMNUM:
|
||||||
sb.append(":" + code[bp]);
|
sb.append(":").append(code[bp]);
|
||||||
bp += OPSize.MEMNUM;
|
bp += OPSize.MEMNUM;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Arguments.OPTION:
|
case Arguments.OPTION:
|
||||||
sb.append(":" + code[bp]);
|
sb.append(":").append(code[bp]);
|
||||||
bp += OPSize.OPTION;
|
bp += OPSize.OPTION;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Arguments.STATE_CHECK:
|
case Arguments.STATE_CHECK:
|
||||||
sb.append(":" + code[bp]);
|
sb.append(":").append(code[bp]);
|
||||||
bp += OPSize.STATE_CHECK;
|
bp += OPSize.STATE_CHECK;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -353,8 +291,6 @@ class ByteCodePrinter {
|
||||||
case OPCode.EXACT1:
|
case OPCode.EXACT1:
|
||||||
case OPCode.ANYCHAR_STAR_PEEK_NEXT:
|
case OPCode.ANYCHAR_STAR_PEEK_NEXT:
|
||||||
case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT:
|
case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT:
|
||||||
case OPCode.ANYCHAR_STAR_PEEK_NEXT_SB:
|
|
||||||
case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT_SB:
|
|
||||||
pString(sb, 1, bp++);
|
pString(sb, 1, bp++);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -386,92 +322,19 @@ class ByteCodePrinter {
|
||||||
bp += OPSize.INDEX;
|
bp += OPSize.INDEX;
|
||||||
idx = code[bp];
|
idx = code[bp];
|
||||||
bp += OPSize.INDEX;
|
bp += OPSize.INDEX;
|
||||||
pLenStringFromTemplate(sb, len, 1, templates[tm], idx);
|
pLenStringFromTemplate(sb, len, templates[tm], idx);
|
||||||
} else {
|
} else {
|
||||||
pLenString(sb, len, 1, bp);
|
pLenString(sb, len, bp);
|
||||||
bp += len;
|
bp += len;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OPCode.EXACTMB2N1:
|
|
||||||
pString(sb, 2, bp);
|
|
||||||
bp += 2;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case OPCode.EXACTMB2N2:
|
|
||||||
pString(sb, 4, bp);
|
|
||||||
bp += 4;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case OPCode.EXACTMB2N3:
|
|
||||||
pString(sb, 6, bp);
|
|
||||||
bp += 6;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case OPCode.EXACTMB2N:
|
|
||||||
len = code[bp];
|
|
||||||
bp += OPSize.LENGTH;
|
|
||||||
if (Config.USE_STRING_TEMPLATES) {
|
|
||||||
tm = code[bp];
|
|
||||||
bp += OPSize.INDEX;
|
|
||||||
idx = code[bp];
|
|
||||||
bp += OPSize.INDEX;
|
|
||||||
pLenStringFromTemplate(sb, len, 2, templates[tm], idx);
|
|
||||||
} else {
|
|
||||||
pLenString(sb, len, 2, bp);
|
|
||||||
bp += len * 2;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case OPCode.EXACTMB3N:
|
|
||||||
len = code[bp];
|
|
||||||
bp += OPSize.LENGTH;
|
|
||||||
if (Config.USE_STRING_TEMPLATES) {
|
|
||||||
tm = code[bp];
|
|
||||||
bp += OPSize.INDEX;
|
|
||||||
idx = code[bp];
|
|
||||||
bp += OPSize.INDEX;
|
|
||||||
pLenStringFromTemplate(sb, len, 3, templates[tm], idx);
|
|
||||||
} else {
|
|
||||||
pLenString(sb, len, 3, bp);
|
|
||||||
bp += len * 3;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case OPCode.EXACTMBN:
|
|
||||||
int mbLen = code[bp];
|
|
||||||
bp += OPSize.LENGTH;
|
|
||||||
len = code[bp];
|
|
||||||
bp += OPSize.LENGTH;
|
|
||||||
n = len * mbLen;
|
|
||||||
|
|
||||||
if (Config.USE_STRING_TEMPLATES) {
|
|
||||||
tm = code[bp];
|
|
||||||
bp += OPSize.INDEX;
|
|
||||||
idx = code[bp];
|
|
||||||
bp += OPSize.INDEX;
|
|
||||||
sb.append(":T:" + mbLen + ":" + len + ":");
|
|
||||||
|
|
||||||
while (n-- > 0) sb.append(new String(new char[]{templates[tm][idx++]}));
|
|
||||||
} else {
|
|
||||||
sb.append(":" + mbLen + ":" + len + ":");
|
|
||||||
|
|
||||||
while (n-- > 0) sb.append(new String(new byte[]{(byte)code[bp++]}));
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
|
||||||
|
|
||||||
case OPCode.EXACT1_IC:
|
case OPCode.EXACT1_IC:
|
||||||
case OPCode.EXACT1_IC_SB:
|
|
||||||
final int MAX_CHAR_LENGTH = 6;
|
|
||||||
byte[]bytes = new byte[MAX_CHAR_LENGTH];
|
|
||||||
for (int i = 0; bp + i < code.length && i < MAX_CHAR_LENGTH; i++) bytes[i] = (byte)code[bp + i];
|
|
||||||
pString(sb, 1, bp);
|
pString(sb, 1, bp);
|
||||||
bp++;
|
bp++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OPCode.EXACTN_IC:
|
case OPCode.EXACTN_IC:
|
||||||
case OPCode.EXACTN_IC_SB:
|
|
||||||
len = code[bp];
|
len = code[bp];
|
||||||
bp += OPSize.LENGTH;
|
bp += OPSize.LENGTH;
|
||||||
if (Config.USE_STRING_TEMPLATES) {
|
if (Config.USE_STRING_TEMPLATES) {
|
||||||
|
@ -479,29 +342,27 @@ class ByteCodePrinter {
|
||||||
bp += OPSize.INDEX;
|
bp += OPSize.INDEX;
|
||||||
idx = code[bp];
|
idx = code[bp];
|
||||||
bp += OPSize.INDEX;
|
bp += OPSize.INDEX;
|
||||||
pLenStringFromTemplate(sb, len, 1, templates[tm], idx);
|
pLenStringFromTemplate(sb, len, templates[tm], idx);
|
||||||
} else {
|
} else {
|
||||||
pLenString(sb, len, 1, bp);
|
pLenString(sb, len, bp);
|
||||||
bp += len;
|
bp += len;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OPCode.CCLASS:
|
case OPCode.CCLASS:
|
||||||
case OPCode.CCLASS_SB:
|
|
||||||
bs = new BitSet();
|
bs = new BitSet();
|
||||||
System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE);
|
System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE);
|
||||||
n = bs.numOn();
|
n = bs.numOn();
|
||||||
bp += BitSet.BITSET_SIZE;
|
bp += BitSet.BITSET_SIZE;
|
||||||
sb.append(":" + n);
|
sb.append(":").append(n);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OPCode.CCLASS_NOT:
|
case OPCode.CCLASS_NOT:
|
||||||
case OPCode.CCLASS_NOT_SB:
|
|
||||||
bs = new BitSet();
|
bs = new BitSet();
|
||||||
System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE);
|
System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE);
|
||||||
n = bs.numOn();
|
n = bs.numOn();
|
||||||
bp += BitSet.BITSET_SIZE;
|
bp += BitSet.BITSET_SIZE;
|
||||||
sb.append(":" + n);
|
sb.append(":").append(n);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OPCode.CCLASS_MB:
|
case OPCode.CCLASS_MB:
|
||||||
|
@ -511,7 +372,7 @@ class ByteCodePrinter {
|
||||||
cod = code[bp];
|
cod = code[bp];
|
||||||
//bp += OPSize.CODE_POINT;
|
//bp += OPSize.CODE_POINT;
|
||||||
bp += len;
|
bp += len;
|
||||||
sb.append(":" + cod + ":" + len);
|
sb.append(":").append(cod).append(":").append(len);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OPCode.CCLASS_MIX:
|
case OPCode.CCLASS_MIX:
|
||||||
|
@ -525,20 +386,20 @@ class ByteCodePrinter {
|
||||||
cod = code[bp];
|
cod = code[bp];
|
||||||
//bp += OPSize.CODE_POINT;
|
//bp += OPSize.CODE_POINT;
|
||||||
bp += len;
|
bp += len;
|
||||||
sb.append(":" + n + ":" + cod + ":" + len);
|
sb.append(":").append(n).append(":").append(cod).append(":").append(len);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OPCode.CCLASS_NODE:
|
case OPCode.CCLASS_NODE:
|
||||||
cc = (CClassNode)operands[code[bp]];
|
cc = (CClassNode)operands[code[bp]];
|
||||||
bp += OPSize.POINTER;
|
bp += OPSize.POINTER;
|
||||||
n = cc.bs.numOn();
|
n = cc.bs.numOn();
|
||||||
sb.append(":" + cc + ":" + n);
|
sb.append(":").append(cc).append(":").append(n);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OPCode.BACKREFN_IC:
|
case OPCode.BACKREFN_IC:
|
||||||
mem = code[bp];
|
mem = code[bp];
|
||||||
bp += OPSize.MEMNUM;
|
bp += OPSize.MEMNUM;
|
||||||
sb.append(":" + mem);
|
sb.append(":").append(mem);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OPCode.BACKREF_MULTI_IC:
|
case OPCode.BACKREF_MULTI_IC:
|
||||||
|
@ -557,10 +418,10 @@ class ByteCodePrinter {
|
||||||
case OPCode.BACKREF_WITH_LEVEL: {
|
case OPCode.BACKREF_WITH_LEVEL: {
|
||||||
int option = code[bp];
|
int option = code[bp];
|
||||||
bp += OPSize.OPTION;
|
bp += OPSize.OPTION;
|
||||||
sb.append(":" + option);
|
sb.append(":").append(option);
|
||||||
int level = code[bp];
|
int level = code[bp];
|
||||||
bp += OPSize.LENGTH;
|
bp += OPSize.LENGTH;
|
||||||
sb.append(":" + level);
|
sb.append(":").append(level);
|
||||||
sb.append(" ");
|
sb.append(" ");
|
||||||
len = code[bp];
|
len = code[bp];
|
||||||
bp += OPSize.LENGTH;
|
bp += OPSize.LENGTH;
|
||||||
|
@ -579,23 +440,22 @@ class ByteCodePrinter {
|
||||||
bp += OPSize.MEMNUM;
|
bp += OPSize.MEMNUM;
|
||||||
addr = code[bp];
|
addr = code[bp];
|
||||||
bp += OPSize.RELADDR;
|
bp += OPSize.RELADDR;
|
||||||
sb.append(":" + mem + ":" + addr);
|
sb.append(":").append(mem).append(":").append(addr);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OPCode.PUSH_OR_JUMP_EXACT1:
|
case OPCode.PUSH_OR_JUMP_EXACT1:
|
||||||
case OPCode.PUSH_IF_PEEK_NEXT:
|
case OPCode.PUSH_IF_PEEK_NEXT:
|
||||||
addr = code[bp];
|
addr = code[bp];
|
||||||
bp += OPSize.RELADDR;
|
bp += OPSize.RELADDR;
|
||||||
sb.append(":(" + addr + ")");
|
sb.append(":(").append(addr).append(")");
|
||||||
pString(sb, 1, bp);
|
pString(sb, 1, bp);
|
||||||
bp++;
|
bp++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OPCode.LOOK_BEHIND:
|
case OPCode.LOOK_BEHIND:
|
||||||
case OPCode.LOOK_BEHIND_SB:
|
|
||||||
len = code[bp];
|
len = code[bp];
|
||||||
bp += OPSize.LENGTH;
|
bp += OPSize.LENGTH;
|
||||||
sb.append(":" + len);
|
sb.append(":").append(len);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OPCode.PUSH_LOOK_BEHIND_NOT:
|
case OPCode.PUSH_LOOK_BEHIND_NOT:
|
||||||
|
@ -603,7 +463,7 @@ class ByteCodePrinter {
|
||||||
bp += OPSize.RELADDR;
|
bp += OPSize.RELADDR;
|
||||||
len = code[bp];
|
len = code[bp];
|
||||||
bp += OPSize.LENGTH;
|
bp += OPSize.LENGTH;
|
||||||
sb.append(":" + len + ":(" + addr + ")");
|
sb.append(":").append(len).append(":(").append(addr).append(")");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OPCode.STATE_CHECK_PUSH:
|
case OPCode.STATE_CHECK_PUSH:
|
||||||
|
@ -612,7 +472,7 @@ class ByteCodePrinter {
|
||||||
bp += OPSize.STATE_CHECK_NUM;
|
bp += OPSize.STATE_CHECK_NUM;
|
||||||
addr = code[bp];
|
addr = code[bp];
|
||||||
bp += OPSize.RELADDR;
|
bp += OPSize.RELADDR;
|
||||||
sb.append(":" + scn + ":(" + addr + ")");
|
sb.append(":").append(scn).append(":(").append(addr).append(")");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
@ -623,14 +483,16 @@ class ByteCodePrinter {
|
||||||
sb.append("]");
|
sb.append("]");
|
||||||
|
|
||||||
// @opcode_address(opcode_size)
|
// @opcode_address(opcode_size)
|
||||||
if (Config.DEBUG_COMPILE_BYTE_CODE_INFO) sb.append("@" + ip + "(" + (bp - ip) + ")");
|
if (Config.DEBUG_COMPILE_BYTE_CODE_INFO) {
|
||||||
|
sb.append("@").append(ip).append("(").append((bp - ip)).append(")");
|
||||||
|
}
|
||||||
|
|
||||||
return bp;
|
return bp;
|
||||||
}
|
}
|
||||||
|
|
||||||
private String compiledByteCodeListToString() {
|
private String compiledByteCodeListToString() {
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
sb.append("code length: " + codeLength + "\n");
|
sb.append("code length: ").append(codeLength).append("\n");
|
||||||
|
|
||||||
int ncode = 0;
|
int ncode = 0;
|
||||||
int bp = 0;
|
int bp = 0;
|
||||||
|
|
|
@ -22,25 +22,34 @@ package jdk.nashorn.internal.runtime.regexp.joni;
|
||||||
import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
|
import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
|
||||||
import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
|
import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
|
||||||
|
|
||||||
public final class CodeRangeBuffer {
|
public final class CodeRangeBuffer implements Cloneable {
|
||||||
private static final int INIT_MULTI_BYTE_RANGE_SIZE = 5;
|
private static final int INIT_MULTI_BYTE_RANGE_SIZE = 5;
|
||||||
private static final int ALL_MULTI_BYTE_RANGE = 0x7fffffff;
|
private static final int ALL_MULTI_BYTE_RANGE = 0x7fffffff;
|
||||||
|
|
||||||
int[]p;
|
int[] p;
|
||||||
int used;
|
int used;
|
||||||
|
|
||||||
public CodeRangeBuffer(int[]ranges) {
|
|
||||||
p = ranges;
|
|
||||||
used = ranges[0] + 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
public CodeRangeBuffer() {
|
public CodeRangeBuffer() {
|
||||||
p = new int[INIT_MULTI_BYTE_RANGE_SIZE];
|
p = new int[INIT_MULTI_BYTE_RANGE_SIZE];
|
||||||
writeCodePoint(0, 0);
|
writeCodePoint(0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
public int[]getCodeRange() {
|
// CodeRange.isInCodeRange
|
||||||
return p;
|
public boolean isInCodeRange(int code) {
|
||||||
|
int low = 0;
|
||||||
|
int n = p[0];
|
||||||
|
int high = n;
|
||||||
|
|
||||||
|
while (low < high) {
|
||||||
|
int x = (low + high) >> 1;
|
||||||
|
if (code > p[(x << 1) + 2]) {
|
||||||
|
low = x + 1;
|
||||||
|
} else {
|
||||||
|
high = x;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return low < n && code >= p[(low << 1) + 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
private CodeRangeBuffer(CodeRangeBuffer orig) {
|
private CodeRangeBuffer(CodeRangeBuffer orig) {
|
||||||
|
@ -52,12 +61,12 @@ public final class CodeRangeBuffer {
|
||||||
public String toString() {
|
public String toString() {
|
||||||
StringBuilder buf = new StringBuilder();
|
StringBuilder buf = new StringBuilder();
|
||||||
buf.append("CodeRange");
|
buf.append("CodeRange");
|
||||||
buf.append("\n used: " + used);
|
buf.append("\n used: ").append(used);
|
||||||
buf.append("\n code point: " + p[0]);
|
buf.append("\n code point: ").append(p[0]);
|
||||||
buf.append("\n ranges: ");
|
buf.append("\n ranges: ");
|
||||||
|
|
||||||
for (int i=0; i<p[0]; i++) {
|
for (int i=0; i<p[0]; i++) {
|
||||||
buf.append("[" + rangeNumToString(p[i * 2 + 1]) + ".." + rangeNumToString(p[i * 2 + 2]) + "]");
|
buf.append("[").append(rangeNumToString(p[i * 2 + 1])).append("..").append(rangeNumToString(p[i * 2 + 2])).append("]");
|
||||||
if (i > 0 && i % 6 == 0) buf.append("\n ");
|
if (i > 0 && i % 6 == 0) buf.append("\n ");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -108,6 +117,7 @@ public final class CodeRangeBuffer {
|
||||||
if (used < u) used = u;
|
if (used < u) used = u;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public CodeRangeBuffer clone() {
|
public CodeRangeBuffer clone() {
|
||||||
return new CodeRangeBuffer(this);
|
return new CodeRangeBuffer(this);
|
||||||
}
|
}
|
||||||
|
|
|
@ -132,23 +132,6 @@ public class EncodingHelper {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// CodeRange.isInCodeRange
|
|
||||||
public static boolean isInCodeRange(int[]p, int code) {
|
|
||||||
int low = 0;
|
|
||||||
int n = p[0];
|
|
||||||
int high = n;
|
|
||||||
|
|
||||||
while (low < high) {
|
|
||||||
int x = (low + high) >> 1;
|
|
||||||
if (code > p[(x << 1) + 2]) {
|
|
||||||
low = x + 1;
|
|
||||||
} else {
|
|
||||||
high = x;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return low < n && code >= p[(low << 1) + 1];
|
|
||||||
}
|
|
||||||
|
|
||||||
public static int[] ctypeCodeRange(int ctype, IntHolder sbOut) {
|
public static int[] ctypeCodeRange(int ctype, IntHolder sbOut) {
|
||||||
sbOut.value = 0x100; // use bitset for codes smaller than 256
|
sbOut.value = 0x100; // use bitset for codes smaller than 256
|
||||||
int[] range = null;
|
int[] range = null;
|
||||||
|
@ -188,7 +171,7 @@ public class EncodingHelper {
|
||||||
}
|
}
|
||||||
|
|
||||||
// CodeRange.isInCodeRange
|
// CodeRange.isInCodeRange
|
||||||
public static boolean isInCodeRange(int[]p, int offset, int code) {
|
public static boolean isInCodeRange(int[] p, int offset, int code) {
|
||||||
int low = 0;
|
int low = 0;
|
||||||
int n = p[offset];
|
int n = p[offset];
|
||||||
int high = n ;
|
int high = n ;
|
||||||
|
|
|
@ -28,6 +28,8 @@ import jdk.nashorn.internal.runtime.regexp.joni.constants.MetaChar;
|
||||||
import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType;
|
import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType;
|
||||||
import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType;
|
import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType;
|
||||||
import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
|
import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
|
||||||
|
import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException;
|
||||||
|
import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
|
||||||
|
|
||||||
class Lexer extends ScannerSupport {
|
class Lexer extends ScannerSupport {
|
||||||
protected final ScanEnvironment env;
|
protected final ScanEnvironment env;
|
||||||
|
@ -52,20 +54,24 @@ class Lexer extends ScannerSupport {
|
||||||
if (synAllow) {
|
if (synAllow) {
|
||||||
return 1; /* "....{" : OK! */
|
return 1; /* "....{" : OK! */
|
||||||
} else {
|
} else {
|
||||||
newSyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE);
|
throw new SyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!synAllow) {
|
if (!synAllow) {
|
||||||
c = peek();
|
c = peek();
|
||||||
if (c == ')' || c == '(' || c == '|') {
|
if (c == ')' || c == '(' || c == '|') {
|
||||||
newSyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE);
|
throw new SyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int low = scanUnsignedNumber();
|
int low = scanUnsignedNumber();
|
||||||
if (low < 0) newSyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
|
if (low < 0) {
|
||||||
if (low > Config.MAX_REPEAT_NUM) newSyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
|
throw new SyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
|
||||||
|
}
|
||||||
|
if (low > Config.MAX_REPEAT_NUM) {
|
||||||
|
throw new SyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
|
||||||
|
}
|
||||||
|
|
||||||
boolean nonLow = false;
|
boolean nonLow = false;
|
||||||
if (p == _p) { /* can't read low */
|
if (p == _p) { /* can't read low */
|
||||||
|
@ -85,8 +91,12 @@ class Lexer extends ScannerSupport {
|
||||||
if (c == ',') {
|
if (c == ',') {
|
||||||
int prev = p; // ??? last
|
int prev = p; // ??? last
|
||||||
up = scanUnsignedNumber();
|
up = scanUnsignedNumber();
|
||||||
if (up < 0) newValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
|
if (up < 0) {
|
||||||
if (up > Config.MAX_REPEAT_NUM) newValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
|
throw new ValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
|
||||||
|
}
|
||||||
|
if (up > Config.MAX_REPEAT_NUM) {
|
||||||
|
throw new ValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
|
||||||
|
}
|
||||||
|
|
||||||
if (p == prev) {
|
if (p == prev) {
|
||||||
if (nonLow) return invalidRangeQuantifier(synAllow);
|
if (nonLow) return invalidRangeQuantifier(synAllow);
|
||||||
|
@ -110,7 +120,7 @@ class Lexer extends ScannerSupport {
|
||||||
if (c != '}') return invalidRangeQuantifier(synAllow);
|
if (c != '}') return invalidRangeQuantifier(synAllow);
|
||||||
|
|
||||||
if (!isRepeatInfinite(up) && low > up) {
|
if (!isRepeatInfinite(up) && low > up) {
|
||||||
newValueException(ERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE);
|
throw new ValueException(ERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE);
|
||||||
}
|
}
|
||||||
|
|
||||||
token.type = TokenType.INTERVAL;
|
token.type = TokenType.INTERVAL;
|
||||||
|
@ -125,24 +135,31 @@ class Lexer extends ScannerSupport {
|
||||||
restore();
|
restore();
|
||||||
return 1;
|
return 1;
|
||||||
} else {
|
} else {
|
||||||
newSyntaxException(ERR_INVALID_REPEAT_RANGE_PATTERN);
|
throw new SyntaxException(ERR_INVALID_REPEAT_RANGE_PATTERN);
|
||||||
return 0; // not reached
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* \M-, \C-, \c, or \... */
|
/* \M-, \C-, \c, or \... */
|
||||||
private int fetchEscapedValue() {
|
private int fetchEscapedValue() {
|
||||||
if (!left()) newSyntaxException(ERR_END_PATTERN_AT_ESCAPE);
|
if (!left()) {
|
||||||
|
throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE);
|
||||||
|
}
|
||||||
fetch();
|
fetch();
|
||||||
|
|
||||||
switch(c) {
|
switch(c) {
|
||||||
|
|
||||||
case 'M':
|
case 'M':
|
||||||
if (syntax.op2EscCapitalMBarMeta()) {
|
if (syntax.op2EscCapitalMBarMeta()) {
|
||||||
if (!left()) newSyntaxException(ERR_END_PATTERN_AT_META);
|
if (!left()) {
|
||||||
|
throw new SyntaxException(ERR_END_PATTERN_AT_META);
|
||||||
|
}
|
||||||
fetch();
|
fetch();
|
||||||
if (c != '-') newSyntaxException(ERR_META_CODE_SYNTAX);
|
if (c != '-') {
|
||||||
if (!left()) newSyntaxException(ERR_END_PATTERN_AT_META);
|
throw new SyntaxException(ERR_META_CODE_SYNTAX);
|
||||||
|
}
|
||||||
|
if (!left()) {
|
||||||
|
throw new SyntaxException(ERR_END_PATTERN_AT_META);
|
||||||
|
}
|
||||||
fetch();
|
fetch();
|
||||||
if (c == syntax.metaCharTable.esc) {
|
if (c == syntax.metaCharTable.esc) {
|
||||||
c = fetchEscapedValue();
|
c = fetchEscapedValue();
|
||||||
|
@ -155,9 +172,13 @@ class Lexer extends ScannerSupport {
|
||||||
|
|
||||||
case 'C':
|
case 'C':
|
||||||
if (syntax.op2EscCapitalCBarControl()) {
|
if (syntax.op2EscCapitalCBarControl()) {
|
||||||
if (!left()) newSyntaxException(ERR_END_PATTERN_AT_CONTROL);
|
if (!left()) {
|
||||||
|
throw new SyntaxException(ERR_END_PATTERN_AT_CONTROL);
|
||||||
|
}
|
||||||
fetch();
|
fetch();
|
||||||
if (c != '-') newSyntaxException(ERR_CONTROL_CODE_SYNTAX);
|
if (c != '-') {
|
||||||
|
throw new SyntaxException(ERR_CONTROL_CODE_SYNTAX);
|
||||||
|
}
|
||||||
fetchEscapedValueControl();
|
fetchEscapedValueControl();
|
||||||
} else {
|
} else {
|
||||||
fetchEscapedValueBackSlash();
|
fetchEscapedValueBackSlash();
|
||||||
|
@ -182,7 +203,9 @@ class Lexer extends ScannerSupport {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void fetchEscapedValueControl() {
|
private void fetchEscapedValueControl() {
|
||||||
if (!left()) newSyntaxException(ERR_END_PATTERN_AT_CONTROL);
|
if (!left()) {
|
||||||
|
throw new SyntaxException(ERR_END_PATTERN_AT_CONTROL);
|
||||||
|
}
|
||||||
fetch();
|
fetch();
|
||||||
if (c == '?') {
|
if (c == '?') {
|
||||||
c = 0177;
|
c = 0177;
|
||||||
|
@ -205,115 +228,6 @@ class Lexer extends ScannerSupport {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// USE_NAMED_GROUP && USE_BACKREF_AT_LEVEL
|
|
||||||
/*
|
|
||||||
\k<name+n>, \k<name-n>
|
|
||||||
\k<num+n>, \k<num-n>
|
|
||||||
\k<-num+n>, \k<-num-n>
|
|
||||||
*/
|
|
||||||
|
|
||||||
// #else USE_NAMED_GROUP
|
|
||||||
// make it return nameEnd!
|
|
||||||
private final int fetchNameForNoNamedGroup(int startCode, boolean ref) {
|
|
||||||
int src = p;
|
|
||||||
value = 0;
|
|
||||||
|
|
||||||
int isNum = 0;
|
|
||||||
int sign = 1;
|
|
||||||
|
|
||||||
int endCode = nameEndCodePoint(startCode);
|
|
||||||
int pnumHead = p;
|
|
||||||
int nameEnd = stop;
|
|
||||||
|
|
||||||
String err = null;
|
|
||||||
if (!left()) {
|
|
||||||
newValueException(ERR_EMPTY_GROUP_NAME);
|
|
||||||
} else {
|
|
||||||
fetch();
|
|
||||||
if (c == endCode) newValueException(ERR_EMPTY_GROUP_NAME);
|
|
||||||
|
|
||||||
if (EncodingHelper.isDigit(c)) {
|
|
||||||
isNum = 1;
|
|
||||||
} else if (c == '-') {
|
|
||||||
isNum = 2;
|
|
||||||
sign = -1;
|
|
||||||
pnumHead = p;
|
|
||||||
} else {
|
|
||||||
err = ERR_INVALID_CHAR_IN_GROUP_NAME;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
while(left()) {
|
|
||||||
nameEnd = p;
|
|
||||||
|
|
||||||
fetch();
|
|
||||||
if (c == endCode || c == ')') break;
|
|
||||||
if (!EncodingHelper.isDigit(c)) err = ERR_INVALID_CHAR_IN_GROUP_NAME;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (err == null && c != endCode) {
|
|
||||||
err = ERR_INVALID_GROUP_NAME;
|
|
||||||
nameEnd = stop;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (err == null) {
|
|
||||||
mark();
|
|
||||||
p = pnumHead;
|
|
||||||
int backNum = scanUnsignedNumber();
|
|
||||||
restore();
|
|
||||||
if (backNum < 0) {
|
|
||||||
newValueException(ERR_TOO_BIG_NUMBER);
|
|
||||||
} else if (backNum == 0){
|
|
||||||
newValueException(ERR_INVALID_GROUP_NAME, src, nameEnd);
|
|
||||||
}
|
|
||||||
backNum *= sign;
|
|
||||||
|
|
||||||
value = nameEnd;
|
|
||||||
return backNum;
|
|
||||||
} else {
|
|
||||||
newValueException(err, src, nameEnd);
|
|
||||||
return 0; // not reached
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected final int fetchName(int startCode, boolean ref) {
|
|
||||||
return fetchNameForNoNamedGroup(startCode, ref);
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean strExistCheckWithEsc(int[]s, int n, int bad) {
|
|
||||||
int p = this.p;
|
|
||||||
int to = this.stop;
|
|
||||||
|
|
||||||
boolean inEsc = false;
|
|
||||||
int i=0;
|
|
||||||
while(p < to) {
|
|
||||||
if (inEsc) {
|
|
||||||
inEsc = false;
|
|
||||||
p ++;
|
|
||||||
} else {
|
|
||||||
int x = chars[p];
|
|
||||||
int q = p + 1;
|
|
||||||
if (x == s[0]) {
|
|
||||||
for (i=1; i<n && q < to; i++) {
|
|
||||||
x = chars[q];
|
|
||||||
if (x != s[i]) break;
|
|
||||||
q++;
|
|
||||||
}
|
|
||||||
if (i >= n) return true;
|
|
||||||
p++;
|
|
||||||
} else {
|
|
||||||
x = chars[p];
|
|
||||||
if (x == bad) return false;
|
|
||||||
else if (x == syntax.metaCharTable.esc) inEsc = true;
|
|
||||||
p = q;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static final int send[] = new int[]{':', ']'};
|
|
||||||
|
|
||||||
private void fetchTokenInCCFor_charType(boolean flag, int type) {
|
private void fetchTokenInCCFor_charType(boolean flag, int type) {
|
||||||
token.type = TokenType.CHAR_TYPE;
|
token.type = TokenType.CHAR_TYPE;
|
||||||
token.setPropCType(type);
|
token.setPropCType(type);
|
||||||
|
@ -327,16 +241,19 @@ class Lexer extends ScannerSupport {
|
||||||
if (peekIs('{') && syntax.opEscXBraceHex8()) {
|
if (peekIs('{') && syntax.opEscXBraceHex8()) {
|
||||||
inc();
|
inc();
|
||||||
int num = scanUnsignedHexadecimalNumber(8);
|
int num = scanUnsignedHexadecimalNumber(8);
|
||||||
if (num < 0) newValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE);
|
if (num < 0) {
|
||||||
|
throw new ValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE);
|
||||||
|
}
|
||||||
if (left()) {
|
if (left()) {
|
||||||
int c2 = peek();
|
int c2 = peek();
|
||||||
if (EncodingHelper.isXDigit(c2)) newValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE);
|
if (EncodingHelper.isXDigit(c2)) {
|
||||||
|
throw new ValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (p > last + 1 && left() && peekIs('}')) {
|
if (p > last + 1 && left() && peekIs('}')) {
|
||||||
inc();
|
inc();
|
||||||
token.type = TokenType.CODE_POINT;
|
token.type = TokenType.CODE_POINT;
|
||||||
token.base = 16;
|
|
||||||
token.setCode(num);
|
token.setCode(num);
|
||||||
} else {
|
} else {
|
||||||
/* can't read nothing or invalid format */
|
/* can't read nothing or invalid format */
|
||||||
|
@ -344,12 +261,13 @@ class Lexer extends ScannerSupport {
|
||||||
}
|
}
|
||||||
} else if (syntax.opEscXHex2()) {
|
} else if (syntax.opEscXHex2()) {
|
||||||
int num = scanUnsignedHexadecimalNumber(2);
|
int num = scanUnsignedHexadecimalNumber(2);
|
||||||
if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
|
if (num < 0) {
|
||||||
|
throw new ValueException(ERR_TOO_BIG_NUMBER);
|
||||||
|
}
|
||||||
if (p == last) { /* can't read nothing. */
|
if (p == last) { /* can't read nothing. */
|
||||||
num = 0; /* but, it's not error */
|
num = 0; /* but, it's not error */
|
||||||
}
|
}
|
||||||
token.type = TokenType.RAW_BYTE;
|
token.type = TokenType.RAW_BYTE;
|
||||||
token.base = 16;
|
|
||||||
token.setC(num);
|
token.setC(num);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -360,12 +278,13 @@ class Lexer extends ScannerSupport {
|
||||||
|
|
||||||
if (syntax.op2EscUHex4()) {
|
if (syntax.op2EscUHex4()) {
|
||||||
int num = scanUnsignedHexadecimalNumber(4);
|
int num = scanUnsignedHexadecimalNumber(4);
|
||||||
if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
|
if (num < 0) {
|
||||||
|
throw new ValueException(ERR_TOO_BIG_NUMBER);
|
||||||
|
}
|
||||||
if (p == last) { /* can't read nothing. */
|
if (p == last) { /* can't read nothing. */
|
||||||
num = 0; /* but, it's not error */
|
num = 0; /* but, it's not error */
|
||||||
}
|
}
|
||||||
token.type = TokenType.CODE_POINT;
|
token.type = TokenType.CODE_POINT;
|
||||||
token.base = 16;
|
|
||||||
token.setCode(num);
|
token.setCode(num);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -375,12 +294,13 @@ class Lexer extends ScannerSupport {
|
||||||
unfetch();
|
unfetch();
|
||||||
int last = p;
|
int last = p;
|
||||||
int num = scanUnsignedOctalNumber(3);
|
int num = scanUnsignedOctalNumber(3);
|
||||||
if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
|
if (num < 0) {
|
||||||
|
throw new ValueException(ERR_TOO_BIG_NUMBER);
|
||||||
|
}
|
||||||
if (p == last) { /* can't read nothing. */
|
if (p == last) { /* can't read nothing. */
|
||||||
num = 0; /* but, it's not error */
|
num = 0; /* but, it's not error */
|
||||||
}
|
}
|
||||||
token.type = TokenType.RAW_BYTE;
|
token.type = TokenType.RAW_BYTE;
|
||||||
token.base = 8;
|
|
||||||
token.setC(num);
|
token.setC(num);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -400,7 +320,6 @@ class Lexer extends ScannerSupport {
|
||||||
|
|
||||||
fetch();
|
fetch();
|
||||||
token.type = TokenType.CHAR;
|
token.type = TokenType.CHAR;
|
||||||
token.base = 0;
|
|
||||||
token.setC(c);
|
token.setC(c);
|
||||||
token.escaped = false;
|
token.escaped = false;
|
||||||
|
|
||||||
|
@ -410,7 +329,9 @@ class Lexer extends ScannerSupport {
|
||||||
token.type = TokenType.CC_RANGE;
|
token.type = TokenType.CC_RANGE;
|
||||||
} else if (c == syntax.metaCharTable.esc) {
|
} else if (c == syntax.metaCharTable.esc) {
|
||||||
if (!syntax.backSlashEscapeInCC()) return token.type;
|
if (!syntax.backSlashEscapeInCC()) return token.type;
|
||||||
if (!left()) newSyntaxException(ERR_END_PATTERN_AT_ESCAPE);
|
if (!left()) {
|
||||||
|
throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE);
|
||||||
|
}
|
||||||
fetch();
|
fetch();
|
||||||
token.escaped = true;
|
token.escaped = true;
|
||||||
token.setC(c);
|
token.setC(c);
|
||||||
|
@ -508,9 +429,13 @@ class Lexer extends ScannerSupport {
|
||||||
if (peekIs('{') && syntax.opEscXBraceHex8()) {
|
if (peekIs('{') && syntax.opEscXBraceHex8()) {
|
||||||
inc();
|
inc();
|
||||||
int num = scanUnsignedHexadecimalNumber(8);
|
int num = scanUnsignedHexadecimalNumber(8);
|
||||||
if (num < 0) newValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE);
|
if (num < 0) {
|
||||||
|
throw new ValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE);
|
||||||
|
}
|
||||||
if (left()) {
|
if (left()) {
|
||||||
if (EncodingHelper.isXDigit(peek())) newValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE);
|
if (EncodingHelper.isXDigit(peek())) {
|
||||||
|
throw new ValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (p > last + 1 && left() && peekIs('}')) {
|
if (p > last + 1 && left() && peekIs('}')) {
|
||||||
|
@ -523,12 +448,13 @@ class Lexer extends ScannerSupport {
|
||||||
}
|
}
|
||||||
} else if (syntax.opEscXHex2()) {
|
} else if (syntax.opEscXHex2()) {
|
||||||
int num = scanUnsignedHexadecimalNumber(2);
|
int num = scanUnsignedHexadecimalNumber(2);
|
||||||
if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
|
if (num < 0) {
|
||||||
|
throw new ValueException(ERR_TOO_BIG_NUMBER);
|
||||||
|
}
|
||||||
if (p == last) { /* can't read nothing. */
|
if (p == last) { /* can't read nothing. */
|
||||||
num = 0; /* but, it's not error */
|
num = 0; /* but, it's not error */
|
||||||
}
|
}
|
||||||
token.type = TokenType.RAW_BYTE;
|
token.type = TokenType.RAW_BYTE;
|
||||||
token.base = 16;
|
|
||||||
token.setC(num);
|
token.setC(num);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -539,12 +465,13 @@ class Lexer extends ScannerSupport {
|
||||||
|
|
||||||
if (syntax.op2EscUHex4()) {
|
if (syntax.op2EscUHex4()) {
|
||||||
int num = scanUnsignedHexadecimalNumber(4);
|
int num = scanUnsignedHexadecimalNumber(4);
|
||||||
if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
|
if (num < 0) {
|
||||||
|
throw new ValueException(ERR_TOO_BIG_NUMBER);
|
||||||
|
}
|
||||||
if (p == last) { /* can't read nothing. */
|
if (p == last) { /* can't read nothing. */
|
||||||
num = 0; /* but, it's not error */
|
num = 0; /* but, it's not error */
|
||||||
}
|
}
|
||||||
token.type = TokenType.CODE_POINT;
|
token.type = TokenType.CODE_POINT;
|
||||||
token.base = 16;
|
|
||||||
token.setCode(num);
|
token.setCode(num);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -556,12 +483,12 @@ class Lexer extends ScannerSupport {
|
||||||
if (num < 0 || num > Config.MAX_BACKREF_NUM) { // goto skip_backref
|
if (num < 0 || num > Config.MAX_BACKREF_NUM) { // goto skip_backref
|
||||||
} else if (syntax.opDecimalBackref() && (num <= env.numMem || num <= 9)) { /* This spec. from GNU regex */
|
} else if (syntax.opDecimalBackref() && (num <= env.numMem || num <= 9)) { /* This spec. from GNU regex */
|
||||||
if (syntax.strictCheckBackref()) {
|
if (syntax.strictCheckBackref()) {
|
||||||
if (num > env.numMem || env.memNodes == null || env.memNodes[num] == null) newValueException(ERR_INVALID_BACKREF);
|
if (num > env.numMem || env.memNodes == null || env.memNodes[num] == null) {
|
||||||
|
throw new ValueException(ERR_INVALID_BACKREF);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
token.type = TokenType.BACKREF;
|
token.type = TokenType.BACKREF;
|
||||||
token.setBackrefNum(1);
|
token.setBackrefRef(num);
|
||||||
token.setBackrefRef1(num);
|
|
||||||
token.setBackrefByName(false);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -579,40 +506,19 @@ class Lexer extends ScannerSupport {
|
||||||
if (syntax.opEscOctal3()) {
|
if (syntax.opEscOctal3()) {
|
||||||
int last = p;
|
int last = p;
|
||||||
int num = scanUnsignedOctalNumber(c == '0' ? 2 : 3);
|
int num = scanUnsignedOctalNumber(c == '0' ? 2 : 3);
|
||||||
if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
|
if (num < 0) {
|
||||||
|
throw new ValueException(ERR_TOO_BIG_NUMBER);
|
||||||
|
}
|
||||||
if (p == last) { /* can't read nothing. */
|
if (p == last) { /* can't read nothing. */
|
||||||
num = 0; /* but, it's not error */
|
num = 0; /* but, it's not error */
|
||||||
}
|
}
|
||||||
token.type = TokenType.RAW_BYTE;
|
token.type = TokenType.RAW_BYTE;
|
||||||
token.base = 8;
|
|
||||||
token.setC(num);
|
token.setC(num);
|
||||||
} else if (c != '0') {
|
} else if (c != '0') {
|
||||||
inc();
|
inc();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void fetchTokenFor_subexpCall() {
|
|
||||||
if (syntax.op2EscGSubexpCall()) {
|
|
||||||
if (left()) {
|
|
||||||
fetch();
|
|
||||||
if (c == '<' || c == '\'') {
|
|
||||||
int last = p;
|
|
||||||
int gNum = fetchName(c, true);
|
|
||||||
int nameEnd = value;
|
|
||||||
token.type = TokenType.CALL;
|
|
||||||
token.setCallNameP(last);
|
|
||||||
token.setCallNameEnd(nameEnd);
|
|
||||||
token.setCallGNum(gNum);
|
|
||||||
} else {
|
|
||||||
unfetch();
|
|
||||||
syntaxWarn(Warnings.INVALID_SUBEXP_CALL);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
syntaxWarn(Warnings.INVALID_SUBEXP_CALL);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void fetchTokenFor_metaChars() {
|
private void fetchTokenFor_metaChars() {
|
||||||
if (c == syntax.metaCharTable.anyChar) {
|
if (c == syntax.metaCharTable.anyChar) {
|
||||||
token.type = TokenType.ANYCHAR;
|
token.type = TokenType.ANYCHAR;
|
||||||
|
@ -638,13 +544,14 @@ class Lexer extends ScannerSupport {
|
||||||
}
|
}
|
||||||
|
|
||||||
token.type = TokenType.STRING;
|
token.type = TokenType.STRING;
|
||||||
token.base = 0;
|
|
||||||
token.backP = p;
|
token.backP = p;
|
||||||
|
|
||||||
fetch();
|
fetch();
|
||||||
|
|
||||||
if (c == syntax.metaCharTable.esc && !syntax.op2IneffectiveEscape()) { // IS_MC_ESC_CODE(code, syn)
|
if (c == syntax.metaCharTable.esc && !syntax.op2IneffectiveEscape()) { // IS_MC_ESC_CODE(code, syn)
|
||||||
if (!left()) newSyntaxException(ERR_END_PATTERN_AT_ESCAPE);
|
if (!left()) {
|
||||||
|
throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE);
|
||||||
|
}
|
||||||
|
|
||||||
token.backP = p;
|
token.backP = p;
|
||||||
fetch();
|
fetch();
|
||||||
|
@ -800,7 +707,9 @@ class Lexer extends ScannerSupport {
|
||||||
if (peekIs('#')) {
|
if (peekIs('#')) {
|
||||||
fetch();
|
fetch();
|
||||||
while (true) {
|
while (true) {
|
||||||
if (!left()) newSyntaxException(ERR_END_PATTERN_IN_GROUP);
|
if (!left()) {
|
||||||
|
throw new SyntaxException(ERR_END_PATTERN_IN_GROUP);
|
||||||
|
}
|
||||||
fetch();
|
fetch();
|
||||||
if (c == syntax.metaCharTable.esc) {
|
if (c == syntax.metaCharTable.esc) {
|
||||||
if (left()) fetch();
|
if (left()) fetch();
|
||||||
|
|
|
@ -40,6 +40,9 @@ import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType;
|
||||||
import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType;
|
import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType;
|
||||||
import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType;
|
import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType;
|
||||||
import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType;
|
import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType;
|
||||||
|
import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
|
||||||
|
import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException;
|
||||||
|
import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
|
||||||
|
|
||||||
class Parser extends Lexer {
|
class Parser extends Lexer {
|
||||||
|
|
||||||
|
@ -94,7 +97,9 @@ class Parser extends Lexer {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (token.type == TokenType.CC_CLOSE) {
|
if (token.type == TokenType.CC_CLOSE) {
|
||||||
if (!codeExistCheck(']', true)) newSyntaxException(ERR_EMPTY_CHAR_CLASS);
|
if (!codeExistCheck(']', true)) {
|
||||||
|
throw new SyntaxException(ERR_EMPTY_CHAR_CLASS);
|
||||||
|
}
|
||||||
env.ccEscWarn("]");
|
env.ccEscWarn("]");
|
||||||
token.type = TokenType.CHAR; /* allow []...] */
|
token.type = TokenType.CHAR; /* allow []...] */
|
||||||
}
|
}
|
||||||
|
@ -187,7 +192,7 @@ class Parser extends Lexer {
|
||||||
parseCharClassValEntry2(cc, arg); // goto val_entry2 /* [0-9-a] is allowed as [0-9\-a] */
|
parseCharClassValEntry2(cc, arg); // goto val_entry2 /* [0-9-a] is allowed as [0-9\-a] */
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
newSyntaxException(ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS);
|
throw new SyntaxException(ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -216,10 +221,10 @@ class Parser extends Lexer {
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case EOT:
|
case EOT:
|
||||||
newSyntaxException(ERR_PREMATURE_END_OF_CHAR_CLASS);
|
throw new SyntaxException(ERR_PREMATURE_END_OF_CHAR_CLASS);
|
||||||
|
|
||||||
default:
|
default:
|
||||||
newInternalException(ERR_PARSER_BUG);
|
throw new InternalException(ERR_PARSER_BUG);
|
||||||
} // switch
|
} // switch
|
||||||
|
|
||||||
if (!fetched) fetchTokenInCC();
|
if (!fetched) fetchTokenInCC();
|
||||||
|
@ -280,13 +285,17 @@ class Parser extends Lexer {
|
||||||
private Node parseEnclose(TokenType term) {
|
private Node parseEnclose(TokenType term) {
|
||||||
Node node = null;
|
Node node = null;
|
||||||
|
|
||||||
if (!left()) newSyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS);
|
if (!left()) {
|
||||||
|
throw new SyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS);
|
||||||
|
}
|
||||||
|
|
||||||
int option = env.option;
|
int option = env.option;
|
||||||
|
|
||||||
if (peekIs('?') && syntax.op2QMarkGroupEffect()) {
|
if (peekIs('?') && syntax.op2QMarkGroupEffect()) {
|
||||||
inc();
|
inc();
|
||||||
if (!left()) newSyntaxException(ERR_END_PATTERN_IN_GROUP);
|
if (!left()) {
|
||||||
|
throw new SyntaxException(ERR_END_PATTERN_IN_GROUP);
|
||||||
|
}
|
||||||
|
|
||||||
boolean listCapture = false;
|
boolean listCapture = false;
|
||||||
|
|
||||||
|
@ -315,18 +324,20 @@ class Parser extends Lexer {
|
||||||
} else if (c == '!') {
|
} else if (c == '!') {
|
||||||
node = new AnchorNode(AnchorType.LOOK_BEHIND_NOT);
|
node = new AnchorNode(AnchorType.LOOK_BEHIND_NOT);
|
||||||
} else {
|
} else {
|
||||||
newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
|
throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case '@':
|
case '@':
|
||||||
if (syntax.op2AtMarkCaptureHistory()) {
|
if (syntax.op2AtMarkCaptureHistory()) {
|
||||||
EncloseNode en = new EncloseNode(); // node_new_enclose_memory
|
EncloseNode en = new EncloseNode(); // node_new_enclose_memory
|
||||||
int num = env.addMemEntry();
|
int num = env.addMemEntry();
|
||||||
if (num >= BitStatus.BIT_STATUS_BITS_NUM) newValueException(ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY);
|
if (num >= BitStatus.BIT_STATUS_BITS_NUM) {
|
||||||
|
throw new ValueException(ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY);
|
||||||
|
}
|
||||||
en.regNum = num;
|
en.regNum = num;
|
||||||
node = en;
|
node = en;
|
||||||
} else {
|
} else {
|
||||||
newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
|
throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -355,7 +366,7 @@ class Parser extends Lexer {
|
||||||
if (syntax.op2OptionPerl()) {
|
if (syntax.op2OptionPerl()) {
|
||||||
option = bsOnOff(option, Option.MULTILINE, neg);
|
option = bsOnOff(option, Option.MULTILINE, neg);
|
||||||
} else {
|
} else {
|
||||||
newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
|
throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 'm':
|
case 'm':
|
||||||
|
@ -364,7 +375,7 @@ class Parser extends Lexer {
|
||||||
} else if (syntax.op2OptionRuby()) {
|
} else if (syntax.op2OptionRuby()) {
|
||||||
option = bsOnOff(option, Option.MULTILINE, neg);
|
option = bsOnOff(option, Option.MULTILINE, neg);
|
||||||
} else {
|
} else {
|
||||||
newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
|
throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
// case 'p': #ifdef USE_POSIXLINE_OPTION // not defined
|
// case 'p': #ifdef USE_POSIXLINE_OPTION // not defined
|
||||||
|
@ -372,7 +383,7 @@ class Parser extends Lexer {
|
||||||
// break;
|
// break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
|
throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
|
||||||
} // switch
|
} // switch
|
||||||
|
|
||||||
if (c == ')') {
|
if (c == ')') {
|
||||||
|
@ -392,12 +403,14 @@ class Parser extends Lexer {
|
||||||
returnCode = 0;
|
returnCode = 0;
|
||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
if (!left()) newSyntaxException(ERR_END_PATTERN_IN_GROUP);
|
if (!left()) {
|
||||||
|
throw new SyntaxException(ERR_END_PATTERN_IN_GROUP);
|
||||||
|
}
|
||||||
fetch();
|
fetch();
|
||||||
} // while
|
} // while
|
||||||
|
|
||||||
default:
|
default:
|
||||||
newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
|
throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
|
||||||
} // switch
|
} // switch
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -458,7 +471,9 @@ class Parser extends Lexer {
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case SUBEXP_CLOSE:
|
case SUBEXP_CLOSE:
|
||||||
if (!syntax.allowUnmatchedCloseSubexp()) newSyntaxException(ERR_UNMATCHED_CLOSE_PARENTHESIS);
|
if (!syntax.allowUnmatchedCloseSubexp()) {
|
||||||
|
throw new SyntaxException(ERR_UNMATCHED_CLOSE_PARENTHESIS);
|
||||||
|
}
|
||||||
if (token.escaped) {
|
if (token.escaped) {
|
||||||
return parseExpTkRawByte(group); // goto tk_raw_byte
|
return parseExpTkRawByte(group); // goto tk_raw_byte
|
||||||
} else {
|
} else {
|
||||||
|
@ -499,7 +514,7 @@ class Parser extends Lexer {
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
newInternalException(ERR_PARSER_BUG);
|
throw new InternalException(ERR_PARSER_BUG);
|
||||||
|
|
||||||
} // inner switch
|
} // inner switch
|
||||||
break;
|
break;
|
||||||
|
@ -529,13 +544,8 @@ class Parser extends Lexer {
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case BACKREF:
|
case BACKREF:
|
||||||
int[]backRefs = token.getBackrefNum() > 1 ? token.getBackrefRefs() : new int[]{token.getBackrefRef1()};
|
int backRef = token.getBackrefRef();
|
||||||
node = new BackRefNode(token.getBackrefNum(),
|
node = new BackRefNode(backRef, env);
|
||||||
backRefs,
|
|
||||||
token.getBackrefByName(),
|
|
||||||
token.getBackrefExistLevel(), // #ifdef USE_BACKREF_AT_LEVEL
|
|
||||||
token.getBackrefLevel(), // ...
|
|
||||||
env);
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case ANCHOR:
|
case ANCHOR:
|
||||||
|
@ -546,7 +556,7 @@ class Parser extends Lexer {
|
||||||
case INTERVAL:
|
case INTERVAL:
|
||||||
if (syntax.contextIndepRepeatOps()) {
|
if (syntax.contextIndepRepeatOps()) {
|
||||||
if (syntax.contextInvalidRepeatOps()) {
|
if (syntax.contextInvalidRepeatOps()) {
|
||||||
newSyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED);
|
throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED);
|
||||||
} else {
|
} else {
|
||||||
node = StringNode.EMPTY; // node_new_empty
|
node = StringNode.EMPTY; // node_new_empty
|
||||||
}
|
}
|
||||||
|
@ -556,7 +566,7 @@ class Parser extends Lexer {
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
newInternalException(ERR_PARSER_BUG);
|
throw new InternalException(ERR_PARSER_BUG);
|
||||||
} //switch
|
} //switch
|
||||||
|
|
||||||
//targetp = node;
|
//targetp = node;
|
||||||
|
@ -599,7 +609,9 @@ class Parser extends Lexer {
|
||||||
|
|
||||||
private Node parseExpRepeat(Node target, boolean group) {
|
private Node parseExpRepeat(Node target, boolean group) {
|
||||||
while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat:
|
while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat:
|
||||||
if (target.isInvalidQuantifier()) newSyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID);
|
if (target.isInvalidQuantifier()) {
|
||||||
|
throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID);
|
||||||
|
}
|
||||||
|
|
||||||
QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(),
|
QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(),
|
||||||
token.getRepeatUpper(),
|
token.getRepeatUpper(),
|
||||||
|
@ -631,7 +643,9 @@ class Parser extends Lexer {
|
||||||
|
|
||||||
private Node parseExpRepeatForCar(Node top, ConsAltNode target, boolean group) {
|
private Node parseExpRepeatForCar(Node top, ConsAltNode target, boolean group) {
|
||||||
while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat:
|
while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat:
|
||||||
if (target.car.isInvalidQuantifier()) newSyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID);
|
if (target.car.isInvalidQuantifier()) {
|
||||||
|
throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID);
|
||||||
|
}
|
||||||
|
|
||||||
QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(),
|
QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(),
|
||||||
token.getRepeatUpper(),
|
token.getRepeatUpper(),
|
||||||
|
@ -709,9 +723,9 @@ class Parser extends Lexer {
|
||||||
|
|
||||||
private void parseSubExpError(TokenType term) {
|
private void parseSubExpError(TokenType term) {
|
||||||
if (term == TokenType.SUBEXP_CLOSE) {
|
if (term == TokenType.SUBEXP_CLOSE) {
|
||||||
newSyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS);
|
throw new SyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS);
|
||||||
} else {
|
} else {
|
||||||
newInternalException(ERR_PARSER_BUG);
|
throw new InternalException(ERR_PARSER_BUG);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,19 +19,9 @@
|
||||||
*/
|
*/
|
||||||
package jdk.nashorn.internal.runtime.regexp.joni;
|
package jdk.nashorn.internal.runtime.regexp.joni;
|
||||||
|
|
||||||
import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt;
|
|
||||||
import static jdk.nashorn.internal.runtime.regexp.joni.Option.isCaptureGroup;
|
|
||||||
import static jdk.nashorn.internal.runtime.regexp.joni.Option.isDontCaptureGroup;
|
|
||||||
|
|
||||||
import java.nio.file.Files;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
import jdk.nashorn.internal.runtime.regexp.joni.ast.Node;
|
|
||||||
import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
|
import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
|
||||||
import jdk.nashorn.internal.runtime.regexp.joni.constants.RegexState;
|
import jdk.nashorn.internal.runtime.regexp.joni.constants.RegexState;
|
||||||
import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
|
import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
|
||||||
import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
|
|
||||||
import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
|
import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
|
||||||
|
|
||||||
public final class Regex implements RegexState {
|
public final class Regex implements RegexState {
|
||||||
|
@ -39,31 +29,26 @@ public final class Regex implements RegexState {
|
||||||
int[] code; /* compiled pattern */
|
int[] code; /* compiled pattern */
|
||||||
int codeLength;
|
int codeLength;
|
||||||
boolean stackNeeded;
|
boolean stackNeeded;
|
||||||
Object[]operands; /* e.g. shared CClassNode */
|
Object[] operands; /* e.g. shared CClassNode */
|
||||||
int operandLength;
|
int operandLength;
|
||||||
|
|
||||||
int state; /* normal, searching, compiling */ // remove
|
|
||||||
int numMem; /* used memory(...) num counted from 1 */
|
int numMem; /* used memory(...) num counted from 1 */
|
||||||
int numRepeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
|
int numRepeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
|
||||||
int numNullCheck; /* OP_NULL_CHECK_START/END id counter */
|
int numNullCheck; /* OP_NULL_CHECK_START/END id counter */
|
||||||
int numCall; /* number of subexp call */
|
|
||||||
int captureHistory; /* (?@...) flag (1-31) */
|
int captureHistory; /* (?@...) flag (1-31) */
|
||||||
int btMemStart; /* need backtrack flag */
|
int btMemStart; /* need backtrack flag */
|
||||||
int btMemEnd; /* need backtrack flag */
|
int btMemEnd; /* need backtrack flag */
|
||||||
|
|
||||||
int stackPopLevel;
|
int stackPopLevel;
|
||||||
|
|
||||||
int[]repeatRangeLo;
|
int[] repeatRangeLo;
|
||||||
int[]repeatRangeHi;
|
int[] repeatRangeHi;
|
||||||
|
|
||||||
WarnCallback warnings;
|
WarnCallback warnings;
|
||||||
MatcherFactory factory;
|
MatcherFactory factory;
|
||||||
protected Analyser analyser;
|
protected Analyser analyser;
|
||||||
|
|
||||||
int options;
|
int options;
|
||||||
int userOptions;
|
|
||||||
Object userObject;
|
|
||||||
//final Syntax syntax;
|
|
||||||
final int caseFoldFlag;
|
final int caseFoldFlag;
|
||||||
|
|
||||||
/* optimization info (string search, char-map and anchors) */
|
/* optimization info (string search, char-map and anchors) */
|
||||||
|
@ -247,46 +232,48 @@ public final class Regex implements RegexState {
|
||||||
}
|
}
|
||||||
|
|
||||||
public String optimizeInfoToString() {
|
public String optimizeInfoToString() {
|
||||||
String s = "";
|
StringBuilder s = new StringBuilder();
|
||||||
s += "optimize: " + searchAlgorithm.getName() + "\n";
|
s.append("optimize: ").append(searchAlgorithm.getName()).append("\n");
|
||||||
s += " anchor: " + OptAnchorInfo.anchorToString(anchor);
|
s.append(" anchor: ").append(OptAnchorInfo.anchorToString(anchor));
|
||||||
|
|
||||||
if ((anchor & AnchorType.END_BUF_MASK) != 0) {
|
if ((anchor & AnchorType.END_BUF_MASK) != 0) {
|
||||||
s += MinMaxLen.distanceRangeToString(anchorDmin, anchorDmax);
|
s.append(MinMaxLen.distanceRangeToString(anchorDmin, anchorDmax));
|
||||||
}
|
}
|
||||||
|
|
||||||
s += "\n";
|
s.append("\n");
|
||||||
|
|
||||||
if (searchAlgorithm != SearchAlgorithm.NONE) {
|
if (searchAlgorithm != SearchAlgorithm.NONE) {
|
||||||
s += " sub anchor: " + OptAnchorInfo.anchorToString(subAnchor) + "\n";
|
s.append(" sub anchor: ").append(OptAnchorInfo.anchorToString(subAnchor)).append("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
s += "dmin: " + dMin + " dmax: " + dMax + "\n";
|
s.append("dmin: ").append(dMin).append(" dmax: ").append(dMax).append("\n");
|
||||||
s += "threshold length: " + thresholdLength + "\n";
|
s.append("threshold length: ").append(thresholdLength).append("\n");
|
||||||
|
|
||||||
if (exact != null) {
|
if (exact != null) {
|
||||||
s += "exact: [" + new String(exact, exactP, exactEnd - exactP) + "]: length: " + (exactEnd - exactP) + "\n";
|
s.append("exact: [").append(exact, exactP, exactEnd - exactP).append("]: length: ").append(exactEnd - exactP).append("\n");
|
||||||
} else if (searchAlgorithm == SearchAlgorithm.MAP) {
|
} else if (searchAlgorithm == SearchAlgorithm.MAP) {
|
||||||
int n=0;
|
int n=0;
|
||||||
for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) if (map[i] != 0) n++;
|
for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) if (map[i] != 0) n++;
|
||||||
|
|
||||||
s += "map: n = " + n + "\n";
|
s.append("map: n = ").append(n).append("\n");
|
||||||
if (n > 0) {
|
if (n > 0) {
|
||||||
int c=0;
|
int c=0;
|
||||||
s += "[";
|
s.append("[");
|
||||||
for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) {
|
for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) {
|
||||||
if (map[i] != 0) {
|
if (map[i] != 0) {
|
||||||
if (c > 0) s += ", ";
|
if (c > 0) {
|
||||||
|
s.append(", ");
|
||||||
|
}
|
||||||
c++;
|
c++;
|
||||||
// TODO if (enc.isPrint(i)
|
// TODO if (enc.isPrint(i)
|
||||||
s += ((char)i);
|
s.append((char)i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
s += "]\n";
|
s.append("]\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return s;
|
return s.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getOptions() {
|
public int getOptions() {
|
||||||
|
|
|
@ -39,13 +39,10 @@ public final class ScanEnvironment {
|
||||||
|
|
||||||
final public Regex reg;
|
final public Regex reg;
|
||||||
|
|
||||||
int numCall;
|
|
||||||
public int numMem;
|
public int numMem;
|
||||||
|
|
||||||
public Node memNodes[];
|
public Node memNodes[];
|
||||||
|
|
||||||
int currMaxRegNum;
|
|
||||||
boolean hasRecursion;
|
|
||||||
|
|
||||||
public ScanEnvironment(Regex regex, Syntax syntax) {
|
public ScanEnvironment(Regex regex, Syntax syntax) {
|
||||||
this.reg = regex;
|
this.reg = regex;
|
||||||
|
@ -60,13 +57,8 @@ public final class ScanEnvironment {
|
||||||
btMemEnd = bsClear();
|
btMemEnd = bsClear();
|
||||||
backrefedMem = bsClear();
|
backrefedMem = bsClear();
|
||||||
|
|
||||||
numCall = 0;
|
|
||||||
numMem = 0;
|
numMem = 0;
|
||||||
|
|
||||||
memNodes = null;
|
memNodes = null;
|
||||||
|
|
||||||
currMaxRegNum = 0;
|
|
||||||
hasRecursion = false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public int addMemEntry() {
|
public int addMemEntry() {
|
||||||
|
@ -117,11 +109,4 @@ public final class ScanEnvironment {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void closeBracketWithoutEscapeWarn(String s) {
|
|
||||||
if (Config.USE_WARN) {
|
|
||||||
if (syntax.warnCCOpNotEscaped()) {
|
|
||||||
reg.warnings.warn("regular expression has '" + s + "' without escape");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -155,24 +155,4 @@ abstract class ScannerSupport extends IntHolder implements ErrorMessages {
|
||||||
return p < stop;
|
return p < stop;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void newSyntaxException(String message) {
|
|
||||||
throw new SyntaxException(message);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void newValueException(String message) {
|
|
||||||
throw new ValueException(message);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void newValueException(String message, String str) {
|
|
||||||
throw new ValueException(message, str);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void newValueException(String message, int p, int end) {
|
|
||||||
throw new ValueException(message, new String(chars, p, end - p));
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void newInternalException(String message) {
|
|
||||||
throw new InternalException(message);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,12 +24,10 @@ import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType;
|
||||||
final class Token {
|
final class Token {
|
||||||
TokenType type;
|
TokenType type;
|
||||||
boolean escaped;
|
boolean escaped;
|
||||||
int base; /* is number: 8, 16 (used in [....]) */
|
|
||||||
int backP;
|
int backP;
|
||||||
|
|
||||||
// union fields
|
// union fields
|
||||||
private int INT1, INT2, INT3, INT4, INT5;
|
private int INT1, INT2, INT3, INT4;
|
||||||
private int []INTA1;
|
|
||||||
|
|
||||||
// union accessors
|
// union accessors
|
||||||
int getC() {
|
int getC() {
|
||||||
|
@ -53,13 +51,6 @@ final class Token {
|
||||||
INT1 = anchor;
|
INT1 = anchor;
|
||||||
}
|
}
|
||||||
|
|
||||||
int getSubtype() {
|
|
||||||
return INT1;
|
|
||||||
}
|
|
||||||
void setSubtype(int subtype) {
|
|
||||||
INT1 = subtype;
|
|
||||||
}
|
|
||||||
|
|
||||||
// repeat union member
|
// repeat union member
|
||||||
int getRepeatLower() {
|
int getRepeatLower() {
|
||||||
return INT1;
|
return INT1;
|
||||||
|
@ -89,72 +80,13 @@ final class Token {
|
||||||
INT4 = possessive ? 1 : 0;
|
INT4 = possessive ? 1 : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// backref union member
|
int getBackrefRef() {
|
||||||
int getBackrefNum() {
|
|
||||||
return INT1;
|
|
||||||
}
|
|
||||||
void setBackrefNum(int num) {
|
|
||||||
INT1 = num;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getBackrefRef1() {
|
|
||||||
return INT2;
|
return INT2;
|
||||||
}
|
}
|
||||||
void setBackrefRef1(int ref1) {
|
void setBackrefRef(int ref1) {
|
||||||
INT2 = ref1;
|
INT2 = ref1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int[]getBackrefRefs() {
|
|
||||||
return INTA1;
|
|
||||||
}
|
|
||||||
void setBackrefRefs(int[]refs) {
|
|
||||||
INTA1 = refs;
|
|
||||||
}
|
|
||||||
|
|
||||||
boolean getBackrefByName() {
|
|
||||||
return INT3 != 0;
|
|
||||||
}
|
|
||||||
void setBackrefByName(boolean byName) {
|
|
||||||
INT3 = byName ? 1 : 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// USE_BACKREF_AT_LEVEL
|
|
||||||
boolean getBackrefExistLevel() {
|
|
||||||
return INT4 != 0;
|
|
||||||
}
|
|
||||||
void setBackrefExistLevel(boolean existLevel) {
|
|
||||||
INT4 = existLevel ? 1 : 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getBackrefLevel() {
|
|
||||||
return INT5;
|
|
||||||
}
|
|
||||||
void setBackrefLevel(int level) {
|
|
||||||
INT5 = level;
|
|
||||||
}
|
|
||||||
|
|
||||||
// call union member
|
|
||||||
int getCallNameP() {
|
|
||||||
return INT1;
|
|
||||||
}
|
|
||||||
void setCallNameP(int nameP) {
|
|
||||||
INT1 = nameP;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getCallNameEnd() {
|
|
||||||
return INT2;
|
|
||||||
}
|
|
||||||
void setCallNameEnd(int nameEnd) {
|
|
||||||
INT2 = nameEnd;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getCallGNum() {
|
|
||||||
return INT3;
|
|
||||||
}
|
|
||||||
void setCallGNum(int gnum) {
|
|
||||||
INT3 = gnum;
|
|
||||||
}
|
|
||||||
|
|
||||||
// prop union member
|
// prop union member
|
||||||
int getPropCType() {
|
int getPropCType() {
|
||||||
return INT1;
|
return INT1;
|
||||||
|
|
|
@ -20,41 +20,15 @@
|
||||||
package jdk.nashorn.internal.runtime.regexp.joni.ast;
|
package jdk.nashorn.internal.runtime.regexp.joni.ast;
|
||||||
|
|
||||||
import jdk.nashorn.internal.runtime.regexp.joni.ScanEnvironment;
|
import jdk.nashorn.internal.runtime.regexp.joni.ScanEnvironment;
|
||||||
import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
|
|
||||||
import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
|
|
||||||
|
|
||||||
public final class BackRefNode extends StateNode {
|
public final class BackRefNode extends StateNode {
|
||||||
//private static int NODE_BACKREFS_SIZE = 6;
|
public final int backRef;
|
||||||
|
|
||||||
//int state;
|
public BackRefNode(int backRef, ScanEnvironment env) {
|
||||||
public int backNum;
|
this.backRef = backRef;
|
||||||
public int back[];
|
|
||||||
|
|
||||||
public int nestLevel;
|
if (backRef <= env.numMem && env.memNodes[backRef] == null) {
|
||||||
|
|
||||||
public BackRefNode(int backNum, int[]backRefs, boolean byName, ScanEnvironment env) {
|
|
||||||
this.backNum = backNum;
|
|
||||||
if (byName) setNameRef();
|
|
||||||
|
|
||||||
for (int i=0; i<backNum; i++) {
|
|
||||||
if (backRefs[i] <= env.numMem && env.memNodes[backRefs[i]] == null) {
|
|
||||||
setRecursion(); /* /...(\1).../ */
|
setRecursion(); /* /...(\1).../ */
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
back = new int[backNum];
|
|
||||||
System.arraycopy(backRefs, 0, back, 0, backNum); // shall we really dup it ???
|
|
||||||
}
|
|
||||||
|
|
||||||
// #ifdef USE_BACKREF_AT_LEVEL
|
|
||||||
public BackRefNode(int backNum, int[]backRefs, boolean byName, boolean existLevel, int nestLevel, ScanEnvironment env) {
|
|
||||||
this(backNum, backRefs, byName, env);
|
|
||||||
|
|
||||||
if (existLevel) {
|
|
||||||
//state |= NST_NEST_LEVEL;
|
|
||||||
setNestLevel();
|
|
||||||
this.nestLevel = nestLevel;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -71,28 +45,7 @@ public final class BackRefNode extends StateNode {
|
||||||
@Override
|
@Override
|
||||||
public String toString(int level) {
|
public String toString(int level) {
|
||||||
StringBuilder value = new StringBuilder(super.toString(level));
|
StringBuilder value = new StringBuilder(super.toString(level));
|
||||||
value.append("\n backNum: " + backNum);
|
value.append("\n back: ").append(backRef);
|
||||||
String backs = "";
|
|
||||||
for (int i=0; i<back.length; i++) backs += back[i] + ", ";
|
|
||||||
value.append("\n back: " + backs);
|
|
||||||
value.append("\n nextLevel: " + nestLevel);
|
|
||||||
return value.toString();
|
return value.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void renumber(int[]map) {
|
|
||||||
if (!isNameRef()) throw new ValueException(ErrorMessages.ERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED);
|
|
||||||
|
|
||||||
int oldNum = backNum;
|
|
||||||
|
|
||||||
int pos = 0;
|
|
||||||
for (int i=0; i<oldNum; i++) {
|
|
||||||
int n = map[back[i]];
|
|
||||||
if (n > 0) {
|
|
||||||
back[pos] = n;
|
|
||||||
pos++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
backNum = pos;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -77,39 +77,12 @@ public final class CClassNode extends Node {
|
||||||
// node_new_cclass
|
// node_new_cclass
|
||||||
public CClassNode() {}
|
public CClassNode() {}
|
||||||
|
|
||||||
public CClassNode(int ctype, boolean not, int sbOut, int[]ranges) {
|
|
||||||
this(not, sbOut, ranges);
|
|
||||||
this.ctype = ctype;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void clear() {
|
public void clear() {
|
||||||
bs.clear();
|
bs.clear();
|
||||||
flags = 0;
|
flags = 0;
|
||||||
mbuf = null;
|
mbuf = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// node_new_cclass_by_codepoint_range, only used by shared Char Classes
|
|
||||||
public CClassNode(boolean not, int sbOut, int[]ranges) {
|
|
||||||
if (not) setNot();
|
|
||||||
// bs.clear();
|
|
||||||
|
|
||||||
if (sbOut > 0 && ranges != null) {
|
|
||||||
int n = ranges[0];
|
|
||||||
for (int i=0; i<n; i++) {
|
|
||||||
int from = ranges[i * 2 + 1];
|
|
||||||
int to = ranges[i * 2 + 2];
|
|
||||||
for (int j=from; j<=to; j++) {
|
|
||||||
if (j >= sbOut) {
|
|
||||||
setupBuffer(ranges);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
bs.set(j);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
setupBuffer(ranges);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getType() {
|
public int getType() {
|
||||||
return CCLASS;
|
return CCLASS;
|
||||||
|
@ -156,13 +129,6 @@ public final class CClassNode extends Node {
|
||||||
return flags.toString();
|
return flags.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void setupBuffer(int[]ranges) {
|
|
||||||
if (ranges != null) {
|
|
||||||
if (ranges[0] == 0) return;
|
|
||||||
mbuf = new CodeRangeBuffer(ranges);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isEmpty() {
|
public boolean isEmpty() {
|
||||||
return mbuf == null && bs.isEmpty();
|
return mbuf == null && bs.isEmpty();
|
||||||
}
|
}
|
||||||
|
@ -531,11 +497,7 @@ public final class CClassNode extends Node {
|
||||||
boolean found;
|
boolean found;
|
||||||
|
|
||||||
if (code > 0xff) {
|
if (code > 0xff) {
|
||||||
if (mbuf == null) {
|
found = mbuf != null && mbuf.isInCodeRange(code);
|
||||||
found = false;
|
|
||||||
} else {
|
|
||||||
found = EncodingHelper.isInCodeRange(mbuf.getCodeRange(), code);
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
found = bs.at(code);
|
found = bs.at(code);
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,7 +40,6 @@ public abstract class StateNode extends Node implements NodeStatus {
|
||||||
if (isRecursion()) states.append("RECURSION ");
|
if (isRecursion()) states.append("RECURSION ");
|
||||||
if (isCalled()) states.append("CALLED ");
|
if (isCalled()) states.append("CALLED ");
|
||||||
if (isAddrFixed()) states.append("ADDR_FIXED ");
|
if (isAddrFixed()) states.append("ADDR_FIXED ");
|
||||||
if (isNameRef()) states.append("NAME_REF ");
|
|
||||||
if (isInRepeat()) states.append("IN_REPEAT ");
|
if (isInRepeat()) states.append("IN_REPEAT ");
|
||||||
if (isNestLevel()) states.append("NEST_LEVEL ");
|
if (isNestLevel()) states.append("NEST_LEVEL ");
|
||||||
if (isByNumber()) states.append("BY_NUMBER ");
|
if (isByNumber()) states.append("BY_NUMBER ");
|
||||||
|
@ -132,14 +131,6 @@ public abstract class StateNode extends Node implements NodeStatus {
|
||||||
state |= NST_ADDR_FIXED;
|
state |= NST_ADDR_FIXED;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isNameRef() {
|
|
||||||
return (state & NST_NAME_REF) != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setNameRef() {
|
|
||||||
state |= NST_NAME_REF;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isInRepeat() {
|
public boolean isInRepeat() {
|
||||||
return (state & NST_IN_REPEAT) != 0;
|
return (state & NST_IN_REPEAT) != 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -96,7 +96,7 @@ public final class StringNode extends Node implements StringType {
|
||||||
if (chars[i] >= 0x20 && chars[i] < 0x7f) {
|
if (chars[i] >= 0x20 && chars[i] < 0x7f) {
|
||||||
value.append(chars[i]);
|
value.append(chars[i]);
|
||||||
} else {
|
} else {
|
||||||
value.append(String.format("[0x%04x]", chars[i]));
|
value.append(String.format("[0x%04x]", (int)chars[i]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
value.append("'");
|
value.append("'");
|
||||||
|
@ -122,10 +122,7 @@ public final class StringNode extends Node implements StringType {
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean canBeSplit() {
|
public boolean canBeSplit() {
|
||||||
if (end > p) {
|
return end > p && 1 < (end - p);
|
||||||
return 1 < (end - p);
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void set(char[] chars, int p, int end) {
|
public void set(char[] chars, int p, int end) {
|
||||||
|
|
|
@ -29,12 +29,6 @@ public interface OPCode {
|
||||||
final int EXACT4 = 5; /* single byte, N = 4 */
|
final int EXACT4 = 5; /* single byte, N = 4 */
|
||||||
final int EXACT5 = 6; /* single byte, N = 5 */
|
final int EXACT5 = 6; /* single byte, N = 5 */
|
||||||
final int EXACTN = 7; /* single byte */
|
final int EXACTN = 7; /* single byte */
|
||||||
final int EXACTMB2N1 = 8; /* mb-length = 2 N = 1 */
|
|
||||||
final int EXACTMB2N2 = 9; /* mb-length = 2 N = 2 */
|
|
||||||
final int EXACTMB2N3 = 10; /* mb-length = 2 N = 3 */
|
|
||||||
final int EXACTMB2N = 11; /* mb-length = 2 */
|
|
||||||
final int EXACTMB3N = 12; /* mb-length = 3 */
|
|
||||||
final int EXACTMBN = 13; /* other length */
|
|
||||||
|
|
||||||
final int EXACT1_IC = 14; /* single byte, N = 1, ignore case */
|
final int EXACT1_IC = 14; /* single byte, N = 1, ignore case */
|
||||||
final int EXACTN_IC = 15; /* single byte, ignore case */
|
final int EXACTN_IC = 15; /* single byte, ignore case */
|
||||||
|
@ -125,28 +119,4 @@ public interface OPCode {
|
||||||
final int SET_OPTION_PUSH = 86; /* set option and push recover option */
|
final int SET_OPTION_PUSH = 86; /* set option and push recover option */
|
||||||
final int SET_OPTION = 87; /* set option */
|
final int SET_OPTION = 87; /* set option */
|
||||||
|
|
||||||
// single byte versions
|
|
||||||
final int ANYCHAR_SB = 88; /* "." */
|
|
||||||
final int ANYCHAR_ML_SB = 89; /* "." multi-line */
|
|
||||||
final int ANYCHAR_STAR_SB = 90; /* ".*" */
|
|
||||||
final int ANYCHAR_ML_STAR_SB = 91; /* ".*" multi-line */
|
|
||||||
final int ANYCHAR_STAR_PEEK_NEXT_SB = 92;
|
|
||||||
final int ANYCHAR_ML_STAR_PEEK_NEXT_SB = 93;
|
|
||||||
final int STATE_CHECK_ANYCHAR_STAR_SB = 94;
|
|
||||||
final int STATE_CHECK_ANYCHAR_ML_STAR_SB= 95;
|
|
||||||
|
|
||||||
final int CCLASS_SB = 96;
|
|
||||||
final int CCLASS_NOT_SB = 97;
|
|
||||||
final int WORD_SB = 98;
|
|
||||||
final int NOT_WORD_SB = 99;
|
|
||||||
final int WORD_BOUND_SB = 100;
|
|
||||||
final int NOT_WORD_BOUND_SB = 101;
|
|
||||||
final int WORD_BEGIN_SB = 102;
|
|
||||||
final int WORD_END_SB = 103;
|
|
||||||
|
|
||||||
final int LOOK_BEHIND_SB = 104;
|
|
||||||
|
|
||||||
final int EXACT1_IC_SB = 105; /* single byte, N = 1, ignore case */
|
|
||||||
final int EXACTN_IC_SB = 106; /* single byte, ignore case */
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue