8142900: Xerces Update: Xerces XPath

Reviewed-by: lancea
This commit is contained in:
Joe Wang 2015-11-16 14:12:06 -08:00
parent 64682e8faa
commit 0204e737b3
16 changed files with 9608 additions and 234 deletions

View file

@ -25,9 +25,12 @@ import com.sun.org.apache.xerces.internal.util.XMLChar;
import com.sun.org.apache.xerces.internal.util.XMLSymbols;
import com.sun.org.apache.xerces.internal.xni.NamespaceContext;
import com.sun.org.apache.xerces.internal.xni.QName;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Vector;
import java.util.stream.Collectors;
/**
* Bare minimum XPath parser.
@ -47,20 +50,18 @@ public class XPath {
private static final boolean DEBUG_XPATH_PARSE = DEBUG_ALL || false;
private static final boolean DEBUG_ANY = DEBUG_XPATH_PARSE;
//
// Data
//
/** Expression. */
protected String fExpression;
protected final String fExpression;
/** Symbol table. */
protected SymbolTable fSymbolTable;
protected final SymbolTable fSymbolTable;
/** Location paths. */
protected LocationPath[] fLocationPaths;
protected final LocationPath[] fLocationPaths;
//
// Constructors
@ -72,7 +73,7 @@ public class XPath {
throws XPathException {
fExpression = xpath;
fSymbolTable = symbolTable;
parseExpression(context);
fLocationPaths = parseExpression(context);
} // <init>(String,SymbolTable,NamespaceContext)
//
@ -101,15 +102,14 @@ public class XPath {
//
/** Returns a string representation of this object. */
@Override
public String toString() {
StringBuffer buf=new StringBuffer();
for (int i=0;i<fLocationPaths.length;i++){
if (i>0){
buf.append("|");
}
buf.append(fLocationPaths[i].toString());
}
return buf.toString();
final List<LocationPath> l = Arrays.asList(fLocationPaths);
final String s = l.stream()
.map(aPath -> aPath.toString())
.collect(Collectors.joining("|"));
return s;
} // toString():String
//
@ -132,12 +132,12 @@ public class XPath {
* to build a {@link LocationPath} object from the accumulated
* {@link Step}s.
*/
private LocationPath buildLocationPath( Vector stepsVector ) throws XPathException {
private LocationPath buildLocationPath( ArrayList<Step> stepsVector ) throws XPathException {
int size = stepsVector.size();
check(size!=0);
Step[] steps = new Step[size];
stepsVector.copyInto(steps);
stepsVector.removeAllElements();
steps = stepsVector.toArray(steps);
stepsVector.clear();
return new LocationPath(steps);
}
@ -146,7 +146,7 @@ public class XPath {
* This method is implemented by using the XPathExprScanner and
* examining the list of tokens that it returns.
*/
private void parseExpression(final NamespaceContext context)
private LocationPath[] parseExpression(final NamespaceContext context)
throws XPathException {
// tokens
@ -184,8 +184,8 @@ public class XPath {
throw new XPathException("c-general-xpath");
//fTokens.dumpTokens();
Vector stepsVector = new Vector();
Vector locationPathsVector= new Vector();
ArrayList<Step> stepsVector = new ArrayList<>();
ArrayList<LocationPath> locationPathsVector= new ArrayList<>();
// true when the next token should be 'Step' (as defined in
// the production rule [3] of XML Schema P1 section 3.11.6
@ -194,28 +194,39 @@ public class XPath {
// this is to make sure we can detect a token list like
// 'abc' '/' '/' 'def' 'ghi'
boolean expectingStep = true;
boolean expectingDoubleColon = false;
while(xtokens.hasMore()) {
while (xtokens.hasMore()) {
final int token = xtokens.nextToken();
switch (token) {
case XPath.Tokens.EXPRTOKEN_OPERATOR_UNION :{
check(!expectingStep);
locationPathsVector.addElement(buildLocationPath(stepsVector));
locationPathsVector.add(buildLocationPath(stepsVector));
expectingStep=true;
break;
}
case XPath.Tokens.EXPRTOKEN_ATSIGN: {
check(expectingStep);
Step step = new Step(
new Axis(Axis.ATTRIBUTE),
parseNodeTest(xtokens.nextToken(),xtokens,context));
stepsVector.addElement(step);
stepsVector.add(step);
expectingStep=false;
break;
}
case XPath.Tokens.EXPRTOKEN_AXISNAME_ATTRIBUTE: {
check(expectingStep);
// If we got here we're expecting attribute::
if (xtokens.nextToken() != XPath.Tokens.EXPRTOKEN_DOUBLE_COLON) {
throw new XPathException("c-general-xpath");
}
Step step = new Step(
new Axis(Axis.ATTRIBUTE),
parseNodeTest(xtokens.nextToken(),xtokens,context));
stepsVector.add(step);
expectingStep = false;
break;
}
case XPath.Tokens.EXPRTOKEN_NAMETEST_ANY:
case XPath.Tokens.EXPRTOKEN_NAMETEST_NAMESPACE:
case XPath.Tokens.EXPRTOKEN_NAMETEST_QNAME: {
@ -223,11 +234,23 @@ public class XPath {
Step step = new Step(
new Axis(Axis.CHILD),
parseNodeTest(token,xtokens,context));
stepsVector.addElement(step);
stepsVector.add(step);
expectingStep=false;
break;
}
case XPath.Tokens.EXPRTOKEN_AXISNAME_CHILD: {
check(expectingStep);
// If we got here we're expecting child::
if (xtokens.nextToken() != XPath.Tokens.EXPRTOKEN_DOUBLE_COLON) {
throw new XPathException("c-general-xpath");
}
Step step = new Step(
new Axis(Axis.CHILD),
parseNodeTest(xtokens.nextToken(),xtokens,context));
stepsVector.add(step);
expectingStep = false;
break;
}
case XPath.Tokens.EXPRTOKEN_PERIOD: {
check(expectingStep);
expectingStep=false;
@ -237,12 +260,12 @@ public class XPath {
// This amounts to shorten "a/././b/./c" to "a/b/c".
// Also, the matcher fails to work correctly if XPath
// has those redundant dots.
if (stepsVector.size()==0) {
if (stepsVector.isEmpty()) {
// build step
Axis axis = new Axis(Axis.SELF);
NodeTest nodeTest = new NodeTest(NodeTest.NODE);
Step step = new Step(axis, nodeTest);
stepsVector.addElement(step);
stepsVector.add(step);
if( xtokens.hasMore()
&& xtokens.peekToken() == XPath.Tokens.EXPRTOKEN_OPERATOR_DOUBLE_SLASH){
@ -253,67 +276,41 @@ public class XPath {
axis = new Axis(Axis.DESCENDANT);
nodeTest = new NodeTest(NodeTest.NODE);
step = new Step(axis, nodeTest);
stepsVector.addElement(step);
stepsVector.add(step);
expectingStep=true;
}
}
break;
}
case XPath.Tokens.EXPRTOKEN_OPERATOR_DOUBLE_SLASH:{
// this cannot appear in arbitrary position.
// this cannot appear in an arbitrary position.
// it is only allowed right after '.' when
// '.' is the first token of a location path.
throw new XPathException("c-general-xpath");
}
case XPath.Tokens.EXPRTOKEN_DOUBLE_COLON: {
// :: cannot appear in an arbitrary position.
// We only expect this token if the xpath
// contains child:: or attribute::
throw new XPathException("c-general-xpath");
}
case XPath.Tokens.EXPRTOKEN_OPERATOR_SLASH: {
check(!expectingStep);
expectingStep=true;
break;
}
case XPath.Tokens.EXPRTOKEN_AXISNAME_ATTRIBUTE: {
check(expectingStep);
expectingDoubleColon = true;
if (xtokens.nextToken() == XPath.Tokens.EXPRTOKEN_DOUBLE_COLON){
Step step = new Step(
new Axis(Axis.ATTRIBUTE),
parseNodeTest(xtokens.nextToken(),xtokens,context));
stepsVector.addElement(step);
expectingStep=false;
expectingDoubleColon = false;
}
break;
}
case XPath.Tokens.EXPRTOKEN_AXISNAME_CHILD:{
check(expectingStep);
expectingDoubleColon = true;
break;
}
case XPath.Tokens.EXPRTOKEN_DOUBLE_COLON :{
check(expectingStep);
check(expectingDoubleColon);
expectingDoubleColon = false;
break;
}
default:
// we should have covered all the tokens that we can possibly see.
throw new XPathException("c-general-xpath");
}
throw new InternalError();
}
}
check(!expectingStep);
locationPathsVector.addElement(buildLocationPath(stepsVector));
locationPathsVector.add(buildLocationPath(stepsVector));
// save location path
fLocationPaths=new LocationPath[locationPathsVector.size()];
locationPathsVector.copyInto(fLocationPaths);
if (DEBUG_XPATH_PARSE) {
System.out.println(">>> "+fLocationPaths);
}
// return location path
return locationPathsVector.toArray(new LocationPath[locationPathsVector.size()]);
} // parseExpression(SymbolTable,NamespaceContext)
@ -378,7 +375,7 @@ public class XPath {
//
/** List of steps. */
public Step[] steps;
public final Step[] steps;
//
// Constructors
@ -445,10 +442,10 @@ public class XPath {
//
/** Axis. */
public Axis axis;
public final Axis axis;
/** Node test. */
public NodeTest nodeTest;
public final NodeTest nodeTest;
//
// Constructors
@ -525,7 +522,7 @@ public class XPath {
//
/** Axis type. */
public short type;
public final short type;
//
// Constructors
@ -594,7 +591,7 @@ public class XPath {
//
/** Node test type. */
public short type;
public final short type;
/** Node qualified name. */
public final QName name = new QName();
@ -856,13 +853,13 @@ public class XPath {
private int[] fTokens = new int[INITIAL_TOKEN_COUNT];
private int fTokenCount = 0; // for writing
private SymbolTable fSymbolTable;
private final SymbolTable fSymbolTable;
// REVISIT: Code something better here. -Ac
private Map<String, Integer> fSymbolMapping = new HashMap<>();
private final Map<String, Integer> fSymbolMapping = new HashMap<>();
// REVISIT: Code something better here. -Ac
private Map<Integer, String> fTokenNames = new HashMap<>();
private final Map<Integer, String> fTokenNames = new HashMap<>();
/**
* Current position in the token list.
@ -1888,6 +1885,10 @@ public class XPath {
tokens.addToken(nameHandle);
}
break;
default:
// CHARTYPE_INVALID or CHARTYPE_OTHER
// We're not expecting to find either of these in a valid expression.
return false;
}
}
if (XPath.Tokens.DUMP_TOKENS) {

View file

@ -3,11 +3,12 @@
* DO NOT REMOVE OR ALTER!
*/
/*
* Copyright 2001, 2002,2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
@ -37,7 +38,7 @@ public class XPathException
// Data
// hold the value of the key this Exception refers to.
private String fKey;
private final String fKey;
//
// Constructors
//

View file

@ -3,11 +3,12 @@
* DO NOT REMOVE OR ALTER!
*/
/*
* Copyright 1999-2002,2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
@ -29,9 +30,9 @@ import java.text.CharacterIterator;
*
*/
public class BMPattern {
char[] pattern;
int[] shiftTable;
boolean ignoreCase;
final char[] pattern;
final int[] shiftTable;
final boolean ignoreCase;
public BMPattern(String pat, boolean ignoreCase) {
this(pat, 256, ignoreCase);

View file

@ -24,32 +24,27 @@ package com.sun.org.apache.xerces.internal.impl.xpath.regex;
/**
*/
public class CaseInsensitiveMap {
final class CaseInsensitiveMap {
private static int CHUNK_SHIFT = 10; /* 2^10 = 1k */
private static int CHUNK_SIZE = (1<<CHUNK_SHIFT);
private static int CHUNK_MASK = (CHUNK_SIZE-1);
private static int INITIAL_CHUNK_COUNT = 64; /* up to 0xFFFF */
private static final int CHUNK_SHIFT = 10; /* 2^10 = 1k */
private static final int CHUNK_SIZE = (1<<CHUNK_SHIFT);
private static final int CHUNK_MASK = (CHUNK_SIZE-1);
private static final int INITIAL_CHUNK_COUNT = 64; /* up to 0xFFFF */
private static int[][][] caseInsensitiveMap;
private static Boolean mapBuilt = Boolean.FALSE;
private static int LOWER_CASE_MATCH = 1;
private static int UPPER_CASE_MATCH = 2;
private static final int LOWER_CASE_MATCH = 1;
private static final int UPPER_CASE_MATCH = 2;
static {
buildCaseInsensitiveMap();
}
/**
* Return a list of code point characters (not including the input value)
* that can be substituted in a case insensitive match
*/
static public int[] get(int codePoint) {
if (mapBuilt == Boolean.FALSE) {
synchronized (mapBuilt) {
if (mapBuilt == Boolean.FALSE) {
buildCaseInsensitiveMap();
}
} // synchronized
} // if mapBuilt
return (codePoint < 0x10000) ? getMapping(codePoint) : null;
}
@ -61,11 +56,7 @@ public class CaseInsensitiveMap {
}
private static void buildCaseInsensitiveMap() {
caseInsensitiveMap = new int[INITIAL_CHUNK_COUNT][][];
for (int i=0; i<INITIAL_CHUNK_COUNT; i++) {
caseInsensitiveMap[i] = new int[CHUNK_SIZE][];
}
caseInsensitiveMap = new int[INITIAL_CHUNK_COUNT][CHUNK_SIZE][];
int lc, uc;
for (int i=0; i<0x10000; i++) {
lc = Character.toLowerCase(i);
@ -100,8 +91,6 @@ public class CaseInsensitiveMap {
set(i, map);
}
}
mapBuilt = Boolean.TRUE;
}
private static int[] expandMap(int[] srcMap, int expandBy) {

View file

@ -3,11 +3,12 @@
* DO NOT REMOVE OR ALTER!
*/
/*
* Copyright 1999-2002,2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*

View file

@ -3,11 +3,12 @@
* DO NOT REMOVE OR ALTER!
*/
/*
* Copyright 1999-2002,2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
@ -20,7 +21,7 @@
package com.sun.org.apache.xerces.internal.impl.xpath.regex;
import java.util.Vector;
import java.util.ArrayList;
/**
* @xerces.internal
@ -126,7 +127,7 @@ class Op {
return op;
}
int type;
final int type;
Op next = null;
protected Op(int type) {
@ -158,7 +159,7 @@ class Op {
// ================================================================
static class CharOp extends Op {
int charData;
final int charData;
CharOp(int type, int data) {
super(type);
this.charData = data;
@ -170,19 +171,19 @@ class Op {
// ================================================================
static class UnionOp extends Op {
Vector branches;
final ArrayList<Op> branches;
UnionOp(int type, int size) {
super(type);
this.branches = new Vector(size);
this.branches = new ArrayList<>(size);
}
void addElement(Op op) {
this.branches.addElement(op);
this.branches.add(op);
}
int size() {
return this.branches.size();
}
Op elementAt(int index) {
return (Op)this.branches.elementAt(index);
return this.branches.get(index);
}
}
@ -201,8 +202,8 @@ class Op {
}
// ================================================================
static class ModifierOp extends ChildOp {
int v1;
int v2;
final int v1;
final int v2;
ModifierOp(int type, int v1, int v2) {
super(type);
this.v1 = v1;
@ -217,7 +218,7 @@ class Op {
}
// ================================================================
static class RangeOp extends Op {
Token tok;
final Token tok;
RangeOp(int type, Token tok) {
super(type);
this.tok = tok;
@ -228,7 +229,7 @@ class Op {
}
// ================================================================
static class StringOp extends Op {
String string;
final String string;
StringOp(int type, String literal) {
super(type);
this.string = literal;
@ -239,10 +240,10 @@ class Op {
}
// ================================================================
static class ConditionOp extends Op {
int refNumber;
Op condition;
Op yes;
Op no;
final int refNumber;
final Op condition;
final Op yes;
final Op no;
ConditionOp(int type, int refno, Op conditionflow, Op yesflow, Op noflow) {
super(type);
this.refNumber = refno;

View file

@ -3,11 +3,12 @@
* DO NOT REMOVE OR ALTER!
*/
/*
* Copyright 1999-2002,2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
@ -30,7 +31,7 @@ public class ParseException extends RuntimeException {
/** Serialization version. */
static final long serialVersionUID = -7012400318097691370L;
int location;
final int location;
/*
public ParseException(String mes) {

View file

@ -252,7 +252,7 @@ class ParserForXMLSchema extends RegexParser {
if (c == ']') throw this.ex("parser.cc.7", this.offset-2);
if (c == '-' && this.chardata != ']' && !firstloop) throw this.ex("parser.cc.8", this.offset-2); // if regex = '[-]' then invalid
}
if (this.read() != T_CHAR || this.chardata != '-' || c == '-' && firstloop) { // Here is no '-'.
if (this.read() != T_CHAR || this.chardata != '-' || c == '-' && !wasDecoded && firstloop) { // Here is no '-'.
if (!this.isSet(RegularExpression.IGNORE_CASE) || c > 0xffff) {
tok.addRange(c, c);
}
@ -382,17 +382,20 @@ class ParserForXMLSchema extends RegexParser {
ranges2.put("xml:isSpace", Token.complementRanges(tok));
tok = Token.createRange();
setupRange(tok, DIGITS);
setupRange(tok, DIGITS_INT);
setupRange(tok, DIGITS_INTS);
ranges.put("xml:isDigit", tok);
ranges2.put("xml:isDigit", Token.complementRanges(tok));
/*
* \w is defined by the XML Schema specification to be:
* [#x0000-#x10FFFF]-[\p{P}\p{Z}\p{C}] (all characters except the set of "punctuation", "separator" and "other" characters)
*/
tok = Token.createRange();
setupRange(tok, LETTERS);
setupRange(tok, LETTERS_INT);
tok.mergeRanges(ranges.get("xml:isDigit"));
ranges.put("xml:isWord", tok);
ranges2.put("xml:isWord", Token.complementRanges(tok));
tok.mergeRanges(Token.getRange("P", true));
tok.mergeRanges(Token.getRange("Z", true));
tok.mergeRanges(Token.getRange("C", true));
ranges2.put("xml:isWord", tok);
ranges.put("xml:isWord", Token.complementRanges(tok));
tok = Token.createRange();
setupRange(tok, NAMECHARS);
@ -401,6 +404,7 @@ class ParserForXMLSchema extends RegexParser {
tok = Token.createRange();
setupRange(tok, LETTERS);
setupRange(tok, LETTERS_INT);
tok.addRange('_', '_');
tok.addRange(':', ':');
ranges.put("xml:isInitialNameChar", tok);
@ -502,11 +506,12 @@ class ParserForXMLSchema extends RegexParser {
private static final int[] LETTERS_INT = {0x1d790, 0x1d7a8, 0x1d7aa, 0x1d7c9, 0x2fa1b, 0x2fa1d};
private static final String DIGITS =
"\u0030\u0039\u0660\u0669\u06F0\u06F9\u0966\u096F\u09E6\u09EF\u0A66\u0A6F\u0AE6\u0AEF"
+"\u0B66\u0B6F\u0BE7\u0BEF\u0C66\u0C6F\u0CE6\u0CEF\u0D66\u0D6F\u0E50\u0E59\u0ED0\u0ED9"
+"\u0F20\u0F29\u1040\u1049\u1369\u1371\u17E0\u17E9\u1810\u1819\uFF10\uFF19";
private static final int[] DIGITS_INT = {0x1D7CE, 0x1D7FF};
private static final int[] DIGITS_INTS = {
0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F,
0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F,
0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F,
0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29, 0x1040, 0x1049,
0x1369, 0x1371, 0x17E0, 0x17E9, 0x1810, 0x1819, 0xFF10, 0xFF19,
0x1D7CE, 0x1D7FF
};
}

View file

@ -1,13 +1,13 @@
/*
* reserved comment block
* DO NOT REMOVE OR ALTER!
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright 1999-2002,2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
@ -110,27 +110,27 @@ public final class REUtil {
}
static final String createOptionString(int options) {
StringBuffer sb = new StringBuffer(9);
StringBuilder sb = new StringBuilder(9);
if ((options & RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION) != 0)
sb.append((char)'F');
sb.append('F');
if ((options & RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION) != 0)
sb.append((char)'H');
sb.append('H');
if ((options & RegularExpression.XMLSCHEMA_MODE) != 0)
sb.append((char)'X');
sb.append('X');
if ((options & RegularExpression.IGNORE_CASE) != 0)
sb.append((char)'i');
sb.append('i');
if ((options & RegularExpression.MULTIPLE_LINES) != 0)
sb.append((char)'m');
sb.append('m');
if ((options & RegularExpression.SINGLE_LINE) != 0)
sb.append((char)'s');
sb.append('s');
if ((options & RegularExpression.USE_UNICODE_CATEGORY) != 0)
sb.append((char)'u');
sb.append('u');
if ((options & RegularExpression.UNICODE_WORD_BOUNDARY) != 0)
sb.append((char)'w');
sb.append('w');
if ((options & RegularExpression.EXTENDED_COMMENT) != 0)
sb.append((char)'x');
sb.append('x');
if ((options & RegularExpression.SPECIAL_COMMA) != 0)
sb.append((char)',');
sb.append(',');
return sb.toString().intern();
}
@ -138,13 +138,19 @@ public final class REUtil {
static String stripExtendedComment(String regex) {
int len = regex.length();
StringBuffer buffer = new StringBuffer(len);
StringBuilder buffer = new StringBuilder(len);
int offset = 0;
int charClass = 0;
while (offset < len) {
int ch = regex.charAt(offset++);
// Skips a white space.
if (ch == '\t' || ch == '\n' || ch == '\f' || ch == '\r' || ch == ' ')
if (ch == '\t' || ch == '\n' || ch == '\f' || ch == '\r' || ch == ' ') {
// if we are inside a character class, we keep the white space
if (charClass > 0) {
buffer.append((char)ch);
}
continue;
}
if (ch == '#') { // Skips chracters between '#' and a line end.
while (offset < len) {
@ -163,12 +169,36 @@ public final class REUtil {
buffer.append((char)next);
offset ++;
} else { // Other escaped character.
buffer.append((char)'\\');
buffer.append('\\');
buffer.append((char)next);
offset ++;
}
} else // As is.
}
else if (ch == '[') {
charClass++;
buffer.append((char)ch);
if (offset < len) {
next = regex.charAt(offset);
if (next == '[' || next ==']') {
buffer.append((char)next);
offset ++;
}
else if (next == '^' && offset + 1 < len) {
next = regex.charAt(offset + 1);
if (next == '[' || next ==']') {
buffer.append('^');
buffer.append((char)next);
offset += 2;
}
}
}
}
else {
if (charClass > 0 && ch == ']') {
--charClass;
}
buffer.append((char)ch);
}
}
return buffer.toString();
}
@ -307,15 +337,15 @@ public final class REUtil {
*/
public static String quoteMeta(String literal) {
int len = literal.length();
StringBuffer buffer = null;
StringBuilder buffer = null;
for (int i = 0; i < len; i ++) {
int ch = literal.charAt(i);
if (".*+?{[()|\\^$".indexOf(ch) >= 0) {
if (buffer == null) {
buffer = new StringBuffer(i+(len-i)*2);
buffer = new StringBuilder(i+(len-i)*2);
if (i > 0) buffer.append(literal.substring(0, i));
}
buffer.append((char)'\\');
buffer.append('\\');
buffer.append((char)ch);
} else if (buffer != null)
buffer.append((char)ch);

View file

@ -1,6 +1,5 @@
/*
* reserved comment block
* DO NOT REMOVE OR ALTER!
* Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -43,7 +42,7 @@ final class RangeToken extends Token implements java.io.Serializable {
this.setSorted(false);
}
// for RANGE or NRANGE
// for RANGE or NRANGE
protected void addRange(int start, int end) {
this.icaseCache = null;
//System.err.println("Token#addRange(): "+start+" "+end);
@ -560,7 +559,7 @@ final class RangeToken extends Token implements java.io.Serializable {
sb.append(escapeCharInCharClass(this.ranges[i]));
} else {
sb.append(escapeCharInCharClass(this.ranges[i]));
sb.append((char)'-');
sb.append('-');
sb.append(escapeCharInCharClass(this.ranges[i+1]));
}
}

View file

@ -1,13 +1,13 @@
/*
* reserved comment block
* DO NOT REMOVE OR ALTER!
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright 1999-2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
@ -24,7 +24,7 @@ import com.sun.org.apache.xerces.internal.utils.SecuritySupport;
import java.util.Locale;
import java.util.MissingResourceException;
import java.util.ResourceBundle;
import java.util.Vector;
import java.util.ArrayList;
/**
* A Regular Expression Parser.
@ -82,8 +82,7 @@ class RegexParser {
int parenOpened = 1;
int parennumber = 1;
boolean hasBackReferences;
Vector references = null;
int parenCount = 0;
ArrayList<ReferencePosition> references = null;
public RegexParser() {
this.setLocale(Locale.getDefault());
@ -115,7 +114,7 @@ class RegexParser {
return (this.options & flag) == flag;
}
synchronized Token parse(String regex, int options) throws ParseException {
Token parse(String regex, int options) throws ParseException {
this.options = options;
this.offset = 0;
this.setContext(S_NORMAL);
@ -132,15 +131,16 @@ class RegexParser {
Token ret = this.parseRegex();
if (this.offset != this.regexlen)
throw ex("parser.parse.1", this.offset);
if (parenCount < 0)
throw ex("parser.factor.0", this.offset);
if (this.read() != T_EOF) {
throw ex("parser.parse.1", this.offset-1);
}
if (this.references != null) {
for (int i = 0; i < this.references.size(); i ++) {
ReferencePosition position = (ReferencePosition)this.references.elementAt(i);
ReferencePosition position = this.references.get(i);
if (this.parennumber <= position.refNumber)
throw ex("parser.parse.2", position.position);
}
this.references.removeAllElements();
this.references.clear();
}
return ret;
}
@ -160,6 +160,7 @@ class RegexParser {
return this.nexttoken;
}
@SuppressWarnings("fallthrough")
final void next() {
if (this.offset >= this.regexlen) {
this.chardata = -1;
@ -239,7 +240,6 @@ class RegexParser {
break;
case '(':
ret = T_LPAREN;
parenCount++;
if (this.offset >= this.regexlen)
break;
if (this.regex.charAt(this.offset) != '?')
@ -328,11 +328,10 @@ class RegexParser {
*/
Token parseTerm() throws ParseException {
int ch = this.read();
Token tok = null;
if (ch == T_OR || ch == T_RPAREN || ch == T_EOF) {
tok = Token.createEmpty();
return Token.createEmpty();
} else {
tok = this.parseFactor();
Token tok = this.parseFactor();
Token concat = null;
while ((ch = this.read()) != T_OR && ch != T_RPAREN && ch != T_EOF) {
if (concat == null) {
@ -343,11 +342,8 @@ class RegexParser {
concat.addChild(this.parseFactor());
//tok = Token.createConcat(tok, this.parseFactor());
}
return tok;
}
if (ch == T_RPAREN) {
parenCount--;
}
return tok;
}
// ----------------------------------------------------------------
@ -482,7 +478,7 @@ class RegexParser {
while (this.offset + 1 < this.regexlen) {
ch = this.regex.charAt(this.offset + 1);
if ('1' <= ch && ch <= '9') {
if ('0' <= ch && ch <= '9') {
refno = (refno * 10) + (ch - '0');
if (refno < this.parennumber) {
finalRefno= refno;
@ -498,8 +494,8 @@ class RegexParser {
}
this.hasBackReferences = true;
if (this.references == null) this.references = new Vector();
this.references.addElement(new ReferencePosition(finalRefno, this.offset));
if (this.references == null) this.references = new ArrayList<>();
this.references.add(new ReferencePosition(finalRefno, this.offset));
this.offset ++;
if (this.regex.charAt(this.offset) != ')') throw ex("parser.factor.1", this.offset);
this.offset ++;
@ -615,7 +611,7 @@ class RegexParser {
while (this.offset < this.regexlen) {
final int ch = this.regex.charAt(this.offset);
if ('1' <= ch && ch <= '9') {
if ('0' <= ch && ch <= '9') {
refnum = (refnum * 10) + (ch - '0');
if (refnum < this.parennumber) {
++this.offset;
@ -633,8 +629,8 @@ class RegexParser {
Token tok = Token.createBackReference(finalRefnum);
this.hasBackReferences = true;
if (this.references == null) this.references = new Vector();
this.references.addElement(new ReferencePosition(finalRefnum, this.offset-2));
if (this.references == null) this.references = new ArrayList<>();
this.references.add(new ReferencePosition(finalRefnum, this.offset-2));
this.next();
return tok;
}

View file

@ -1,13 +1,13 @@
/*
* reserved comment block
* DO NOT REMOVE OR ALTER!
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright 1999-2002,2004,2005 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
@ -1041,9 +1041,10 @@ public class RegularExpression implements java.io.Serializable {
/**
* @return -1 when not match; offset of the end of matched string when match.
*/
@SuppressWarnings("fallthrough")
private int match(Context con, Op op, int offset, int dx, int opts) {
final ExpressionTarget target = con.target;
final Stack opStack = new Stack();
final Stack<Op> opStack = new Stack<>();
final IntStack dataStack = new IntStack();
final boolean isSetIgnoreCase = isSet(opts, IGNORE_CASE);
int retValue = -1;
@ -1322,7 +1323,7 @@ public class RegularExpression implements java.io.Serializable {
return retValue;
}
op = (Op) opStack.pop();
op = opStack.pop();
offset = dataStack.pop();
switch (op.type) {

View file

@ -859,7 +859,7 @@ class Token implements java.io.Serializable {
buffer.append("Is");
if (n.indexOf(' ') >= 0) {
for (int ci = 0; ci < n.length(); ci ++)
if (n.charAt(ci) != ' ') buffer.append((char)n.charAt(ci));
if (n.charAt(ci) != ' ') buffer.append(n.charAt(ci));
}
else {
buffer.append(n);
@ -995,8 +995,8 @@ class Token implements java.io.Serializable {
}
private static void setAlias(String newName, String name, boolean positive) {
Token t1 = (Token)Token.categories.get(name);
Token t2 = (Token)Token.categories2.get(name);
Token t1 = Token.categories.get(name);
Token t2 = Token.categories2.get(name);
if (positive) {
Token.categories.put(newName, t1);
Token.categories2.put(newName, t2);
@ -1525,7 +1525,7 @@ class Token implements java.io.Serializable {
this.children.stream().forEach((children1) -> {
sb.append((children1).toString(options));
});
ret = new String(sb);
ret = sb.toString();
}
return ret;
}
@ -1538,10 +1538,10 @@ class Token implements java.io.Serializable {
StringBuilder sb = new StringBuilder();
sb.append((this.children.get(0)).toString(options));
for (int i = 1; i < this.children.size(); i ++) {
sb.append((char)'|');
sb.append('|');
sb.append((this.children.get(i)).toString(options));
}
ret = new String(sb);
ret = sb.toString();
}
return ret;
}
@ -1557,7 +1557,7 @@ class Token implements java.io.Serializable {
ObjectOutputStream.PutField pf = out.putFields();
pf.put("children", vChildren);
out.writeFields();
}
}
@SuppressWarnings("unchecked")
private void readObject(ObjectInputStream in)