This commit is contained in:
Alejandro Murillo 2015-11-16 22:36:41 -08:00
commit 8756c98eb8
16 changed files with 9608 additions and 234 deletions

View file

@ -25,9 +25,12 @@ import com.sun.org.apache.xerces.internal.util.XMLChar;
import com.sun.org.apache.xerces.internal.util.XMLSymbols;
import com.sun.org.apache.xerces.internal.xni.NamespaceContext;
import com.sun.org.apache.xerces.internal.xni.QName;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Vector;
import java.util.stream.Collectors;
/**
* Bare minimum XPath parser.
@ -47,20 +50,18 @@ public class XPath {
private static final boolean DEBUG_XPATH_PARSE = DEBUG_ALL || false;
private static final boolean DEBUG_ANY = DEBUG_XPATH_PARSE;
//
// Data
//
/** Expression. */
protected String fExpression;
protected final String fExpression;
/** Symbol table. */
protected SymbolTable fSymbolTable;
protected final SymbolTable fSymbolTable;
/** Location paths. */
protected LocationPath[] fLocationPaths;
protected final LocationPath[] fLocationPaths;
//
// Constructors
@ -72,7 +73,7 @@ public class XPath {
throws XPathException {
fExpression = xpath;
fSymbolTable = symbolTable;
parseExpression(context);
fLocationPaths = parseExpression(context);
} // <init>(String,SymbolTable,NamespaceContext)
//
@ -101,15 +102,14 @@ public class XPath {
//
/** Returns a string representation of this object. */
@Override
public String toString() {
StringBuffer buf=new StringBuffer();
for (int i=0;i<fLocationPaths.length;i++){
if (i>0){
buf.append("|");
}
buf.append(fLocationPaths[i].toString());
}
return buf.toString();
final List<LocationPath> l = Arrays.asList(fLocationPaths);
final String s = l.stream()
.map(aPath -> aPath.toString())
.collect(Collectors.joining("|"));
return s;
} // toString():String
//
@ -132,12 +132,12 @@ public class XPath {
* to build a {@link LocationPath} object from the accumulated
* {@link Step}s.
*/
private LocationPath buildLocationPath( Vector stepsVector ) throws XPathException {
private LocationPath buildLocationPath( ArrayList<Step> stepsVector ) throws XPathException {
int size = stepsVector.size();
check(size!=0);
Step[] steps = new Step[size];
stepsVector.copyInto(steps);
stepsVector.removeAllElements();
steps = stepsVector.toArray(steps);
stepsVector.clear();
return new LocationPath(steps);
}
@ -146,7 +146,7 @@ public class XPath {
* This method is implemented by using the XPathExprScanner and
* examining the list of tokens that it returns.
*/
private void parseExpression(final NamespaceContext context)
private LocationPath[] parseExpression(final NamespaceContext context)
throws XPathException {
// tokens
@ -184,8 +184,8 @@ public class XPath {
throw new XPathException("c-general-xpath");
//fTokens.dumpTokens();
Vector stepsVector = new Vector();
Vector locationPathsVector= new Vector();
ArrayList<Step> stepsVector = new ArrayList<>();
ArrayList<LocationPath> locationPathsVector= new ArrayList<>();
// true when the next token should be 'Step' (as defined in
// the production rule [3] of XML Schema P1 section 3.11.6
@ -194,28 +194,39 @@ public class XPath {
// this is to make sure we can detect a token list like
// 'abc' '/' '/' 'def' 'ghi'
boolean expectingStep = true;
boolean expectingDoubleColon = false;
while(xtokens.hasMore()) {
while (xtokens.hasMore()) {
final int token = xtokens.nextToken();
switch (token) {
case XPath.Tokens.EXPRTOKEN_OPERATOR_UNION :{
check(!expectingStep);
locationPathsVector.addElement(buildLocationPath(stepsVector));
locationPathsVector.add(buildLocationPath(stepsVector));
expectingStep=true;
break;
}
case XPath.Tokens.EXPRTOKEN_ATSIGN: {
check(expectingStep);
Step step = new Step(
new Axis(Axis.ATTRIBUTE),
parseNodeTest(xtokens.nextToken(),xtokens,context));
stepsVector.addElement(step);
stepsVector.add(step);
expectingStep=false;
break;
}
case XPath.Tokens.EXPRTOKEN_AXISNAME_ATTRIBUTE: {
check(expectingStep);
// If we got here we're expecting attribute::
if (xtokens.nextToken() != XPath.Tokens.EXPRTOKEN_DOUBLE_COLON) {
throw new XPathException("c-general-xpath");
}
Step step = new Step(
new Axis(Axis.ATTRIBUTE),
parseNodeTest(xtokens.nextToken(),xtokens,context));
stepsVector.add(step);
expectingStep = false;
break;
}
case XPath.Tokens.EXPRTOKEN_NAMETEST_ANY:
case XPath.Tokens.EXPRTOKEN_NAMETEST_NAMESPACE:
case XPath.Tokens.EXPRTOKEN_NAMETEST_QNAME: {
@ -223,11 +234,23 @@ public class XPath {
Step step = new Step(
new Axis(Axis.CHILD),
parseNodeTest(token,xtokens,context));
stepsVector.addElement(step);
stepsVector.add(step);
expectingStep=false;
break;
}
case XPath.Tokens.EXPRTOKEN_AXISNAME_CHILD: {
check(expectingStep);
// If we got here we're expecting child::
if (xtokens.nextToken() != XPath.Tokens.EXPRTOKEN_DOUBLE_COLON) {
throw new XPathException("c-general-xpath");
}
Step step = new Step(
new Axis(Axis.CHILD),
parseNodeTest(xtokens.nextToken(),xtokens,context));
stepsVector.add(step);
expectingStep = false;
break;
}
case XPath.Tokens.EXPRTOKEN_PERIOD: {
check(expectingStep);
expectingStep=false;
@ -237,12 +260,12 @@ public class XPath {
// This amounts to shorten "a/././b/./c" to "a/b/c".
// Also, the matcher fails to work correctly if XPath
// has those redundant dots.
if (stepsVector.size()==0) {
if (stepsVector.isEmpty()) {
// build step
Axis axis = new Axis(Axis.SELF);
NodeTest nodeTest = new NodeTest(NodeTest.NODE);
Step step = new Step(axis, nodeTest);
stepsVector.addElement(step);
stepsVector.add(step);
if( xtokens.hasMore()
&& xtokens.peekToken() == XPath.Tokens.EXPRTOKEN_OPERATOR_DOUBLE_SLASH){
@ -253,67 +276,41 @@ public class XPath {
axis = new Axis(Axis.DESCENDANT);
nodeTest = new NodeTest(NodeTest.NODE);
step = new Step(axis, nodeTest);
stepsVector.addElement(step);
stepsVector.add(step);
expectingStep=true;
}
}
break;
}
case XPath.Tokens.EXPRTOKEN_OPERATOR_DOUBLE_SLASH:{
// this cannot appear in arbitrary position.
// this cannot appear in an arbitrary position.
// it is only allowed right after '.' when
// '.' is the first token of a location path.
throw new XPathException("c-general-xpath");
}
case XPath.Tokens.EXPRTOKEN_DOUBLE_COLON: {
// :: cannot appear in an arbitrary position.
// We only expect this token if the xpath
// contains child:: or attribute::
throw new XPathException("c-general-xpath");
}
case XPath.Tokens.EXPRTOKEN_OPERATOR_SLASH: {
check(!expectingStep);
expectingStep=true;
break;
}
case XPath.Tokens.EXPRTOKEN_AXISNAME_ATTRIBUTE: {
check(expectingStep);
expectingDoubleColon = true;
if (xtokens.nextToken() == XPath.Tokens.EXPRTOKEN_DOUBLE_COLON){
Step step = new Step(
new Axis(Axis.ATTRIBUTE),
parseNodeTest(xtokens.nextToken(),xtokens,context));
stepsVector.addElement(step);
expectingStep=false;
expectingDoubleColon = false;
}
break;
}
case XPath.Tokens.EXPRTOKEN_AXISNAME_CHILD:{
check(expectingStep);
expectingDoubleColon = true;
break;
}
case XPath.Tokens.EXPRTOKEN_DOUBLE_COLON :{
check(expectingStep);
check(expectingDoubleColon);
expectingDoubleColon = false;
break;
}
default:
// we should have covered all the tokens that we can possibly see.
throw new XPathException("c-general-xpath");
}
throw new InternalError();
}
}
check(!expectingStep);
locationPathsVector.addElement(buildLocationPath(stepsVector));
locationPathsVector.add(buildLocationPath(stepsVector));
// save location path
fLocationPaths=new LocationPath[locationPathsVector.size()];
locationPathsVector.copyInto(fLocationPaths);
if (DEBUG_XPATH_PARSE) {
System.out.println(">>> "+fLocationPaths);
}
// return location path
return locationPathsVector.toArray(new LocationPath[locationPathsVector.size()]);
} // parseExpression(SymbolTable,NamespaceContext)
@ -378,7 +375,7 @@ public class XPath {
//
/** List of steps. */
public Step[] steps;
public final Step[] steps;
//
// Constructors
@ -445,10 +442,10 @@ public class XPath {
//
/** Axis. */
public Axis axis;
public final Axis axis;
/** Node test. */
public NodeTest nodeTest;
public final NodeTest nodeTest;
//
// Constructors
@ -525,7 +522,7 @@ public class XPath {
//
/** Axis type. */
public short type;
public final short type;
//
// Constructors
@ -594,7 +591,7 @@ public class XPath {
//
/** Node test type. */
public short type;
public final short type;
/** Node qualified name. */
public final QName name = new QName();
@ -856,13 +853,13 @@ public class XPath {
private int[] fTokens = new int[INITIAL_TOKEN_COUNT];
private int fTokenCount = 0; // for writing
private SymbolTable fSymbolTable;
private final SymbolTable fSymbolTable;
// REVISIT: Code something better here. -Ac
private Map<String, Integer> fSymbolMapping = new HashMap<>();
private final Map<String, Integer> fSymbolMapping = new HashMap<>();
// REVISIT: Code something better here. -Ac
private Map<Integer, String> fTokenNames = new HashMap<>();
private final Map<Integer, String> fTokenNames = new HashMap<>();
/**
* Current position in the token list.
@ -1888,6 +1885,10 @@ public class XPath {
tokens.addToken(nameHandle);
}
break;
default:
// CHARTYPE_INVALID or CHARTYPE_OTHER
// We're not expecting to find either of these in a valid expression.
return false;
}
}
if (XPath.Tokens.DUMP_TOKENS) {

View file

@ -3,11 +3,12 @@
* DO NOT REMOVE OR ALTER!
*/
/*
* Copyright 2001, 2002,2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
@ -37,7 +38,7 @@ public class XPathException
// Data
// hold the value of the key this Exception refers to.
private String fKey;
private final String fKey;
//
// Constructors
//

View file

@ -3,11 +3,12 @@
* DO NOT REMOVE OR ALTER!
*/
/*
* Copyright 1999-2002,2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
@ -29,9 +30,9 @@ import java.text.CharacterIterator;
*
*/
public class BMPattern {
char[] pattern;
int[] shiftTable;
boolean ignoreCase;
final char[] pattern;
final int[] shiftTable;
final boolean ignoreCase;
public BMPattern(String pat, boolean ignoreCase) {
this(pat, 256, ignoreCase);

View file

@ -24,32 +24,27 @@ package com.sun.org.apache.xerces.internal.impl.xpath.regex;
/**
*/
public class CaseInsensitiveMap {
final class CaseInsensitiveMap {
private static int CHUNK_SHIFT = 10; /* 2^10 = 1k */
private static int CHUNK_SIZE = (1<<CHUNK_SHIFT);
private static int CHUNK_MASK = (CHUNK_SIZE-1);
private static int INITIAL_CHUNK_COUNT = 64; /* up to 0xFFFF */
private static final int CHUNK_SHIFT = 10; /* 2^10 = 1k */
private static final int CHUNK_SIZE = (1<<CHUNK_SHIFT);
private static final int CHUNK_MASK = (CHUNK_SIZE-1);
private static final int INITIAL_CHUNK_COUNT = 64; /* up to 0xFFFF */
private static int[][][] caseInsensitiveMap;
private static Boolean mapBuilt = Boolean.FALSE;
private static int LOWER_CASE_MATCH = 1;
private static int UPPER_CASE_MATCH = 2;
private static final int LOWER_CASE_MATCH = 1;
private static final int UPPER_CASE_MATCH = 2;
static {
buildCaseInsensitiveMap();
}
/**
* Return a list of code point characters (not including the input value)
* that can be substituted in a case insensitive match
*/
static public int[] get(int codePoint) {
if (mapBuilt == Boolean.FALSE) {
synchronized (mapBuilt) {
if (mapBuilt == Boolean.FALSE) {
buildCaseInsensitiveMap();
}
} // synchronized
} // if mapBuilt
return (codePoint < 0x10000) ? getMapping(codePoint) : null;
}
@ -61,11 +56,7 @@ public class CaseInsensitiveMap {
}
private static void buildCaseInsensitiveMap() {
caseInsensitiveMap = new int[INITIAL_CHUNK_COUNT][][];
for (int i=0; i<INITIAL_CHUNK_COUNT; i++) {
caseInsensitiveMap[i] = new int[CHUNK_SIZE][];
}
caseInsensitiveMap = new int[INITIAL_CHUNK_COUNT][CHUNK_SIZE][];
int lc, uc;
for (int i=0; i<0x10000; i++) {
lc = Character.toLowerCase(i);
@ -100,8 +91,6 @@ public class CaseInsensitiveMap {
set(i, map);
}
}
mapBuilt = Boolean.TRUE;
}
private static int[] expandMap(int[] srcMap, int expandBy) {

View file

@ -3,11 +3,12 @@
* DO NOT REMOVE OR ALTER!
*/
/*
* Copyright 1999-2002,2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*

View file

@ -3,11 +3,12 @@
* DO NOT REMOVE OR ALTER!
*/
/*
* Copyright 1999-2002,2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
@ -20,7 +21,7 @@
package com.sun.org.apache.xerces.internal.impl.xpath.regex;
import java.util.Vector;
import java.util.ArrayList;
/**
* @xerces.internal
@ -126,7 +127,7 @@ class Op {
return op;
}
int type;
final int type;
Op next = null;
protected Op(int type) {
@ -158,7 +159,7 @@ class Op {
// ================================================================
static class CharOp extends Op {
int charData;
final int charData;
CharOp(int type, int data) {
super(type);
this.charData = data;
@ -170,19 +171,19 @@ class Op {
// ================================================================
static class UnionOp extends Op {
Vector branches;
final ArrayList<Op> branches;
UnionOp(int type, int size) {
super(type);
this.branches = new Vector(size);
this.branches = new ArrayList<>(size);
}
void addElement(Op op) {
this.branches.addElement(op);
this.branches.add(op);
}
int size() {
return this.branches.size();
}
Op elementAt(int index) {
return (Op)this.branches.elementAt(index);
return this.branches.get(index);
}
}
@ -201,8 +202,8 @@ class Op {
}
// ================================================================
static class ModifierOp extends ChildOp {
int v1;
int v2;
final int v1;
final int v2;
ModifierOp(int type, int v1, int v2) {
super(type);
this.v1 = v1;
@ -217,7 +218,7 @@ class Op {
}
// ================================================================
static class RangeOp extends Op {
Token tok;
final Token tok;
RangeOp(int type, Token tok) {
super(type);
this.tok = tok;
@ -228,7 +229,7 @@ class Op {
}
// ================================================================
static class StringOp extends Op {
String string;
final String string;
StringOp(int type, String literal) {
super(type);
this.string = literal;
@ -239,10 +240,10 @@ class Op {
}
// ================================================================
static class ConditionOp extends Op {
int refNumber;
Op condition;
Op yes;
Op no;
final int refNumber;
final Op condition;
final Op yes;
final Op no;
ConditionOp(int type, int refno, Op conditionflow, Op yesflow, Op noflow) {
super(type);
this.refNumber = refno;

View file

@ -3,11 +3,12 @@
* DO NOT REMOVE OR ALTER!
*/
/*
* Copyright 1999-2002,2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
@ -30,7 +31,7 @@ public class ParseException extends RuntimeException {
/** Serialization version. */
static final long serialVersionUID = -7012400318097691370L;
int location;
final int location;
/*
public ParseException(String mes) {

View file

@ -252,7 +252,7 @@ class ParserForXMLSchema extends RegexParser {
if (c == ']') throw this.ex("parser.cc.7", this.offset-2);
if (c == '-' && this.chardata != ']' && !firstloop) throw this.ex("parser.cc.8", this.offset-2); // if regex = '[-]' then invalid
}
if (this.read() != T_CHAR || this.chardata != '-' || c == '-' && firstloop) { // Here is no '-'.
if (this.read() != T_CHAR || this.chardata != '-' || c == '-' && !wasDecoded && firstloop) { // Here is no '-'.
if (!this.isSet(RegularExpression.IGNORE_CASE) || c > 0xffff) {
tok.addRange(c, c);
}
@ -382,17 +382,20 @@ class ParserForXMLSchema extends RegexParser {
ranges2.put("xml:isSpace", Token.complementRanges(tok));
tok = Token.createRange();
setupRange(tok, DIGITS);
setupRange(tok, DIGITS_INT);
setupRange(tok, DIGITS_INTS);
ranges.put("xml:isDigit", tok);
ranges2.put("xml:isDigit", Token.complementRanges(tok));
/*
* \w is defined by the XML Schema specification to be:
* [#x0000-#x10FFFF]-[\p{P}\p{Z}\p{C}] (all characters except the set of "punctuation", "separator" and "other" characters)
*/
tok = Token.createRange();
setupRange(tok, LETTERS);
setupRange(tok, LETTERS_INT);
tok.mergeRanges(ranges.get("xml:isDigit"));
ranges.put("xml:isWord", tok);
ranges2.put("xml:isWord", Token.complementRanges(tok));
tok.mergeRanges(Token.getRange("P", true));
tok.mergeRanges(Token.getRange("Z", true));
tok.mergeRanges(Token.getRange("C", true));
ranges2.put("xml:isWord", tok);
ranges.put("xml:isWord", Token.complementRanges(tok));
tok = Token.createRange();
setupRange(tok, NAMECHARS);
@ -401,6 +404,7 @@ class ParserForXMLSchema extends RegexParser {
tok = Token.createRange();
setupRange(tok, LETTERS);
setupRange(tok, LETTERS_INT);
tok.addRange('_', '_');
tok.addRange(':', ':');
ranges.put("xml:isInitialNameChar", tok);
@ -502,11 +506,12 @@ class ParserForXMLSchema extends RegexParser {
private static final int[] LETTERS_INT = {0x1d790, 0x1d7a8, 0x1d7aa, 0x1d7c9, 0x2fa1b, 0x2fa1d};
private static final String DIGITS =
"\u0030\u0039\u0660\u0669\u06F0\u06F9\u0966\u096F\u09E6\u09EF\u0A66\u0A6F\u0AE6\u0AEF"
+"\u0B66\u0B6F\u0BE7\u0BEF\u0C66\u0C6F\u0CE6\u0CEF\u0D66\u0D6F\u0E50\u0E59\u0ED0\u0ED9"
+"\u0F20\u0F29\u1040\u1049\u1369\u1371\u17E0\u17E9\u1810\u1819\uFF10\uFF19";
private static final int[] DIGITS_INT = {0x1D7CE, 0x1D7FF};
private static final int[] DIGITS_INTS = {
0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F,
0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F,
0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F,
0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29, 0x1040, 0x1049,
0x1369, 0x1371, 0x17E0, 0x17E9, 0x1810, 0x1819, 0xFF10, 0xFF19,
0x1D7CE, 0x1D7FF
};
}

View file

@ -1,13 +1,13 @@
/*
* reserved comment block
* DO NOT REMOVE OR ALTER!
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright 1999-2002,2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
@ -110,27 +110,27 @@ public final class REUtil {
}
static final String createOptionString(int options) {
StringBuffer sb = new StringBuffer(9);
StringBuilder sb = new StringBuilder(9);
if ((options & RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION) != 0)
sb.append((char)'F');
sb.append('F');
if ((options & RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION) != 0)
sb.append((char)'H');
sb.append('H');
if ((options & RegularExpression.XMLSCHEMA_MODE) != 0)
sb.append((char)'X');
sb.append('X');
if ((options & RegularExpression.IGNORE_CASE) != 0)
sb.append((char)'i');
sb.append('i');
if ((options & RegularExpression.MULTIPLE_LINES) != 0)
sb.append((char)'m');
sb.append('m');
if ((options & RegularExpression.SINGLE_LINE) != 0)
sb.append((char)'s');
sb.append('s');
if ((options & RegularExpression.USE_UNICODE_CATEGORY) != 0)
sb.append((char)'u');
sb.append('u');
if ((options & RegularExpression.UNICODE_WORD_BOUNDARY) != 0)
sb.append((char)'w');
sb.append('w');
if ((options & RegularExpression.EXTENDED_COMMENT) != 0)
sb.append((char)'x');
sb.append('x');
if ((options & RegularExpression.SPECIAL_COMMA) != 0)
sb.append((char)',');
sb.append(',');
return sb.toString().intern();
}
@ -138,13 +138,19 @@ public final class REUtil {
static String stripExtendedComment(String regex) {
int len = regex.length();
StringBuffer buffer = new StringBuffer(len);
StringBuilder buffer = new StringBuilder(len);
int offset = 0;
int charClass = 0;
while (offset < len) {
int ch = regex.charAt(offset++);
// Skips a white space.
if (ch == '\t' || ch == '\n' || ch == '\f' || ch == '\r' || ch == ' ')
if (ch == '\t' || ch == '\n' || ch == '\f' || ch == '\r' || ch == ' ') {
// if we are inside a character class, we keep the white space
if (charClass > 0) {
buffer.append((char)ch);
}
continue;
}
if (ch == '#') { // Skips chracters between '#' and a line end.
while (offset < len) {
@ -163,12 +169,36 @@ public final class REUtil {
buffer.append((char)next);
offset ++;
} else { // Other escaped character.
buffer.append((char)'\\');
buffer.append('\\');
buffer.append((char)next);
offset ++;
}
} else // As is.
}
else if (ch == '[') {
charClass++;
buffer.append((char)ch);
if (offset < len) {
next = regex.charAt(offset);
if (next == '[' || next ==']') {
buffer.append((char)next);
offset ++;
}
else if (next == '^' && offset + 1 < len) {
next = regex.charAt(offset + 1);
if (next == '[' || next ==']') {
buffer.append('^');
buffer.append((char)next);
offset += 2;
}
}
}
}
else {
if (charClass > 0 && ch == ']') {
--charClass;
}
buffer.append((char)ch);
}
}
return buffer.toString();
}
@ -307,15 +337,15 @@ public final class REUtil {
*/
public static String quoteMeta(String literal) {
int len = literal.length();
StringBuffer buffer = null;
StringBuilder buffer = null;
for (int i = 0; i < len; i ++) {
int ch = literal.charAt(i);
if (".*+?{[()|\\^$".indexOf(ch) >= 0) {
if (buffer == null) {
buffer = new StringBuffer(i+(len-i)*2);
buffer = new StringBuilder(i+(len-i)*2);
if (i > 0) buffer.append(literal.substring(0, i));
}
buffer.append((char)'\\');
buffer.append('\\');
buffer.append((char)ch);
} else if (buffer != null)
buffer.append((char)ch);

View file

@ -1,6 +1,5 @@
/*
* reserved comment block
* DO NOT REMOVE OR ALTER!
* Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -43,7 +42,7 @@ final class RangeToken extends Token implements java.io.Serializable {
this.setSorted(false);
}
// for RANGE or NRANGE
// for RANGE or NRANGE
protected void addRange(int start, int end) {
this.icaseCache = null;
//System.err.println("Token#addRange(): "+start+" "+end);
@ -560,7 +559,7 @@ final class RangeToken extends Token implements java.io.Serializable {
sb.append(escapeCharInCharClass(this.ranges[i]));
} else {
sb.append(escapeCharInCharClass(this.ranges[i]));
sb.append((char)'-');
sb.append('-');
sb.append(escapeCharInCharClass(this.ranges[i+1]));
}
}

View file

@ -1,13 +1,13 @@
/*
* reserved comment block
* DO NOT REMOVE OR ALTER!
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright 1999-2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
@ -24,7 +24,7 @@ import com.sun.org.apache.xerces.internal.utils.SecuritySupport;
import java.util.Locale;
import java.util.MissingResourceException;
import java.util.ResourceBundle;
import java.util.Vector;
import java.util.ArrayList;
/**
* A Regular Expression Parser.
@ -82,8 +82,7 @@ class RegexParser {
int parenOpened = 1;
int parennumber = 1;
boolean hasBackReferences;
Vector references = null;
int parenCount = 0;
ArrayList<ReferencePosition> references = null;
public RegexParser() {
this.setLocale(Locale.getDefault());
@ -115,7 +114,7 @@ class RegexParser {
return (this.options & flag) == flag;
}
synchronized Token parse(String regex, int options) throws ParseException {
Token parse(String regex, int options) throws ParseException {
this.options = options;
this.offset = 0;
this.setContext(S_NORMAL);
@ -132,15 +131,16 @@ class RegexParser {
Token ret = this.parseRegex();
if (this.offset != this.regexlen)
throw ex("parser.parse.1", this.offset);
if (parenCount < 0)
throw ex("parser.factor.0", this.offset);
if (this.read() != T_EOF) {
throw ex("parser.parse.1", this.offset-1);
}
if (this.references != null) {
for (int i = 0; i < this.references.size(); i ++) {
ReferencePosition position = (ReferencePosition)this.references.elementAt(i);
ReferencePosition position = this.references.get(i);
if (this.parennumber <= position.refNumber)
throw ex("parser.parse.2", position.position);
}
this.references.removeAllElements();
this.references.clear();
}
return ret;
}
@ -160,6 +160,7 @@ class RegexParser {
return this.nexttoken;
}
@SuppressWarnings("fallthrough")
final void next() {
if (this.offset >= this.regexlen) {
this.chardata = -1;
@ -239,7 +240,6 @@ class RegexParser {
break;
case '(':
ret = T_LPAREN;
parenCount++;
if (this.offset >= this.regexlen)
break;
if (this.regex.charAt(this.offset) != '?')
@ -328,11 +328,10 @@ class RegexParser {
*/
Token parseTerm() throws ParseException {
int ch = this.read();
Token tok = null;
if (ch == T_OR || ch == T_RPAREN || ch == T_EOF) {
tok = Token.createEmpty();
return Token.createEmpty();
} else {
tok = this.parseFactor();
Token tok = this.parseFactor();
Token concat = null;
while ((ch = this.read()) != T_OR && ch != T_RPAREN && ch != T_EOF) {
if (concat == null) {
@ -343,11 +342,8 @@ class RegexParser {
concat.addChild(this.parseFactor());
//tok = Token.createConcat(tok, this.parseFactor());
}
return tok;
}
if (ch == T_RPAREN) {
parenCount--;
}
return tok;
}
// ----------------------------------------------------------------
@ -482,7 +478,7 @@ class RegexParser {
while (this.offset + 1 < this.regexlen) {
ch = this.regex.charAt(this.offset + 1);
if ('1' <= ch && ch <= '9') {
if ('0' <= ch && ch <= '9') {
refno = (refno * 10) + (ch - '0');
if (refno < this.parennumber) {
finalRefno= refno;
@ -498,8 +494,8 @@ class RegexParser {
}
this.hasBackReferences = true;
if (this.references == null) this.references = new Vector();
this.references.addElement(new ReferencePosition(finalRefno, this.offset));
if (this.references == null) this.references = new ArrayList<>();
this.references.add(new ReferencePosition(finalRefno, this.offset));
this.offset ++;
if (this.regex.charAt(this.offset) != ')') throw ex("parser.factor.1", this.offset);
this.offset ++;
@ -615,7 +611,7 @@ class RegexParser {
while (this.offset < this.regexlen) {
final int ch = this.regex.charAt(this.offset);
if ('1' <= ch && ch <= '9') {
if ('0' <= ch && ch <= '9') {
refnum = (refnum * 10) + (ch - '0');
if (refnum < this.parennumber) {
++this.offset;
@ -633,8 +629,8 @@ class RegexParser {
Token tok = Token.createBackReference(finalRefnum);
this.hasBackReferences = true;
if (this.references == null) this.references = new Vector();
this.references.addElement(new ReferencePosition(finalRefnum, this.offset-2));
if (this.references == null) this.references = new ArrayList<>();
this.references.add(new ReferencePosition(finalRefnum, this.offset-2));
this.next();
return tok;
}

View file

@ -1,13 +1,13 @@
/*
* reserved comment block
* DO NOT REMOVE OR ALTER!
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright 1999-2002,2004,2005 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
@ -1041,9 +1041,10 @@ public class RegularExpression implements java.io.Serializable {
/**
* @return -1 when not match; offset of the end of matched string when match.
*/
@SuppressWarnings("fallthrough")
private int match(Context con, Op op, int offset, int dx, int opts) {
final ExpressionTarget target = con.target;
final Stack opStack = new Stack();
final Stack<Op> opStack = new Stack<>();
final IntStack dataStack = new IntStack();
final boolean isSetIgnoreCase = isSet(opts, IGNORE_CASE);
int retValue = -1;
@ -1322,7 +1323,7 @@ public class RegularExpression implements java.io.Serializable {
return retValue;
}
op = (Op) opStack.pop();
op = opStack.pop();
offset = dataStack.pop();
switch (op.type) {

View file

@ -859,7 +859,7 @@ class Token implements java.io.Serializable {
buffer.append("Is");
if (n.indexOf(' ') >= 0) {
for (int ci = 0; ci < n.length(); ci ++)
if (n.charAt(ci) != ' ') buffer.append((char)n.charAt(ci));
if (n.charAt(ci) != ' ') buffer.append(n.charAt(ci));
}
else {
buffer.append(n);
@ -995,8 +995,8 @@ class Token implements java.io.Serializable {
}
private static void setAlias(String newName, String name, boolean positive) {
Token t1 = (Token)Token.categories.get(name);
Token t2 = (Token)Token.categories2.get(name);
Token t1 = Token.categories.get(name);
Token t2 = Token.categories2.get(name);
if (positive) {
Token.categories.put(newName, t1);
Token.categories2.put(newName, t2);
@ -1525,7 +1525,7 @@ class Token implements java.io.Serializable {
this.children.stream().forEach((children1) -> {
sb.append((children1).toString(options));
});
ret = new String(sb);
ret = sb.toString();
}
return ret;
}
@ -1538,10 +1538,10 @@ class Token implements java.io.Serializable {
StringBuilder sb = new StringBuilder();
sb.append((this.children.get(0)).toString(options));
for (int i = 1; i < this.children.size(); i ++) {
sb.append((char)'|');
sb.append('|');
sb.append((this.children.get(i)).toString(options));
}
ret = new String(sb);
ret = sb.toString();
}
return ret;
}
@ -1557,7 +1557,7 @@ class Token implements java.io.Serializable {
ObjectOutputStream.PutField pf = out.putFields();
pf.put("children", vChildren);
out.writeFields();
}
}
@SuppressWarnings("unchecked")
private void readObject(ObjectInputStream in)

View file

@ -0,0 +1,59 @@
/*
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package validation.tck;
import java.io.IOException;
import javax.xml.XMLConstants;
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;
import javax.xml.validation.Validator;
import org.testng.annotations.Test;
import org.xml.sax.SAXException;
/*
* @bug 8142900
* @summary Verifies that all characters except the set of "punctuation",
* "separator" and "other" characters are accepted by \w [#x0000-#x10FFFF]-[\p{P}\p{Z}\p{C}]
* @author Joe Wang
*/
public class RegexWord {
static final String SCHEMA_LANGUAGE = "http://java.sun.com/xml/jaxp/properties/schemaLanguage";
static final String SCHEMA_SOURCE = "http://java.sun.com/xml/jaxp/properties/schemaSource";
/*
The original reZ003v.xml contains a full list of word characters that \w should accept.
However, U+2308..U+230B were changed from Sm to either Ps or Pe in Unicode 7.0.
They are therefore excluded from the test.
The test throws an Exception (and fails) if it fails to recognize any of characters.
*/
@Test
public void test() throws SAXException, IOException {
SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
Schema schema = schemaFactory.newSchema(new StreamSource(RegexWord.class.getResourceAsStream("reZ003.xsd")));
Validator validator = schema.newValidator();
validator.validate(new StreamSource(RegexWord.class.getResourceAsStream("reZ003vExc23082309.xml")));
}
}

View file

@ -0,0 +1,16 @@
<?xml version="1.0" encoding="utf-8" ?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:element name="doc">
<xs:complexType>
<xs:sequence>
<xs:element name="value" maxOccurs='unbounded'>
<xs:simpleType>
<xs:restriction base="xs:string">
<xs:pattern value="[\w]"/>
</xs:restriction>
</xs:simpleType>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:schema>

File diff suppressed because it is too large Load diff