8187443: Forest Consolidation: Move files to unified layout

Reviewed-by: darcy, ihse
This commit is contained in:
Erik Joelsson 2017-09-12 19:03:39 +02:00
parent 270fe13182
commit 3789983e89
56923 changed files with 3 additions and 15727 deletions

View file

@ -0,0 +1,274 @@
/*
* Copyright (c) 1999, 2000, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package java.util.regex;
/**
* Utility class that implements the standard C ctype functionality.
*
* @author Hong Zhang
*/
final class ASCII {
static final int UPPER = 0x00000100;
static final int LOWER = 0x00000200;
static final int DIGIT = 0x00000400;
static final int SPACE = 0x00000800;
static final int PUNCT = 0x00001000;
static final int CNTRL = 0x00002000;
static final int BLANK = 0x00004000;
static final int HEX = 0x00008000;
static final int UNDER = 0x00010000;
static final int ASCII = 0x0000FF00;
static final int ALPHA = (UPPER|LOWER);
static final int ALNUM = (UPPER|LOWER|DIGIT);
static final int GRAPH = (PUNCT|UPPER|LOWER|DIGIT);
static final int WORD = (UPPER|LOWER|UNDER|DIGIT);
static final int XDIGIT = (HEX);
private static final int[] ctype = new int[] {
CNTRL, /* 00 (NUL) */
CNTRL, /* 01 (SOH) */
CNTRL, /* 02 (STX) */
CNTRL, /* 03 (ETX) */
CNTRL, /* 04 (EOT) */
CNTRL, /* 05 (ENQ) */
CNTRL, /* 06 (ACK) */
CNTRL, /* 07 (BEL) */
CNTRL, /* 08 (BS) */
SPACE+CNTRL+BLANK, /* 09 (HT) */
SPACE+CNTRL, /* 0A (LF) */
SPACE+CNTRL, /* 0B (VT) */
SPACE+CNTRL, /* 0C (FF) */
SPACE+CNTRL, /* 0D (CR) */
CNTRL, /* 0E (SI) */
CNTRL, /* 0F (SO) */
CNTRL, /* 10 (DLE) */
CNTRL, /* 11 (DC1) */
CNTRL, /* 12 (DC2) */
CNTRL, /* 13 (DC3) */
CNTRL, /* 14 (DC4) */
CNTRL, /* 15 (NAK) */
CNTRL, /* 16 (SYN) */
CNTRL, /* 17 (ETB) */
CNTRL, /* 18 (CAN) */
CNTRL, /* 19 (EM) */
CNTRL, /* 1A (SUB) */
CNTRL, /* 1B (ESC) */
CNTRL, /* 1C (FS) */
CNTRL, /* 1D (GS) */
CNTRL, /* 1E (RS) */
CNTRL, /* 1F (US) */
SPACE+BLANK, /* 20 SPACE */
PUNCT, /* 21 ! */
PUNCT, /* 22 " */
PUNCT, /* 23 # */
PUNCT, /* 24 $ */
PUNCT, /* 25 % */
PUNCT, /* 26 & */
PUNCT, /* 27 ' */
PUNCT, /* 28 ( */
PUNCT, /* 29 ) */
PUNCT, /* 2A * */
PUNCT, /* 2B + */
PUNCT, /* 2C , */
PUNCT, /* 2D - */
PUNCT, /* 2E . */
PUNCT, /* 2F / */
DIGIT+HEX+0, /* 30 0 */
DIGIT+HEX+1, /* 31 1 */
DIGIT+HEX+2, /* 32 2 */
DIGIT+HEX+3, /* 33 3 */
DIGIT+HEX+4, /* 34 4 */
DIGIT+HEX+5, /* 35 5 */
DIGIT+HEX+6, /* 36 6 */
DIGIT+HEX+7, /* 37 7 */
DIGIT+HEX+8, /* 38 8 */
DIGIT+HEX+9, /* 39 9 */
PUNCT, /* 3A : */
PUNCT, /* 3B ; */
PUNCT, /* 3C < */
PUNCT, /* 3D = */
PUNCT, /* 3E > */
PUNCT, /* 3F ? */
PUNCT, /* 40 @ */
UPPER+HEX+10, /* 41 A */
UPPER+HEX+11, /* 42 B */
UPPER+HEX+12, /* 43 C */
UPPER+HEX+13, /* 44 D */
UPPER+HEX+14, /* 45 E */
UPPER+HEX+15, /* 46 F */
UPPER+16, /* 47 G */
UPPER+17, /* 48 H */
UPPER+18, /* 49 I */
UPPER+19, /* 4A J */
UPPER+20, /* 4B K */
UPPER+21, /* 4C L */
UPPER+22, /* 4D M */
UPPER+23, /* 4E N */
UPPER+24, /* 4F O */
UPPER+25, /* 50 P */
UPPER+26, /* 51 Q */
UPPER+27, /* 52 R */
UPPER+28, /* 53 S */
UPPER+29, /* 54 T */
UPPER+30, /* 55 U */
UPPER+31, /* 56 V */
UPPER+32, /* 57 W */
UPPER+33, /* 58 X */
UPPER+34, /* 59 Y */
UPPER+35, /* 5A Z */
PUNCT, /* 5B [ */
PUNCT, /* 5C \ */
PUNCT, /* 5D ] */
PUNCT, /* 5E ^ */
PUNCT|UNDER, /* 5F _ */
PUNCT, /* 60 ` */
LOWER+HEX+10, /* 61 a */
LOWER+HEX+11, /* 62 b */
LOWER+HEX+12, /* 63 c */
LOWER+HEX+13, /* 64 d */
LOWER+HEX+14, /* 65 e */
LOWER+HEX+15, /* 66 f */
LOWER+16, /* 67 g */
LOWER+17, /* 68 h */
LOWER+18, /* 69 i */
LOWER+19, /* 6A j */
LOWER+20, /* 6B k */
LOWER+21, /* 6C l */
LOWER+22, /* 6D m */
LOWER+23, /* 6E n */
LOWER+24, /* 6F o */
LOWER+25, /* 70 p */
LOWER+26, /* 71 q */
LOWER+27, /* 72 r */
LOWER+28, /* 73 s */
LOWER+29, /* 74 t */
LOWER+30, /* 75 u */
LOWER+31, /* 76 v */
LOWER+32, /* 77 w */
LOWER+33, /* 78 x */
LOWER+34, /* 79 y */
LOWER+35, /* 7A z */
PUNCT, /* 7B { */
PUNCT, /* 7C | */
PUNCT, /* 7D } */
PUNCT, /* 7E ~ */
CNTRL, /* 7F (DEL) */
};
static int getType(int ch) {
return ((ch & 0xFFFFFF80) == 0 ? ctype[ch] : 0);
}
static boolean isType(int ch, int type) {
return (getType(ch) & type) != 0;
}
static boolean isAscii(int ch) {
return ((ch & 0xFFFFFF80) == 0);
}
static boolean isAlpha(int ch) {
return isType(ch, ALPHA);
}
static boolean isDigit(int ch) {
return ((ch-'0')|('9'-ch)) >= 0;
}
static boolean isAlnum(int ch) {
return isType(ch, ALNUM);
}
static boolean isGraph(int ch) {
return isType(ch, GRAPH);
}
static boolean isPrint(int ch) {
return ((ch-0x20)|(0x7E-ch)) >= 0;
}
static boolean isPunct(int ch) {
return isType(ch, PUNCT);
}
static boolean isSpace(int ch) {
return isType(ch, SPACE);
}
static boolean isHexDigit(int ch) {
return isType(ch, HEX);
}
static boolean isOctDigit(int ch) {
return ((ch-'0')|('7'-ch)) >= 0;
}
static boolean isCntrl(int ch) {
return isType(ch, CNTRL);
}
static boolean isLower(int ch) {
return ((ch-'a')|('z'-ch)) >= 0;
}
static boolean isUpper(int ch) {
return ((ch-'A')|('Z'-ch)) >= 0;
}
static boolean isWord(int ch) {
return isType(ch, WORD);
}
static int toDigit(int ch) {
return (ctype[ch & 0x7F] & 0x3F);
}
static int toLower(int ch) {
return isUpper(ch) ? (ch + 0x20) : ch;
}
static int toUpper(int ch) {
return isLower(ch) ? (ch - 0x20) : ch;
}
}

View file

@ -0,0 +1,398 @@
/*
* Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package java.util.regex;
import java.util.HashMap;
import java.util.Locale;
import java.util.regex.Pattern.CharPredicate;
import java.util.regex.Pattern.BmpCharPredicate;
class CharPredicates {
static final CharPredicate ALPHABETIC() {
return Character::isAlphabetic;
}
// \p{gc=Decimal_Number}
static final CharPredicate DIGIT() {
return Character::isDigit;
}
static final CharPredicate LETTER() {
return Character::isLetter;
}
static final CharPredicate IDEOGRAPHIC() {
return Character::isIdeographic;
}
static final CharPredicate LOWERCASE() {
return Character::isLowerCase;
}
static final CharPredicate UPPERCASE() {
return Character::isUpperCase;
}
static final CharPredicate TITLECASE() {
return Character::isTitleCase;
}
// \p{Whitespace}
static final CharPredicate WHITE_SPACE() {
return ch ->
((((1 << Character.SPACE_SEPARATOR) |
(1 << Character.LINE_SEPARATOR) |
(1 << Character.PARAGRAPH_SEPARATOR)) >> Character.getType(ch)) & 1)
!= 0 || (ch >= 0x9 && ch <= 0xd) || (ch == 0x85);
}
// \p{gc=Control}
static final CharPredicate CONTROL() {
return ch -> Character.getType(ch) == Character.CONTROL;
}
// \p{gc=Punctuation}
static final CharPredicate PUNCTUATION() {
return ch ->
((((1 << Character.CONNECTOR_PUNCTUATION) |
(1 << Character.DASH_PUNCTUATION) |
(1 << Character.START_PUNCTUATION) |
(1 << Character.END_PUNCTUATION) |
(1 << Character.OTHER_PUNCTUATION) |
(1 << Character.INITIAL_QUOTE_PUNCTUATION) |
(1 << Character.FINAL_QUOTE_PUNCTUATION)) >> Character.getType(ch)) & 1)
!= 0;
}
// \p{gc=Decimal_Number}
// \p{Hex_Digit} -> PropList.txt: Hex_Digit
static final CharPredicate HEX_DIGIT() {
return DIGIT().union(ch -> (ch >= 0x0030 && ch <= 0x0039) ||
(ch >= 0x0041 && ch <= 0x0046) ||
(ch >= 0x0061 && ch <= 0x0066) ||
(ch >= 0xFF10 && ch <= 0xFF19) ||
(ch >= 0xFF21 && ch <= 0xFF26) ||
(ch >= 0xFF41 && ch <= 0xFF46));
}
static final CharPredicate ASSIGNED() {
return ch -> Character.getType(ch) != Character.UNASSIGNED;
}
// PropList.txt:Noncharacter_Code_Point
static final CharPredicate NONCHARACTER_CODE_POINT() {
return ch -> (ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef);
}
// \p{alpha}
// \p{digit}
static final CharPredicate ALNUM() {
return ALPHABETIC().union(DIGIT());
}
// \p{Whitespace} --
// [\N{LF} \N{VT} \N{FF} \N{CR} \N{NEL} -> 0xa, 0xb, 0xc, 0xd, 0x85
// \p{gc=Line_Separator}
// \p{gc=Paragraph_Separator}]
static final CharPredicate BLANK() {
return ch ->
Character.getType(ch) == Character.SPACE_SEPARATOR ||
ch == 0x9; // \N{HT}
}
// [^
// \p{space}
// \p{gc=Control}
// \p{gc=Surrogate}
// \p{gc=Unassigned}]
static final CharPredicate GRAPH() {
return ch ->
((((1 << Character.SPACE_SEPARATOR) |
(1 << Character.LINE_SEPARATOR) |
(1 << Character.PARAGRAPH_SEPARATOR) |
(1 << Character.CONTROL) |
(1 << Character.SURROGATE) |
(1 << Character.UNASSIGNED)) >> Character.getType(ch)) & 1)
== 0;
}
// \p{graph}
// \p{blank}
// -- \p{cntrl}
static final CharPredicate PRINT() {
return GRAPH().union(BLANK()).and(CONTROL().negate());
}
// 200C..200D PropList.txt:Join_Control
static final CharPredicate JOIN_CONTROL() {
return ch -> ch == 0x200C || ch == 0x200D;
}
// \p{alpha}
// \p{gc=Mark}
// \p{digit}
// \p{gc=Connector_Punctuation}
// \p{Join_Control} 200C..200D
static final CharPredicate WORD() {
return ALPHABETIC().union(ch -> ((((1 << Character.NON_SPACING_MARK) |
(1 << Character.ENCLOSING_MARK) |
(1 << Character.COMBINING_SPACING_MARK) |
(1 << Character.DECIMAL_DIGIT_NUMBER) |
(1 << Character.CONNECTOR_PUNCTUATION))
>> Character.getType(ch)) & 1) != 0,
JOIN_CONTROL());
}
/////////////////////////////////////////////////////////////////////////////
private static CharPredicate getPosixPredicate(String name) {
switch (name) {
case "ALPHA": return ALPHABETIC();
case "LOWER": return LOWERCASE();
case "UPPER": return UPPERCASE();
case "SPACE": return WHITE_SPACE();
case "PUNCT": return PUNCTUATION();
case "XDIGIT": return HEX_DIGIT();
case "ALNUM": return ALNUM();
case "CNTRL": return CONTROL();
case "DIGIT": return DIGIT();
case "BLANK": return BLANK();
case "GRAPH": return GRAPH();
case "PRINT": return PRINT();
default: return null;
}
}
private static CharPredicate getUnicodePredicate(String name) {
switch (name) {
case "ALPHABETIC": return ALPHABETIC();
case "ASSIGNED": return ASSIGNED();
case "CONTROL": return CONTROL();
case "HEXDIGIT": return HEX_DIGIT();
case "IDEOGRAPHIC": return IDEOGRAPHIC();
case "JOINCONTROL": return JOIN_CONTROL();
case "LETTER": return LETTER();
case "LOWERCASE": return LOWERCASE();
case "NONCHARACTERCODEPOINT": return NONCHARACTER_CODE_POINT();
case "TITLECASE": return TITLECASE();
case "PUNCTUATION": return PUNCTUATION();
case "UPPERCASE": return UPPERCASE();
case "WHITESPACE": return WHITE_SPACE();
case "WORD": return WORD();
case "WHITE_SPACE": return WHITE_SPACE();
case "HEX_DIGIT": return HEX_DIGIT();
case "NONCHARACTER_CODE_POINT": return NONCHARACTER_CODE_POINT();
case "JOIN_CONTROL": return JOIN_CONTROL();
default: return null;
}
}
public static CharPredicate forUnicodeProperty(String propName) {
propName = propName.toUpperCase(Locale.ROOT);
CharPredicate p = getUnicodePredicate(propName);
if (p != null)
return p;
return getPosixPredicate(propName);
}
public static CharPredicate forPOSIXName(String propName) {
return getPosixPredicate(propName.toUpperCase(Locale.ENGLISH));
}
/////////////////////////////////////////////////////////////////////////////
/**
* Returns a predicate matching all characters belong to a named
* UnicodeScript.
*/
static CharPredicate forUnicodeScript(String name) {
final Character.UnicodeScript script;
try {
script = Character.UnicodeScript.forName(name);
return ch -> script == Character.UnicodeScript.of(ch);
} catch (IllegalArgumentException iae) {}
return null;
}
/**
* Returns a predicate matching all characters in a UnicodeBlock.
*/
static CharPredicate forUnicodeBlock(String name) {
final Character.UnicodeBlock block;
try {
block = Character.UnicodeBlock.forName(name);
return ch -> block == Character.UnicodeBlock.of(ch);
} catch (IllegalArgumentException iae) {}
return null;
}
/////////////////////////////////////////////////////////////////////////////
// unicode categories, aliases, properties, java methods ...
static CharPredicate forProperty(String name) {
// Unicode character property aliases, defined in
// http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt
switch (name) {
case "Cn": return category(1<<Character.UNASSIGNED);
case "Lu": return category(1<<Character.UPPERCASE_LETTER);
case "Ll": return category(1<<Character.LOWERCASE_LETTER);
case "Lt": return category(1<<Character.TITLECASE_LETTER);
case "Lm": return category(1<<Character.MODIFIER_LETTER);
case "Lo": return category(1<<Character.OTHER_LETTER);
case "Mn": return category(1<<Character.NON_SPACING_MARK);
case "Me": return category(1<<Character.ENCLOSING_MARK);
case "Mc": return category(1<<Character.COMBINING_SPACING_MARK);
case "Nd": return category(1<<Character.DECIMAL_DIGIT_NUMBER);
case "Nl": return category(1<<Character.LETTER_NUMBER);
case "No": return category(1<<Character.OTHER_NUMBER);
case "Zs": return category(1<<Character.SPACE_SEPARATOR);
case "Zl": return category(1<<Character.LINE_SEPARATOR);
case "Zp": return category(1<<Character.PARAGRAPH_SEPARATOR);
case "Cc": return category(1<<Character.CONTROL);
case "Cf": return category(1<<Character.FORMAT);
case "Co": return category(1<<Character.PRIVATE_USE);
case "Cs": return category(1<<Character.SURROGATE);
case "Pd": return category(1<<Character.DASH_PUNCTUATION);
case "Ps": return category(1<<Character.START_PUNCTUATION);
case "Pe": return category(1<<Character.END_PUNCTUATION);
case "Pc": return category(1<<Character.CONNECTOR_PUNCTUATION);
case "Po": return category(1<<Character.OTHER_PUNCTUATION);
case "Sm": return category(1<<Character.MATH_SYMBOL);
case "Sc": return category(1<<Character.CURRENCY_SYMBOL);
case "Sk": return category(1<<Character.MODIFIER_SYMBOL);
case "So": return category(1<<Character.OTHER_SYMBOL);
case "Pi": return category(1<<Character.INITIAL_QUOTE_PUNCTUATION);
case "Pf": return category(1<<Character.FINAL_QUOTE_PUNCTUATION);
case "L": return category(((1<<Character.UPPERCASE_LETTER) |
(1<<Character.LOWERCASE_LETTER) |
(1<<Character.TITLECASE_LETTER) |
(1<<Character.MODIFIER_LETTER) |
(1<<Character.OTHER_LETTER)));
case "M": return category(((1<<Character.NON_SPACING_MARK) |
(1<<Character.ENCLOSING_MARK) |
(1<<Character.COMBINING_SPACING_MARK)));
case "N": return category(((1<<Character.DECIMAL_DIGIT_NUMBER) |
(1<<Character.LETTER_NUMBER) |
(1<<Character.OTHER_NUMBER)));
case "Z": return category(((1<<Character.SPACE_SEPARATOR) |
(1<<Character.LINE_SEPARATOR) |
(1<<Character.PARAGRAPH_SEPARATOR)));
case "C": return category(((1<<Character.CONTROL) |
(1<<Character.FORMAT) |
(1<<Character.PRIVATE_USE) |
(1<<Character.SURROGATE) |
(1<<Character.UNASSIGNED))); // Other
case "P": return category(((1<<Character.DASH_PUNCTUATION) |
(1<<Character.START_PUNCTUATION) |
(1<<Character.END_PUNCTUATION) |
(1<<Character.CONNECTOR_PUNCTUATION) |
(1<<Character.OTHER_PUNCTUATION) |
(1<<Character.INITIAL_QUOTE_PUNCTUATION) |
(1<<Character.FINAL_QUOTE_PUNCTUATION)));
case "S": return category(((1<<Character.MATH_SYMBOL) |
(1<<Character.CURRENCY_SYMBOL) |
(1<<Character.MODIFIER_SYMBOL) |
(1<<Character.OTHER_SYMBOL)));
case "LC": return category(((1<<Character.UPPERCASE_LETTER) |
(1<<Character.LOWERCASE_LETTER) |
(1<<Character.TITLECASE_LETTER)));
case "LD": return category(((1<<Character.UPPERCASE_LETTER) |
(1<<Character.LOWERCASE_LETTER) |
(1<<Character.TITLECASE_LETTER) |
(1<<Character.MODIFIER_LETTER) |
(1<<Character.OTHER_LETTER) |
(1<<Character.DECIMAL_DIGIT_NUMBER)));
case "L1": return range(0x00, 0xFF); // Latin-1
case "all": return Pattern.ALL();
// Posix regular expression character classes, defined in
// http://www.unix.org/onlinepubs/009695399/basedefs/xbd_chap09.html
case "ASCII": return range(0x00, 0x7F); // ASCII
case "Alnum": return ctype(ASCII.ALNUM); // Alphanumeric characters
case "Alpha": return ctype(ASCII.ALPHA); // Alphabetic characters
case "Blank": return ctype(ASCII.BLANK); // Space and tab characters
case "Cntrl": return ctype(ASCII.CNTRL); // Control characters
case "Digit": return range('0', '9'); // Numeric characters
case "Graph": return ctype(ASCII.GRAPH); // printable and visible
case "Lower": return range('a', 'z'); // Lower-case alphabetic
case "Print": return range(0x20, 0x7E); // Printable characters
case "Punct": return ctype(ASCII.PUNCT); // Punctuation characters
case "Space": return ctype(ASCII.SPACE); // Space characters
case "Upper": return range('A', 'Z'); // Upper-case alphabetic
case "XDigit": return ctype(ASCII.XDIGIT); // hexadecimal digits
// Java character properties, defined by methods in Character.java
case "javaLowerCase": return java.lang.Character::isLowerCase;
case "javaUpperCase": return Character::isUpperCase;
case "javaAlphabetic": return java.lang.Character::isAlphabetic;
case "javaIdeographic": return java.lang.Character::isIdeographic;
case "javaTitleCase": return java.lang.Character::isTitleCase;
case "javaDigit": return java.lang.Character::isDigit;
case "javaDefined": return java.lang.Character::isDefined;
case "javaLetter": return java.lang.Character::isLetter;
case "javaLetterOrDigit": return java.lang.Character::isLetterOrDigit;
case "javaJavaIdentifierStart": return java.lang.Character::isJavaIdentifierStart;
case "javaJavaIdentifierPart": return java.lang.Character::isJavaIdentifierPart;
case "javaUnicodeIdentifierStart": return java.lang.Character::isUnicodeIdentifierStart;
case "javaUnicodeIdentifierPart": return java.lang.Character::isUnicodeIdentifierPart;
case "javaIdentifierIgnorable": return java.lang.Character::isIdentifierIgnorable;
case "javaSpaceChar": return java.lang.Character::isSpaceChar;
case "javaWhitespace": return java.lang.Character::isWhitespace;
case "javaISOControl": return java.lang.Character::isISOControl;
case "javaMirrored": return java.lang.Character::isMirrored;
default: return null;
}
}
private static CharPredicate category(final int typeMask) {
return ch -> (typeMask & (1 << Character.getType(ch))) != 0;
}
private static CharPredicate range(final int lower, final int upper) {
return (BmpCharPredicate)ch -> lower <= ch && ch <= upper;
}
private static CharPredicate ctype(final int ctype) {
return (BmpCharPredicate)ch -> ch < 128 && ASCII.isType(ch, ctype);
}
/////////////////////////////////////////////////////////////////////////////
/**
* Posix ASCII variants, not in the lookup map
*/
static final BmpCharPredicate ASCII_DIGIT() {
return ch -> ch < 128 && ASCII.isDigit(ch);
}
static final BmpCharPredicate ASCII_WORD() {
return ch -> ch < 128 && ASCII.isWord(ch);
}
static final BmpCharPredicate ASCII_SPACE() {
return ch -> ch < 128 && ASCII.isSpace(ch);
}
}

View file

@ -0,0 +1,205 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package java.util.regex;
final class Grapheme {
/**
* Determines if there is an extended grapheme cluster boundary between two
* continuing characters {@code cp1} and {@code cp2}.
* <p>
* See Unicode Standard Annex #29 Unicode Text Segmentation for the specification
* for the extended grapheme cluster boundary rules
*/
static boolean isBoundary(int cp1, int cp2) {
return rules[getType(cp1)][getType(cp2)];
}
// types
private static final int OTHER = 0;
private static final int CR = 1;
private static final int LF = 2;
private static final int CONTROL = 3;
private static final int EXTEND = 4;
private static final int RI = 5;
private static final int PREPEND = 6;
private static final int SPACINGMARK = 7;
private static final int L = 8;
private static final int V = 9;
private static final int T = 10;
private static final int LV = 11;
private static final int LVT = 12;
private static final int FIRST_TYPE = 0;
private static final int LAST_TYPE = 12;
private static boolean[][] rules;
static {
rules = new boolean[LAST_TYPE + 1][LAST_TYPE + 1];
// default, any + any
for (int i = FIRST_TYPE; i <= LAST_TYPE; i++)
for (int j = FIRST_TYPE; j <= LAST_TYPE; j++)
rules[i][j] = true;
// GB 6 L x (L | V | LV | VT)
rules[L][L] = false;
rules[L][V] = false;
rules[L][LV] = false;
rules[L][LVT] = false;
// GB 7 (LV | V) x (V | T)
rules[LV][V] = false;
rules[LV][T] = false;
rules[V][V] = false;
rules[V][T] = false;
// GB 8 (LVT | T) x T
rules[LVT][T] = false;
rules[T][T] = false;
// GB 8a RI x RI
rules[RI][RI] = false;
// GB 9 x Extend
// GB 9a x Spacing Mark
// GB 9b Prepend x
for (int i = FIRST_TYPE; i <= LAST_TYPE; i++) {
rules[i][EXTEND] = false;
rules[i][SPACINGMARK] = false;
rules[PREPEND][i] = false;
}
// GB 4 (Control | CR | LF) +
// GB 5 + (Control | CR | LF)
for (int i = FIRST_TYPE; i <= LAST_TYPE; i++)
for (int j = CR; j <= CONTROL; j++) {
rules[i][j] = true;
rules[j][i] = true;
}
// GB 3 CR x LF
rules[CR][LF] = false;
// GB 10 Any + Any -> default
}
// Hangul syllables
private static final int SYLLABLE_BASE = 0xAC00;
private static final int LCOUNT = 19;
private static final int VCOUNT = 21;
private static final int TCOUNT = 28;
private static final int NCOUNT = VCOUNT * TCOUNT; // 588
private static final int SCOUNT = LCOUNT * NCOUNT; // 11172
// #tr29: SpacingMark exceptions: The following (which have
// General_Category = Spacing_Mark and would otherwise be included)
// are specifically excluded
private static boolean isExcludedSpacingMark(int cp) {
return cp == 0x102B || cp == 0x102C || cp == 0x1038 ||
cp >= 0x1062 && cp <= 0x1064 ||
cp >= 0x1062 && cp <= 0x106D ||
cp == 0x1083 ||
cp >= 0x1087 && cp <= 0x108C ||
cp == 0x108F ||
cp >= 0x109A && cp <= 0x109C ||
cp == 0x1A61 || cp == 0x1A63 || cp == 0x1A64 ||
cp == 0xAA7B || cp == 0xAA7D;
}
@SuppressWarnings("fallthrough")
private static int getType(int cp) {
int type = Character.getType(cp);
switch(type) {
case Character.CONTROL:
if (cp == 0x000D)
return CR;
if (cp == 0x000A)
return LF;
return CONTROL;
case Character.UNASSIGNED:
// NOTE: #tr29 lists "Unassigned and Default_Ignorable_Code_Point" as Control
// but GraphemeBreakTest.txt lists u+0378/reserved-0378 as "Other"
// so type it as "Other" to make the test happy
if (cp == 0x0378)
return OTHER;
case Character.LINE_SEPARATOR:
case Character.PARAGRAPH_SEPARATOR:
case Character.SURROGATE:
return CONTROL;
case Character.FORMAT:
if (cp == 0x200C || cp == 0x200D)
return EXTEND;
return CONTROL;
case Character.NON_SPACING_MARK:
case Character.ENCLOSING_MARK:
// NOTE:
// #tr29 "plus a few General_Category = Spacing_Mark needed for
// canonical equivalence."
// but for "extended grapheme clusters" support, there is no
// need actually to diff "extend" and "spackmark" given GB9, GB9a
return EXTEND;
case Character.COMBINING_SPACING_MARK:
if (isExcludedSpacingMark(cp))
return OTHER;
// NOTE:
// 0x11720 and 0x11721 are mentioned in #tr29 as
// OTHER_LETTER but it appears their category has been updated to
// COMBING_SPACING_MARK already (verified in ver.8)
return SPACINGMARK;
case Character.OTHER_SYMBOL:
if (cp >= 0x1F1E6 && cp <= 0x1F1FF)
return RI;
return OTHER;
case Character.MODIFIER_LETTER:
// WARNING:
// not mentioned in #tr29 but listed in GraphemeBreakProperty.txt
if (cp == 0xFF9E || cp == 0xFF9F)
return EXTEND;
return OTHER;
case Character.OTHER_LETTER:
if (cp == 0x0E33 || cp == 0x0EB3)
return SPACINGMARK;
// hangul jamo
if (cp >= 0x1100 && cp <= 0x11FF) {
if (cp <= 0x115F)
return L;
if (cp <= 0x11A7)
return V;
return T;
}
// hangul syllables
int sindex = cp - SYLLABLE_BASE;
if (sindex >= 0 && sindex < SCOUNT) {
if (sindex % TCOUNT == 0)
return LV;
return LVT;
}
// hangul jamo_extended A
if (cp >= 0xA960 && cp <= 0xA97C)
return L;
// hangul jamo_extended B
if (cp >= 0xD7B0 && cp <= 0xD7C6)
return V;
if (cp >= 0xD7CB && cp <= 0xD7FB)
return T;
}
return OTHER;
}
}

View file

@ -0,0 +1,98 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package java.util.regex;
import java.util.Arrays;
/**
* A lightweight hashset implementation for positive 'int'. Not safe for
* concurrent access.
*/
class IntHashSet {
private int[] entries;
private int[] hashes;
private int pos = 0;
public IntHashSet() {
this.entries = new int[16 << 1]; // initCapacity = 16;
this.hashes = new int[(16 / 2) | 1]; // odd -> fewer collisions
Arrays.fill(this.entries, -1);
Arrays.fill(this.hashes, -1);
}
public boolean contains(int i) {
int h = hashes[i % hashes.length];
while (h != -1) {
if (entries[h] == i)
return true;
h = entries[h + 1];
}
return false;
}
public void add(int i) {
int h0 = i % hashes.length;
int next = hashes[h0];
// if invoker guarantees contains(i) checked before add(i)
// the following check is not needed.
int next0 = next;
while (next0 != -1) {
if (entries[next0 ] == i)
return;
next0 = entries[next0 + 1];
}
hashes[h0] = pos;
entries[pos++] = i;
entries[pos++] = next;
if (pos == entries.length)
expand();
}
public void clear() {
Arrays.fill(this.entries, -1);
Arrays.fill(this.hashes, -1);
pos = 0;
}
private void expand() {
int[] old = entries;
int[] es = new int[old.length << 1];
int hlen = (old.length / 2) | 1;
int[] hs = new int[hlen];
Arrays.fill(es, -1);
Arrays.fill(hs, -1);
for (int n = 0; n < pos;) { // re-hashing
int i = old[n];
int hsh = i % hlen;
int next = hs[hsh];
hs[hsh] = n;
es[n++] = i;
es[n++] = next;
}
this.entries = es;
this.hashes = hs;
}
}

View file

@ -0,0 +1,189 @@
/*
* Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package java.util.regex;
/**
* The result of a match operation.
*
* <p>This interface contains query methods used to determine the
* results of a match against a regular expression. The match boundaries,
* groups and group boundaries can be seen but not modified through
* a {@code MatchResult}.
*
* @author Michael McCloskey
* @see Matcher
* @since 1.5
*/
public interface MatchResult {
/**
* Returns the start index of the match.
*
* @return The index of the first character matched
*
* @throws IllegalStateException
* If no match has yet been attempted,
* or if the previous match operation failed
*/
public int start();
/**
* Returns the start index of the subsequence captured by the given group
* during this match.
*
* <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
* to right, starting at one. Group zero denotes the entire pattern, so
* the expression <i>m.</i>{@code start(0)} is equivalent to
* <i>m.</i>{@code start()}. </p>
*
* @param group
* The index of a capturing group in this matcher's pattern
*
* @return The index of the first character captured by the group,
* or {@code -1} if the match was successful but the group
* itself did not match anything
*
* @throws IllegalStateException
* If no match has yet been attempted,
* or if the previous match operation failed
*
* @throws IndexOutOfBoundsException
* If there is no capturing group in the pattern
* with the given index
*/
public int start(int group);
/**
* Returns the offset after the last character matched.
*
* @return The offset after the last character matched
*
* @throws IllegalStateException
* If no match has yet been attempted,
* or if the previous match operation failed
*/
public int end();
/**
* Returns the offset after the last character of the subsequence
* captured by the given group during this match.
*
* <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
* to right, starting at one. Group zero denotes the entire pattern, so
* the expression <i>m.</i>{@code end(0)} is equivalent to
* <i>m.</i>{@code end()}. </p>
*
* @param group
* The index of a capturing group in this matcher's pattern
*
* @return The offset after the last character captured by the group,
* or {@code -1} if the match was successful
* but the group itself did not match anything
*
* @throws IllegalStateException
* If no match has yet been attempted,
* or if the previous match operation failed
*
* @throws IndexOutOfBoundsException
* If there is no capturing group in the pattern
* with the given index
*/
public int end(int group);
/**
* Returns the input subsequence matched by the previous match.
*
* <p> For a matcher <i>m</i> with input sequence <i>s</i>,
* the expressions <i>m.</i>{@code group()} and
* <i>s.</i>{@code substring(}<i>m.</i>{@code start(),}&nbsp;<i>m.</i>{@code end())}
* are equivalent. </p>
*
* <p> Note that some patterns, for example {@code a*}, match the empty
* string. This method will return the empty string when the pattern
* successfully matches the empty string in the input. </p>
*
* @return The (possibly empty) subsequence matched by the previous match,
* in string form
*
* @throws IllegalStateException
* If no match has yet been attempted,
* or if the previous match operation failed
*/
public String group();
/**
* Returns the input subsequence captured by the given group during the
* previous match operation.
*
* <p> For a matcher <i>m</i>, input sequence <i>s</i>, and group index
* <i>g</i>, the expressions <i>m.</i>{@code group(}<i>g</i>{@code )} and
* <i>s.</i>{@code substring(}<i>m.</i>{@code start(}<i>g</i>{@code
* ),}&nbsp;<i>m.</i>{@code end(}<i>g</i>{@code ))}
* are equivalent. </p>
*
* <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
* to right, starting at one. Group zero denotes the entire pattern, so
* the expression {@code m.group(0)} is equivalent to {@code m.group()}.
* </p>
*
* <p> If the match was successful but the group specified failed to match
* any part of the input sequence, then {@code null} is returned. Note
* that some groups, for example {@code (a*)}, match the empty string.
* This method will return the empty string when such a group successfully
* matches the empty string in the input. </p>
*
* @param group
* The index of a capturing group in this matcher's pattern
*
* @return The (possibly empty) subsequence captured by the group
* during the previous match, or {@code null} if the group
* failed to match part of the input
*
* @throws IllegalStateException
* If no match has yet been attempted,
* or if the previous match operation failed
*
* @throws IndexOutOfBoundsException
* If there is no capturing group in the pattern
* with the given index
*/
public String group(int group);
/**
* Returns the number of capturing groups in this match result's pattern.
*
* <p> Group zero denotes the entire pattern by convention. It is not
* included in this count.
*
* <p> Any non-negative integer smaller than or equal to the value
* returned by this method is guaranteed to be a valid group index for
* this matcher. </p>
*
* @return The number of capturing groups in this matcher's pattern
*/
public int groupCount();
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,123 @@
/*
* Copyright (c) 1999, 2008, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package java.util.regex;
import sun.security.action.GetPropertyAction;
/**
* Unchecked exception thrown to indicate a syntax error in a
* regular-expression pattern.
*
* @author unascribed
* @since 1.4
* @spec JSR-51
*/
public class PatternSyntaxException
extends IllegalArgumentException
{
private static final long serialVersionUID = -3864639126226059218L;
private final String desc;
private final String pattern;
private final int index;
/**
* Constructs a new instance of this class.
*
* @param desc
* A description of the error
*
* @param regex
* The erroneous pattern
*
* @param index
* The approximate index in the pattern of the error,
* or {@code -1} if the index is not known
*/
public PatternSyntaxException(String desc, String regex, int index) {
this.desc = desc;
this.pattern = regex;
this.index = index;
}
/**
* Retrieves the error index.
*
* @return The approximate index in the pattern of the error,
* or {@code -1} if the index is not known
*/
public int getIndex() {
return index;
}
/**
* Retrieves the description of the error.
*
* @return The description of the error
*/
public String getDescription() {
return desc;
}
/**
* Retrieves the erroneous regular-expression pattern.
*
* @return The erroneous pattern
*/
public String getPattern() {
return pattern;
}
private static final String nl =
GetPropertyAction.privilegedGetProperty("line.separator");
/**
* Returns a multi-line string containing the description of the syntax
* error and its index, the erroneous regular-expression pattern, and a
* visual indication of the error index within the pattern.
*
* @return The full detail message
*/
public String getMessage() {
StringBuilder sb = new StringBuilder();
sb.append(desc);
if (index >= 0) {
sb.append(" near index ");
sb.append(index);
}
sb.append(nl);
sb.append(pattern);
if (index >= 0) {
sb.append(nl);
for (int i = 0; i < index; i++) sb.append(' ');
sb.append('^');
}
return sb.toString();
}
}

View file

@ -0,0 +1,219 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package java.util.regex;
import java.util.HashMap;
import java.util.regex.Pattern.CharPredicate;
import static java.util.regex.ASCII.*;
/**
* A utility class to print out the pattern node tree.
*/
class PrintPattern {
private static HashMap<Pattern.Node, Integer> ids = new HashMap<>();
private static void print(Pattern.Node node, String text, int depth) {
if (!ids.containsKey(node))
ids.put(node, ids.size());
print("%6d:%" + (depth==0? "": depth<<1) + "s<%s>", ids.get(node), "", text);
if (ids.containsKey(node.next))
print(" (=>%d)", ids.get(node.next));
print("%n");
}
private static void print(String s, int depth) {
print(" %" + (depth==0?"":depth<<1) + "s<%s>%n", "", s);
}
private static void print(String fmt, Object ... args) {
System.err.printf(fmt, args);
}
private static String toStringCPS(int[] cps) {
StringBuilder sb = new StringBuilder(cps.length);
for (int cp : cps)
sb.append(toStringCP(cp));
return sb.toString();
}
private static String toStringCP(int cp) {
return (isPrint(cp) ? "" + (char)cp
: "\\u" + Integer.toString(cp, 16));
}
private static String toStringRange(int min, int max) {
if (max == Pattern.MAX_REPS) {
if (min == 0)
return " * ";
else if (min == 1)
return " + ";
return "{" + min + ", max}";
}
return "{" + min + ", " + max + "}";
}
private static String toStringCtype(int type) {
switch(type) {
case UPPER: return "ASCII.UPPER";
case LOWER: return "ASCII.LOWER";
case DIGIT: return "ASCII.DIGIT";
case SPACE: return "ASCII.SPACE";
case PUNCT: return "ASCII.PUNCT";
case CNTRL: return "ASCII.CNTRL";
case BLANK: return "ASCII.BLANK";
case UNDER: return "ASCII.UNDER";
case ASCII: return "ASCII.ASCII";
case ALPHA: return "ASCII.ALPHA";
case ALNUM: return "ASCII.ALNUM";
case GRAPH: return "ASCII.GRAPH";
case WORD: return "ASCII.WORD";
case XDIGIT: return "ASCII.XDIGIT";
default: return "ASCII ?";
}
}
private static String toString(Pattern.Node node) {
String name = node.getClass().getName();
return name.substring(name.lastIndexOf('$') + 1);
}
static HashMap<CharPredicate, String> pmap;
static {
pmap = new HashMap<>();
pmap.put(Pattern.ALL(), "All");
pmap.put(Pattern.DOT(), "Dot");
pmap.put(Pattern.UNIXDOT(), "UnixDot");
pmap.put(Pattern.VertWS(), "VertWS");
pmap.put(Pattern.HorizWS(), "HorizWS");
pmap.put(CharPredicates.ASCII_DIGIT(), "ASCII.DIGIT");
pmap.put(CharPredicates.ASCII_WORD(), "ASCII.WORD");
pmap.put(CharPredicates.ASCII_SPACE(), "ASCII.SPACE");
}
static void walk(Pattern.Node node, int depth) {
depth++;
while(node != null) {
String name = toString(node);
String str;
if (node instanceof Pattern.Prolog) {
print(node, name, depth);
// print the loop here
Pattern.Loop loop = ((Pattern.Prolog)node).loop;
name = toString(loop);
str = name + " " + toStringRange(loop.cmin, loop.cmax);
print(loop, str, depth);
walk(loop.body, depth);
print("/" + name, depth);
node = loop;
} else if (node instanceof Pattern.Loop) {
return; // stop here, body.next -> loop
} else if (node instanceof Pattern.Curly) {
Pattern.Curly c = (Pattern.Curly)node;
str = "Curly " + c.type + " " + toStringRange(c.cmin, c.cmax);
print(node, str, depth);
walk(c.atom, depth);
print("/Curly", depth);
} else if (node instanceof Pattern.GroupCurly) {
Pattern.GroupCurly gc = (Pattern.GroupCurly)node;
str = "GroupCurly " + gc.groupIndex / 2 +
", " + gc.type + " " + toStringRange(gc.cmin, gc.cmax);
print(node, str, depth);
walk(gc.atom, depth);
print("/GroupCurly", depth);
} else if (node instanceof Pattern.GroupHead) {
Pattern.GroupHead head = (Pattern.GroupHead)node;
Pattern.GroupTail tail = head.tail;
print(head, "Group.head " + (tail.groupIndex / 2), depth);
walk(head.next, depth);
print(tail, "/Group.tail " + (tail.groupIndex / 2), depth);
node = tail;
} else if (node instanceof Pattern.GroupTail) {
return; // stopper
} else if (node instanceof Pattern.Ques) {
print(node, "Ques " + ((Pattern.Ques)node).type, depth);
walk(((Pattern.Ques)node).atom, depth);
print("/Ques", depth);
} else if (node instanceof Pattern.Branch) {
Pattern.Branch b = (Pattern.Branch)node;
print(b, name, depth);
int i = 0;
while (true) {
if (b.atoms[i] != null) {
walk(b.atoms[i], depth);
} else {
print(" (accepted)", depth);
}
if (++i == b.size)
break;
print("-branch.separator-", depth);
}
node = b.conn;
print(node, "/Branch", depth);
} else if (node instanceof Pattern.BranchConn) {
return;
} else if (node instanceof Pattern.CharProperty) {
str = pmap.get(((Pattern.CharProperty)node).predicate);
if (str == null)
str = toString(node);
else
str = "Single \"" + str + "\"";
print(node, str, depth);
} else if (node instanceof Pattern.SliceNode) {
str = name + " \"" +
toStringCPS(((Pattern.SliceNode)node).buffer) + "\"";
print(node, str, depth);
} else if (node instanceof Pattern.CharPropertyGreedy) {
Pattern.CharPropertyGreedy gcp = (Pattern.CharPropertyGreedy)node;
String pstr = pmap.get(gcp.predicate);
if (pstr == null)
pstr = gcp.predicate.toString();
else
pstr = "Single \"" + pstr + "\"";
str = name + " " + pstr + ((gcp.cmin == 0) ? "*" : "+");
print(node, str, depth);
} else if (node instanceof Pattern.BackRef) {
str = "GroupBackRef " + ((Pattern.BackRef)node).groupIndex / 2;
print(node, str, depth);
} else if (node instanceof Pattern.LastNode) {
print(node, "END", depth);
} else if (node == Pattern.accept) {
return;
} else {
print(node, name, depth);
}
node = node.next;
}
}
public static void main(String[] args) {
Pattern p = Pattern.compile(args[0]);
System.out.println(" Pattern: " + p);
walk(p.root, 0);
}
}

View file

@ -0,0 +1,56 @@
/*
* Copyright (c) 2000, 2006, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/**
* Classes for matching character sequences against patterns specified
* by regular expressions.
*
* <p> An instance of the {@link java.util.regex.Pattern} class
* represents a regular expression that is specified in string form in
* a syntax similar to that used by Perl.
*
* <p> Instances of the {@link java.util.regex.Matcher} class are used
* to match character sequences against a given pattern. Input is
* provided to matchers via the {@link java.lang.CharSequence}
* interface in order to support matching against characters from a
* wide variety of input sources. </p>
*
* <p> Unless otherwise noted, passing a <code>null</code> argument to a
* method in any class or interface in this package will cause a
* {@link java.lang.NullPointerException NullPointerException} to be
* thrown.
*
* <h2>Related Documentation</h2>
*
* <p> An excellent tutorial and overview of regular expressions is <a
* href="http://www.oreilly.com/catalog/regex/"><i>Mastering Regular
* Expressions</i>, Jeffrey E. F. Friedl, O'Reilly and Associates,
* 1997.</a> </p>
*
* @since 1.4
* @author Mike McCloskey
* @author Mark Reinhold
*/
package java.util.regex;