8187443: Forest Consolidation: Move files to unified layout

Reviewed-by: darcy, ihse
2025-08-28 15:24:43 +02:00 · 2017-09-12 19:03:39 +02:00 · 2017-09-12 19:03:39 +02:00 · 3789983e89
commit 3789983e89
parent 270fe13182
56923 changed files with 3 additions and 15727 deletions
--- a/src/java.base/share/classes/java/util/regex/ASCII.java
+++ b/src/java.base/share/classes/java/util/regex/ASCII.java
@ -0,0 +1,274 @@
+/*
+ * Copyright (c) 1999, 2000, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package java.util.regex;
+
+
+/**
+ * Utility class that implements the standard C ctype functionality.
+ *
+ * @author Hong Zhang
+ */
+
+final class ASCII {
+
+    static final int UPPER   = 0x00000100;
+
+    static final int LOWER   = 0x00000200;
+
+    static final int DIGIT   = 0x00000400;
+
+    static final int SPACE   = 0x00000800;
+
+    static final int PUNCT   = 0x00001000;
+
+    static final int CNTRL   = 0x00002000;
+
+    static final int BLANK   = 0x00004000;
+
+    static final int HEX     = 0x00008000;
+
+    static final int UNDER   = 0x00010000;
+
+    static final int ASCII   = 0x0000FF00;
+
+    static final int ALPHA   = (UPPER|LOWER);
+
+    static final int ALNUM   = (UPPER|LOWER|DIGIT);
+
+    static final int GRAPH   = (PUNCT|UPPER|LOWER|DIGIT);
+
+    static final int WORD    = (UPPER|LOWER|UNDER|DIGIT);
+
+    static final int XDIGIT  = (HEX);
+
+    private static final int[] ctype = new int[] {
+        CNTRL,                  /* 00 (NUL) */
+        CNTRL,                  /* 01 (SOH) */
+        CNTRL,                  /* 02 (STX) */
+        CNTRL,                  /* 03 (ETX) */
+        CNTRL,                  /* 04 (EOT) */
+        CNTRL,                  /* 05 (ENQ) */
+        CNTRL,                  /* 06 (ACK) */
+        CNTRL,                  /* 07 (BEL) */
+        CNTRL,                  /* 08 (BS)  */
+        SPACE+CNTRL+BLANK,      /* 09 (HT)  */
+        SPACE+CNTRL,            /* 0A (LF)  */
+        SPACE+CNTRL,            /* 0B (VT)  */
+        SPACE+CNTRL,            /* 0C (FF)  */
+        SPACE+CNTRL,            /* 0D (CR)  */
+        CNTRL,                  /* 0E (SI)  */
+        CNTRL,                  /* 0F (SO)  */
+        CNTRL,                  /* 10 (DLE) */
+        CNTRL,                  /* 11 (DC1) */
+        CNTRL,                  /* 12 (DC2) */
+        CNTRL,                  /* 13 (DC3) */
+        CNTRL,                  /* 14 (DC4) */
+        CNTRL,                  /* 15 (NAK) */
+        CNTRL,                  /* 16 (SYN) */
+        CNTRL,                  /* 17 (ETB) */
+        CNTRL,                  /* 18 (CAN) */
+        CNTRL,                  /* 19 (EM)  */
+        CNTRL,                  /* 1A (SUB) */
+        CNTRL,                  /* 1B (ESC) */
+        CNTRL,                  /* 1C (FS)  */
+        CNTRL,                  /* 1D (GS)  */
+        CNTRL,                  /* 1E (RS)  */
+        CNTRL,                  /* 1F (US)  */
+        SPACE+BLANK,            /* 20 SPACE */
+        PUNCT,                  /* 21 !     */
+        PUNCT,                  /* 22 "     */
+        PUNCT,                  /* 23 #     */
+        PUNCT,                  /* 24 $     */
+        PUNCT,                  /* 25 %     */
+        PUNCT,                  /* 26 &     */
+        PUNCT,                  /* 27 '     */
+        PUNCT,                  /* 28 (     */
+        PUNCT,                  /* 29 )     */
+        PUNCT,                  /* 2A *     */
+        PUNCT,                  /* 2B +     */
+        PUNCT,                  /* 2C ,     */
+        PUNCT,                  /* 2D -     */
+        PUNCT,                  /* 2E .     */
+        PUNCT,                  /* 2F /     */
+        DIGIT+HEX+0,            /* 30 0     */
+        DIGIT+HEX+1,            /* 31 1     */
+        DIGIT+HEX+2,            /* 32 2     */
+        DIGIT+HEX+3,            /* 33 3     */
+        DIGIT+HEX+4,            /* 34 4     */
+        DIGIT+HEX+5,            /* 35 5     */
+        DIGIT+HEX+6,            /* 36 6     */
+        DIGIT+HEX+7,            /* 37 7     */
+        DIGIT+HEX+8,            /* 38 8     */
+        DIGIT+HEX+9,            /* 39 9     */
+        PUNCT,                  /* 3A :     */
+        PUNCT,                  /* 3B ;     */
+        PUNCT,                  /* 3C <     */
+        PUNCT,                  /* 3D =     */
+        PUNCT,                  /* 3E >     */
+        PUNCT,                  /* 3F ?     */
+        PUNCT,                  /* 40 @     */
+        UPPER+HEX+10,           /* 41 A     */
+        UPPER+HEX+11,           /* 42 B     */
+        UPPER+HEX+12,           /* 43 C     */
+        UPPER+HEX+13,           /* 44 D     */
+        UPPER+HEX+14,           /* 45 E     */
+        UPPER+HEX+15,           /* 46 F     */
+        UPPER+16,               /* 47 G     */
+        UPPER+17,               /* 48 H     */
+        UPPER+18,               /* 49 I     */
+        UPPER+19,               /* 4A J     */
+        UPPER+20,               /* 4B K     */
+        UPPER+21,               /* 4C L     */
+        UPPER+22,               /* 4D M     */
+        UPPER+23,               /* 4E N     */
+        UPPER+24,               /* 4F O     */
+        UPPER+25,               /* 50 P     */
+        UPPER+26,               /* 51 Q     */
+        UPPER+27,               /* 52 R     */
+        UPPER+28,               /* 53 S     */
+        UPPER+29,               /* 54 T     */
+        UPPER+30,               /* 55 U     */
+        UPPER+31,               /* 56 V     */
+        UPPER+32,               /* 57 W     */
+        UPPER+33,               /* 58 X     */
+        UPPER+34,               /* 59 Y     */
+        UPPER+35,               /* 5A Z     */
+        PUNCT,                  /* 5B [     */
+        PUNCT,                  /* 5C \     */
+        PUNCT,                  /* 5D ]     */
+        PUNCT,                  /* 5E ^     */
+        PUNCT|UNDER,            /* 5F _     */
+        PUNCT,                  /* 60 `     */
+        LOWER+HEX+10,           /* 61 a     */
+        LOWER+HEX+11,           /* 62 b     */
+        LOWER+HEX+12,           /* 63 c     */
+        LOWER+HEX+13,           /* 64 d     */
+        LOWER+HEX+14,           /* 65 e     */
+        LOWER+HEX+15,           /* 66 f     */
+        LOWER+16,               /* 67 g     */
+        LOWER+17,               /* 68 h     */
+        LOWER+18,               /* 69 i     */
+        LOWER+19,               /* 6A j     */
+        LOWER+20,               /* 6B k     */
+        LOWER+21,               /* 6C l     */
+        LOWER+22,               /* 6D m     */
+        LOWER+23,               /* 6E n     */
+        LOWER+24,               /* 6F o     */
+        LOWER+25,               /* 70 p     */
+        LOWER+26,               /* 71 q     */
+        LOWER+27,               /* 72 r     */
+        LOWER+28,               /* 73 s     */
+        LOWER+29,               /* 74 t     */
+        LOWER+30,               /* 75 u     */
+        LOWER+31,               /* 76 v     */
+        LOWER+32,               /* 77 w     */
+        LOWER+33,               /* 78 x     */
+        LOWER+34,               /* 79 y     */
+        LOWER+35,               /* 7A z     */
+        PUNCT,                  /* 7B {     */
+        PUNCT,                  /* 7C |     */
+        PUNCT,                  /* 7D }     */
+        PUNCT,                  /* 7E ~     */
+        CNTRL,                  /* 7F (DEL) */
+    };
+
+    static int getType(int ch) {
+        return ((ch & 0xFFFFFF80) == 0 ? ctype[ch] : 0);
+    }
+
+    static boolean isType(int ch, int type) {
+        return (getType(ch) & type) != 0;
+    }
+
+    static boolean isAscii(int ch) {
+        return ((ch & 0xFFFFFF80) == 0);
+    }
+
+    static boolean isAlpha(int ch) {
+        return isType(ch, ALPHA);
+    }
+
+    static boolean isDigit(int ch) {
+        return ((ch-'0')|('9'-ch)) >= 0;
+    }
+
+    static boolean isAlnum(int ch) {
+        return isType(ch, ALNUM);
+    }
+
+    static boolean isGraph(int ch) {
+        return isType(ch, GRAPH);
+    }
+
+    static boolean isPrint(int ch) {
+        return ((ch-0x20)|(0x7E-ch)) >= 0;
+    }
+
+    static boolean isPunct(int ch) {
+        return isType(ch, PUNCT);
+    }
+
+    static boolean isSpace(int ch) {
+        return isType(ch, SPACE);
+    }
+
+    static boolean isHexDigit(int ch) {
+        return isType(ch, HEX);
+    }
+
+    static boolean isOctDigit(int ch) {
+        return ((ch-'0')|('7'-ch)) >= 0;
+    }
+
+    static boolean isCntrl(int ch) {
+        return isType(ch, CNTRL);
+    }
+
+    static boolean isLower(int ch) {
+        return ((ch-'a')|('z'-ch)) >= 0;
+    }
+
+    static boolean isUpper(int ch) {
+        return ((ch-'A')|('Z'-ch)) >= 0;
+    }
+
+    static boolean isWord(int ch) {
+        return isType(ch, WORD);
+    }
+
+    static int toDigit(int ch) {
+        return (ctype[ch & 0x7F] & 0x3F);
+    }
+
+    static int toLower(int ch) {
+        return isUpper(ch) ? (ch + 0x20) : ch;
+    }
+
+    static int toUpper(int ch) {
+        return isLower(ch) ? (ch - 0x20) : ch;
+    }
+
+}
--- a/src/java.base/share/classes/java/util/regex/CharPredicates.java
+++ b/src/java.base/share/classes/java/util/regex/CharPredicates.java
@ -0,0 +1,398 @@
+/*
+ * Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package java.util.regex;
+
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.regex.Pattern.CharPredicate;
+import java.util.regex.Pattern.BmpCharPredicate;
+
+class CharPredicates {
+
+    static final CharPredicate ALPHABETIC() {
+        return Character::isAlphabetic;
+    }
+
+    // \p{gc=Decimal_Number}
+    static final CharPredicate DIGIT() {
+        return Character::isDigit;
+    }
+
+    static final CharPredicate LETTER() {
+        return Character::isLetter;
+    }
+
+    static final CharPredicate IDEOGRAPHIC() {
+        return Character::isIdeographic;
+    }
+
+    static final CharPredicate LOWERCASE() {
+        return Character::isLowerCase;
+    }
+
+    static final CharPredicate UPPERCASE() {
+        return Character::isUpperCase;
+    }
+
+    static final CharPredicate TITLECASE() {
+        return Character::isTitleCase;
+    }
+
+    // \p{Whitespace}
+    static final CharPredicate WHITE_SPACE() {
+        return ch ->
+            ((((1 << Character.SPACE_SEPARATOR) |
+               (1 << Character.LINE_SEPARATOR) |
+               (1 << Character.PARAGRAPH_SEPARATOR)) >> Character.getType(ch)) & 1)
+            != 0 || (ch >= 0x9 && ch <= 0xd) || (ch == 0x85);
+    }
+
+    // \p{gc=Control}
+    static final CharPredicate CONTROL() {
+        return ch -> Character.getType(ch) == Character.CONTROL;
+    }
+
+    // \p{gc=Punctuation}
+    static final CharPredicate PUNCTUATION() {
+        return ch ->
+            ((((1 << Character.CONNECTOR_PUNCTUATION) |
+               (1 << Character.DASH_PUNCTUATION) |
+               (1 << Character.START_PUNCTUATION) |
+               (1 << Character.END_PUNCTUATION) |
+               (1 << Character.OTHER_PUNCTUATION) |
+               (1 << Character.INITIAL_QUOTE_PUNCTUATION) |
+               (1 << Character.FINAL_QUOTE_PUNCTUATION)) >> Character.getType(ch)) & 1)
+            != 0;
+    }
+
+    // \p{gc=Decimal_Number}
+    // \p{Hex_Digit}    -> PropList.txt: Hex_Digit
+    static final CharPredicate HEX_DIGIT() {
+        return DIGIT().union(ch -> (ch >= 0x0030 && ch <= 0x0039) ||
+                (ch >= 0x0041 && ch <= 0x0046) ||
+                (ch >= 0x0061 && ch <= 0x0066) ||
+                (ch >= 0xFF10 && ch <= 0xFF19) ||
+                (ch >= 0xFF21 && ch <= 0xFF26) ||
+                (ch >= 0xFF41 && ch <= 0xFF46));
+    }
+
+    static final CharPredicate ASSIGNED() {
+        return ch -> Character.getType(ch) != Character.UNASSIGNED;
+    }
+
+    // PropList.txt:Noncharacter_Code_Point
+    static final CharPredicate NONCHARACTER_CODE_POINT() {
+        return ch -> (ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef);
+    }
+
+    // \p{alpha}
+    // \p{digit}
+    static final CharPredicate ALNUM() {
+        return ALPHABETIC().union(DIGIT());
+    }
+
+    // \p{Whitespace} --
+    // [\N{LF} \N{VT} \N{FF} \N{CR} \N{NEL}  -> 0xa, 0xb, 0xc, 0xd, 0x85
+    //  \p{gc=Line_Separator}
+    //  \p{gc=Paragraph_Separator}]
+    static final CharPredicate BLANK() {
+        return ch ->
+            Character.getType(ch) == Character.SPACE_SEPARATOR ||
+            ch == 0x9; // \N{HT}
+    }
+
+    // [^
+    //  \p{space}
+    //  \p{gc=Control}
+    //  \p{gc=Surrogate}
+    //  \p{gc=Unassigned}]
+    static final CharPredicate GRAPH() {
+        return ch ->
+            ((((1 << Character.SPACE_SEPARATOR) |
+               (1 << Character.LINE_SEPARATOR) |
+               (1 << Character.PARAGRAPH_SEPARATOR) |
+               (1 << Character.CONTROL) |
+               (1 << Character.SURROGATE) |
+               (1 << Character.UNASSIGNED)) >> Character.getType(ch)) & 1)
+            == 0;
+    }
+
+    // \p{graph}
+    // \p{blank}
+    // -- \p{cntrl}
+    static final CharPredicate PRINT() {
+        return GRAPH().union(BLANK()).and(CONTROL().negate());
+    }
+
+    //  200C..200D    PropList.txt:Join_Control
+    static final CharPredicate JOIN_CONTROL() {
+        return ch -> ch == 0x200C || ch == 0x200D;
+    }
+
+    //  \p{alpha}
+    //  \p{gc=Mark}
+    //  \p{digit}
+    //  \p{gc=Connector_Punctuation}
+    //  \p{Join_Control}    200C..200D
+    static final CharPredicate WORD() {
+        return ALPHABETIC().union(ch -> ((((1 << Character.NON_SPACING_MARK) |
+                                  (1 << Character.ENCLOSING_MARK) |
+                                  (1 << Character.COMBINING_SPACING_MARK) |
+                                  (1 << Character.DECIMAL_DIGIT_NUMBER) |
+                                  (1 << Character.CONNECTOR_PUNCTUATION))
+                                 >> Character.getType(ch)) & 1) != 0,
+                         JOIN_CONTROL());
+    }
+
+    /////////////////////////////////////////////////////////////////////////////
+
+    private static CharPredicate getPosixPredicate(String name) {
+        switch (name) {
+            case "ALPHA": return ALPHABETIC();
+            case "LOWER": return LOWERCASE();
+            case "UPPER": return UPPERCASE();
+            case "SPACE": return WHITE_SPACE();
+            case "PUNCT": return PUNCTUATION();
+            case "XDIGIT": return HEX_DIGIT();
+            case "ALNUM": return ALNUM();
+            case "CNTRL": return CONTROL();
+            case "DIGIT": return DIGIT();
+            case "BLANK": return BLANK();
+            case "GRAPH": return GRAPH();
+            case "PRINT": return PRINT();
+            default: return null;
+        }
+    }
+
+    private static CharPredicate getUnicodePredicate(String name) {
+        switch (name) {
+            case "ALPHABETIC": return ALPHABETIC();
+            case "ASSIGNED": return ASSIGNED();
+            case "CONTROL": return CONTROL();
+            case "HEXDIGIT": return HEX_DIGIT();
+            case "IDEOGRAPHIC": return IDEOGRAPHIC();
+            case "JOINCONTROL": return JOIN_CONTROL();
+            case "LETTER": return LETTER();
+            case "LOWERCASE": return LOWERCASE();
+            case "NONCHARACTERCODEPOINT": return NONCHARACTER_CODE_POINT();
+            case "TITLECASE": return TITLECASE();
+            case "PUNCTUATION": return PUNCTUATION();
+            case "UPPERCASE": return UPPERCASE();
+            case "WHITESPACE": return WHITE_SPACE();
+            case "WORD": return WORD();
+            case "WHITE_SPACE": return WHITE_SPACE();
+            case "HEX_DIGIT": return HEX_DIGIT();
+            case "NONCHARACTER_CODE_POINT": return NONCHARACTER_CODE_POINT();
+            case "JOIN_CONTROL": return JOIN_CONTROL();
+            default: return null;
+        }
+    }
+
+    public static CharPredicate forUnicodeProperty(String propName) {
+        propName = propName.toUpperCase(Locale.ROOT);
+        CharPredicate p = getUnicodePredicate(propName);
+        if (p != null)
+            return p;
+        return getPosixPredicate(propName);
+    }
+
+    public static CharPredicate forPOSIXName(String propName) {
+        return getPosixPredicate(propName.toUpperCase(Locale.ENGLISH));
+    }
+
+    /////////////////////////////////////////////////////////////////////////////
+
+    /**
+     * Returns a predicate matching all characters belong to a named
+     * UnicodeScript.
+     */
+    static CharPredicate forUnicodeScript(String name) {
+        final Character.UnicodeScript script;
+        try {
+            script = Character.UnicodeScript.forName(name);
+            return ch -> script == Character.UnicodeScript.of(ch);
+        } catch (IllegalArgumentException iae) {}
+        return null;
+    }
+
+    /**
+     * Returns a predicate matching all characters in a UnicodeBlock.
+     */
+    static CharPredicate forUnicodeBlock(String name) {
+        final Character.UnicodeBlock block;
+        try {
+            block = Character.UnicodeBlock.forName(name);
+            return ch -> block == Character.UnicodeBlock.of(ch);
+        } catch (IllegalArgumentException iae) {}
+         return null;
+    }
+
+    /////////////////////////////////////////////////////////////////////////////
+
+    // unicode categories, aliases, properties, java methods ...
+
+    static CharPredicate forProperty(String name) {
+        // Unicode character property aliases, defined in
+        // http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt
+        switch (name) {
+            case "Cn": return category(1<<Character.UNASSIGNED);
+            case "Lu": return category(1<<Character.UPPERCASE_LETTER);
+            case "Ll": return category(1<<Character.LOWERCASE_LETTER);
+            case "Lt": return category(1<<Character.TITLECASE_LETTER);
+            case "Lm": return category(1<<Character.MODIFIER_LETTER);
+            case "Lo": return category(1<<Character.OTHER_LETTER);
+            case "Mn": return category(1<<Character.NON_SPACING_MARK);
+            case "Me": return category(1<<Character.ENCLOSING_MARK);
+            case "Mc": return category(1<<Character.COMBINING_SPACING_MARK);
+            case "Nd": return category(1<<Character.DECIMAL_DIGIT_NUMBER);
+            case "Nl": return category(1<<Character.LETTER_NUMBER);
+            case "No": return category(1<<Character.OTHER_NUMBER);
+            case "Zs": return category(1<<Character.SPACE_SEPARATOR);
+            case "Zl": return category(1<<Character.LINE_SEPARATOR);
+            case "Zp": return category(1<<Character.PARAGRAPH_SEPARATOR);
+            case "Cc": return category(1<<Character.CONTROL);
+            case "Cf": return category(1<<Character.FORMAT);
+            case "Co": return category(1<<Character.PRIVATE_USE);
+            case "Cs": return category(1<<Character.SURROGATE);
+            case "Pd": return category(1<<Character.DASH_PUNCTUATION);
+            case "Ps": return category(1<<Character.START_PUNCTUATION);
+            case "Pe": return category(1<<Character.END_PUNCTUATION);
+            case "Pc": return category(1<<Character.CONNECTOR_PUNCTUATION);
+            case "Po": return category(1<<Character.OTHER_PUNCTUATION);
+            case "Sm": return category(1<<Character.MATH_SYMBOL);
+            case "Sc": return category(1<<Character.CURRENCY_SYMBOL);
+            case "Sk": return category(1<<Character.MODIFIER_SYMBOL);
+            case "So": return category(1<<Character.OTHER_SYMBOL);
+            case "Pi": return category(1<<Character.INITIAL_QUOTE_PUNCTUATION);
+            case "Pf": return category(1<<Character.FINAL_QUOTE_PUNCTUATION);
+            case "L": return category(((1<<Character.UPPERCASE_LETTER) |
+                              (1<<Character.LOWERCASE_LETTER) |
+                              (1<<Character.TITLECASE_LETTER) |
+                              (1<<Character.MODIFIER_LETTER)  |
+                              (1<<Character.OTHER_LETTER)));
+            case "M": return category(((1<<Character.NON_SPACING_MARK) |
+                              (1<<Character.ENCLOSING_MARK)   |
+                              (1<<Character.COMBINING_SPACING_MARK)));
+            case "N": return category(((1<<Character.DECIMAL_DIGIT_NUMBER) |
+                              (1<<Character.LETTER_NUMBER)        |
+                              (1<<Character.OTHER_NUMBER)));
+            case "Z": return category(((1<<Character.SPACE_SEPARATOR) |
+                              (1<<Character.LINE_SEPARATOR)  |
+                              (1<<Character.PARAGRAPH_SEPARATOR)));
+            case "C": return category(((1<<Character.CONTROL)     |
+                              (1<<Character.FORMAT)      |
+                              (1<<Character.PRIVATE_USE) |
+                              (1<<Character.SURROGATE)   |
+                              (1<<Character.UNASSIGNED))); // Other
+            case "P": return category(((1<<Character.DASH_PUNCTUATION)      |
+                              (1<<Character.START_PUNCTUATION)     |
+                              (1<<Character.END_PUNCTUATION)       |
+                              (1<<Character.CONNECTOR_PUNCTUATION) |
+                              (1<<Character.OTHER_PUNCTUATION)     |
+                              (1<<Character.INITIAL_QUOTE_PUNCTUATION) |
+                              (1<<Character.FINAL_QUOTE_PUNCTUATION)));
+            case "S": return category(((1<<Character.MATH_SYMBOL)     |
+                              (1<<Character.CURRENCY_SYMBOL) |
+                              (1<<Character.MODIFIER_SYMBOL) |
+                              (1<<Character.OTHER_SYMBOL)));
+            case "LC": return category(((1<<Character.UPPERCASE_LETTER) |
+                               (1<<Character.LOWERCASE_LETTER) |
+                               (1<<Character.TITLECASE_LETTER)));
+            case "LD": return category(((1<<Character.UPPERCASE_LETTER) |
+                               (1<<Character.LOWERCASE_LETTER) |
+                               (1<<Character.TITLECASE_LETTER) |
+                               (1<<Character.MODIFIER_LETTER)  |
+                               (1<<Character.OTHER_LETTER)     |
+                               (1<<Character.DECIMAL_DIGIT_NUMBER)));
+            case "L1": return range(0x00, 0xFF); // Latin-1
+            case "all": return Pattern.ALL();
+            // Posix regular expression character classes, defined in
+            // http://www.unix.org/onlinepubs/009695399/basedefs/xbd_chap09.html
+            case "ASCII": return range(0x00, 0x7F);   // ASCII
+            case "Alnum": return ctype(ASCII.ALNUM);  // Alphanumeric characters
+            case "Alpha": return ctype(ASCII.ALPHA);  // Alphabetic characters
+            case "Blank": return ctype(ASCII.BLANK);  // Space and tab characters
+            case "Cntrl": return ctype(ASCII.CNTRL);  // Control characters
+            case "Digit": return range('0', '9');     // Numeric characters
+            case "Graph": return ctype(ASCII.GRAPH);  // printable and visible
+            case "Lower": return range('a', 'z');     // Lower-case alphabetic
+            case "Print": return range(0x20, 0x7E);   // Printable characters
+            case "Punct": return ctype(ASCII.PUNCT);  // Punctuation characters
+            case "Space": return ctype(ASCII.SPACE);  // Space characters
+            case "Upper": return range('A', 'Z');     // Upper-case alphabetic
+            case "XDigit": return ctype(ASCII.XDIGIT); // hexadecimal digits
+
+            // Java character properties, defined by methods in Character.java
+            case "javaLowerCase": return java.lang.Character::isLowerCase;
+            case "javaUpperCase": return  Character::isUpperCase;
+            case "javaAlphabetic": return java.lang.Character::isAlphabetic;
+            case "javaIdeographic": return java.lang.Character::isIdeographic;
+            case "javaTitleCase": return java.lang.Character::isTitleCase;
+            case "javaDigit": return java.lang.Character::isDigit;
+            case "javaDefined": return java.lang.Character::isDefined;
+            case "javaLetter": return java.lang.Character::isLetter;
+            case "javaLetterOrDigit": return java.lang.Character::isLetterOrDigit;
+            case "javaJavaIdentifierStart": return java.lang.Character::isJavaIdentifierStart;
+            case "javaJavaIdentifierPart": return java.lang.Character::isJavaIdentifierPart;
+            case "javaUnicodeIdentifierStart": return java.lang.Character::isUnicodeIdentifierStart;
+            case "javaUnicodeIdentifierPart": return java.lang.Character::isUnicodeIdentifierPart;
+            case "javaIdentifierIgnorable": return java.lang.Character::isIdentifierIgnorable;
+            case "javaSpaceChar": return java.lang.Character::isSpaceChar;
+            case "javaWhitespace": return java.lang.Character::isWhitespace;
+            case "javaISOControl": return java.lang.Character::isISOControl;
+            case "javaMirrored": return java.lang.Character::isMirrored;
+            default: return null;
+        }
+    }
+
+    private static CharPredicate category(final int typeMask) {
+        return ch -> (typeMask & (1 << Character.getType(ch))) != 0;
+    }
+
+    private static CharPredicate range(final int lower, final int upper) {
+        return (BmpCharPredicate)ch -> lower <= ch && ch <= upper;
+    }
+
+    private static CharPredicate ctype(final int ctype) {
+        return (BmpCharPredicate)ch -> ch < 128 && ASCII.isType(ch, ctype);
+    }
+
+    /////////////////////////////////////////////////////////////////////////////
+
+    /**
+     * Posix ASCII variants, not in the lookup map
+     */
+    static final BmpCharPredicate ASCII_DIGIT() {
+        return ch -> ch < 128 && ASCII.isDigit(ch);
+    }
+    static final BmpCharPredicate ASCII_WORD() {
+        return ch -> ch < 128 && ASCII.isWord(ch);
+    }
+    static final BmpCharPredicate ASCII_SPACE() {
+        return ch -> ch < 128 && ASCII.isSpace(ch);
+    }
+
+}
--- a/src/java.base/share/classes/java/util/regex/Grapheme.java
+++ b/src/java.base/share/classes/java/util/regex/Grapheme.java
@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package java.util.regex;
+
+final class Grapheme {
+
+    /**
+     * Determines if there is an extended  grapheme cluster boundary between two
+     * continuing characters {@code cp1} and {@code cp2}.
+     * <p>
+     * See Unicode Standard Annex #29 Unicode Text Segmentation for the specification
+     * for the extended grapheme cluster boundary rules
+     */
+    static boolean isBoundary(int cp1, int cp2) {
+        return rules[getType(cp1)][getType(cp2)];
+    }
+
+    // types
+    private static final int OTHER = 0;
+    private static final int CR = 1;
+    private static final int LF = 2;
+    private static final int CONTROL = 3;
+    private static final int EXTEND = 4;
+    private static final int RI = 5;
+    private static final int PREPEND = 6;
+    private static final int SPACINGMARK = 7;
+    private static final int L = 8;
+    private static final int V = 9;
+    private static final int T = 10;
+    private static final int LV = 11;
+    private static final int LVT = 12;
+
+    private static final int FIRST_TYPE = 0;
+    private static final int LAST_TYPE = 12;
+
+    private static boolean[][] rules;
+    static {
+        rules = new boolean[LAST_TYPE + 1][LAST_TYPE + 1];
+        // default, any + any
+        for (int i = FIRST_TYPE; i <= LAST_TYPE; i++)
+            for (int j = FIRST_TYPE; j <= LAST_TYPE; j++)
+                rules[i][j] = true;
+        // GB 6 L x (L | V | LV | VT)
+        rules[L][L] = false;
+        rules[L][V] = false;
+        rules[L][LV] = false;
+        rules[L][LVT] = false;
+        // GB 7 (LV | V) x (V | T)
+        rules[LV][V] = false;
+        rules[LV][T] = false;
+        rules[V][V] = false;
+        rules[V][T] = false;
+        // GB 8 (LVT | T) x T
+        rules[LVT][T] = false;
+        rules[T][T] = false;
+        // GB 8a RI x RI
+        rules[RI][RI] = false;
+        // GB 9 x Extend
+        // GB 9a x Spacing Mark
+        // GB 9b Prepend x
+        for (int i = FIRST_TYPE; i <= LAST_TYPE; i++) {
+            rules[i][EXTEND] = false;
+            rules[i][SPACINGMARK] = false;
+            rules[PREPEND][i] = false;
+        }
+        // GB 4  (Control | CR | LF) +
+        // GB 5  + (Control | CR | LF)
+        for (int i = FIRST_TYPE; i <= LAST_TYPE; i++)
+            for (int j = CR; j <= CONTROL; j++) {
+                rules[i][j] = true;
+                rules[j][i] = true;
+            }
+        // GB 3 CR x LF
+        rules[CR][LF] = false;
+        // GB 10 Any + Any  -> default
+    }
+
+    // Hangul syllables
+    private static final int SYLLABLE_BASE = 0xAC00;
+    private static final int LCOUNT = 19;
+    private static final int VCOUNT = 21;
+    private static final int TCOUNT = 28;
+    private static final int NCOUNT = VCOUNT * TCOUNT; // 588
+    private static final int SCOUNT = LCOUNT * NCOUNT; // 11172
+
+    // #tr29: SpacingMark exceptions: The following (which have
+    // General_Category = Spacing_Mark and would otherwise be included)
+    // are specifically excluded
+    private static boolean isExcludedSpacingMark(int cp) {
+       return  cp == 0x102B || cp == 0x102C || cp == 0x1038 ||
+               cp >= 0x1062 && cp <= 0x1064 ||
+               cp >= 0x1062 && cp <= 0x106D ||
+               cp == 0x1083 ||
+               cp >= 0x1087 && cp <= 0x108C ||
+               cp == 0x108F ||
+               cp >= 0x109A && cp <= 0x109C ||
+               cp == 0x1A61 || cp == 0x1A63 || cp == 0x1A64 ||
+               cp == 0xAA7B || cp == 0xAA7D;
+    }
+
+    @SuppressWarnings("fallthrough")
+    private static int getType(int cp) {
+        int type = Character.getType(cp);
+        switch(type) {
+        case Character.CONTROL:
+            if (cp == 0x000D)
+                return CR;
+            if (cp == 0x000A)
+                return LF;
+            return CONTROL;
+         case Character.UNASSIGNED:
+            // NOTE: #tr29 lists "Unassigned and Default_Ignorable_Code_Point" as Control
+            // but GraphemeBreakTest.txt lists u+0378/reserved-0378 as "Other"
+            // so type it as "Other" to make the test happy
+             if (cp == 0x0378)
+                 return OTHER;
+
+        case Character.LINE_SEPARATOR:
+        case Character.PARAGRAPH_SEPARATOR:
+        case Character.SURROGATE:
+            return CONTROL;
+        case Character.FORMAT:
+            if (cp == 0x200C || cp == 0x200D)
+                return EXTEND;
+            return CONTROL;
+        case Character.NON_SPACING_MARK:
+        case Character.ENCLOSING_MARK:
+             // NOTE:
+             // #tr29 "plus a few General_Category = Spacing_Mark needed for
+             // canonical equivalence."
+             // but for "extended grapheme clusters" support, there is no
+             // need actually to diff "extend" and "spackmark" given GB9, GB9a
+             return EXTEND;
+        case  Character.COMBINING_SPACING_MARK:
+            if (isExcludedSpacingMark(cp))
+                return OTHER;
+            // NOTE:
+            // 0x11720 and 0x11721 are mentioned in #tr29 as
+            // OTHER_LETTER but it appears their category has been updated to
+            // COMBING_SPACING_MARK already (verified in ver.8)
+            return SPACINGMARK;
+        case Character.OTHER_SYMBOL:
+            if (cp >= 0x1F1E6 && cp <= 0x1F1FF)
+                return RI;
+            return OTHER;
+        case Character.MODIFIER_LETTER:
+            // WARNING:
+            // not mentioned in #tr29 but listed in GraphemeBreakProperty.txt
+            if (cp == 0xFF9E || cp == 0xFF9F)
+                return EXTEND;
+            return OTHER;
+        case Character.OTHER_LETTER:
+            if (cp == 0x0E33 || cp == 0x0EB3)
+                return SPACINGMARK;
+            // hangul jamo
+            if (cp >= 0x1100 && cp <= 0x11FF) {
+                if (cp <= 0x115F)
+                    return L;
+                if (cp <= 0x11A7)
+                    return V;
+                return T;
+            }
+            // hangul syllables
+            int sindex = cp - SYLLABLE_BASE;
+            if (sindex >= 0 && sindex < SCOUNT) {
+
+                if (sindex % TCOUNT == 0)
+                    return LV;
+                return LVT;
+            }
+            //  hangul jamo_extended A
+            if (cp >= 0xA960 && cp <= 0xA97C)
+                return L;
+            //  hangul jamo_extended B
+            if (cp >= 0xD7B0 && cp <= 0xD7C6)
+                return V;
+            if (cp >= 0xD7CB && cp <= 0xD7FB)
+                return T;
+        }
+        return OTHER;
+    }
+}
--- a/src/java.base/share/classes/java/util/regex/IntHashSet.java
+++ b/src/java.base/share/classes/java/util/regex/IntHashSet.java
@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package java.util.regex;
+
+import java.util.Arrays;
+
+/**
+ * A lightweight hashset implementation for positive 'int'. Not safe for
+ * concurrent access.
+ */
+class IntHashSet {
+    private int[] entries;
+    private int[] hashes;
+    private int pos = 0;
+
+    public IntHashSet() {
+        this.entries = new int[16 << 1];      // initCapacity = 16;
+        this.hashes = new int[(16 / 2) | 1];  // odd -> fewer collisions
+        Arrays.fill(this.entries, -1);
+        Arrays.fill(this.hashes, -1);
+    }
+
+    public boolean contains(int i) {
+        int h = hashes[i % hashes.length];
+        while (h != -1) {
+            if (entries[h] == i)
+                return true;
+            h = entries[h + 1];
+        }
+        return false;
+    }
+
+    public void add(int i) {
+        int h0 = i % hashes.length;
+        int next = hashes[h0];
+        //  if invoker guarantees contains(i) checked before add(i)
+        //  the following check is not needed.
+        int next0 = next;
+        while (next0 != -1) {
+            if (entries[next0 ] == i)
+                return;
+            next0 = entries[next0 + 1];
+        }
+        hashes[h0] = pos;
+        entries[pos++] = i;
+        entries[pos++] = next;
+        if (pos == entries.length)
+            expand();
+    }
+
+    public void clear() {
+        Arrays.fill(this.entries, -1);
+        Arrays.fill(this.hashes, -1);
+        pos = 0;
+    }
+
+    private void expand() {
+        int[] old = entries;
+        int[] es = new int[old.length << 1];
+        int hlen = (old.length / 2) | 1;
+        int[] hs = new int[hlen];
+        Arrays.fill(es, -1);
+        Arrays.fill(hs, -1);
+        for (int n = 0; n < pos;) {  // re-hashing
+            int i = old[n];
+            int hsh = i % hlen;
+            int next = hs[hsh];
+            hs[hsh] = n;
+            es[n++] = i;
+            es[n++] = next;
+        }
+        this.entries = es;
+        this.hashes = hs;
+    }
+}
--- a/src/java.base/share/classes/java/util/regex/MatchResult.java
+++ b/src/java.base/share/classes/java/util/regex/MatchResult.java
@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package java.util.regex;
+
+/**
+ * The result of a match operation.
+ *
+ * <p>This interface contains query methods used to determine the
+ * results of a match against a regular expression. The match boundaries,
+ * groups and group boundaries can be seen but not modified through
+ * a {@code MatchResult}.
+ *
+ * @author  Michael McCloskey
+ * @see Matcher
+ * @since 1.5
+ */
+public interface MatchResult {
+
+    /**
+     * Returns the start index of the match.
+     *
+     * @return  The index of the first character matched
+     *
+     * @throws  IllegalStateException
+     *          If no match has yet been attempted,
+     *          or if the previous match operation failed
+     */
+    public int start();
+
+    /**
+     * Returns the start index of the subsequence captured by the given group
+     * during this match.
+     *
+     * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
+     * to right, starting at one.  Group zero denotes the entire pattern, so
+     * the expression <i>m.</i>{@code start(0)} is equivalent to
+     * <i>m.</i>{@code start()}.  </p>
+     *
+     * @param  group
+     *         The index of a capturing group in this matcher's pattern
+     *
+     * @return  The index of the first character captured by the group,
+     *          or {@code -1} if the match was successful but the group
+     *          itself did not match anything
+     *
+     * @throws  IllegalStateException
+     *          If no match has yet been attempted,
+     *          or if the previous match operation failed
+     *
+     * @throws  IndexOutOfBoundsException
+     *          If there is no capturing group in the pattern
+     *          with the given index
+     */
+    public int start(int group);
+
+    /**
+     * Returns the offset after the last character matched.
+     *
+     * @return  The offset after the last character matched
+     *
+     * @throws  IllegalStateException
+     *          If no match has yet been attempted,
+     *          or if the previous match operation failed
+     */
+    public int end();
+
+    /**
+     * Returns the offset after the last character of the subsequence
+     * captured by the given group during this match.
+     *
+     * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
+     * to right, starting at one.  Group zero denotes the entire pattern, so
+     * the expression <i>m.</i>{@code end(0)} is equivalent to
+     * <i>m.</i>{@code end()}.  </p>
+     *
+     * @param  group
+     *         The index of a capturing group in this matcher's pattern
+     *
+     * @return  The offset after the last character captured by the group,
+     *          or {@code -1} if the match was successful
+     *          but the group itself did not match anything
+     *
+     * @throws  IllegalStateException
+     *          If no match has yet been attempted,
+     *          or if the previous match operation failed
+     *
+     * @throws  IndexOutOfBoundsException
+     *          If there is no capturing group in the pattern
+     *          with the given index
+     */
+    public int end(int group);
+
+    /**
+     * Returns the input subsequence matched by the previous match.
+     *
+     * <p> For a matcher <i>m</i> with input sequence <i>s</i>,
+     * the expressions <i>m.</i>{@code group()} and
+     * <i>s.</i>{@code substring(}<i>m.</i>{@code start(),}&nbsp;<i>m.</i>{@code end())}
+     * are equivalent.  </p>
+     *
+     * <p> Note that some patterns, for example {@code a*}, match the empty
+     * string.  This method will return the empty string when the pattern
+     * successfully matches the empty string in the input.  </p>
+     *
+     * @return The (possibly empty) subsequence matched by the previous match,
+     *         in string form
+     *
+     * @throws  IllegalStateException
+     *          If no match has yet been attempted,
+     *          or if the previous match operation failed
+     */
+    public String group();
+
+    /**
+     * Returns the input subsequence captured by the given group during the
+     * previous match operation.
+     *
+     * <p> For a matcher <i>m</i>, input sequence <i>s</i>, and group index
+     * <i>g</i>, the expressions <i>m.</i>{@code group(}<i>g</i>{@code )} and
+     * <i>s.</i>{@code substring(}<i>m.</i>{@code start(}<i>g</i>{@code
+     * ),}&nbsp;<i>m.</i>{@code end(}<i>g</i>{@code ))}
+     * are equivalent.  </p>
+     *
+     * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
+     * to right, starting at one.  Group zero denotes the entire pattern, so
+     * the expression {@code m.group(0)} is equivalent to {@code m.group()}.
+     * </p>
+     *
+     * <p> If the match was successful but the group specified failed to match
+     * any part of the input sequence, then {@code null} is returned. Note
+     * that some groups, for example {@code (a*)}, match the empty string.
+     * This method will return the empty string when such a group successfully
+     * matches the empty string in the input.  </p>
+     *
+     * @param  group
+     *         The index of a capturing group in this matcher's pattern
+     *
+     * @return  The (possibly empty) subsequence captured by the group
+     *          during the previous match, or {@code null} if the group
+     *          failed to match part of the input
+     *
+     * @throws  IllegalStateException
+     *          If no match has yet been attempted,
+     *          or if the previous match operation failed
+     *
+     * @throws  IndexOutOfBoundsException
+     *          If there is no capturing group in the pattern
+     *          with the given index
+     */
+    public String group(int group);
+
+    /**
+     * Returns the number of capturing groups in this match result's pattern.
+     *
+     * <p> Group zero denotes the entire pattern by convention. It is not
+     * included in this count.
+     *
+     * <p> Any non-negative integer smaller than or equal to the value
+     * returned by this method is guaranteed to be a valid group index for
+     * this matcher.  </p>
+     *
+     * @return The number of capturing groups in this matcher's pattern
+     */
+    public int groupCount();
+
+}
--- a/src/java.base/share/classes/java/util/regex/Matcher.java
+++ b/src/java.base/share/classes/java/util/regex/Matcher.java
--- a/src/java.base/share/classes/java/util/regex/Pattern.java
+++ b/src/java.base/share/classes/java/util/regex/Pattern.java
--- a/src/java.base/share/classes/java/util/regex/PatternSyntaxException.java
+++ b/src/java.base/share/classes/java/util/regex/PatternSyntaxException.java
@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 1999, 2008, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package java.util.regex;
+
+import sun.security.action.GetPropertyAction;
+
+
+/**
+ * Unchecked exception thrown to indicate a syntax error in a
+ * regular-expression pattern.
+ *
+ * @author  unascribed
+ * @since 1.4
+ * @spec JSR-51
+ */
+
+public class PatternSyntaxException
+    extends IllegalArgumentException
+{
+    private static final long serialVersionUID = -3864639126226059218L;
+
+    private final String desc;
+    private final String pattern;
+    private final int index;
+
+    /**
+     * Constructs a new instance of this class.
+     *
+     * @param  desc
+     *         A description of the error
+     *
+     * @param  regex
+     *         The erroneous pattern
+     *
+     * @param  index
+     *         The approximate index in the pattern of the error,
+     *         or {@code -1} if the index is not known
+     */
+    public PatternSyntaxException(String desc, String regex, int index) {
+        this.desc = desc;
+        this.pattern = regex;
+        this.index = index;
+    }
+
+    /**
+     * Retrieves the error index.
+     *
+     * @return  The approximate index in the pattern of the error,
+     *         or {@code -1} if the index is not known
+     */
+    public int getIndex() {
+        return index;
+    }
+
+    /**
+     * Retrieves the description of the error.
+     *
+     * @return  The description of the error
+     */
+    public String getDescription() {
+        return desc;
+    }
+
+    /**
+     * Retrieves the erroneous regular-expression pattern.
+     *
+     * @return  The erroneous pattern
+     */
+    public String getPattern() {
+        return pattern;
+    }
+
+    private static final String nl =
+            GetPropertyAction.privilegedGetProperty("line.separator");
+
+    /**
+     * Returns a multi-line string containing the description of the syntax
+     * error and its index, the erroneous regular-expression pattern, and a
+     * visual indication of the error index within the pattern.
+     *
+     * @return  The full detail message
+     */
+    public String getMessage() {
+        StringBuilder sb = new StringBuilder();
+        sb.append(desc);
+        if (index >= 0) {
+            sb.append(" near index ");
+            sb.append(index);
+        }
+        sb.append(nl);
+        sb.append(pattern);
+        if (index >= 0) {
+            sb.append(nl);
+            for (int i = 0; i < index; i++) sb.append(' ');
+            sb.append('^');
+        }
+        return sb.toString();
+    }
+
+}
--- a/src/java.base/share/classes/java/util/regex/PrintPattern.java
+++ b/src/java.base/share/classes/java/util/regex/PrintPattern.java
@ -0,0 +1,219 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package java.util.regex;
+
+import java.util.HashMap;
+import java.util.regex.Pattern.CharPredicate;
+import static java.util.regex.ASCII.*;
+
+/**
+ * A utility class to print out the pattern node tree.
+ */
+
+class PrintPattern {
+
+    private static HashMap<Pattern.Node, Integer> ids = new HashMap<>();
+
+    private static void print(Pattern.Node node, String text, int depth) {
+        if (!ids.containsKey(node))
+            ids.put(node, ids.size());
+        print("%6d:%" + (depth==0? "": depth<<1) + "s<%s>", ids.get(node), "", text);
+        if (ids.containsKey(node.next))
+            print(" (=>%d)", ids.get(node.next));
+        print("%n");
+    }
+
+    private static void print(String s, int depth) {
+        print("       %" + (depth==0?"":depth<<1) + "s<%s>%n", "", s);
+    }
+
+    private static void print(String fmt, Object ... args) {
+        System.err.printf(fmt, args);
+    }
+
+    private static String toStringCPS(int[] cps) {
+        StringBuilder sb = new StringBuilder(cps.length);
+        for (int cp : cps)
+            sb.append(toStringCP(cp));
+        return sb.toString();
+    }
+
+    private static String toStringCP(int cp) {
+        return (isPrint(cp) ? "" + (char)cp
+                            : "\\u" + Integer.toString(cp, 16));
+    }
+
+    private static String toStringRange(int min, int max) {
+       if (max == Pattern.MAX_REPS) {
+           if (min == 0)
+               return " * ";
+           else if (min == 1)
+               return " + ";
+           return "{" + min + ", max}";
+       }
+       return "{" + min + ", " +  max + "}";
+    }
+
+    private static String toStringCtype(int type) {
+        switch(type) {
+        case UPPER:  return "ASCII.UPPER";
+        case LOWER:  return "ASCII.LOWER";
+        case DIGIT:  return "ASCII.DIGIT";
+        case SPACE:  return "ASCII.SPACE";
+        case PUNCT:  return "ASCII.PUNCT";
+        case CNTRL:  return "ASCII.CNTRL";
+        case BLANK:  return "ASCII.BLANK";
+        case UNDER:  return "ASCII.UNDER";
+        case ASCII:  return "ASCII.ASCII";
+        case ALPHA:  return "ASCII.ALPHA";
+        case ALNUM:  return "ASCII.ALNUM";
+        case GRAPH:  return "ASCII.GRAPH";
+        case WORD:   return "ASCII.WORD";
+        case XDIGIT: return "ASCII.XDIGIT";
+        default: return "ASCII ?";
+        }
+    }
+
+    private static String toString(Pattern.Node node) {
+        String name = node.getClass().getName();
+        return name.substring(name.lastIndexOf('$') + 1);
+    }
+
+    static HashMap<CharPredicate, String> pmap;
+    static {
+        pmap = new HashMap<>();
+        pmap.put(Pattern.ALL(), "All");
+        pmap.put(Pattern.DOT(), "Dot");
+        pmap.put(Pattern.UNIXDOT(), "UnixDot");
+        pmap.put(Pattern.VertWS(), "VertWS");
+        pmap.put(Pattern.HorizWS(), "HorizWS");
+
+        pmap.put(CharPredicates.ASCII_DIGIT(), "ASCII.DIGIT");
+        pmap.put(CharPredicates.ASCII_WORD(),  "ASCII.WORD");
+        pmap.put(CharPredicates.ASCII_SPACE(), "ASCII.SPACE");
+    }
+
+    static void walk(Pattern.Node node, int depth) {
+        depth++;
+        while(node != null) {
+            String name = toString(node);
+            String str;
+            if (node instanceof Pattern.Prolog) {
+                print(node, name, depth);
+                // print the loop here
+                Pattern.Loop loop = ((Pattern.Prolog)node).loop;
+                name = toString(loop);
+                str = name + " " + toStringRange(loop.cmin, loop.cmax);
+                print(loop, str, depth);
+                walk(loop.body, depth);
+                print("/" + name, depth);
+                node = loop;
+            } else if (node instanceof Pattern.Loop) {
+                return;  // stop here, body.next -> loop
+            } else if (node instanceof Pattern.Curly) {
+                Pattern.Curly c = (Pattern.Curly)node;
+                str = "Curly " + c.type + " " + toStringRange(c.cmin, c.cmax);
+                print(node, str, depth);
+                walk(c.atom, depth);
+                print("/Curly", depth);
+            } else if (node instanceof Pattern.GroupCurly) {
+                Pattern.GroupCurly gc = (Pattern.GroupCurly)node;
+                str = "GroupCurly " + gc.groupIndex / 2 +
+                      ", " + gc.type + " " + toStringRange(gc.cmin, gc.cmax);
+                print(node, str, depth);
+                walk(gc.atom, depth);
+                print("/GroupCurly", depth);
+            } else if (node instanceof Pattern.GroupHead) {
+                Pattern.GroupHead head = (Pattern.GroupHead)node;
+                Pattern.GroupTail tail = head.tail;
+                print(head, "Group.head " + (tail.groupIndex / 2), depth);
+                walk(head.next, depth);
+                print(tail, "/Group.tail " + (tail.groupIndex / 2), depth);
+                node = tail;
+            } else if (node instanceof Pattern.GroupTail) {
+                return;  // stopper
+            } else if (node instanceof Pattern.Ques) {
+                print(node, "Ques " + ((Pattern.Ques)node).type, depth);
+                walk(((Pattern.Ques)node).atom, depth);
+                print("/Ques", depth);
+            } else if (node instanceof Pattern.Branch) {
+                Pattern.Branch b = (Pattern.Branch)node;
+                print(b, name, depth);
+                int i = 0;
+                while (true) {
+                    if (b.atoms[i] != null) {
+                        walk(b.atoms[i], depth);
+                    } else {
+                        print("  (accepted)", depth);
+                    }
+                    if (++i == b.size)
+                        break;
+                    print("-branch.separator-", depth);
+                }
+                node = b.conn;
+                print(node, "/Branch", depth);
+            } else if (node instanceof Pattern.BranchConn) {
+                return;
+            } else if (node instanceof Pattern.CharProperty) {
+                str = pmap.get(((Pattern.CharProperty)node).predicate);
+                if (str == null)
+                    str = toString(node);
+                else
+                    str = "Single \"" + str + "\"";
+                print(node, str, depth);
+            } else if (node instanceof Pattern.SliceNode) {
+                str = name + "  \"" +
+                      toStringCPS(((Pattern.SliceNode)node).buffer) + "\"";
+                print(node, str, depth);
+            } else if (node instanceof Pattern.CharPropertyGreedy) {
+                Pattern.CharPropertyGreedy gcp = (Pattern.CharPropertyGreedy)node;
+                String pstr = pmap.get(gcp.predicate);
+                if (pstr == null)
+                    pstr = gcp.predicate.toString();
+                else
+                    pstr = "Single \"" + pstr + "\"";
+                str = name + " " + pstr + ((gcp.cmin == 0) ? "*" : "+");
+                print(node, str, depth);
+            } else if (node instanceof Pattern.BackRef) {
+                str = "GroupBackRef " + ((Pattern.BackRef)node).groupIndex / 2;
+                print(node, str, depth);
+            } else if (node instanceof Pattern.LastNode) {
+                print(node, "END", depth);
+            } else if (node == Pattern.accept) {
+                return;
+            } else {
+                print(node, name, depth);
+            }
+            node = node.next;
+        }
+    }
+
+    public static void main(String[] args) {
+        Pattern p = Pattern.compile(args[0]);
+        System.out.println("   Pattern: " + p);
+        walk(p.root, 0);
+    }
+}
--- a/src/java.base/share/classes/java/util/regex/package-info.java
+++ b/src/java.base/share/classes/java/util/regex/package-info.java
@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2000, 2006, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * Classes for matching character sequences against patterns specified
+ * by regular expressions.
+ *
+ * <p> An instance of the {@link java.util.regex.Pattern} class
+ * represents a regular expression that is specified in string form in
+ * a syntax similar to that used by Perl.
+ *
+ * <p> Instances of the {@link java.util.regex.Matcher} class are used
+ * to match character sequences against a given pattern.  Input is
+ * provided to matchers via the {@link java.lang.CharSequence}
+ * interface in order to support matching against characters from a
+ * wide variety of input sources. </p>
+ *
+ * <p> Unless otherwise noted, passing a <code>null</code> argument to a
+ * method in any class or interface in this package will cause a
+ * {@link java.lang.NullPointerException NullPointerException} to be
+ * thrown.
+ *
+ * <h2>Related Documentation</h2>
+ *
+ * <p> An excellent tutorial and overview of regular expressions is <a
+ * href="http://www.oreilly.com/catalog/regex/"><i>Mastering Regular
+ * Expressions</i>, Jeffrey E. F. Friedl, O'Reilly and Associates,
+ * 1997.</a> </p>
+ *
+ * @since 1.4
+ * @author Mike McCloskey
+ * @author Mark Reinhold
+ */
+package java.util.regex;