8221431: Support for Unicode 12.1

Reviewed-by: erikj, rriggs
2025-08-28 15:24:43 +02:00 · 2019-05-23 12:21:21 -07:00 · 2019-05-23 12:21:21 -07:00 · 93fabcdc5a
commit 93fabcdc5a
parent e4f31b1cd7
66 changed files with 60279 additions and 38178 deletions
--- a/src/java.base/share/classes/sun/text/normalizer/CodePointMap.java
+++ b/src/java.base/share/classes/sun/text/normalizer/CodePointMap.java
@ -0,0 +1,501 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+// (c) 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// created: 2018may10 Markus W. Scherer
+
+package sun.text.normalizer;
+
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+/**
+ * Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values.
+ * This does not implement java.util.Map.
+ *
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+public abstract class CodePointMap implements Iterable<CodePointMap.Range> {
+    /**
+     * Selectors for how getRange() should report value ranges overlapping with surrogates.
+     * Most users should use NORMAL.
+     *
+     * @see #getRange
+     * @draft ICU 63
+     * @provisional This API might change or be removed in a future release.
+     */
+    public enum RangeOption {
+        /**
+         * getRange() enumerates all same-value ranges as stored in the map.
+         * Most users should use this option.
+         *
+         * @draft ICU 63
+         * @provisional This API might change or be removed in a future release.
+         */
+        NORMAL,
+        /**
+         * getRange() enumerates all same-value ranges as stored in the map,
+         * except that lead surrogates (U+D800..U+DBFF) are treated as having the
+         * surrogateValue, which is passed to getRange() as a separate parameter.
+         * The surrogateValue is not transformed via filter().
+         * See {@link Character#isHighSurrogate}.
+         *
+         * <p>Most users should use NORMAL instead.
+         *
+         * <p>This option is useful for maps that map surrogate code *units* to
+         * special values optimized for UTF-16 string processing
+         * or for special error behavior for unpaired surrogates,
+         * but those values are not to be associated with the lead surrogate code *points*.
+         *
+         * @draft ICU 63
+         * @provisional This API might change or be removed in a future release.
+         */
+        FIXED_LEAD_SURROGATES,
+        /**
+         * getRange() enumerates all same-value ranges as stored in the map,
+         * except that all surrogates (U+D800..U+DFFF) are treated as having the
+         * surrogateValue, which is passed to getRange() as a separate parameter.
+         * The surrogateValue is not transformed via filter().
+         * See {@link Character#isSurrogate}.
+         *
+         * <p>Most users should use NORMAL instead.
+         *
+         * <p>This option is useful for maps that map surrogate code *units* to
+         * special values optimized for UTF-16 string processing
+         * or for special error behavior for unpaired surrogates,
+         * but those values are not to be associated with the lead surrogate code *points*.
+         *
+         * @draft ICU 63
+         * @provisional This API might change or be removed in a future release.
+         */
+        FIXED_ALL_SURROGATES
+    }
+
+    /**
+     * Callback function interface: Modifies a map value.
+     * Optionally called by getRange().
+     * The modified value will be returned by the getRange() function.
+     *
+     * <p>Can be used to ignore some of the value bits,
+     * make a filter for one of several values,
+     * return a value index computed from the map value, etc.
+     *
+     * @see #getRange
+     * @see #iterator
+     * @draft ICU 63
+     * @provisional This API might change or be removed in a future release.
+     */
+    public interface ValueFilter {
+        /**
+         * Modifies the map value.
+         *
+         * @param value map value
+         * @return modified value
+         * @draft ICU 63
+         * @provisional This API might change or be removed in a future release.
+         */
+        public int apply(int value);
+    }
+
+    /**
+     * Range iteration result data.
+     * Code points from start to end map to the same value.
+     * The value may have been modified by {@link ValueFilter#apply(int)},
+     * or it may be the surrogateValue if a RangeOption other than "normal" was used.
+     *
+     * @see #getRange
+     * @see #iterator
+     * @draft ICU 63
+     * @provisional This API might change or be removed in a future release.
+     */
+    public static final class Range {
+        private int start;
+        private int end;
+        private int value;
+
+        /**
+         * Constructor. Sets start and end to -1 and value to 0.
+         *
+         * @draft ICU 63
+         * @provisional This API might change or be removed in a future release.
+         */
+        public Range() {
+            start = end = -1;
+            value = 0;
+        }
+
+        /**
+         * @return the start code point
+         * @draft ICU 63
+         * @provisional This API might change or be removed in a future release.
+         */
+        public int getStart() { return start; }
+        /**
+         * @return the (inclusive) end code point
+         * @draft ICU 63
+         * @provisional This API might change or be removed in a future release.
+         */
+        public int getEnd() { return end; }
+        /**
+         * @return the range value
+         * @draft ICU 63
+         * @provisional This API might change or be removed in a future release.
+         */
+        public int getValue() { return value; }
+        /**
+         * Sets the range. When using {@link #iterator()},
+         * iteration will resume after the newly set end.
+         *
+         * @param start new start code point
+         * @param end new end code point
+         * @param value new value
+         * @draft ICU 63
+         * @provisional This API might change or be removed in a future release.
+         */
+        public void set(int start, int end, int value) {
+            this.start = start;
+            this.end = end;
+            this.value = value;
+        }
+    }
+
+    private final class RangeIterator implements Iterator<Range> {
+        private Range range = new Range();
+
+        @Override
+        public boolean hasNext() {
+            return -1 <= range.end && range.end < 0x10ffff;
+        }
+
+        @Override
+        public Range next() {
+            if (getRange(range.end + 1, null, range)) {
+                return range;
+            } else {
+                throw new NoSuchElementException();
+            }
+        }
+
+        @Override
+        public final void remove() {
+            throw new UnsupportedOperationException();
+        }
+    }
+
+    /**
+     * Iterates over code points of a string and fetches map values.
+     * This does not implement java.util.Iterator.
+     *
+     * <pre>
+     * void onString(CodePointMap map, CharSequence s, int start) {
+     *     CodePointMap.StringIterator iter = map.stringIterator(s, start);
+     *     while (iter.next()) {
+     *         int end = iter.getIndex();  // code point from between start and end
+     *         useValue(s, start, end, iter.getCodePoint(), iter.getValue());
+     *         start = end;
+     *     }
+     * }
+     * </pre>
+     *
+     * <p>This class is not intended for public subclassing.
+     *
+     * @draft ICU 63
+     * @provisional This API might change or be removed in a future release.
+     */
+    public class StringIterator {
+        /**
+         * @internal
+         * @deprecated This API is ICU internal only.
+         */
+        @Deprecated
+        protected CharSequence s;
+        /**
+         * @internal
+         * @deprecated This API is ICU internal only.
+         */
+        @Deprecated
+        protected int sIndex;
+        /**
+         * @internal
+         * @deprecated This API is ICU internal only.
+         */
+        @Deprecated
+        protected int c;
+        /**
+         * @internal
+         * @deprecated This API is ICU internal only.
+         */
+        @Deprecated
+        protected int value;
+
+        /**
+         * @internal
+         * @deprecated This API is ICU internal only.
+         */
+        @Deprecated
+        protected StringIterator(CharSequence s, int sIndex) {
+            this.s = s;
+            this.sIndex = sIndex;
+            c = -1;
+            value = 0;
+        }
+
+        /**
+         * Resets the iterator to a new string and/or a new string index.
+         *
+         * @param s string to iterate over
+         * @param sIndex string index where the iteration will start
+         * @draft ICU 63
+         * @provisional This API might change or be removed in a future release.
+         */
+        public void reset(CharSequence s, int sIndex) {
+            this.s = s;
+            this.sIndex = sIndex;
+            c = -1;
+            value = 0;
+        }
+
+        /**
+         * Reads the next code point, post-increments the string index,
+         * and gets a value from the map.
+         * Sets an implementation-defined error value if the code point is an unpaired surrogate.
+         *
+         * @return true if the string index was not yet at the end of the string;
+         *         otherwise the iterator did not advance
+         * @draft ICU 63
+         * @provisional This API might change or be removed in a future release.
+         */
+        public boolean next() {
+            if (sIndex >= s.length()) {
+                return false;
+            }
+            c = Character.codePointAt(s, sIndex);
+            sIndex += Character.charCount(c);
+            value = get(c);
+            return true;
+        }
+
+        /**
+         * Reads the previous code point, pre-decrements the string index,
+         * and gets a value from the map.
+         * Sets an implementation-defined error value if the code point is an unpaired surrogate.
+         *
+         * @return true if the string index was not yet at the start of the string;
+         *         otherwise the iterator did not advance
+         * @draft ICU 63
+         * @provisional This API might change or be removed in a future release.
+         */
+        public boolean previous() {
+            if (sIndex <= 0) {
+                return false;
+            }
+            c = Character.codePointBefore(s, sIndex);
+            sIndex -= Character.charCount(c);
+            value = get(c);
+            return true;
+        }
+        /**
+         * @return the string index
+         * @draft ICU 63
+         * @provisional This API might change or be removed in a future release.
+         */
+        public final int getIndex() { return sIndex; }
+        /**
+         * @return the code point
+         * @draft ICU 63
+         * @provisional This API might change or be removed in a future release.
+         */
+        public final int getCodePoint() { return c; }
+        /**
+         * @return the map value,
+         *         or an implementation-defined error value if
+         *         the code point is an unpaired surrogate
+         * @draft ICU 63
+         * @provisional This API might change or be removed in a future release.
+         */
+        public final int getValue() { return value; }
+    }
+
+    /**
+     * Protected no-args constructor.
+     *
+     * @draft ICU 63
+     * @provisional This API might change or be removed in a future release.
+     */
+    protected CodePointMap() {
+    }
+
+    /**
+     * Returns the value for a code point as stored in the map, with range checking.
+     * Returns an implementation-defined error value if c is not in the range 0..U+10FFFF.
+     *
+     * @param c the code point
+     * @return the map value,
+     *         or an implementation-defined error value if
+     *         the code point is not in the range 0..U+10FFFF
+     * @draft ICU 63
+     * @provisional This API might change or be removed in a future release.
+     */
+    public abstract int get(int c);
+
+    /**
+     * Sets the range object to a range of code points beginning with the start parameter.
+     * The range start is the same as the start input parameter
+     * (even if there are preceding code points that have the same value).
+     * The range end is the last code point such that
+     * all those from start to there have the same value.
+     * Returns false if start is not 0..U+10FFFF.
+     * Can be used to efficiently iterate over all same-value ranges in a map.
+     * (This is normally faster than iterating over code points and get()ting each value,
+     * but may be much slower than a data structure that stores ranges directly.)
+     *
+     * <p>If the {@link ValueFilter} parameter is not null, then
+     * the value to be delivered is passed through that filter, and the return value is the end
+     * of the range where all values are modified to the same actual value.
+     * The value is unchanged if that parameter is null.
+     *
+     * <p>Example:
+     * <pre>
+     * int start = 0;
+     * CodePointMap.Range range = new CodePointMap.Range();
+     * while (map.getRange(start, null, range)) {
+     *     int end = range.getEnd();
+     *     int value = range.getValue();
+     *     // Work with the range start..end and its value.
+     *     start = end + 1;
+     * }
+     * </pre>
+     *
+     * @param start range start
+     * @param filter an object that may modify the map data value,
+     *     or null if the values from the map are to be used unmodified
+     * @param range the range object that will be set to the code point range and value
+     * @return true if start is 0..U+10FFFF; otherwise no new range is fetched
+     * @draft ICU 63
+     * @provisional This API might change or be removed in a future release.
+     */
+    public abstract boolean getRange(int start, ValueFilter filter, Range range);
+
+    /**
+     * Sets the range object to a range of code points beginning with the start parameter.
+     * The range start is the same as the start input parameter
+     * (even if there are preceding code points that have the same value).
+     * The range end is the last code point such that
+     * all those from start to there have the same value.
+     * Returns false if start is not 0..U+10FFFF.
+     *
+     * <p>Same as the simpler {@link #getRange(int, ValueFilter, Range)} but optionally
+     * modifies the range if it overlaps with surrogate code points.
+     *
+     * @param start range start
+     * @param option defines whether surrogates are treated normally,
+     *               or as having the surrogateValue; usually {@link RangeOption#NORMAL}
+     * @param surrogateValue value for surrogates; ignored if option=={@link RangeOption#NORMAL}
+     * @param filter an object that may modify the map data value,
+     *     or null if the values from the map are to be used unmodified
+     * @param range the range object that will be set to the code point range and value
+     * @return true if start is 0..U+10FFFF; otherwise no new range is fetched
+     * @draft ICU 63
+     * @provisional This API might change or be removed in a future release.
+     */
+    public boolean getRange(int start, RangeOption option, int surrogateValue,
+            ValueFilter filter, Range range) {
+        assert option != null;
+        if (!getRange(start, filter, range)) {
+            return false;
+        }
+        if (option == RangeOption.NORMAL) {
+            return true;
+        }
+        int surrEnd = option == RangeOption.FIXED_ALL_SURROGATES ? 0xdfff : 0xdbff;
+        int end = range.end;
+        if (end < 0xd7ff || start > surrEnd) {
+            return true;
+        }
+        // The range overlaps with surrogates, or ends just before the first one.
+        if (range.value == surrogateValue) {
+            if (end >= surrEnd) {
+                // Surrogates followed by a non-surrValue range,
+                // or surrogates are part of a larger surrValue range.
+                return true;
+            }
+        } else {
+            if (start <= 0xd7ff) {
+                range.end = 0xd7ff;  // Non-surrValue range ends before surrValue surrogates.
+                return true;
+            }
+            // Start is a surrogate with a non-surrValue code *unit* value.
+            // Return a surrValue code *point* range.
+            range.value = surrogateValue;
+            if (end > surrEnd) {
+                range.end = surrEnd;  // Surrogate range ends before non-surrValue rest of range.
+                return true;
+            }
+        }
+        // See if the surrValue surrogate range can be merged with
+        // an immediately following range.
+        if (getRange(surrEnd + 1, filter, range) && range.value == surrogateValue) {
+            range.start = start;
+            return true;
+        }
+        range.start = start;
+        range.end = surrEnd;
+        range.value = surrogateValue;
+        return true;
+    }
+
+    /**
+     * Convenience iterator over same-map-value code point ranges.
+     * Same as looping over all ranges with {@link #getRange(int, ValueFilter, Range)}
+     * without filtering.
+     * Adjacent ranges have different map values.
+     *
+     * <p>The iterator always returns the same Range object.
+     *
+     * @return a Range iterator
+     * @draft ICU 63
+     * @provisional This API might change or be removed in a future release.
+     */
+    @Override
+    public Iterator<Range> iterator() {
+        return new RangeIterator();
+    }
+
+    /**
+     * Returns an iterator (not a java.util.Iterator) over code points of a string
+     * for fetching map values.
+     *
+     * @param s string to iterate over
+     * @param sIndex string index where the iteration will start
+     * @return the iterator
+     * @draft ICU 63
+     * @provisional This API might change or be removed in a future release.
+     */
+    public StringIterator stringIterator(CharSequence s, int sIndex) {
+        return new StringIterator(s, sIndex);
+    }
+}
--- a/src/java.base/share/classes/sun/text/normalizer/CodePointTrie.java
+++ b/src/java.base/share/classes/sun/text/normalizer/CodePointTrie.java
--- a/src/java.base/share/classes/sun/text/normalizer/ICUBinary.java
+++ b/src/java.base/share/classes/sun/text/normalizer/ICUBinary.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -119,10 +119,7 @@ public final class ICUBinary {
                    } else if (capacity < 0x4000) {
                        capacity *= 2;  // Grow faster until we reach 16kB.
                    }
-                    // TODO Java 6 replace new byte[] and arraycopy(): bytes = Arrays.copyOf(bytes, capacity);
-                    byte[] newBytes = new byte[capacity];
-                    System.arraycopy(bytes, 0, newBytes, 0, length);
-                    bytes = newBytes;
+                    bytes = Arrays.copyOf(bytes, capacity);
                    bytes[length++] = (byte) nextByte;
                }
           }
@ -264,6 +261,36 @@ public final class ICUBinary {
        }
    }

+    public static byte[] getBytes(ByteBuffer bytes, int length, int additionalSkipLength) {
+        byte[] dest = new byte[length];
+        bytes.get(dest);
+        if (additionalSkipLength > 0) {
+            skipBytes(bytes, additionalSkipLength);
+        }
+        return dest;
+    }
+
+    public static String getString(ByteBuffer bytes, int length, int additionalSkipLength) {
+        CharSequence cs = bytes.asCharBuffer();
+        String s = cs.subSequence(0, length).toString();
+        skipBytes(bytes, length * 2 + additionalSkipLength);
+        return s;
+    }
+
+    public static char[] getChars(ByteBuffer bytes, int length, int additionalSkipLength) {
+        char[] dest = new char[length];
+        bytes.asCharBuffer().get(dest);
+        skipBytes(bytes, length * 2 + additionalSkipLength);
+        return dest;
+    }
+
+    public static int[] getInts(ByteBuffer bytes, int length, int additionalSkipLength) {
+        int[] dest = new int[length];
+        bytes.asIntBuffer().get(dest);
+        skipBytes(bytes, length * 4 + additionalSkipLength);
+        return dest;
+    }
+
    /**
     * Returns a VersionInfo for the bytes in the compact version integer.
     */
--- a/src/java.base/share/classes/sun/text/normalizer/Norm2AllModes.java
+++ b/src/java.base/share/classes/sun/text/normalizer/Norm2AllModes.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -265,7 +265,7 @@ final class Norm2AllModes {
    private static final class Norm2AllModesSingleton {
        private Norm2AllModesSingleton(String name) {
            try {
-                String DATA_FILE_NAME = "/sun/text/resources/" + name + ".icu";
+                String DATA_FILE_NAME = "/sun/text/resources/" + name + ".nrm";
                NormalizerImpl impl=new NormalizerImpl().load(DATA_FILE_NAME);
                allModes=new Norm2AllModes(impl);
            } catch (RuntimeException e) {
--- a/src/java.base/share/classes/sun/text/normalizer/NormalizerImpl.java
+++ b/src/java.base/share/classes/sun/text/normalizer/NormalizerImpl.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2009, 2019, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -61,7 +61,7 @@ public final class NormalizerImpl {
            return 0<=c && c<HANGUL_COUNT && c%JAMO_T_COUNT==0;
        }

-                /**
+        /**
         * Decomposes c, which must be a Hangul syllable, into buffer
         * and returns the length of the decomposition (2 or 3).
         */
@ -145,8 +145,7 @@ public final class NormalizerImpl {
                insert(c, cc);
            }
        }
-        // s must be in NFD, otherwise change the implementation.
-        public void append(CharSequence s, int start, int limit,
+        public void append(CharSequence s, int start, int limit, boolean isNFD,
                           int leadCC, int trailCC) {
            if(start==limit) {
                return;
@ -167,8 +166,11 @@ public final class NormalizerImpl {
                    c=Character.codePointAt(s, start);
                    start+=Character.charCount(c);
                    if(start<limit) {
-                        // s must be in NFD, otherwise we need to use getCC().
-                        leadCC=getCCFromYesOrMaybe(impl.getNorm16(c));
+                        if (isNFD) {
+                            leadCC = getCCFromYesOrMaybe(impl.getNorm16(c));
+                        } else {
+                            leadCC = impl.getCC(impl.getNorm16(c));
+                        }
                    } else {
                        leadCC=trailCC;
                    }
@ -310,6 +312,12 @@ public final class NormalizerImpl {
    // TODO: Propose widening UTF16 methods that take char to take int.
    // TODO: Propose widening UTF16 methods that take String to take CharSequence.
    public static final class UTF16Plus {
+        /**
+         * Is this code point a lead surrogate (U+d800..U+dbff)?
+         * @param c code unit or code point
+         * @return true or false
+         */
+        public static boolean isLeadSurrogate(int c) { return (c & 0xfffffc00) == 0xd800; }
        /**
         * Assuming c is a surrogate code point (UTF16.isSurrogate(c)),
         * is it a lead surrogate?
@ -350,7 +358,7 @@ public final class NormalizerImpl {

    private static final class IsAcceptable implements ICUBinary.Authenticate {
        public boolean isDataVersionAcceptable(byte version[]) {
-            return version[0]==3;
+            return version[0]==4;
        }
    }
    private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
@ -387,8 +395,9 @@ public final class NormalizerImpl {
            // Read the normTrie.
            int offset=inIndexes[IX_NORM_TRIE_OFFSET];
            int nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
-            normTrie=Trie2_16.createFromSerialized(bytes);
-            int trieLength=normTrie.getSerializedLength();
+            int triePosition = bytes.position();
+            normTrie = CodePointTrie.Fast16.fromBinary(bytes);
+            int trieLength = bytes.position() - triePosition;
            if(trieLength>(nextOffset-offset)) {
                throw new InternalError("Normalizer2 data: not enough bytes for normTrie");
            }
@ -398,13 +407,8 @@ public final class NormalizerImpl {
            offset=nextOffset;
            nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
            int numChars=(nextOffset-offset)/2;
-            char[] chars;
            if(numChars!=0) {
-                chars=new char[numChars];
-                for(int i=0; i<numChars; ++i) {
-                    chars[i]=bytes.getChar();
-                }
-                maybeYesCompositions=new String(chars);
+                maybeYesCompositions=ICUBinary.getString(bytes, numChars, 0);
                extraData=maybeYesCompositions.substring((MIN_NORMAL_MAYBE_YES-minMaybeYes)>>OFFSET_SHIFT);
            }

@ -422,8 +426,12 @@ public final class NormalizerImpl {
        return load(ICUBinary.getRequiredData(name));
    }

-
-    public int getNorm16(int c) { return normTrie.get(c); }
+    // The trie stores values for lead surrogate code *units*.
+    // Surrogate code *points* are inert.
+    public int getNorm16(int c) {
+        return UTF16Plus.isLeadSurrogate(c) ? INERT : normTrie.get(c);
+    }
+    public int getRawNorm16(int c) { return normTrie.get(c); }
    public boolean isAlgorithmicNoNo(int norm16) { return limitNoNo<=norm16 && norm16<minMaybeYes; }
    public boolean isCompNo(int norm16) { return minNoNo<=norm16 && norm16<minMaybeYes; }
    public boolean isDecompYes(int norm16) { return norm16<minYesNo || minMaybeYes<=norm16; }
@ -486,7 +494,7 @@ public final class NormalizerImpl {
                }
                // Maps to an isCompYesAndZeroCC.
                c=mapAlgorithmic(c, norm16);
-                norm16=getNorm16(c);
+                norm16=getRawNorm16(c);
            }
        }
        if(norm16<=minYesNo || isHangulLVT(norm16)) {
@ -519,7 +527,7 @@ public final class NormalizerImpl {
            // Maps to an isCompYesAndZeroCC.
            decomp=c=mapAlgorithmic(c, norm16);
            // The mapping might decompose further.
-            norm16 = getNorm16(c);
+            norm16 = getRawNorm16(c);
        }
        if (norm16 < minYesNo) {
            if(decomp<0) {
@ -641,27 +649,23 @@ public final class NormalizerImpl {
            // count code units below the minimum or with irrelevant data for the quick check
            for(prevSrc=src; src!=limit;) {
                if( (c=s.charAt(src))<minNoCP ||
-                    isMostDecompYesAndZeroCC(norm16=normTrie.getFromU16SingleLead((char)c))
+                    isMostDecompYesAndZeroCC(norm16=normTrie.bmpGet(c))
                ) {
                    ++src;
-                } else if(!UTF16.isSurrogate((char)c)) {
+                } else if(!UTF16Plus.isLeadSurrogate(c)) {
                    break;
                } else {
                    char c2;
-                    if(UTF16Plus.isSurrogateLead(c)) {
-                        if((src+1)!=limit && Character.isLowSurrogate(c2=s.charAt(src+1))) {
-                            c=Character.toCodePoint((char)c, c2);
+                    if ((src + 1) != limit && Character.isLowSurrogate(c2 = s.charAt(src + 1))) {
+                        c = Character.toCodePoint((char)c, c2);
+                        norm16 = normTrie.suppGet(c);
+                        if (isMostDecompYesAndZeroCC(norm16)) {
+                            src += 2;
+                        } else {
+                            break;
                        }
-                    } else /* trail surrogate */ {
-                        if(prevSrc<src && Character.isHighSurrogate(c2=s.charAt(src-1))) {
-                            --src;
-                            c=Character.toCodePoint(c2, (char)c);
-                        }
-                    }
-                    if(isMostDecompYesAndZeroCC(norm16=getNorm16(c))) {
-                        src+=Character.charCount(c);
                    } else {
-                        break;
+                        ++src;  // unpaired lead surrogate: inert
                    }
                }
            }
@ -721,7 +725,7 @@ public final class NormalizerImpl {
            c=Character.codePointAt(s, src);
            cc=getCC(getNorm16(c));
        };
-        buffer.append(s, 0, src, firstCC, prevCC);
+        buffer.append(s, 0, src, false, firstCC, prevCC);
        buffer.append(s, src, limit);
    }

@ -749,28 +753,22 @@ public final class NormalizerImpl {
                    return true;
                }
                if( (c=s.charAt(src))<minNoMaybeCP ||
-                    isCompYesAndZeroCC(norm16=normTrie.getFromU16SingleLead((char)c))
+                    isCompYesAndZeroCC(norm16=normTrie.bmpGet(c))
                ) {
                    ++src;
                } else {
                    prevSrc = src++;
-                    if(!UTF16.isSurrogate((char)c)) {
+                    if (!UTF16Plus.isLeadSurrogate(c)) {
                        break;
                    } else {
                        char c2;
-                        if(UTF16Plus.isSurrogateLead(c)) {
-                            if(src!=limit && Character.isLowSurrogate(c2=s.charAt(src))) {
-                                ++src;
-                                c=Character.toCodePoint((char)c, c2);
+                        if (src != limit && Character.isLowSurrogate(c2 = s.charAt(src))) {
+                            ++src;
+                            c = Character.toCodePoint((char)c, c2);
+                            norm16 = normTrie.suppGet(c);
+                            if (!isCompYesAndZeroCC(norm16)) {
+                                break;
                            }
-                        } else /* trail surrogate */ {
-                            if(prevBoundary<prevSrc && Character.isHighSurrogate(c2=s.charAt(prevSrc-1))) {
-                                --prevSrc;
-                                c=Character.toCodePoint(c2, (char)c);
-                            }
-                        }
-                        if(!isCompYesAndZeroCC(norm16=getNorm16(c))) {
-                            break;
                        }
                    }
                }
@ -991,28 +989,22 @@ public final class NormalizerImpl {
                    return (src<<1)|qcResult;  // "yes" or "maybe"
                }
                if( (c=s.charAt(src))<minNoMaybeCP ||
-                    isCompYesAndZeroCC(norm16=normTrie.getFromU16SingleLead((char)c))
+                    isCompYesAndZeroCC(norm16=normTrie.bmpGet(c))
                ) {
                    ++src;
                } else {
                    prevSrc = src++;
-                    if(!UTF16.isSurrogate((char)c)) {
+                    if (!UTF16Plus.isLeadSurrogate(c)) {
                        break;
                    } else {
                        char c2;
-                        if(UTF16Plus.isSurrogateLead(c)) {
-                            if(src!=limit && Character.isLowSurrogate(c2=s.charAt(src))) {
-                                ++src;
-                                c=Character.toCodePoint((char)c, c2);
+                        if (src != limit && Character.isLowSurrogate(c2 = s.charAt(src))) {
+                            ++src;
+                            c = Character.toCodePoint((char)c, c2);
+                            norm16 = normTrie.suppGet(c);
+                            if (!isCompYesAndZeroCC(norm16)) {
+                                break;
                            }
-                        } else /* trail surrogate */ {
-                            if(prevBoundary<prevSrc && Character.isHighSurrogate(c2=s.charAt(prevSrc-1))) {
-                                --prevSrc;
-                                c=Character.toCodePoint(c2, (char)c);
-                            }
-                        }
-                        if(!isCompYesAndZeroCC(norm16=getNorm16(c))) {
-                            break;
                        }
                    }
                }
@ -1134,17 +1126,10 @@ public final class NormalizerImpl {
                    prevFCD16=0;
                    ++src;
                } else {
-                    if(UTF16.isSurrogate((char)c)) {
+                    if (UTF16Plus.isLeadSurrogate(c)) {
                        char c2;
-                        if(UTF16Plus.isSurrogateLead(c)) {
-                            if((src+1)!=limit && Character.isLowSurrogate(c2=s.charAt(src+1))) {
-                                c=Character.toCodePoint((char)c, c2);
-                            }
-                        } else /* trail surrogate */ {
-                            if(prevSrc<src && Character.isHighSurrogate(c2=s.charAt(src-1))) {
-                                --src;
-                                c=Character.toCodePoint(c2, (char)c);
-                            }
+                        if ((src + 1) != limit && Character.isLowSurrogate(c2 = s.charAt(src + 1))) {
+                            c = Character.toCodePoint((char)c, c2);
                        }
                    }
                    if((fcd16=getFCD16FromNormData(c))<=0xff) {
@ -1430,7 +1415,7 @@ public final class NormalizerImpl {
            }
            // Maps to an isCompYesAndZeroCC.
            c=mapAlgorithmic(c, norm16);
-            norm16=getNorm16(c);
+            norm16=getRawNorm16(c);
        }
        if (norm16 < minYesNo) {
            // c does not decompose
@ -1451,7 +1436,7 @@ public final class NormalizerImpl {
                leadCC=0;
            }
            ++mapping;  // skip over the firstUnit
-            buffer.append(extraData, mapping, mapping+length, leadCC, trailCC);
+            buffer.append(extraData, mapping, mapping+length, true, leadCC, trailCC);
        }
    }

@ -1643,7 +1628,7 @@ public final class NormalizerImpl {
                    // Is the composite a starter that combines forward?
                    if((compositeAndFwd&1)!=0) {
                        compositionsList=
-                            getCompositionsListForComposite(getNorm16(composite));
+                            getCompositionsListForComposite(getRawNorm16(composite));
                    } else {
                        compositionsList=-1;
                    }
@ -2196,9 +2181,8 @@ public final class NormalizerImpl {
    private int centerNoNoDelta;
    private int minMaybeYes;

-    private Trie2_16 normTrie;
+    private CodePointTrie.Fast16 normTrie;
    private String maybeYesCompositions;
    private String extraData;  // mappings and/or compositions for yesYes, yesNo & noNo characters
    private byte[] smallFCD;  // [0x100] one bit per 32 BMP code points, set if any FCD!=0
-
-   }
+}
--- a/src/java.base/share/classes/sun/text/resources/nfc.nrm
+++ b/src/java.base/share/classes/sun/text/resources/nfc.nrm
--- a/src/java.base/share/classes/sun/text/resources/nfkc.icu
+++ b/src/java.base/share/classes/sun/text/resources/nfkc.icu
--- a/src/java.base/share/classes/sun/text/resources/nfkc.nrm
+++ b/src/java.base/share/classes/sun/text/resources/nfkc.nrm
--- a/src/java.base/share/classes/sun/text/resources/nfkc_cf.icu
+++ b/src/java.base/share/classes/sun/text/resources/nfkc_cf.icu
--- a/src/java.base/share/classes/sun/text/resources/ubidi.icu
+++ b/src/java.base/share/classes/sun/text/resources/ubidi.icu
--- a/src/java.base/share/classes/sun/text/resources/uprops.icu
+++ b/src/java.base/share/classes/sun/text/resources/uprops.icu