8268081: Upgrade Unicode Data Files to 14.0.0

Reviewed-by: joehw, iris, lancea
2025-08-27 23:04:50 +02:00 · 2022-01-12 19:17:18 +00:00 · 2022-01-12 19:17:18 +00:00 · 0a094d7c28
commit 0a094d7c28
parent ddddec7d74
38 changed files with 3333 additions and 1081 deletions
--- a/src/java.base/share/classes/java/lang/Character.java
+++ b/src/java.base/share/classes/java/lang/Character.java
--- a/src/java.base/share/classes/java/util/regex/Grapheme.java
+++ b/src/java.base/share/classes/java/util/regex/Grapheme.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -35,8 +35,8 @@ final class Grapheme {
     * <p>
     * See Unicode Standard Annex #29 Unicode Text Segmentation for the specification
     * for the extended grapheme cluster boundary rules. The following implementation
-     * is based on version 12.0 of the annex.
-     * (http://www.unicode.org/reports/tr29/tr29-35.html)
+     * is based on the annex for Unicode version 14.0.
+     * (http://www.unicode.org/reports/tr29/tr29-38.html)
     *
     * @param src the {@code CharSequence} to be scanned
     * @param off offset to start looking for the next boundary in the src
@ -97,7 +97,7 @@ final class Grapheme {
    private static final int FIRST_TYPE = 0;
    private static final int LAST_TYPE = 14;

-    private static boolean[][] rules;
+    private static final boolean[][] rules;
    static {
        rules = new boolean[LAST_TYPE + 1][LAST_TYPE + 1];
        // GB 999 Any + Any  -> default
@ -201,8 +201,9 @@ final class Grapheme {
            if (cp == 0x200D)
                return ZWJ;
            if (cp >= 0x0600 && cp <= 0x0605 ||
-                cp == 0x06DD || cp == 0x070F || cp == 0x08E2 ||
-                cp == 0x110BD || cp == 0x110CD)
+                cp == 0x06DD || cp == 0x070F ||
+                cp == 0x0890 || cp == 0x0891 ||
+                cp == 0x08E2 || cp == 0x110BD || cp == 0x110CD)
                return PREPEND;
            return CONTROL;
        case Character.NON_SPACING_MARK:
--- a/src/java.base/share/classes/jdk/internal/icu/impl/Punycode.java
+++ b/src/java.base/share/classes/jdk/internal/icu/impl/Punycode.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -76,9 +76,6 @@ public final class Punycode {
    //  TODO: eliminate the 256 limitation
    private static final int MAX_CP_COUNT   = 256;

-    private static final int UINT_MAGIC     = 0x80000000;
-    private static final long ULONG_MAGIC   = 0x8000000000000000L;
-
    private static int adaptBias(int delta, int length, boolean firstTime){
        if(firstTime){
            delta /=DAMP;
@ -96,34 +93,25 @@ public final class Punycode {
    }

    /**
-     * basicToDigit[] contains the numeric value of a basic code
-     * point (for use in representing integers) in the range 0 to
-     * BASE-1, or -1 if b is does not represent a value.
+     * @return the numeric value of a basic code point (for use in representing integers)
+     *         in the range 0 to BASE-1, or a negative value if cp is invalid.
     */
-    static final int[]    basicToDigit= new int[]{
-        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-
-        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-        26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1,
-
-        -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
-        15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
-
-        -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
-        15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
-
-        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-
-        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-
-        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-
-        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
+    private static final int decodeDigit(int cp) {
+        if(cp<='Z') {
+            if(cp<='9') {
+                if(cp<'0') {
+                    return -1;
+                } else {
+                    return cp-'0'+26;  // 0..9 -> 26..35
+                }
+            } else {
+                return cp-'A';  // A-Z -> 0..25
+            }
+        } else if(cp<='z') {
+            return cp-'a';  // a..z -> 0..25
+        } else {
+            return -1;
+        }
    };

    private static char asciiCaseMap(char b, boolean uppercase) {
@ -158,6 +146,12 @@ public final class Punycode {
            return (char)((ZERO-26)+digit);
        }
    }
+
+    // ICU-13727: Limit input length for n^2 algorithm
+    // where well-formed strings are at most 59 characters long.
+    private static final int ENCODE_MAX_CODE_UNITS = 1000;
+    private static final int DECODE_MAX_CHARS = 2000;
+
    /**
     * Converts Unicode to Punycode.
     * The input string must not contain single, unpaired surrogates.
@ -174,6 +168,10 @@ public final class Punycode {
        int n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount;
        char c, c2;
        int srcLength = src.length();
+        if (srcLength > ENCODE_MAX_CODE_UNITS) {
+            throw new RuntimeException(
+                    "input too long: " + srcLength + " UTF-16 code units");
+        }
        int destCapacity = MAX_CP_COUNT;
        char[] dest = new char[destCapacity];
        StringBuffer result = new StringBuffer();
@ -251,7 +249,7 @@ public final class Punycode {
             * Increase delta enough to advance the decoder's
             * <n,i> state to <m,0>, but guard against overflow:
             */
-            if(m-n>(0x7fffffff-MAX_CP_COUNT-delta)/(handledCPCount+1)) {
+            if(m-n>(0x7fffffff-handledCPCount-delta)/(handledCPCount+1)) {
                throw new RuntimeException("Internal program error");
            }
            delta+=(m-n)*(handledCPCount+1);
@ -332,6 +330,9 @@ public final class Punycode {
    public static StringBuffer decode(StringBuffer src, boolean[] caseFlags)
                               throws ParseException{
        int srcLength = src.length();
+        if (srcLength > DECODE_MAX_CHARS) {
+            throw new RuntimeException("input too long: " + srcLength + " characters");
+        }
        StringBuffer result = new StringBuffer();
        int n, destLength, i, bias, basicLength, j, in, oldi, w, k, digit, t,
                destCPCount, firstSupplementaryIndex, cpLength;
@ -395,7 +396,7 @@ public final class Punycode {
                    throw new ParseException("Illegal char found", -1);
                }

-                digit=basicToDigit[(byte)src.charAt(in++)];
+                digit=decodeDigit(src.charAt(in++));
                if(digit<0) {
                    throw new ParseException("Invalid char found", -1);
                }
--- a/src/java.base/share/classes/jdk/internal/icu/impl/UnicodeSetStringSpan.java
+++ b/src/java.base/share/classes/jdk/internal/icu/impl/UnicodeSetStringSpan.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -134,9 +134,15 @@ public class UnicodeSetStringSpan {

        int i, spanLength;
        someRelevant = false;
-        for (i = 0; i < stringsLength; ++i) {
+        for (i = 0; i < stringsLength;) {
            String string = strings.get(i);
            int length16 = string.length();
+            if (length16 == 0) {
+                // Remove the empty string.
+                strings.remove(i);
+                --stringsLength;
+                continue;
+            }
            spanLength = spanSet.span(string, SpanCondition.CONTAINED);
            if (spanLength < length16) { // Relevant string.
                someRelevant = true;
@ -144,6 +150,7 @@ public class UnicodeSetStringSpan {
            if (/* (0 != (which & UTF16)) && */ length16 > maxLength16) {
                maxLength16 = length16;
            }
+            ++i;
        }
        if (!someRelevant && (which & WITH_COUNT) == 0) {
            return;
--- a/src/java.base/share/classes/jdk/internal/icu/impl/data/icudt67b/uprops.icu
+++ b/src/java.base/share/classes/jdk/internal/icu/impl/data/icudt67b/uprops.icu
--- a/src/java.base/share/classes/jdk/internal/icu/impl/data/icudt70b/nfc.nrm
+++ b/src/java.base/share/classes/jdk/internal/icu/impl/data/icudt70b/nfc.nrm
--- a/src/java.base/share/classes/jdk/internal/icu/impl/data/icudt70b/nfkc.nrm
+++ b/src/java.base/share/classes/jdk/internal/icu/impl/data/icudt70b/nfkc.nrm
--- a/src/java.base/share/classes/jdk/internal/icu/impl/data/icudt70b/ubidi.icu
+++ b/src/java.base/share/classes/jdk/internal/icu/impl/data/icudt70b/ubidi.icu
--- a/src/java.base/share/classes/jdk/internal/icu/impl/data/icudt70b/uprops.icu
+++ b/src/java.base/share/classes/jdk/internal/icu/impl/data/icudt70b/uprops.icu
--- a/src/java.base/share/classes/jdk/internal/icu/lang/UCharacter.java
+++ b/src/java.base/share/classes/jdk/internal/icu/lang/UCharacter.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2009, 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -84,7 +84,7 @@ import jdk.internal.icu.util.VersionInfo;
 * <p>
 * Further detail on differences can be determined using the program
 *        <a href=
- * "http://source.icu-project.org/repos/icu/icu4j/trunk/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java">
+ * "https://github.com/unicode-org/icu/blob/main/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java">
 *        com.ibm.icu.dev.test.lang.UCharacterCompare</a>
 * </p>
 * <p>
@ -101,9 +101,9 @@ import jdk.internal.icu.util.VersionInfo;
 * For more information see
 * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a>
 * (http://www.unicode.org/ucd/)
- * and the <a href="http://www.icu-project.org/userguide/properties.html">ICU
+ * and the <a href="https://unicode-org.github.io/icu/userguide/strings/properties">ICU
 * User Guide chapter on Properties</a>
- * (http://www.icu-project.org/userguide/properties.html).
+ * (https://unicode-org.github.io/icu/userguide/strings/properties).
 * </p>
 * <p>
 * There are also functions that provide easy migration from C/POSIX functions
--- a/src/java.base/share/classes/jdk/internal/icu/lang/UCharacterDirection.java
+++ b/src/java.base/share/classes/jdk/internal/icu/lang/UCharacterDirection.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -53,7 +53,7 @@ public final class UCharacterDirection implements UCharacterEnums.ECharacterDire
    // private constructor =========================================
    ///CLOVER:OFF
    /**
-     * Private constructor to prevent initialisation
+     * Private constructor to prevent initialization
     */
    private UCharacterDirection()
    {
--- a/src/java.base/share/classes/jdk/internal/icu/lang/UCharacterEnums.java
+++ b/src/java.base/share/classes/jdk/internal/icu/lang/UCharacterEnums.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -62,7 +62,7 @@ package jdk.internal.icu.lang;
@Deprecated
 class UCharacterEnums {

-    /** This is just a namespace, it is not instantiatable. */
+    /** This is just a namespace, it is not instantiable. */
    private UCharacterEnums() {};

    /**
--- a/src/java.base/share/classes/jdk/internal/icu/text/BidiBase.java
+++ b/src/java.base/share/classes/jdk/internal/icu/text/BidiBase.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2009, 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -63,7 +63,7 @@ import jdk.internal.icu.impl.UBiDiProps;
 *
 * This is an implementation of the Unicode Bidirectional Algorithm. The
 * algorithm is defined in the
- * <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
+ * <a href="https://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
 * Unicode Bidirectional Algorithm</a>.
 * <p>
 *
@ -985,7 +985,7 @@ public class BidiBase {
    /**
     * Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).
     * Used in
-     * <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
+     * <a href="https://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
     * Unicode Bidirectional Algorithm</a>.
     * Returns UCharacter.BidiPairedBracketType values.
     * @stable ICU 52
@ -3365,7 +3365,7 @@ public class BidiBase {

    /**
     * Perform the Unicode Bidi algorithm. It is defined in the
-     * <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
+     * <a href="https://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
     * Unicode Bidirectional Algorithm</a>, version 13,
     * also described in The Unicode Standard, Version 4.0 .<p>
     *
@ -3450,7 +3450,7 @@ public class BidiBase {

    /**
     * Perform the Unicode Bidi algorithm. It is defined in the
-     * <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
+     * <a href="https://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
     * Unicode Bidirectional Algorithm</a>, version 13,
     * also described in The Unicode Standard, Version 4.0 .<p>
     *
@ -3786,7 +3786,7 @@ public class BidiBase {

    /**
     * Perform the Unicode Bidi algorithm on a given paragraph, as defined in the
-     * <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
+     * <a href="https://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
     * Unicode Bidirectional Algorithm</a>, version 13,
     * also described in The Unicode Standard, Version 4.0 .<p>
     *
--- a/src/java.base/share/classes/jdk/internal/icu/text/BidiLine.java
+++ b/src/java.base/share/classes/jdk/internal/icu/text/BidiLine.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2009, 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -47,7 +47,7 @@ final class BidiLine {
     * text in a single paragraph or in a line of a single paragraph
     * which has already been processed according to
     * the Unicode 3.0 Bidi algorithm as defined in
-     * <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
+     * <a href="https://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
     * Unicode Bidirectional Algorithm</a>, version 13,
     * also described in The Unicode Standard, Version 4.0.1 .
     *
--- a/src/java.base/share/classes/jdk/internal/icu/text/Normalizer2.java
+++ b/src/java.base/share/classes/jdk/internal/icu/text/Normalizer2.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -43,7 +43,7 @@ import jdk.internal.icu.impl.Norm2AllModes;
 * The primary functions are to produce a normalized string and to detect whether
 * a string is already normalized.
 * The most commonly used normalization forms are those defined in
- * <a href="http://www.unicode.org/reports/tr15/">Unicode Standard Annex #15:
+ * <a href="https://www.unicode.org/reports/tr15/">Unicode Standard Annex #15:
 * Unicode Normalization Forms</a>.
 * However, this API supports additional normalization forms for specialized purposes.
 * For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE)
--- a/src/java.base/share/classes/jdk/internal/icu/text/NormalizerBase.java
+++ b/src/java.base/share/classes/jdk/internal/icu/text/NormalizerBase.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -44,7 +44,7 @@ import java.text.Normalizer;
 * <code>normalize</code> transforms Unicode text into an equivalent composed or
 * decomposed form, allowing for easier sorting and searching of text.
 * <code>normalize</code> supports the standard normalization forms described in
- * <a href="http://www.unicode.org/reports/tr15/" target="unicode">
+ * <a href="https://www.unicode.org/reports/tr15/" target="unicode">
 * Unicode Standard Annex #15 &mdash; Unicode Normalization Forms</a>.
 *
 * Characters with accents or other adornments can be encoded in
--- a/src/java.base/share/classes/jdk/internal/icu/text/StringPrep.java
+++ b/src/java.base/share/classes/jdk/internal/icu/text/StringPrep.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -67,9 +67,9 @@ import jdk.internal.icu.util.VersionInfo;
 *     <li> Unassigned Table: Contains code points that are unassigned
 *          in the Unicode Version supported by StringPrep. Currently
 *          RFC 3454 supports Unicode 3.2. </li>
- *     <li> Prohibited Table: Contains code points that are prohibted from
+ *     <li> Prohibited Table: Contains code points that are prohibited from
 *          the output of the StringPrep processing function. </li>
- *     <li> Mapping Table: Contains code ponts that are deleted from the output or case mapped. </li>
+ *     <li> Mapping Table: Contains code points that are deleted from the output or case mapped. </li>
 * </ul>
 *
 * The procedure for preparing Unicode strings:
@ -226,8 +226,8 @@ public final class StringPrep {
        sprepUniVer   = getVersionInfo(reader.getUnicodeVersion());
        normCorrVer   = getVersionInfo(indexes[NORM_CORRECTNS_LAST_UNI_VERSION]);
        VersionInfo normUniVer = UCharacter.getUnicodeVersion();
-        if(normUniVer.compareTo(sprepUniVer) < 0 && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
-           normUniVer.compareTo(normCorrVer) < 0 && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
+        if(normUniVer.compareTo(sprepUniVer) < 0 && /* the Unicode version of SPREP file must be less than the Unicode Version of the normalization data */
+           normUniVer.compareTo(normCorrVer) < 0 && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Version of the normalization data */
           ((indexes[OPTIONS] & NORMALIZATION_ON) > 0) /* normalization turned on*/
           ){
            throw new IOException("Normalization Correction version not supported");
@ -325,7 +325,7 @@ public final class StringPrep {
                    ch -= val.value;
                }
            }else if(val.type == DELETE){
-                // just consume the codepoint and contine
+                // just consume the codepoint and continue
                continue;
            }
            //copy the source into destination
--- a/src/java.base/share/classes/jdk/internal/icu/text/UCharacterIterator.java
+++ b/src/java.base/share/classes/jdk/internal/icu/text/UCharacterIterator.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -147,9 +147,9 @@ public abstract class UCharacterIterator
     */
    public int nextCodePoint(){
        int ch1 = next();
-        if(UTF16.isLeadSurrogate((char)ch1)){
+        if(UTF16.isLeadSurrogate(ch1)){
            int ch2 = next();
-            if(UTF16.isTrailSurrogate((char)ch2)){
+            if(UTF16.isTrailSurrogate(ch2)){
                return UCharacterProperty.getRawSupplementary((char)ch1,
                                                              (char)ch2);
            }else if (ch2 != DONE) {
@ -175,7 +175,7 @@ public abstract class UCharacterIterator
    /**
     * Retreat to the start of the previous code point in the text,
     * and return it (pre-decrement semantics).  If the index is not
-     * preceeded by a valid surrogate pair, the behavior is the same
+     * preceded by a valid surrogate pair, the behavior is the same
     * as <code>previous()</code>.  Otherwise the iterator is
     * decremented to the start of the surrogate pair, and the code
     * point represented by the pair is returned.
@ -185,9 +185,9 @@ public abstract class UCharacterIterator
     */
    public int previousCodePoint(){
        int ch1 = previous();
-        if(UTF16.isTrailSurrogate((char)ch1)){
+        if(UTF16.isTrailSurrogate(ch1)){
            int ch2 = previous();
-            if(UTF16.isLeadSurrogate((char)ch2)){
+            if(UTF16.isLeadSurrogate(ch2)){
                return UCharacterProperty.getRawSupplementary((char)ch2,
                                                              (char)ch1);
            }else if (ch2 != DONE) {
--- a/src/java.base/share/classes/jdk/internal/icu/text/UTF16.java
+++ b/src/java.base/share/classes/jdk/internal/icu/text/UTF16.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -382,36 +382,39 @@ public final class UTF16
    }

    /**
-     * Determines whether the code value is a surrogate.
-     * @param char16 the input character.
-     * @return true if the input character is a surrogate.
-     * @stable ICU 2.1
+     * Determines whether the code point is a surrogate.
+     *
+     * @param codePoint The input character.
+     *        (In ICU 2.1-69 the type of this parameter was <code>char</code>.)
+     * @return true If the input code point is a surrogate.
+     * @stable ICU 70
     */
-    public static boolean isSurrogate(char char16)
-    {
-        return (char16 & SURROGATE_BITMASK) == SURROGATE_BITS;
+    public static boolean isSurrogate(int codePoint) {
+        return (codePoint & SURROGATE_BITMASK) == SURROGATE_BITS;
    }

    /**
-     * Determines whether the character is a trail surrogate.
-     * @param char16 the input character.
-     * @return true if the input character is a trail surrogate.
-     * @stable ICU 2.1
+     * Determines whether the code point is a trail surrogate.
+     *
+     * @param codePoint The input character.
+     *        (In ICU 2.1-69 the type of this parameter was <code>char</code>.)
+     * @return true If the input code point is a trail surrogate.
+     * @stable ICU 70
     */
-    public static boolean isTrailSurrogate(char char16)
-    {
-        return (char16 & TRAIL_SURROGATE_BITMASK) == TRAIL_SURROGATE_BITS;
+    public static boolean isTrailSurrogate(int codePoint) {
+        return (codePoint & TRAIL_SURROGATE_BITMASK) == TRAIL_SURROGATE_BITS;
    }

    /**
-     * Determines whether the character is a lead surrogate.
-     * @param char16 the input character.
-     * @return true if the input character is a lead surrogate
-     * @stable ICU 2.1
+     * Determines whether the code point is a lead surrogate.
+     *
+     * @param codePoint The input character.
+     *        (In ICU 2.1-69 the type of this parameter was <code>char</code>.)
+     * @return true If the input code point is a lead surrogate
+     * @stable ICU 70
     */
-    public static boolean isLeadSurrogate(char char16)
-    {
-        return (char16 & LEAD_SURROGATE_BITMASK) == LEAD_SURROGATE_BITS;
+    public static boolean isLeadSurrogate(int codePoint) {
+        return (codePoint & LEAD_SURROGATE_BITMASK) == LEAD_SURROGATE_BITS;
    }

    /**
--- a/src/java.base/share/classes/jdk/internal/icu/text/UnicodeSet.java
+++ b/src/java.base/share/classes/jdk/internal/icu/text/UnicodeSet.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -135,8 +135,8 @@ import jdk.internal.icu.util.VersionInfo;
 * "[:Lu:]" and the Perl-like syntax "\p{Lu}" are recognized.  For a
 * complete list of supported property patterns, see the User's Guide
 * for UnicodeSet at
- * <a href="http://www.icu-project.org/userguide/unicodeSet.html">
- * http://www.icu-project.org/userguide/unicodeSet.html</a>.
+ * <a href="https://unicode-org.github.io/icu/userguide/strings/unicodeset">
+ * https://unicode-org.github.io/icu/userguide/strings/unicodeset</a>.
 * Actual determination of property data is defined by the underlying
 * Unicode database as implemented by UCharacter.
 *
@ -147,6 +147,13 @@ import jdk.internal.icu.util.VersionInfo;
 * their delimiters; "[:^foo]" and "\P{foo}".  In any other location,
 * '^' has no special meaning.
 *
+ * <p>Since ICU 70, "[^...]", "[:^foo]", "\P{foo}", and "[:binaryProperty=No:]"
+ * perform a "code point complement" (all code points minus the original set),
+ * removing all multicharacter strings,
+ * equivalent to .{@link #complement()}.{@link #removeAllStrings()} .
+ * The {@link #complement()} API function continues to perform a
+ * symmetric difference with all code points and thus retains all multicharacter strings.
+ *
 * <p>Ranges are indicated by placing two a '-' between two
 * characters, as in "a-z".  This specifies the range of all
 * characters from the left to the right, in Unicode order.  If the
@ -189,8 +196,6 @@ import jdk.internal.icu.util.VersionInfo;
 * Unicode property
 * </table>
 *
- * <p><b>Warning</b>: you cannot add an empty string ("") to a UnicodeSet.</p>
- *
 * <p><b>Formal syntax</b></p>
 *
 * <blockquote>
@ -230,9 +235,8 @@ import jdk.internal.icu.util.VersionInfo;
 *     </tr>
 *     <tr align="top">
 *       <td nowrap valign="top" align="right"><code>hex :=&nbsp; </code></td>
- *       <td valign="top"><em>any character for which
- *       </em><code>Character.digit(c, 16)</code><em>
- *       returns a non-negative result</em></td>
+ *       <td style="vertical-align: top;"><code>'0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' |<br>
+ *       &nbsp;&nbsp;&nbsp;&nbsp;'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'a' | 'b' | 'c' | 'd' | 'e' | 'f'</code></td>
 *     </tr>
 *     <tr>
 *       <td nowrap valign="top" align="right"><code>property :=&nbsp; </code></td>
@ -487,7 +491,7 @@ public class UnicodeSet {
        else if (i > 0 && c == list[i-1]) {
            // c is after end of prior range
            list[i-1]++;
-            // no need to chcek for collapse here
+            // no need to check for collapse here
        }

        else {
@ -528,7 +532,6 @@ public class UnicodeSet {
     * present.  If this set already contains the multicharacter,
     * the call leaves this set unchanged.
     * Thus {@code "ch" => {"ch"}}
-     * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
     * @param s the source string
     * @return this object, for chaining
     * @stable ICU 2.0
@ -546,22 +549,19 @@ public class UnicodeSet {

    /**
     * Utility for getting code point from single code point CharSequence.
-     * See the public UTF16.getSingleCodePoint()
+     * See the public UTF16.getSingleCodePoint() (which returns -1 for null rather than throwing NPE).
+     *
     * @return a code point IF the string consists of a single one.
     * otherwise returns -1.
     * @param s to test
     */
    private static int getSingleCP(CharSequence s) {
-        if (s.length() < 1) {
-            throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet");
-        }
-        if (s.length() > 2) return -1;
        if (s.length() == 1) return s.charAt(0);
-
-        // at this point, len = 2
-        int cp = UTF16.charAt(s, 0);
-        if (cp > 0xFFFF) { // is surrogate pair
-            return cp;
+        if (s.length() == 2) {
+            int cp = Character.codePointAt(s, 0);
+            if (cp > 0xFFFF) { // is surrogate pair
+                return cp;
+            }
        }
        return -1;
    }
@ -569,13 +569,11 @@ public class UnicodeSet {
    /**
     * Complements the specified range in this set.  Any character in
     * the range will be removed if it is in this set, or will be
-     * added if it is not in this set.  If {@code end > start}
+     * added if it is not in this set.  If <code>start &gt; end</code>
     * then an empty range is complemented, leaving the set unchanged.
     *
-     * @param start first character, inclusive, of range to be removed
-     * from this set.
-     * @param end last character, inclusive, of range to be removed
-     * from this set.
+     * @param start first character, inclusive, of range
+     * @param end last character, inclusive, of range
     * @stable ICU 2.0
     */
    public UnicodeSet complement(int start, int end) {
--- a/src/java.base/share/classes/jdk/internal/icu/util/CodePointTrie.java
+++ b/src/java.base/share/classes/jdk/internal/icu/util/CodePointTrie.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2019, 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -43,7 +43,7 @@ import static jdk.internal.icu.impl.NormalizerImpl.UTF16Plus;
 /**
 * Immutable Unicode code point trie.
 * Fast, reasonably compact, map from Unicode code points (U+0000..U+10FFFF) to integer values.
- * For details see http://site.icu-project.org/design/struct/utrie
+ * For details see https://icu.unicode.org/design/struct/utrie
 *
 * <p>This class is not intended for public subclassing.
 *
--- a/src/java.base/share/classes/jdk/internal/icu/util/VersionInfo.java
+++ b/src/java.base/share/classes/jdk/internal/icu/util/VersionInfo.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -54,7 +54,7 @@ public final class VersionInfo
     * @deprecated This API is ICU internal only.
     */
    @Deprecated
-    public static final String ICU_DATA_VERSION_PATH = "67b";
+    public static final String ICU_DATA_VERSION_PATH = "70b";

    // public methods ------------------------------------------------------

@ -148,7 +148,15 @@ public final class VersionInfo
     */
    public int compareTo(VersionInfo other)
    {
-        return m_version_ - other.m_version_;
+        // m_version_ is an int, a signed 32-bit integer.
+        // When the major version is >=128, then the version int is negative.
+        // Compare it in two steps to simulate an unsigned-int comparison.
+        // (Alternatively we could turn each int into a long and reset the upper 32 bits.)
+        // Compare the upper bits first, using logical shift right (unsigned).
+        int diff = (m_version_ >>> 1) - (other.m_version_ >>> 1);
+        if (diff != 0) { return diff; }
+        // Compare the remaining bits.
+        return (m_version_ & 1) - (other.m_version_ & 1);
    }

    // private data members ----------------------------------------------