mirror of
https://github.com/openjdk/jdk.git
synced 2025-09-17 17:44:40 +02:00
8268081: Upgrade Unicode Data Files to 14.0.0
Reviewed-by: joehw, iris, lancea
This commit is contained in:
parent
ddddec7d74
commit
0a094d7c28
38 changed files with 3333 additions and 1081 deletions
File diff suppressed because it is too large
Load diff
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -35,8 +35,8 @@ final class Grapheme {
|
|||
* <p>
|
||||
* See Unicode Standard Annex #29 Unicode Text Segmentation for the specification
|
||||
* for the extended grapheme cluster boundary rules. The following implementation
|
||||
* is based on version 12.0 of the annex.
|
||||
* (http://www.unicode.org/reports/tr29/tr29-35.html)
|
||||
* is based on the annex for Unicode version 14.0.
|
||||
* (http://www.unicode.org/reports/tr29/tr29-38.html)
|
||||
*
|
||||
* @param src the {@code CharSequence} to be scanned
|
||||
* @param off offset to start looking for the next boundary in the src
|
||||
|
@ -97,7 +97,7 @@ final class Grapheme {
|
|||
private static final int FIRST_TYPE = 0;
|
||||
private static final int LAST_TYPE = 14;
|
||||
|
||||
private static boolean[][] rules;
|
||||
private static final boolean[][] rules;
|
||||
static {
|
||||
rules = new boolean[LAST_TYPE + 1][LAST_TYPE + 1];
|
||||
// GB 999 Any + Any -> default
|
||||
|
@ -201,8 +201,9 @@ final class Grapheme {
|
|||
if (cp == 0x200D)
|
||||
return ZWJ;
|
||||
if (cp >= 0x0600 && cp <= 0x0605 ||
|
||||
cp == 0x06DD || cp == 0x070F || cp == 0x08E2 ||
|
||||
cp == 0x110BD || cp == 0x110CD)
|
||||
cp == 0x06DD || cp == 0x070F ||
|
||||
cp == 0x0890 || cp == 0x0891 ||
|
||||
cp == 0x08E2 || cp == 0x110BD || cp == 0x110CD)
|
||||
return PREPEND;
|
||||
return CONTROL;
|
||||
case Character.NON_SPACING_MARK:
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -76,9 +76,6 @@ public final class Punycode {
|
|||
// TODO: eliminate the 256 limitation
|
||||
private static final int MAX_CP_COUNT = 256;
|
||||
|
||||
private static final int UINT_MAGIC = 0x80000000;
|
||||
private static final long ULONG_MAGIC = 0x8000000000000000L;
|
||||
|
||||
private static int adaptBias(int delta, int length, boolean firstTime){
|
||||
if(firstTime){
|
||||
delta /=DAMP;
|
||||
|
@ -96,34 +93,25 @@ public final class Punycode {
|
|||
}
|
||||
|
||||
/**
|
||||
* basicToDigit[] contains the numeric value of a basic code
|
||||
* point (for use in representing integers) in the range 0 to
|
||||
* BASE-1, or -1 if b is does not represent a value.
|
||||
* @return the numeric value of a basic code point (for use in representing integers)
|
||||
* in the range 0 to BASE-1, or a negative value if cp is invalid.
|
||||
*/
|
||||
static final int[] basicToDigit= new int[]{
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
|
||||
private static final int decodeDigit(int cp) {
|
||||
if(cp<='Z') {
|
||||
if(cp<='9') {
|
||||
if(cp<'0') {
|
||||
return -1;
|
||||
} else {
|
||||
return cp-'0'+26; // 0..9 -> 26..35
|
||||
}
|
||||
} else {
|
||||
return cp-'A'; // A-Z -> 0..25
|
||||
}
|
||||
} else if(cp<='z') {
|
||||
return cp-'a'; // a..z -> 0..25
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
|
||||
private static char asciiCaseMap(char b, boolean uppercase) {
|
||||
|
@ -158,6 +146,12 @@ public final class Punycode {
|
|||
return (char)((ZERO-26)+digit);
|
||||
}
|
||||
}
|
||||
|
||||
// ICU-13727: Limit input length for n^2 algorithm
|
||||
// where well-formed strings are at most 59 characters long.
|
||||
private static final int ENCODE_MAX_CODE_UNITS = 1000;
|
||||
private static final int DECODE_MAX_CHARS = 2000;
|
||||
|
||||
/**
|
||||
* Converts Unicode to Punycode.
|
||||
* The input string must not contain single, unpaired surrogates.
|
||||
|
@ -174,6 +168,10 @@ public final class Punycode {
|
|||
int n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount;
|
||||
char c, c2;
|
||||
int srcLength = src.length();
|
||||
if (srcLength > ENCODE_MAX_CODE_UNITS) {
|
||||
throw new RuntimeException(
|
||||
"input too long: " + srcLength + " UTF-16 code units");
|
||||
}
|
||||
int destCapacity = MAX_CP_COUNT;
|
||||
char[] dest = new char[destCapacity];
|
||||
StringBuffer result = new StringBuffer();
|
||||
|
@ -251,7 +249,7 @@ public final class Punycode {
|
|||
* Increase delta enough to advance the decoder's
|
||||
* <n,i> state to <m,0>, but guard against overflow:
|
||||
*/
|
||||
if(m-n>(0x7fffffff-MAX_CP_COUNT-delta)/(handledCPCount+1)) {
|
||||
if(m-n>(0x7fffffff-handledCPCount-delta)/(handledCPCount+1)) {
|
||||
throw new RuntimeException("Internal program error");
|
||||
}
|
||||
delta+=(m-n)*(handledCPCount+1);
|
||||
|
@ -332,6 +330,9 @@ public final class Punycode {
|
|||
public static StringBuffer decode(StringBuffer src, boolean[] caseFlags)
|
||||
throws ParseException{
|
||||
int srcLength = src.length();
|
||||
if (srcLength > DECODE_MAX_CHARS) {
|
||||
throw new RuntimeException("input too long: " + srcLength + " characters");
|
||||
}
|
||||
StringBuffer result = new StringBuffer();
|
||||
int n, destLength, i, bias, basicLength, j, in, oldi, w, k, digit, t,
|
||||
destCPCount, firstSupplementaryIndex, cpLength;
|
||||
|
@ -395,7 +396,7 @@ public final class Punycode {
|
|||
throw new ParseException("Illegal char found", -1);
|
||||
}
|
||||
|
||||
digit=basicToDigit[(byte)src.charAt(in++)];
|
||||
digit=decodeDigit(src.charAt(in++));
|
||||
if(digit<0) {
|
||||
throw new ParseException("Invalid char found", -1);
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -134,9 +134,15 @@ public class UnicodeSetStringSpan {
|
|||
|
||||
int i, spanLength;
|
||||
someRelevant = false;
|
||||
for (i = 0; i < stringsLength; ++i) {
|
||||
for (i = 0; i < stringsLength;) {
|
||||
String string = strings.get(i);
|
||||
int length16 = string.length();
|
||||
if (length16 == 0) {
|
||||
// Remove the empty string.
|
||||
strings.remove(i);
|
||||
--stringsLength;
|
||||
continue;
|
||||
}
|
||||
spanLength = spanSet.span(string, SpanCondition.CONTAINED);
|
||||
if (spanLength < length16) { // Relevant string.
|
||||
someRelevant = true;
|
||||
|
@ -144,6 +150,7 @@ public class UnicodeSetStringSpan {
|
|||
if (/* (0 != (which & UTF16)) && */ length16 > maxLength16) {
|
||||
maxLength16 = length16;
|
||||
}
|
||||
++i;
|
||||
}
|
||||
if (!someRelevant && (which & WITH_COUNT) == 0) {
|
||||
return;
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2009, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2009, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -84,7 +84,7 @@ import jdk.internal.icu.util.VersionInfo;
|
|||
* <p>
|
||||
* Further detail on differences can be determined using the program
|
||||
* <a href=
|
||||
* "http://source.icu-project.org/repos/icu/icu4j/trunk/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java">
|
||||
* "https://github.com/unicode-org/icu/blob/main/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java">
|
||||
* com.ibm.icu.dev.test.lang.UCharacterCompare</a>
|
||||
* </p>
|
||||
* <p>
|
||||
|
@ -101,9 +101,9 @@ import jdk.internal.icu.util.VersionInfo;
|
|||
* For more information see
|
||||
* <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a>
|
||||
* (http://www.unicode.org/ucd/)
|
||||
* and the <a href="http://www.icu-project.org/userguide/properties.html">ICU
|
||||
* and the <a href="https://unicode-org.github.io/icu/userguide/strings/properties">ICU
|
||||
* User Guide chapter on Properties</a>
|
||||
* (http://www.icu-project.org/userguide/properties.html).
|
||||
* (https://unicode-org.github.io/icu/userguide/strings/properties).
|
||||
* </p>
|
||||
* <p>
|
||||
* There are also functions that provide easy migration from C/POSIX functions
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -53,7 +53,7 @@ public final class UCharacterDirection implements UCharacterEnums.ECharacterDire
|
|||
// private constructor =========================================
|
||||
///CLOVER:OFF
|
||||
/**
|
||||
* Private constructor to prevent initialisation
|
||||
* Private constructor to prevent initialization
|
||||
*/
|
||||
private UCharacterDirection()
|
||||
{
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -62,7 +62,7 @@ package jdk.internal.icu.lang;
|
|||
@Deprecated
|
||||
class UCharacterEnums {
|
||||
|
||||
/** This is just a namespace, it is not instantiatable. */
|
||||
/** This is just a namespace, it is not instantiable. */
|
||||
private UCharacterEnums() {};
|
||||
|
||||
/**
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2009, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2009, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -63,7 +63,7 @@ import jdk.internal.icu.impl.UBiDiProps;
|
|||
*
|
||||
* This is an implementation of the Unicode Bidirectional Algorithm. The
|
||||
* algorithm is defined in the
|
||||
* <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
|
||||
* <a href="https://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
|
||||
* Unicode Bidirectional Algorithm</a>.
|
||||
* <p>
|
||||
*
|
||||
|
@ -985,7 +985,7 @@ public class BidiBase {
|
|||
/**
|
||||
* Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).
|
||||
* Used in
|
||||
* <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
|
||||
* <a href="https://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
|
||||
* Unicode Bidirectional Algorithm</a>.
|
||||
* Returns UCharacter.BidiPairedBracketType values.
|
||||
* @stable ICU 52
|
||||
|
@ -3365,7 +3365,7 @@ public class BidiBase {
|
|||
|
||||
/**
|
||||
* Perform the Unicode Bidi algorithm. It is defined in the
|
||||
* <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
|
||||
* <a href="https://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
|
||||
* Unicode Bidirectional Algorithm</a>, version 13,
|
||||
* also described in The Unicode Standard, Version 4.0 .<p>
|
||||
*
|
||||
|
@ -3450,7 +3450,7 @@ public class BidiBase {
|
|||
|
||||
/**
|
||||
* Perform the Unicode Bidi algorithm. It is defined in the
|
||||
* <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
|
||||
* <a href="https://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
|
||||
* Unicode Bidirectional Algorithm</a>, version 13,
|
||||
* also described in The Unicode Standard, Version 4.0 .<p>
|
||||
*
|
||||
|
@ -3786,7 +3786,7 @@ public class BidiBase {
|
|||
|
||||
/**
|
||||
* Perform the Unicode Bidi algorithm on a given paragraph, as defined in the
|
||||
* <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
|
||||
* <a href="https://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
|
||||
* Unicode Bidirectional Algorithm</a>, version 13,
|
||||
* also described in The Unicode Standard, Version 4.0 .<p>
|
||||
*
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2009, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2009, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -47,7 +47,7 @@ final class BidiLine {
|
|||
* text in a single paragraph or in a line of a single paragraph
|
||||
* which has already been processed according to
|
||||
* the Unicode 3.0 Bidi algorithm as defined in
|
||||
* <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
|
||||
* <a href="https://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
|
||||
* Unicode Bidirectional Algorithm</a>, version 13,
|
||||
* also described in The Unicode Standard, Version 4.0.1 .
|
||||
*
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2015, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -43,7 +43,7 @@ import jdk.internal.icu.impl.Norm2AllModes;
|
|||
* The primary functions are to produce a normalized string and to detect whether
|
||||
* a string is already normalized.
|
||||
* The most commonly used normalization forms are those defined in
|
||||
* <a href="http://www.unicode.org/reports/tr15/">Unicode Standard Annex #15:
|
||||
* <a href="https://www.unicode.org/reports/tr15/">Unicode Standard Annex #15:
|
||||
* Unicode Normalization Forms</a>.
|
||||
* However, this API supports additional normalization forms for specialized purposes.
|
||||
* For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE)
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -44,7 +44,7 @@ import java.text.Normalizer;
|
|||
* <code>normalize</code> transforms Unicode text into an equivalent composed or
|
||||
* decomposed form, allowing for easier sorting and searching of text.
|
||||
* <code>normalize</code> supports the standard normalization forms described in
|
||||
* <a href="http://www.unicode.org/reports/tr15/" target="unicode">
|
||||
* <a href="https://www.unicode.org/reports/tr15/" target="unicode">
|
||||
* Unicode Standard Annex #15 — Unicode Normalization Forms</a>.
|
||||
*
|
||||
* Characters with accents or other adornments can be encoded in
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -67,9 +67,9 @@ import jdk.internal.icu.util.VersionInfo;
|
|||
* <li> Unassigned Table: Contains code points that are unassigned
|
||||
* in the Unicode Version supported by StringPrep. Currently
|
||||
* RFC 3454 supports Unicode 3.2. </li>
|
||||
* <li> Prohibited Table: Contains code points that are prohibted from
|
||||
* <li> Prohibited Table: Contains code points that are prohibited from
|
||||
* the output of the StringPrep processing function. </li>
|
||||
* <li> Mapping Table: Contains code ponts that are deleted from the output or case mapped. </li>
|
||||
* <li> Mapping Table: Contains code points that are deleted from the output or case mapped. </li>
|
||||
* </ul>
|
||||
*
|
||||
* The procedure for preparing Unicode strings:
|
||||
|
@ -226,8 +226,8 @@ public final class StringPrep {
|
|||
sprepUniVer = getVersionInfo(reader.getUnicodeVersion());
|
||||
normCorrVer = getVersionInfo(indexes[NORM_CORRECTNS_LAST_UNI_VERSION]);
|
||||
VersionInfo normUniVer = UCharacter.getUnicodeVersion();
|
||||
if(normUniVer.compareTo(sprepUniVer) < 0 && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
|
||||
normUniVer.compareTo(normCorrVer) < 0 && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
|
||||
if(normUniVer.compareTo(sprepUniVer) < 0 && /* the Unicode version of SPREP file must be less than the Unicode Version of the normalization data */
|
||||
normUniVer.compareTo(normCorrVer) < 0 && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Version of the normalization data */
|
||||
((indexes[OPTIONS] & NORMALIZATION_ON) > 0) /* normalization turned on*/
|
||||
){
|
||||
throw new IOException("Normalization Correction version not supported");
|
||||
|
@ -325,7 +325,7 @@ public final class StringPrep {
|
|||
ch -= val.value;
|
||||
}
|
||||
}else if(val.type == DELETE){
|
||||
// just consume the codepoint and contine
|
||||
// just consume the codepoint and continue
|
||||
continue;
|
||||
}
|
||||
//copy the source into destination
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -147,9 +147,9 @@ public abstract class UCharacterIterator
|
|||
*/
|
||||
public int nextCodePoint(){
|
||||
int ch1 = next();
|
||||
if(UTF16.isLeadSurrogate((char)ch1)){
|
||||
if(UTF16.isLeadSurrogate(ch1)){
|
||||
int ch2 = next();
|
||||
if(UTF16.isTrailSurrogate((char)ch2)){
|
||||
if(UTF16.isTrailSurrogate(ch2)){
|
||||
return UCharacterProperty.getRawSupplementary((char)ch1,
|
||||
(char)ch2);
|
||||
}else if (ch2 != DONE) {
|
||||
|
@ -175,7 +175,7 @@ public abstract class UCharacterIterator
|
|||
/**
|
||||
* Retreat to the start of the previous code point in the text,
|
||||
* and return it (pre-decrement semantics). If the index is not
|
||||
* preceeded by a valid surrogate pair, the behavior is the same
|
||||
* preceded by a valid surrogate pair, the behavior is the same
|
||||
* as <code>previous()</code>. Otherwise the iterator is
|
||||
* decremented to the start of the surrogate pair, and the code
|
||||
* point represented by the pair is returned.
|
||||
|
@ -185,9 +185,9 @@ public abstract class UCharacterIterator
|
|||
*/
|
||||
public int previousCodePoint(){
|
||||
int ch1 = previous();
|
||||
if(UTF16.isTrailSurrogate((char)ch1)){
|
||||
if(UTF16.isTrailSurrogate(ch1)){
|
||||
int ch2 = previous();
|
||||
if(UTF16.isLeadSurrogate((char)ch2)){
|
||||
if(UTF16.isLeadSurrogate(ch2)){
|
||||
return UCharacterProperty.getRawSupplementary((char)ch2,
|
||||
(char)ch1);
|
||||
}else if (ch2 != DONE) {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -382,36 +382,39 @@ public final class UTF16
|
|||
}
|
||||
|
||||
/**
|
||||
* Determines whether the code value is a surrogate.
|
||||
* @param char16 the input character.
|
||||
* @return true if the input character is a surrogate.
|
||||
* @stable ICU 2.1
|
||||
* Determines whether the code point is a surrogate.
|
||||
*
|
||||
* @param codePoint The input character.
|
||||
* (In ICU 2.1-69 the type of this parameter was <code>char</code>.)
|
||||
* @return true If the input code point is a surrogate.
|
||||
* @stable ICU 70
|
||||
*/
|
||||
public static boolean isSurrogate(char char16)
|
||||
{
|
||||
return (char16 & SURROGATE_BITMASK) == SURROGATE_BITS;
|
||||
public static boolean isSurrogate(int codePoint) {
|
||||
return (codePoint & SURROGATE_BITMASK) == SURROGATE_BITS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether the character is a trail surrogate.
|
||||
* @param char16 the input character.
|
||||
* @return true if the input character is a trail surrogate.
|
||||
* @stable ICU 2.1
|
||||
* Determines whether the code point is a trail surrogate.
|
||||
*
|
||||
* @param codePoint The input character.
|
||||
* (In ICU 2.1-69 the type of this parameter was <code>char</code>.)
|
||||
* @return true If the input code point is a trail surrogate.
|
||||
* @stable ICU 70
|
||||
*/
|
||||
public static boolean isTrailSurrogate(char char16)
|
||||
{
|
||||
return (char16 & TRAIL_SURROGATE_BITMASK) == TRAIL_SURROGATE_BITS;
|
||||
public static boolean isTrailSurrogate(int codePoint) {
|
||||
return (codePoint & TRAIL_SURROGATE_BITMASK) == TRAIL_SURROGATE_BITS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether the character is a lead surrogate.
|
||||
* @param char16 the input character.
|
||||
* @return true if the input character is a lead surrogate
|
||||
* @stable ICU 2.1
|
||||
* Determines whether the code point is a lead surrogate.
|
||||
*
|
||||
* @param codePoint The input character.
|
||||
* (In ICU 2.1-69 the type of this parameter was <code>char</code>.)
|
||||
* @return true If the input code point is a lead surrogate
|
||||
* @stable ICU 70
|
||||
*/
|
||||
public static boolean isLeadSurrogate(char char16)
|
||||
{
|
||||
return (char16 & LEAD_SURROGATE_BITMASK) == LEAD_SURROGATE_BITS;
|
||||
public static boolean isLeadSurrogate(int codePoint) {
|
||||
return (codePoint & LEAD_SURROGATE_BITMASK) == LEAD_SURROGATE_BITS;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -135,8 +135,8 @@ import jdk.internal.icu.util.VersionInfo;
|
|||
* "[:Lu:]" and the Perl-like syntax "\p{Lu}" are recognized. For a
|
||||
* complete list of supported property patterns, see the User's Guide
|
||||
* for UnicodeSet at
|
||||
* <a href="http://www.icu-project.org/userguide/unicodeSet.html">
|
||||
* http://www.icu-project.org/userguide/unicodeSet.html</a>.
|
||||
* <a href="https://unicode-org.github.io/icu/userguide/strings/unicodeset">
|
||||
* https://unicode-org.github.io/icu/userguide/strings/unicodeset</a>.
|
||||
* Actual determination of property data is defined by the underlying
|
||||
* Unicode database as implemented by UCharacter.
|
||||
*
|
||||
|
@ -147,6 +147,13 @@ import jdk.internal.icu.util.VersionInfo;
|
|||
* their delimiters; "[:^foo]" and "\P{foo}". In any other location,
|
||||
* '^' has no special meaning.
|
||||
*
|
||||
* <p>Since ICU 70, "[^...]", "[:^foo]", "\P{foo}", and "[:binaryProperty=No:]"
|
||||
* perform a "code point complement" (all code points minus the original set),
|
||||
* removing all multicharacter strings,
|
||||
* equivalent to .{@link #complement()}.{@link #removeAllStrings()} .
|
||||
* The {@link #complement()} API function continues to perform a
|
||||
* symmetric difference with all code points and thus retains all multicharacter strings.
|
||||
*
|
||||
* <p>Ranges are indicated by placing two a '-' between two
|
||||
* characters, as in "a-z". This specifies the range of all
|
||||
* characters from the left to the right, in Unicode order. If the
|
||||
|
@ -189,8 +196,6 @@ import jdk.internal.icu.util.VersionInfo;
|
|||
* Unicode property
|
||||
* </table>
|
||||
*
|
||||
* <p><b>Warning</b>: you cannot add an empty string ("") to a UnicodeSet.</p>
|
||||
*
|
||||
* <p><b>Formal syntax</b></p>
|
||||
*
|
||||
* <blockquote>
|
||||
|
@ -230,9 +235,8 @@ import jdk.internal.icu.util.VersionInfo;
|
|||
* </tr>
|
||||
* <tr align="top">
|
||||
* <td nowrap valign="top" align="right"><code>hex := </code></td>
|
||||
* <td valign="top"><em>any character for which
|
||||
* </em><code>Character.digit(c, 16)</code><em>
|
||||
* returns a non-negative result</em></td>
|
||||
* <td style="vertical-align: top;"><code>'0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' |<br>
|
||||
* 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'a' | 'b' | 'c' | 'd' | 'e' | 'f'</code></td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td nowrap valign="top" align="right"><code>property := </code></td>
|
||||
|
@ -487,7 +491,7 @@ public class UnicodeSet {
|
|||
else if (i > 0 && c == list[i-1]) {
|
||||
// c is after end of prior range
|
||||
list[i-1]++;
|
||||
// no need to chcek for collapse here
|
||||
// no need to check for collapse here
|
||||
}
|
||||
|
||||
else {
|
||||
|
@ -528,7 +532,6 @@ public class UnicodeSet {
|
|||
* present. If this set already contains the multicharacter,
|
||||
* the call leaves this set unchanged.
|
||||
* Thus {@code "ch" => {"ch"}}
|
||||
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
|
||||
* @param s the source string
|
||||
* @return this object, for chaining
|
||||
* @stable ICU 2.0
|
||||
|
@ -546,22 +549,19 @@ public class UnicodeSet {
|
|||
|
||||
/**
|
||||
* Utility for getting code point from single code point CharSequence.
|
||||
* See the public UTF16.getSingleCodePoint()
|
||||
* See the public UTF16.getSingleCodePoint() (which returns -1 for null rather than throwing NPE).
|
||||
*
|
||||
* @return a code point IF the string consists of a single one.
|
||||
* otherwise returns -1.
|
||||
* @param s to test
|
||||
*/
|
||||
private static int getSingleCP(CharSequence s) {
|
||||
if (s.length() < 1) {
|
||||
throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet");
|
||||
}
|
||||
if (s.length() > 2) return -1;
|
||||
if (s.length() == 1) return s.charAt(0);
|
||||
|
||||
// at this point, len = 2
|
||||
int cp = UTF16.charAt(s, 0);
|
||||
if (cp > 0xFFFF) { // is surrogate pair
|
||||
return cp;
|
||||
if (s.length() == 2) {
|
||||
int cp = Character.codePointAt(s, 0);
|
||||
if (cp > 0xFFFF) { // is surrogate pair
|
||||
return cp;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
@ -569,13 +569,11 @@ public class UnicodeSet {
|
|||
/**
|
||||
* Complements the specified range in this set. Any character in
|
||||
* the range will be removed if it is in this set, or will be
|
||||
* added if it is not in this set. If {@code end > start}
|
||||
* added if it is not in this set. If <code>start > end</code>
|
||||
* then an empty range is complemented, leaving the set unchanged.
|
||||
*
|
||||
* @param start first character, inclusive, of range to be removed
|
||||
* from this set.
|
||||
* @param end last character, inclusive, of range to be removed
|
||||
* from this set.
|
||||
* @param start first character, inclusive, of range
|
||||
* @param end last character, inclusive, of range
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public UnicodeSet complement(int start, int end) {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2019, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -43,7 +43,7 @@ import static jdk.internal.icu.impl.NormalizerImpl.UTF16Plus;
|
|||
/**
|
||||
* Immutable Unicode code point trie.
|
||||
* Fast, reasonably compact, map from Unicode code points (U+0000..U+10FFFF) to integer values.
|
||||
* For details see http://site.icu-project.org/design/struct/utrie
|
||||
* For details see https://icu.unicode.org/design/struct/utrie
|
||||
*
|
||||
* <p>This class is not intended for public subclassing.
|
||||
*
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -54,7 +54,7 @@ public final class VersionInfo
|
|||
* @deprecated This API is ICU internal only.
|
||||
*/
|
||||
@Deprecated
|
||||
public static final String ICU_DATA_VERSION_PATH = "67b";
|
||||
public static final String ICU_DATA_VERSION_PATH = "70b";
|
||||
|
||||
// public methods ------------------------------------------------------
|
||||
|
||||
|
@ -148,7 +148,15 @@ public final class VersionInfo
|
|||
*/
|
||||
public int compareTo(VersionInfo other)
|
||||
{
|
||||
return m_version_ - other.m_version_;
|
||||
// m_version_ is an int, a signed 32-bit integer.
|
||||
// When the major version is >=128, then the version int is negative.
|
||||
// Compare it in two steps to simulate an unsigned-int comparison.
|
||||
// (Alternatively we could turn each int into a long and reset the upper 32 bits.)
|
||||
// Compare the upper bits first, using logical shift right (unsigned).
|
||||
int diff = (m_version_ >>> 1) - (other.m_version_ >>> 1);
|
||||
if (diff != 0) { return diff; }
|
||||
// Compare the remaining bits.
|
||||
return (m_version_ & 1) - (other.m_version_ & 1);
|
||||
}
|
||||
|
||||
// private data members ----------------------------------------------
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
## International Components for Unicode (ICU4J) v67.1
|
||||
## International Components for Unicode (ICU4J) v70.1
|
||||
|
||||
### ICU4J License
|
||||
```
|
||||
|
@ -80,61 +80,439 @@ of the copyright holder.
|
|||
All trademarks and registered trademarks mentioned herein are the
|
||||
property of their respective owners.
|
||||
|
||||
2. Chinese/Japanese Word Break Dictionary Data (cjdict.txt)
|
||||
|
||||
——————————————————————————————————————————————————————————————————————
|
||||
# The Google Chrome software developed by Google is licensed under
|
||||
# the BSD license. Other software included in this distribution is
|
||||
# provided under other licenses, as set forth below.
|
||||
#
|
||||
# The BSD License
|
||||
# http://opensource.org/licenses/bsd-license.php
|
||||
# Copyright (C) 2006-2008, Google Inc.
|
||||
#
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# Redistributions in binary form must reproduce the above
|
||||
# copyright notice, this list of conditions and the following
|
||||
# disclaimer in the documentation and/or other materials provided with
|
||||
# the distribution.
|
||||
# Neither the name of Google Inc. nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
|
||||
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
||||
# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
#
|
||||
# The word list in cjdict.txt are generated by combining three word lists
|
||||
# listed below with further processing for compound word breaking. The
|
||||
# frequency is generated with an iterative training against Google web
|
||||
# corpora.
|
||||
#
|
||||
# * Libtabe (Chinese)
|
||||
# - https://sourceforge.net/project/?group_id=1519
|
||||
# - Its license terms and conditions are shown below.
|
||||
#
|
||||
# * IPADIC (Japanese)
|
||||
# - http://chasen.aist-nara.ac.jp/chasen/distribution.html
|
||||
# - Its license terms and conditions are shown below.
|
||||
#
|
||||
# ---------COPYING.libtabe ---- BEGIN--------------------
|
||||
#
|
||||
# /*
|
||||
# * Copyright (c) 1999 TaBE Project.
|
||||
# * Copyright (c) 1999 Pai-Hsiang Hsiao.
|
||||
# * All rights reserved.
|
||||
# *
|
||||
# * Redistribution and use in source and binary forms, with or without
|
||||
# * modification, are permitted provided that the following conditions
|
||||
# * are met:
|
||||
# *
|
||||
# * . Redistributions of source code must retain the above copyright
|
||||
# * notice, this list of conditions and the following disclaimer.
|
||||
# * . Redistributions in binary form must reproduce the above copyright
|
||||
# * notice, this list of conditions and the following disclaimer in
|
||||
# * the documentation and/or other materials provided with the
|
||||
# * distribution.
|
||||
# * . Neither the name of the TaBE Project nor the names of its
|
||||
# * contributors may be used to endorse or promote products derived
|
||||
# * from this software without specific prior written permission.
|
||||
# *
|
||||
# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
# * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
# * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
# * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
# * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
# * OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
# */
|
||||
#
|
||||
# /*
|
||||
# * Copyright (c) 1999 Computer Systems and Communication Lab,
|
||||
# * Institute of Information Science, Academia
|
||||
# * Sinica. All rights reserved.
|
||||
# *
|
||||
# * Redistribution and use in source and binary forms, with or without
|
||||
# * modification, are permitted provided that the following conditions
|
||||
# * are met:
|
||||
# *
|
||||
# * . Redistributions of source code must retain the above copyright
|
||||
# * notice, this list of conditions and the following disclaimer.
|
||||
# * . Redistributions in binary form must reproduce the above copyright
|
||||
# * notice, this list of conditions and the following disclaimer in
|
||||
# * the documentation and/or other materials provided with the
|
||||
# * distribution.
|
||||
# * . Neither the name of the Computer Systems and Communication Lab
|
||||
# * nor the names of its contributors may be used to endorse or
|
||||
# * promote products derived from this software without specific
|
||||
# * prior written permission.
|
||||
# *
|
||||
# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
# * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
# * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
# * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
# * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
# * OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
# */
|
||||
#
|
||||
# Copyright 1996 Chih-Hao Tsai @ Beckman Institute,
|
||||
# University of Illinois
|
||||
# c-tsai4@uiuc.edu http://casper.beckman.uiuc.edu/~c-tsai4
|
||||
#
|
||||
# ---------------COPYING.libtabe-----END--------------------------------
|
||||
#
|
||||
#
|
||||
# ---------------COPYING.ipadic-----BEGIN-------------------------------
|
||||
#
|
||||
# Copyright 2000, 2001, 2002, 2003 Nara Institute of Science
|
||||
# and Technology. All Rights Reserved.
|
||||
#
|
||||
# Use, reproduction, and distribution of this software is permitted.
|
||||
# Any copy of this software, whether in its original form or modified,
|
||||
# must include both the above copyright notice and the following
|
||||
# paragraphs.
|
||||
#
|
||||
# Nara Institute of Science and Technology (NAIST),
|
||||
# the copyright holders, disclaims all warranties with regard to this
|
||||
# software, including all implied warranties of merchantability and
|
||||
# fitness, in no event shall NAIST be liable for
|
||||
# any special, indirect or consequential damages or any damages
|
||||
# whatsoever resulting from loss of use, data or profits, whether in an
|
||||
# action of contract, negligence or other tortuous action, arising out
|
||||
# of or in connection with the use or performance of this software.
|
||||
#
|
||||
# A large portion of the dictionary entries
|
||||
# originate from ICOT Free Software. The following conditions for ICOT
|
||||
# Free Software applies to the current dictionary as well.
|
||||
#
|
||||
# Each User may also freely distribute the Program, whether in its
|
||||
# original form or modified, to any third party or parties, PROVIDED
|
||||
# that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear
|
||||
# on, or be attached to, the Program, which is distributed substantially
|
||||
# in the same form as set out herein and that such intended
|
||||
# distribution, if actually made, will neither violate or otherwise
|
||||
# contravene any of the laws and regulations of the countries having
|
||||
# jurisdiction over the User or the intended distribution itself.
|
||||
#
|
||||
# NO WARRANTY
|
||||
#
|
||||
# The program was produced on an experimental basis in the course of the
|
||||
# research and development conducted during the project and is provided
|
||||
# to users as so produced on an experimental basis. Accordingly, the
|
||||
# program is provided without any warranty whatsoever, whether express,
|
||||
# implied, statutory or otherwise. The term "warranty" used herein
|
||||
# includes, but is not limited to, any warranty of the quality,
|
||||
# performance, merchantability and fitness for a particular purpose of
|
||||
# the program and the nonexistence of any infringement or violation of
|
||||
# any right of any third party.
|
||||
#
|
||||
# Each user of the program will agree and understand, and be deemed to
|
||||
# have agreed and understood, that there is no warranty whatsoever for
|
||||
# the program and, accordingly, the entire risk arising from or
|
||||
# otherwise connected with the program is assumed by the user.
|
||||
#
|
||||
# Therefore, neither ICOT, the copyright holder, or any other
|
||||
# organization that participated in or was otherwise related to the
|
||||
# development of the program and their respective officials, directors,
|
||||
# officers and other employees shall be held liable for any and all
|
||||
# damages, including, without limitation, general, special, incidental
|
||||
# and consequential damages, arising out of or otherwise in connection
|
||||
# with the use or inability to use the program or any product, material
|
||||
# or result produced or otherwise obtained by using the program,
|
||||
# regardless of whether they have been advised of, or otherwise had
|
||||
# knowledge of, the possibility of such damages at any time during the
|
||||
# project or thereafter. Each user will be deemed to have agreed to the
|
||||
# foregoing by his or her commencement of use of the program. The term
|
||||
# "use" as used herein includes, but is not limited to, the use,
|
||||
# modification, copying and distribution of the program and the
|
||||
# production of secondary products from the program.
|
||||
#
|
||||
# In the case where the program, whether in its original form or
|
||||
# modified, was distributed or delivered to or received by a user from
|
||||
# any person, organization or entity other than ICOT, unless it makes or
|
||||
# grants independently of ICOT any specific warranty to the user in
|
||||
# writing, such person, organization or entity, will also be exempted
|
||||
# from and not be held liable to the user for any such damages as noted
|
||||
# above as far as the program is concerned.
|
||||
#
|
||||
# ---------------COPYING.ipadic-----END----------------------------------
|
||||
|
||||
3. Lao Word Break Dictionary Data (laodict.txt)
|
||||
|
||||
From: https://www.unicode.org/copyright.html:
|
||||
# Copyright (C) 2016 and later: Unicode, Inc. and others.
|
||||
# License & terms of use: http://www.unicode.org/copyright.html
|
||||
# Copyright (c) 2015 International Business Machines Corporation
|
||||
# and others. All Rights Reserved.
|
||||
#
|
||||
# Project: https://github.com/rober42539/lao-dictionary
|
||||
# Dictionary: https://github.com/rober42539/lao-dictionary/laodict.txt
|
||||
# License: https://github.com/rober42539/lao-dictionary/LICENSE.txt
|
||||
# (copied below)
|
||||
#
|
||||
# This file is derived from the above dictionary version of Nov 22, 2020
|
||||
# ----------------------------------------------------------------------
|
||||
# Copyright (C) 2013 Brian Eugene Wilson, Robert Martin Campbell.
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer. Redistributions in binary
|
||||
# form must reproduce the above copyright notice, this list of conditions and
|
||||
# the following disclaimer in the documentation and/or ther materials
|
||||
# provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
# OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
Unicode® Copyright and Terms of Use
|
||||
4. Burmese Word Break Dictionary Data (burmesedict.txt)
|
||||
|
||||
For the general privacy policy governing access to this site, see the Unicode Privacy Policy.
|
||||
# Copyright (c) 2014 International Business Machines Corporation
|
||||
# and others. All Rights Reserved.
|
||||
#
|
||||
# This list is part of a project hosted at:
|
||||
# github.com/kanyawtech/myanmar-karen-word-lists
|
||||
#
|
||||
# --------------------------------------------------------------------------
|
||||
# Copyright (c) 2013, LeRoy Benjamin Sharon
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met: Redistributions of source code must retain the above
|
||||
# copyright notice, this list of conditions and the following
|
||||
# disclaimer. Redistributions in binary form must reproduce the
|
||||
# above copyright notice, this list of conditions and the following
|
||||
# disclaimer in the documentation and/or other materials provided
|
||||
# with the distribution.
|
||||
#
|
||||
# Neither the name Myanmar Karen Word Lists, nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
|
||||
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
||||
# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
|
||||
# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
||||
# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
|
||||
# THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
# SUCH DAMAGE.
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
Unicode Copyright
|
||||
Copyright © 1991-2020 Unicode, Inc. All rights reserved.
|
||||
Definitions
|
||||
5. Time Zone Database
|
||||
|
||||
Unicode Data Files ("DATA FILES") include all data files under the directories:
|
||||
https://www.unicode.org/Public/
|
||||
https://www.unicode.org/reports/
|
||||
https://www.unicode.org/ivd/data/
|
||||
ICU uses the public domain data and code derived from Time Zone
|
||||
Database for its time zone support. The ownership of the TZ database
|
||||
is explained in BCP 175: Procedure for Maintaining the Time Zone
|
||||
Database section 7.
|
||||
|
||||
Unicode Data Files do not include PDF online code charts under the directory:
|
||||
https://www.unicode.org/Public/
|
||||
# 7. Database Ownership
|
||||
#
|
||||
# The TZ database itself is not an IETF Contribution or an IETF
|
||||
# document. Rather it is a pre-existing and regularly updated work
|
||||
# that is in the public domain, and is intended to remain in the
|
||||
# public domain. Therefore, BCPs 78 [RFC5378] and 79 [RFC3979] do
|
||||
# not apply to the TZ Database or contributions that individuals make
|
||||
# to it. Should any claims be made and substantiated against the TZ
|
||||
# Database, the organization that is providing the IANA
|
||||
# Considerations defined in this RFC, under the memorandum of
|
||||
# understanding with the IETF, currently ICANN, may act in accordance
|
||||
# with all competent court orders. No ownership claims will be made
|
||||
# by ICANN or the IETF Trust on the database or the code. Any person
|
||||
# making a contribution to the database or code waives all rights to
|
||||
# future claims in that contribution or in the TZ Database.
|
||||
|
||||
Unicode Software ("SOFTWARE") includes any source code published in the Unicode Standard
|
||||
or any source code or compiled code under the directories:
|
||||
https://www.unicode.org/Public/PROGRAMS/
|
||||
https://www.unicode.org/Public/cldr/
|
||||
http://site.icu-project.org/download/
|
||||
6. Google double-conversion
|
||||
|
||||
Terms of Use
|
||||
Certain documents and files on this website contain a legend indicating that "Modification is permitted." Any person is hereby authorized, without fee, to modify such documents and files to create derivative works conforming to the Unicode® Standard, subject to Terms and Conditions herein.
|
||||
Any person is hereby authorized, without fee, to view, use, reproduce, and distribute all documents and files, subject to the Terms and Conditions herein.
|
||||
Further specifications of rights and restrictions pertaining to the use of the Unicode DATA FILES and SOFTWARE can be found in the Unicode Data Files and Software License.
|
||||
Each version of the Unicode Standard has further specifications of rights and restrictions of use. For the book editions (Unicode 5.0 and earlier), these are found on the back of the title page.
|
||||
The Unicode PDF online code charts carry specific restrictions. Those restrictions are incorporated as the first page of each PDF code chart.
|
||||
All other files, including online documentation of the core specification for Unicode 6.0 and later, are covered under these general Terms of Use.
|
||||
No license is granted to "mirror" the Unicode website where a fee is charged for access to the "mirror" site.
|
||||
Modification is not permitted with respect to this document. All copies of this document must be verbatim.
|
||||
Restricted Rights Legend
|
||||
Any technical data or software which is licensed to the United States of America, its agencies and/or instrumentalities under this Agreement is commercial technical data or commercial computer software developed exclusively at private expense as defined in FAR 2.101, or DFARS 252.227-7014 (June 1995), as applicable. For technical data, use, duplication, or disclosure by the Government is subject to restrictions as set forth in DFARS 202.227-7015 Technical Data, Commercial and Items (Nov 1995) and this Agreement. For Software, in accordance with FAR 12-212 or DFARS 227-7202, as applicable, use, duplication or disclosure by the Government is subject to the restrictions set forth in this Agreement.
|
||||
Warranties and Disclaimers
|
||||
This publication and/or website may include technical or typographical errors or other inaccuracies. Changes are periodically added to the information herein; these changes will be incorporated in new editions of the publication and/or website. Unicode, Inc. may make improvements and/or changes in the product(s) and/or program(s) described in this publication and/or website at any time.
|
||||
If this file has been purchased on magnetic or optical media from Unicode, Inc. the sole and exclusive remedy for any claim will be exchange of the defective media within ninety (90) days of original purchase.
|
||||
EXCEPT AS PROVIDED IN SECTION E.2, THIS PUBLICATION AND/OR SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND EITHER EXPRESS, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. UNICODE, INC. AND ITS LICENSORS ASSUME NO RESPONSIBILITY FOR ERRORS OR OMISSIONS IN THIS PUBLICATION AND/OR SOFTWARE OR OTHER DOCUMENTS WHICH ARE REFERENCED BY OR LINKED TO THIS PUBLICATION OR THE UNICODE WEBSITE.
|
||||
Waiver of Damages
|
||||
In no event shall Unicode, Inc. or its licensors be liable for any special, incidental, indirect or consequential damages of any kind, or any damages whatsoever, whether or not Unicode, Inc. was advised of the possibility of the damage, including, without limitation, those resulting from the following: loss of use, data or profits, in connection with the use, modification or distribution of this information or its derivatives.
|
||||
Trademarks & Logos
|
||||
The Unicode Word Mark and the Unicode Logo are trademarks of Unicode, Inc. “The Unicode Consortium” and “Unicode, Inc.” are trade names of Unicode, Inc. Use of the information and materials found on this website indicates your acknowledgement of Unicode, Inc.’s exclusive worldwide rights in the Unicode Word Mark, the Unicode Logo, and the Unicode trade names.
|
||||
The Unicode Consortium Name and Trademark Usage Policy (“Trademark Policy”) are incorporated herein by reference and you agree to abide by the provisions of the Trademark Policy, which may be changed from time to time in the sole discretion of Unicode, Inc.
|
||||
All third party trademarks referenced herein are the property of their respective owners.
|
||||
Miscellaneous
|
||||
Jurisdiction and Venue. This website is operated from a location in the State of California, United States of America. Unicode, Inc. makes no representation that the materials are appropriate for use in other locations. If you access this website from other locations, you are responsible for compliance with local laws. This Agreement, all use of this website and any claims and damages resulting from use of this website are governed solely by the laws of the State of California without regard to any principles which would apply the laws of a different jurisdiction. The user agrees that any disputes regarding this website shall be resolved solely in the courts located in Santa Clara County, California. The user agrees said courts have personal jurisdiction and agree to waive any right to transfer the dispute to any other forum.
|
||||
Modification by Unicode, Inc. Unicode, Inc. shall have the right to modify this Agreement at any time by posting it to this website. The user may not assign any part of this Agreement without Unicode, Inc.’s prior written consent.
|
||||
Taxes. The user agrees to pay any taxes arising from access to this website or use of the information herein, except for those based on Unicode’s net income.
|
||||
Severability. If any provision of this Agreement is declared invalid or unenforceable, the remaining provisions of this Agreement shall remain in effect.
|
||||
Entire Agreement. This Agreement constitutes the entire agreement between the parties.
|
||||
Copyright 2006-2011, the V8 project authors. All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following
|
||||
disclaimer in the documentation and/or other materials provided
|
||||
with the distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
====================================================
|
||||
|
||||
Unicode® Copyright and Terms of Use
|
||||
For the general privacy policy governing access to this site, see the Unicode Privacy Policy.
|
||||
|
||||
Unicode Copyright
|
||||
Copyright © 1991-2021 Unicode, Inc. All rights reserved.
|
||||
Definitions
|
||||
Unicode Data Files ("DATA FILES") include all data files under the directories:
|
||||
https://www.unicode.org/Public/
|
||||
https://www.unicode.org/reports/
|
||||
https://www.unicode.org/ivd/data/
|
||||
|
||||
Unicode Data Files do not include PDF online code charts under the directory:
|
||||
https://www.unicode.org/Public/
|
||||
|
||||
Unicode Software ("SOFTWARE") includes any source code published in the Unicode Standard
|
||||
or any source code or compiled code under the directories:
|
||||
https://www.unicode.org/Public/PROGRAMS/
|
||||
https://www.unicode.org/Public/cldr/
|
||||
http://site.icu-project.org/download/
|
||||
Terms of Use
|
||||
Certain documents and files on this website contain a legend indicating that "Modification is permitted." Any person is hereby authorized, without fee, to modify such documents and files to create derivative works conforming to the Unicode® Standard, subject to Terms and Conditions herein.
|
||||
Any person is hereby authorized, without fee, to view, use, reproduce, and distribute all documents and files, subject to the Terms and Conditions herein.
|
||||
Further specifications of rights and restrictions pertaining to the use of the Unicode DATA FILES and SOFTWARE can be found in the Unicode Data Files and Software License.
|
||||
Each version of the Unicode Standard has further specifications of rights and restrictions of use. For the book editions (Unicode 5.0 and earlier), these are found on the back of the title page.
|
||||
The Unicode PDF online code charts carry specific restrictions. Those restrictions are incorporated as the first page of each PDF code chart.
|
||||
All other files, including online documentation of the core specification for Unicode 6.0 and later, are covered under these general Terms of Use.
|
||||
No license is granted to "mirror" the Unicode website where a fee is charged for access to the "mirror" site.
|
||||
Modification is not permitted with respect to this document. All copies of this document must be verbatim.
|
||||
Restricted Rights Legend
|
||||
Any technical data or software which is licensed to the United States of America, its agencies and/or instrumentalities under this Agreement is commercial technical data or commercial computer software developed exclusively at private expense as defined in FAR 2.101, or DFARS 252.227-7014 (June 1995), as applicable. For technical data, use, duplication, or disclosure by the Government is subject to restrictions as set forth in DFARS 202.227-7015 Technical Data, Commercial and Items (Nov 1995) and this Agreement. For Software, in accordance with FAR 12-212 or DFARS 227-7202, as applicable, use, duplication or disclosure by the Government is subject to the restrictions set forth in this Agreement.
|
||||
Warranties and Disclaimers
|
||||
This publication and/or website may include technical or typographical errors or other inaccuracies. Changes are periodically added to the information herein; these changes will be incorporated in new editions of the publication and/or website. Unicode, Inc. may make improvements and/or changes in the product(s) and/or program(s) described in this publication and/or website at any time.
|
||||
If this file has been purchased on magnetic or optical media from Unicode, Inc. the sole and exclusive remedy for any claim will be exchange of the defective media within ninety (90) days of original purchase.
|
||||
EXCEPT AS PROVIDED IN SECTION E.2, THIS PUBLICATION AND/OR SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND EITHER EXPRESS, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. UNICODE, INC. AND ITS LICENSORS ASSUME NO RESPONSIBILITY FOR ERRORS OR OMISSIONS IN THIS PUBLICATION AND/OR SOFTWARE OR OTHER DOCUMENTS WHICH ARE REFERENCED BY OR LINKED TO THIS PUBLICATION OR THE UNICODE WEBSITE.
|
||||
Waiver of Damages
|
||||
In no event shall Unicode, Inc. or its licensors be liable for any special, incidental, indirect or consequential damages of any kind, or any damages whatsoever, whether or not Unicode, Inc. was advised of the possibility of the damage, including, without limitation, those resulting from the following: loss of use, data or profits, in connection with the use, modification or distribution of this information or its derivatives.
|
||||
Trademarks & Logos
|
||||
The Unicode Word Mark and the Unicode Logo are trademarks of Unicode, Inc. “The Unicode Consortium” and “Unicode, Inc.” are trade names of Unicode, Inc. Use of the information and materials found on this website indicates your acknowledgement of Unicode, Inc.’s exclusive worldwide rights in the Unicode Word Mark, the Unicode Logo, and the Unicode trade names.
|
||||
The Unicode Consortium Name and Trademark Usage Policy (“Trademark Policy”) are incorporated herein by reference and you agree to abide by the provisions of the Trademark Policy, which may be changed from time to time in the sole discretion of Unicode, Inc.
|
||||
All third party trademarks referenced herein are the property of their respective owners.
|
||||
Miscellaneous
|
||||
Jurisdiction and Venue. This website is operated from a location in the State of California, United States of America. Unicode, Inc. makes no representation that the materials are appropriate for use in other locations. If you access this website from other locations, you are responsible for compliance with local laws. This Agreement, all use of this website and any claims and damages resulting from use of this website are governed solely by the laws of the State of California without regard to any principles which would apply the laws of a different jurisdiction. The user agrees that any disputes regarding this website shall be resolved solely in the courts located in Santa Clara County, California. The user agrees said courts have personal jurisdiction and agree to waive any right to transfer the dispute to any other forum.
|
||||
Modification by Unicode, Inc. Unicode, Inc. shall have the right to modify this Agreement at any time by posting it to this website. The user may not assign any part of this Agreement without Unicode, Inc.’s prior written consent.
|
||||
Taxes. The user agrees to pay any taxes arising from access to this website or use of the information herein, except for those based on Unicode’s net income.
|
||||
Severability. If any provision of this Agreement is declared invalid or unenforceable, the remaining provisions of this Agreement shall remain in effect.
|
||||
Entire Agreement. This Agreement constitutes the entire agreement between the parties.
|
||||
|
||||
=======================================================
|
||||
|
||||
UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
|
||||
|
||||
See Terms of Use
|
||||
for definitions of Unicode Inc.’s Data Files and Software.
|
||||
|
||||
NOTICE TO USER: Carefully read the following legal agreement.
|
||||
BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
|
||||
DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
|
||||
YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
|
||||
TERMS AND CONDITIONS OF THIS AGREEMENT.
|
||||
IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
|
||||
THE DATA FILES OR SOFTWARE.
|
||||
|
||||
COPYRIGHT AND PERMISSION NOTICE
|
||||
|
||||
Copyright © 1991-2021 Unicode, Inc. All rights reserved.
|
||||
Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of the Unicode data files and any associated documentation
|
||||
(the "Data Files") or Unicode software and any associated documentation
|
||||
(the "Software") to deal in the Data Files or Software
|
||||
without restriction, including without limitation the rights to use,
|
||||
copy, modify, merge, publish, distribute, and/or sell copies of
|
||||
the Data Files or Software, and to permit persons to whom the Data Files
|
||||
or Software are furnished to do so, provided that either
|
||||
(a) this copyright and permission notice appear with all copies
|
||||
of the Data Files or Software, or
|
||||
(b) this copyright and permission notice appear in associated
|
||||
Documentation.
|
||||
|
||||
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
|
||||
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
|
||||
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
|
||||
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
|
||||
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
||||
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
|
||||
|
||||
Except as contained in this notice, the name of a copyright holder
|
||||
shall not be used in advertising or otherwise to promote the sale,
|
||||
use or other dealings in these Data Files or Software without prior
|
||||
written authorization of the copyright holder.
|
||||
|
||||
```
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
## The Unicode Standard, Unicode Character Database, Version 13.0.0
|
||||
|
||||
## The Unicode Standard, Unicode Character Database, Version 14.0.0
|
||||
|
||||
### Unicode Character Database
|
||||
```
|
||||
|
||||
|
@ -18,7 +18,7 @@ THE DATA FILES OR SOFTWARE.
|
|||
|
||||
COPYRIGHT AND PERMISSION NOTICE
|
||||
|
||||
Copyright © 1991-2020 Unicode, Inc. All rights reserved.
|
||||
Copyright © 1991-2021 Unicode, Inc. All rights reserved.
|
||||
Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
|
@ -50,5 +50,54 @@ shall not be used in advertising or otherwise to promote the sale,
|
|||
use or other dealings in these Data Files or Software without prior
|
||||
written authorization of the copyright holder.
|
||||
|
||||
=== http://www.unicode.org/copyright.html content ===
|
||||
Unicode (R) Copyright and Terms of Use
|
||||
For the general privacy policy governing access to this site, see the Unicode Privacy Policy.
|
||||
|
||||
Unicode Copyright
|
||||
Copyright (C) 1991-2021 Unicode, Inc. All rights reserved.
|
||||
Definitions
|
||||
Unicode Data Files ("DATA FILES") include all data files under the directories:
|
||||
https://www.unicode.org/Public/
|
||||
https://www.unicode.org/reports/
|
||||
https://www.unicode.org/ivd/data/
|
||||
|
||||
Unicode Data Files do not include PDF online code charts under the directory:
|
||||
https://www.unicode.org/Public/
|
||||
|
||||
Unicode Software ("SOFTWARE") includes any source code published in the Unicode Standard
|
||||
or any source code or compiled code under the directories:
|
||||
https://www.unicode.org/Public/PROGRAMS/
|
||||
https://www.unicode.org/Public/cldr/
|
||||
http://site.icu-project.org/download/
|
||||
Terms of Use
|
||||
Certain documents and files on this website contain a legend indicating that "Modification is permitted." Any person is hereby authorized, without fee, to modify such documents and files to create derivative works conforming to the Unicode® Standard, subject to Terms and Conditions herein.
|
||||
Any person is hereby authorized, without fee, to view, use, reproduce, and distribute all documents and files, subject to the Terms and Conditions herein.
|
||||
Further specifications of rights and restrictions pertaining to the use of the Unicode DATA FILES and SOFTWARE can be found in the Unicode Data Files and Software License.
|
||||
Each version of the Unicode Standard has further specifications of rights and restrictions of use. For the book editions (Unicode 5.0 and earlier), these are found on the back of the title page.
|
||||
The Unicode PDF online code charts carry specific restrictions. Those restrictions are incorporated as the first page of each PDF code chart.
|
||||
All other files, including online documentation of the core specification for Unicode 6.0 and later, are covered under these general Terms of Use.
|
||||
No license is granted to "mirror" the Unicode website where a fee is charged for access to the "mirror" site.
|
||||
Modification is not permitted with respect to this document. All copies of this document must be verbatim.
|
||||
Restricted Rights Legend
|
||||
Any technical data or software which is licensed to the United States of America, its agencies and/or instrumentalities under this Agreement is commercial technical data or commercial computer software developed exclusively at private expense as defined in FAR 2.101, or DFARS 252.227-7014 (June 1995), as applicable. For technical data, use, duplication, or disclosure by the Government is subject to restrictions as set forth in DFARS 202.227-7015 Technical Data, Commercial and Items (Nov 1995) and this Agreement. For Software, in accordance with FAR 12-212 or DFARS 227-7202, as applicable, use, duplication or disclosure by the Government is subject to the restrictions set forth in this Agreement.
|
||||
Warranties and Disclaimers
|
||||
This publication and/or website may include technical or typographical errors or other inaccuracies. Changes are periodically added to the information herein; these changes will be incorporated in new editions of the publication and/or website. Unicode, Inc. may make improvements and/or changes in the product(s) and/or program(s) described in this publication and/or website at any time.
|
||||
If this file has been purchased on magnetic or optical media from Unicode, Inc. the sole and exclusive remedy for any claim will be exchange of the defective media within ninety (90) days of original purchase.
|
||||
EXCEPT AS PROVIDED IN SECTION E.2, THIS PUBLICATION AND/OR SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND EITHER EXPRESS, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. UNICODE, INC. AND ITS LICENSORS ASSUME NO RESPONSIBILITY FOR ERRORS OR OMISSIONS IN THIS PUBLICATION AND/OR SOFTWARE OR OTHER DOCUMENTS WHICH ARE REFERENCED BY OR LINKED TO THIS PUBLICATION OR THE UNICODE WEBSITE.
|
||||
Waiver of Damages
|
||||
In no event shall Unicode, Inc. or its licensors be liable for any special, incidental, indirect or consequential damages of any kind, or any damages whatsoever, whether or not Unicode, Inc. was advised of the possibility of the damage, including, without limitation, those resulting from the following: loss of use, data or profits, in connection with the use, modification or distribution of this information or its derivatives.
|
||||
Trademarks & Logos
|
||||
The Unicode Word Mark and the Unicode Logo are trademarks of Unicode, Inc. “The Unicode Consortium” and “Unicode, Inc.” are trade names of Unicode, Inc. Use of the information and materials found on this website indicates your acknowledgement of Unicode, Inc.’s exclusive worldwide rights in the Unicode Word Mark, the Unicode Logo, and the Unicode trade names.
|
||||
The Unicode Consortium Name and Trademark Usage Policy (“Trademark Policy”) are incorporated herein by reference and you agree to abide by the provisions of the Trademark Policy, which may be changed from time to time in the sole discretion of Unicode, Inc.
|
||||
All third party trademarks referenced herein are the property of their respective owners.
|
||||
Miscellaneous
|
||||
Jurisdiction and Venue. This website is operated from a location in the State of California, United States of America. Unicode, Inc. makes no representation that the materials are appropriate for use in other locations. If you access this website from other locations, you are responsible for compliance with local laws. This Agreement, all use of this website and any claims and damages resulting from use of this website are governed solely by the laws of the State of California without regard to any principles which would apply the laws of a different jurisdiction. The user agrees that any disputes regarding this website shall be resolved solely in the courts located in Santa Clara County, California. The user agrees said courts have personal jurisdiction and agree to waive any right to transfer the dispute to any other forum.
|
||||
Modification by Unicode, Inc. Unicode, Inc. shall have the right to modify this Agreement at any time by posting it to this website. The user may not assign any part of this Agreement without Unicode, Inc.’s prior written consent.
|
||||
Taxes. The user agrees to pay any taxes arising from access to this website or use of the information herein, except for those based on Unicode’s net income.
|
||||
Severability. If any provision of this Agreement is declared invalid or unenforceable, the remaining provisions of this Agreement shall remain in effect.
|
||||
Entire Agreement. This Agreement constitutes the entire agreement between the parties.
|
||||
|
||||
|
||||
```
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue