mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-26 14:24:46 +02:00
8159337: Introduce a method in Locale class to return the language tags as per RFC 5646 convention
Reviewed-by: naoto, rriggs
This commit is contained in:
parent
3aff5eacbd
commit
82bcee76ea
3 changed files with 276 additions and 5 deletions
|
@ -1689,6 +1689,58 @@ public final class Locale implements Cloneable, Serializable {
|
||||||
return langTag;
|
return langTag;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@return a case folded IETF BCP 47 language tag}
|
||||||
|
*
|
||||||
|
* <p>This method formats a language tag into one with case convention
|
||||||
|
* that adheres to section 2.1.1. Formatting of Language Tags of RFC5646.
|
||||||
|
* This format is defined as: <i>All subtags, including extension and private
|
||||||
|
* use subtags, use lowercase letters with two exceptions: two-letter
|
||||||
|
* and four-letter subtags that neither appear at the start of the tag
|
||||||
|
* nor occur after singletons. Such two-letter subtags are all
|
||||||
|
* uppercase (as in the tags "en-CA-x-ca" or "sgn-BE-FR") and four-
|
||||||
|
* letter subtags are titlecase (as in the tag "az-Latn-x-latn").</i> As
|
||||||
|
* legacy tags, (defined as "grandfathered" in RFC5646) are not always well-formed, this method
|
||||||
|
* will simply case fold a legacy tag to match the exact case convention
|
||||||
|
* for the particular tag specified in the respective
|
||||||
|
* {@link ##legacy_tags Legacy tags} table.
|
||||||
|
*
|
||||||
|
* <p><b>Special Exceptions</b>
|
||||||
|
* <p>To maintain consistency with {@link ##def_variant variant}
|
||||||
|
* which is case-sensitive, this method will neither case fold variant
|
||||||
|
* subtags nor case fold private use subtags prefixed by {@code lvariant}.
|
||||||
|
*
|
||||||
|
* <p>For example,
|
||||||
|
* {@snippet lang=java :
|
||||||
|
* String tag = "ja-kana-jp-x-lvariant-Oracle-JDK-Standard-Edition";
|
||||||
|
* Locale.caseFoldLanguageTag(tag); // returns "ja-Kana-JP-x-lvariant-Oracle-JDK-Standard-Edition"
|
||||||
|
* String tag2 = "ja-kana-jp-x-Oracle-JDK-Standard-Edition";
|
||||||
|
* Locale.caseFoldLanguageTag(tag2); // returns "ja-Kana-JP-x-oracle-jdk-standard-edition"
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* <p>Excluding case folding, this method makes no modifications to the tag itself.
|
||||||
|
* Case convention of language tags does not carry meaning, and is simply
|
||||||
|
* recommended as it corresponds with various ISO standards, including:
|
||||||
|
* ISO639-1, ISO15924, and ISO3166-1.
|
||||||
|
*
|
||||||
|
* <p>As the formatting of the case convention is dependent on the
|
||||||
|
* positioning of certain subtags, callers of this method should ensure
|
||||||
|
* that the language tag is well-formed, (conforming to section 2.1. Syntax
|
||||||
|
* of RFC5646).
|
||||||
|
*
|
||||||
|
* @param languageTag the IETF BCP 47 language tag.
|
||||||
|
* @throws IllformedLocaleException if {@code languageTag} is not well-formed
|
||||||
|
* @throws NullPointerException if {@code languageTag} is {@code null}
|
||||||
|
* @spec https://www.rfc-editor.org/rfc/rfc5646.html#section-2.1
|
||||||
|
* RFC5646 2.1. Syntax
|
||||||
|
* @spec https://www.rfc-editor.org/rfc/rfc5646#section-2.1.1
|
||||||
|
* RFC5646 2.1.1. Formatting of Language Tags
|
||||||
|
* @since 21
|
||||||
|
*/
|
||||||
|
public static String caseFoldLanguageTag(String languageTag) {
|
||||||
|
return LanguageTag.caseFoldTag(languageTag);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a locale for the specified IETF BCP 47 language tag string.
|
* Returns a locale for the specified IETF BCP 47 language tag string.
|
||||||
*
|
*
|
||||||
|
@ -1748,7 +1800,7 @@ public final class Locale implements Cloneable, Serializable {
|
||||||
* // returns "th-TH-u-nu-thai-x-lvariant-TH"
|
* // returns "th-TH-u-nu-thai-x-lvariant-TH"
|
||||||
* </pre></ul>
|
* </pre></ul>
|
||||||
*
|
*
|
||||||
* <p>This implements the 'Language-Tag' production of BCP47, and
|
* <p id="legacy_tags">This implements the 'Language-Tag' production of BCP47, and
|
||||||
* so supports legacy (regular and irregular, referred to as
|
* so supports legacy (regular and irregular, referred to as
|
||||||
* "Type: grandfathered" in BCP47) as well as
|
* "Type: grandfathered" in BCP47) as well as
|
||||||
* private use language tags. Stand alone private use tags are
|
* private use language tags. Stand alone private use tags are
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2010, 2020, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2010, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
@ -34,7 +34,9 @@ package sun.util.locale;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.IllformedLocaleException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.StringJoiner;
|
import java.util.StringJoiner;
|
||||||
|
@ -59,7 +61,6 @@ public class LanguageTag {
|
||||||
private List<String> extlangs = Collections.emptyList(); // extlang subtags
|
private List<String> extlangs = Collections.emptyList(); // extlang subtags
|
||||||
private List<String> variants = Collections.emptyList(); // variant subtags
|
private List<String> variants = Collections.emptyList(); // variant subtags
|
||||||
private List<String> extensions = Collections.emptyList(); // extensions
|
private List<String> extensions = Collections.emptyList(); // extensions
|
||||||
|
|
||||||
// Map contains legacy language tags and its preferred mappings from
|
// Map contains legacy language tags and its preferred mappings from
|
||||||
// http://www.ietf.org/rfc/rfc5646.txt
|
// http://www.ietf.org/rfc/rfc5646.txt
|
||||||
// Keys are lower-case strings.
|
// Keys are lower-case strings.
|
||||||
|
@ -208,7 +209,6 @@ public class LanguageTag {
|
||||||
tag.parseExtensions(itr, sts);
|
tag.parseExtensions(itr, sts);
|
||||||
}
|
}
|
||||||
tag.parsePrivateuse(itr, sts);
|
tag.parsePrivateuse(itr, sts);
|
||||||
|
|
||||||
if (!itr.isDone() && !sts.isError()) {
|
if (!itr.isDone() && !sts.isError()) {
|
||||||
String s = itr.current();
|
String s = itr.current();
|
||||||
sts.errorIndex = itr.currentStart();
|
sts.errorIndex = itr.currentStart();
|
||||||
|
@ -218,7 +218,6 @@ public class LanguageTag {
|
||||||
sts.errorMsg = "Invalid subtag: " + s;
|
sts.errorMsg = "Invalid subtag: " + s;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return tag;
|
return tag;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -414,6 +413,54 @@ public class LanguageTag {
|
||||||
return found;
|
return found;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String caseFoldTag(String tag) {
|
||||||
|
ParseStatus sts = new ParseStatus();
|
||||||
|
parse(tag, sts);
|
||||||
|
// Illegal tags
|
||||||
|
if (sts.errorMsg != null) {
|
||||||
|
throw new IllformedLocaleException(String.format("Ill formed tag:" +
|
||||||
|
" %s", sts.errorMsg));
|
||||||
|
}
|
||||||
|
// Legacy tags
|
||||||
|
String potentialLegacy = tag.toLowerCase(Locale.ROOT);
|
||||||
|
if (LEGACY.containsKey(potentialLegacy)) {
|
||||||
|
return LEGACY.get(potentialLegacy)[0];
|
||||||
|
}
|
||||||
|
// Non-legacy tags
|
||||||
|
StringBuilder bldr = new StringBuilder(tag.length());
|
||||||
|
String[] subtags = tag.split("-");
|
||||||
|
boolean privateFound = false;
|
||||||
|
boolean singletonFound = false;
|
||||||
|
boolean privUseVarFound = false;
|
||||||
|
for (int i = 0; i < subtags.length; i++) {
|
||||||
|
String subtag = subtags[i];
|
||||||
|
if (privUseVarFound) {
|
||||||
|
bldr.append(subtag);
|
||||||
|
} else if (i > 0 && isVariant(subtag) && !singletonFound && !privateFound) {
|
||||||
|
bldr.append(subtag);
|
||||||
|
} else if (i > 0 && isRegion(subtag) && !singletonFound && !privateFound) {
|
||||||
|
bldr.append(canonicalizeRegion(subtag));
|
||||||
|
} else if (i > 0 && isScript(subtag) && !singletonFound && !privateFound) {
|
||||||
|
bldr.append(canonicalizeScript(subtag));
|
||||||
|
// If subtag is not 2 letter, 4 letter, or variant
|
||||||
|
// under the right conditions, then it should be lower-case
|
||||||
|
} else {
|
||||||
|
if (isPrivateusePrefix(subtag)) {
|
||||||
|
privateFound = true;
|
||||||
|
} else if (isExtensionSingleton(subtag)) {
|
||||||
|
singletonFound = true;
|
||||||
|
} else if (subtag.equals(PRIVUSE_VARIANT_PREFIX)) {
|
||||||
|
privUseVarFound = true;
|
||||||
|
}
|
||||||
|
bldr.append(subtag.toLowerCase(Locale.ROOT));
|
||||||
|
}
|
||||||
|
if (i != subtags.length-1) {
|
||||||
|
bldr.append("-");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return bldr.substring(0);
|
||||||
|
}
|
||||||
|
|
||||||
public static LanguageTag parseLocale(BaseLocale baseLocale, LocaleExtensions localeExtensions) {
|
public static LanguageTag parseLocale(BaseLocale baseLocale, LocaleExtensions localeExtensions) {
|
||||||
LanguageTag tag = new LanguageTag();
|
LanguageTag tag = new LanguageTag();
|
||||||
|
|
||||||
|
|
172
test/jdk/java/util/Locale/CaseFoldLanguageTagTest.java
Normal file
172
test/jdk/java/util/Locale/CaseFoldLanguageTagTest.java
Normal file
|
@ -0,0 +1,172 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @test
|
||||||
|
* @bug 8159337
|
||||||
|
* @summary Test Locale.caseFoldLanguageTag(String languageTag)
|
||||||
|
* @run junit CaseFoldLanguageTagTest
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.AfterAll;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||||
|
|
||||||
|
import org.junit.jupiter.params.ParameterizedTest;
|
||||||
|
import org.junit.jupiter.params.provider.Arguments;
|
||||||
|
import org.junit.jupiter.params.provider.MethodSource;
|
||||||
|
|
||||||
|
import java.util.IllformedLocaleException;
|
||||||
|
import java.util.Locale;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test the implementation of Locale.caseFoldLanguageTag(String languageTag).
|
||||||
|
* A variety of well-formed tags are tested, composed of the following subtags:
|
||||||
|
* language, extlang, script, region, variant, extension, singleton, privateuse,
|
||||||
|
* grandfathered, and irregular. For more info, see the following,
|
||||||
|
* <a href="https://www.rfc-editor.org/rfc/rfc5646.html#section-2.1">Tag Syntax</a>).
|
||||||
|
* In addition, the method is tested to ensure that IllformedLocaleException and
|
||||||
|
* NullPointerException are thrown given the right circumstances.
|
||||||
|
*/
|
||||||
|
public class CaseFoldLanguageTagTest {
|
||||||
|
|
||||||
|
@ParameterizedTest
|
||||||
|
@MethodSource("wellFormedTags")
|
||||||
|
public void wellFormedTags(String tag, String foldedTag) {
|
||||||
|
assertEquals(foldedTag, Locale.caseFoldLanguageTag(tag), String.format("Folded %s", tag));
|
||||||
|
}
|
||||||
|
|
||||||
|
@ParameterizedTest
|
||||||
|
@MethodSource("illFormedTags")
|
||||||
|
public void illFormedTags(String tag) {
|
||||||
|
assertThrows(IllformedLocaleException.class, () ->
|
||||||
|
Locale.caseFoldLanguageTag(tag));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void throwNPE() {
|
||||||
|
assertThrows(NullPointerException.class, () ->
|
||||||
|
Locale.caseFoldLanguageTag(null));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Stream<Arguments> wellFormedTags() {
|
||||||
|
return Stream.of(
|
||||||
|
// langtag tests
|
||||||
|
// language
|
||||||
|
Arguments.of("AB", "ab"),
|
||||||
|
// language - ext
|
||||||
|
Arguments.of("AB-ABC", "ab-abc"),
|
||||||
|
// language - ext - script
|
||||||
|
Arguments.of("AB-ABC-ABCD", "ab-abc-Abcd"),
|
||||||
|
// language - ext - script - region
|
||||||
|
Arguments.of("AB-ABC-ABCD-ab", "ab-abc-Abcd-AB"),
|
||||||
|
// language - region
|
||||||
|
Arguments.of("AB-ab", "ab-AB"),
|
||||||
|
// language - script
|
||||||
|
Arguments.of("AB-aBCD", "ab-Abcd"),
|
||||||
|
// language - private use
|
||||||
|
Arguments.of("AB-X-AB-ABCD", "ab-x-ab-abcd"),
|
||||||
|
// language - ext - script - region - variant
|
||||||
|
Arguments.of("AB-ABC-ABCD-ab-ABCDE", "ab-abc-Abcd-AB-ABCDE"),
|
||||||
|
// language - ext - script - region - variant x 2
|
||||||
|
Arguments.of("AB-ABC-ABCD-ab-ABCDE-fghij",
|
||||||
|
"ab-abc-Abcd-AB-ABCDE-fghij"),
|
||||||
|
// language - ext - script - region - variant - extension
|
||||||
|
Arguments.of("AB-ABC-ABCD-ab-ABCDE-A-ABCD",
|
||||||
|
"ab-abc-Abcd-AB-ABCDE-a-abcd"),
|
||||||
|
// language - ext - script - region - variant - private
|
||||||
|
Arguments.of("AB-ABC-ABCD-ab-ABCDE-X-ABCD",
|
||||||
|
"ab-abc-Abcd-AB-ABCDE-x-abcd"),
|
||||||
|
// language - ext - script - region - variant - extension x2
|
||||||
|
Arguments.of("AB-ABC-ABCD-ab-ABCDE-A-ABCD-B-EFGHI",
|
||||||
|
"ab-abc-Abcd-AB-ABCDE-a-abcd-b-efghi"),
|
||||||
|
// language - ext - script - region - variant - extension - private
|
||||||
|
Arguments.of("AB-ABC-ABCD-ab-ABCDE-A-ABCD-X-ABCD",
|
||||||
|
"ab-abc-Abcd-AB-ABCDE-a-abcd-x-abcd"),
|
||||||
|
// language - ext - script - region - variant x2 - extension x2 - private (x2 ext)
|
||||||
|
Arguments.of("AB-ABC-ABCD-ab-ABCDE-A-ABCD-X-ABCD-EFGHI",
|
||||||
|
"ab-abc-Abcd-AB-ABCDE-a-abcd-x-abcd-efghi"),
|
||||||
|
// language - variant x2 - extension x3 - private
|
||||||
|
Arguments.of("AB-aBcDeF-GhIjKl-a-ABC-DEFGH-B-ABC-C-ABC-X-A-ABC-DEF",
|
||||||
|
"ab-aBcDeF-GhIjKl-a-abc-defgh-b-abc-c-abc-x-a-abc-def"),
|
||||||
|
// language - ext- script - region - variant - extension x2 - private (x2 ext)
|
||||||
|
Arguments.of("AB-ABC-ABCD-ab-abCDe12-A-AB-B-ABCD-X-AB-ABCD",
|
||||||
|
"ab-abc-Abcd-AB-abCDe12-a-ab-b-abcd-x-ab-abcd"),
|
||||||
|
|
||||||
|
// Multiple singleton extensions
|
||||||
|
Arguments.of("AB-ABC-ABCD-ab-ABCDE-A-ABCD-GGG-ZZZ-B-EFGHI",
|
||||||
|
"ab-abc-Abcd-AB-ABCDE-a-abcd-ggg-zzz-b-efghi"),
|
||||||
|
|
||||||
|
// private use tests
|
||||||
|
Arguments.of("X-Abc", "x-abc"), // regular private
|
||||||
|
Arguments.of("X-A-ABC", "x-a-abc"), // private w/ extended (incl. 1)
|
||||||
|
Arguments.of("X-A-AB-Abcd", "x-a-ab-abcd"), // private w/ extended (incl. 1, 2, 4)
|
||||||
|
|
||||||
|
// Legacy tests
|
||||||
|
// irregular
|
||||||
|
Arguments.of("I-AMI", "i-ami"),
|
||||||
|
Arguments.of("EN-gb-OED", "en-GB-oed"),
|
||||||
|
Arguments.of("SGN-be-fr", "sgn-BE-FR"),
|
||||||
|
// regular
|
||||||
|
Arguments.of("NO-BOK", "no-bok"),
|
||||||
|
Arguments.of("CEL-GAULISH", "cel-gaulish"),
|
||||||
|
Arguments.of("ZH-MIN-NAN", "zh-min-nan"),
|
||||||
|
|
||||||
|
// Special JDK Cases (Variant and x-lvariant)
|
||||||
|
Arguments.of("de-POSIX-x-URP-lvariant-Abc-Def", "de-POSIX-x-urp-lvariant-Abc-Def"),
|
||||||
|
Arguments.of("JA-JPAN-JP-U-CA-JAPANESE-x-RANDOM-lvariant-JP",
|
||||||
|
"ja-Jpan-JP-u-ca-japanese-x-random-lvariant-JP"),
|
||||||
|
Arguments.of("ja-JP-u-ca-japanese-x-lvariant-JP", "ja-JP-u-ca-japanese-x-lvariant-JP"),
|
||||||
|
Arguments.of("XX-ABCD-yy-VARIANT-x-TEST-lvariant-JDK",
|
||||||
|
"xx-Abcd-YY-VARIANT-x-test-lvariant-JDK"),
|
||||||
|
Arguments.of("ja-kana-jp-x-lvariant-Oracle-JDK-Standard-Edition",
|
||||||
|
"ja-Kana-JP-x-lvariant-Oracle-JDK-Standard-Edition"),
|
||||||
|
Arguments.of("ja-kana-jp-x-Oracle-JDK-Standard-Edition",
|
||||||
|
"ja-Kana-JP-x-oracle-jdk-standard-edition"),
|
||||||
|
Arguments.of("ja-kana-jp-a-ABC-EFG-ZZZ-b-aaa-x-Oracle-JDK-Standard-Edition",
|
||||||
|
"ja-Kana-JP-a-abc-efg-zzz-b-aaa-x-oracle-jdk-standard-edition")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Stream<Arguments> illFormedTags() {
|
||||||
|
return Stream.of(
|
||||||
|
// Starts with non-language
|
||||||
|
Arguments.of("xabadadoo-me"),
|
||||||
|
// Starts with singleton
|
||||||
|
Arguments.of("a-abc"),
|
||||||
|
Arguments.of("a-singleton-en-us"),
|
||||||
|
// Hanging dash
|
||||||
|
Arguments.of("en-"),
|
||||||
|
// Double dash
|
||||||
|
Arguments.of("en--US"),
|
||||||
|
// Script before ext lang
|
||||||
|
Arguments.of("ab-Script-ext"),
|
||||||
|
// Region before ext lang
|
||||||
|
Arguments.of("ab-AB-ext"),
|
||||||
|
// Variants at start
|
||||||
|
Arguments.of("variant-first-ab")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue