diff --git a/src/java.base/share/classes/java/util/Locale.java b/src/java.base/share/classes/java/util/Locale.java index c055c160367..d0d42e7199e 100644 --- a/src/java.base/share/classes/java/util/Locale.java +++ b/src/java.base/share/classes/java/util/Locale.java @@ -45,7 +45,7 @@ import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.io.ObjectStreamField; import java.io.Serializable; -import java.text.DateFormat; +import java.text.NumberFormat; import java.text.MessageFormat; import java.util.concurrent.ConcurrentHashMap; import java.util.function.Function; @@ -70,131 +70,151 @@ import sun.util.locale.provider.LocaleServiceProviderPool; import sun.util.locale.provider.TimeZoneNameUtility; /** - * A {@code Locale} object represents a specific geographical, political, - * or cultural region. An operation that requires a {@code Locale} to perform - * its task is called locale-sensitive and uses the {@code Locale} - * to tailor information for the user. For example, displaying a number - * is a locale-sensitive operation— the number should be formatted - * according to the customs and conventions of the user's native country, - * region, or culture. + * A {@code Locale} represents a specific geographical, political, + * or cultural region. An API that requires a {@code Locale} to perform + * its task is {@index "locale-sensitive"} and uses the {@code Locale} + * to tailor information for the user. These locale-sensitive APIs + * are principally in the java.text and java.util packages. + * For example, displaying a number is a locale-sensitive operation— + * the number should be formatted according to the customs and conventions of the + * user's native country, region, or culture. * - *
The {@code Locale} class implements IETF BCP 47 which is composed of + *
The {@code Locale} class implements IETF BCP 47 which is composed of * RFC 4647 "Matching of Language * Tags" and RFC 5646 "Tags * for Identifying Languages" with support for the LDML (UTS#35, "Unicode * Locale Data Markup Language") BCP 47-compatible extensions for locale data - * exchange. + * exchange. Each {@code Locale} is associated with locale data which is provided + * by the Java runtime environment or any deployed {@link + * java.util.spi.LocaleServiceProvider LocaleServiceProvider} implementations. + * The locale data provided by the Java runtime environment may vary by release. * - *
A {@code Locale} object logically consists of the fields - * described below. + *
A {@code Locale} is composed of the bolded fields described below; note that a + * {@code Locale} need not have all such fields. For example, {@link + * Locale#ENGLISH Locale.ENGLISH} is only comprised of the language field. + * In contrast, a {@code Locale} such as the one returned by {@code + * Locale.forLanguageTag("en-Latn-US-POSIX-u-nu-latn")} would be comprised of all + * the fields below. This particular {@code Locale} would represent English in + * the United States using the Latin script and numerics for use in POSIX + * environments. + *
+ * {@code Locale} implements IETF BCP 47 and any deviations should be observed + * by the comments prefixed by "BCP 47 deviation:". + * RFC 5646 + * combines subtags from various ISO (639, 3166, 15924) standards which are also + * included in the composition of {@code Locale}. + * Additionally, the full list of valid codes for each field can be found in the + * + * IANA Language Subtag Registry (e.g. search for "Type: region"). * *
[a-zA-Z]{2,8}
. Note that this is not the full
- * BCP47 language production, since it excludes extlang. They are
- * not needed since modern three-letter language codes replace
- * them.[a-zA-Z]{4}
[a-zA-Z]{2} | [0-9]{3}
However, the variant field in {@code Locale} has + * region subtags. However, the variant field in {@code Locale} has * historically been used for any kind of variation, not just * language variations. For example, some supported variants * available in Java SE Runtime Environments indicate alternative * cultural behaviors such as calendar type or number script. In - * BCP 47 this kind of information, which does not identify the + * BCP 47, this kind of information which does not identify the * language, is supported by extension subtags or private use * subtags.
SUBTAG
- * (('_'|'-') SUBTAG)*
where SUBTAG =
- * [0-9][0-9a-zA-Z]{3} | [0-9a-zA-Z]{5,8}
. (Note: BCP 47 only
- * uses hyphen ('-') as a delimiter, this is more lenient).SUBTAG = [0-9a-zA-Z]{1,8}
and for other keys
- * SUBTAG = [0-9a-zA-Z]{2,8}
(that is, 'x' allows
+ * {@code SUBTAG = [0-9a-zA-Z]{1,8}} and for other keys
+ * {@code SUBTAG = [0-9a-zA-Z]{2,8}} (that is, 'x' allows
* single-character subtags).UTS#35, "Unicode Locale Data Markup Language" defines optional * attributes and keywords to override or refine the default behavior @@ -213,7 +233,7 @@ import sun.util.locale.provider.TimeZoneNameUtility; * String representing this information, for example, "nu-thai". The * {@code Locale} class also provides {@link * #getUnicodeLocaleAttributes}, {@link #getUnicodeLocaleKeys}, and - * {@link #getUnicodeLocaleType} which allow you to access Unicode + * {@link #getUnicodeLocaleType(String)} which provides access to the Unicode * locale attributes and key/type pairs directly. When represented as * a string, the Unicode Locale Extension lists attributes * alphabetically, followed by key/type sequences with keys listed @@ -221,11 +241,11 @@ import sun.util.locale.provider.TimeZoneNameUtility; * fixed when the type is defined) * *
A well-formed locale key has the form
- * [0-9a-zA-Z]{2}
. A well-formed locale type has the
- * form "" | [0-9a-zA-Z]{3,8} ('-' [0-9a-zA-Z]{3,8})*
(it
+ * {@code [0-9a-zA-Z]{2}}. A well-formed locale type has the
+ * form {@code "" | [0-9a-zA-Z]{3,8} ('-' [0-9a-zA-Z]{3,8})*} (it
* can be empty, or a series of subtags 3-8 alphanums in length). A
* well-formed locale attribute has the form
- * [0-9a-zA-Z]{3,8}
(it is a single subtag with the same
+ * {@code [0-9a-zA-Z]{3,8}} (it is a single subtag with the same
* form as a locale type subtag).
*
*
The Unicode locale extension specifies optional behavior in @@ -234,36 +254,11 @@ import sun.util.locale.provider.TimeZoneNameUtility; * implementations in a Java Runtime Environment might not support any * particular Unicode locale attributes or key/type pairs. * - *
There are several ways to obtain a {@code Locale} - * object. - * - *
Using {@link Builder} you can construct a {@code Locale} object - * that conforms to BCP 47 syntax. - * - *
The method {@link #forLanguageTag} obtains a {@code Locale} - * object for a well-formed BCP 47 language tag. The method - * {@link #of(String, String, String)} and its overloads obtain a - * {@code Locale} object from given {@code language}, {@code country}, - * and/or {@code variant} defined above. - * - *
The {@code Locale} class provides a number of convenient constants - * that you can use to obtain {@code Locale} objects for commonly used - * locales. For example, {@code Locale.US} is the {@code Locale} object - * for the United States. - * - *
The default Locale is provided for any locale-sensitive methods if no + *
The default Locale is provided for any locale-sensitive methods if no * {@code Locale} is explicitly specified as an argument, such as - * {@link DateFormat#getInstance()}. The default Locale is determined at startup + * {@link NumberFormat#getInstance()}. The default Locale is determined at startup * of the Java runtime and established in the following three phases: *
There are finer-grained default Locales specific for each {@link Locale.Category}. * These category specific default Locales can be queried by {@link #getDefault(Category)}, * and set by {@link #setDefault(Category, Locale)}. Construction of these category @@ -327,19 +323,87 @@ import sun.util.locale.provider.TimeZoneNameUtility; * category. In the absence of category specific system properties, the "category-less" * system properties are used, such as {@code user.language} in the previous example. * - *
If an application or a system is internationalized and provides localized + *
There are several ways to obtain a {@code Locale} object. + * It is advised against using the deprecated {@code Locale} constructors. + * + *
The following invocations produce Locale objects that are all equivalent: + * {@snippet lang=java : + * Locale.US; + * Locale.of("en", "US"); + * Locale.forLanguageTag("en-US"); + * new Locale.Builder().setLanguage("en").setRegion("US").build(); + * } + * + *
Once a {@code Locale} is {@linkplain ##ObtainingLocale obtained}, + * it can be queried for information about itself. For example, use {@link + * #getCountry} to get the country (or region) code and {@link #getLanguage} to + * get the language. {@link #getDisplayCountry} can be used to get the + * name of the country suitable for displaying to the user. Similarly, + * use {@link #getDisplayLanguage()} to get the name of + * the language suitable for displaying to the user. The {@code getDisplayXXX} + * methods are themselves locale-sensitive and have two variants; one with an explicit + * locale parameter, and one without. The latter uses the default {@link + * Locale.Category#DISPLAY DISPLAY} locale, so the following are equivalent : + * {@snippet lang=java : + * Locale.getDefault().getDisplayCountry(); + * Locale.getDefault().getDisplayCountry(Locale.getDefault(Locale.Category.DISPLAY)); + * } + * + *
The Java Platform provides a number of classes that perform locale-sensitive + * operations. For example, the {@code NumberFormat} class formats + * numbers, currency, and percentages in a locale-sensitive manner. Classes such + * as {@code NumberFormat} have several factory methods for creating a default object + * of that type. These methods generally have two variants; one with an explicit + * locale parameter, and one without. The latter uses the default {@link + * Locale.Category#FORMAT FORMAT} locale, so the following are equivalent : + * {@snippet lang=java : + * NumberFormat.getCurrencyInstance(); + * NumberFormat.getCurrencyInstance(Locale.getDefault(Locale.Category.FORMAT)); + * } + * + *
+ * The following example demonstrates locale-sensitive currency and + * date related operations under different locales : + * {@snippet lang = java: + * var number = 1000; + * NumberFormat.getCurrencyInstance(Locale.US).format(number); // returns "$1,000.00" + * NumberFormat.getCurrencyInstance(Locale.JAPAN).format(number); // returns "\u00A51,000"" + * var date = LocalDate.of(2024, 1, 1); + * DateTimeFormatter.ofLocalizedDate(FormatStyle.LONG).localizedBy(Locale.US).format(date); // returns "January 1, 2024" + * DateTimeFormatter.ofLocalizedDate(FormatStyle.LONG).localizedBy(Locale.JAPAN).format(date); // returns "2024\u5e741\u67081\u65e5" + * } + * + *
If an application is internationalized and provides localized * resources for multiple locales, it sometimes needs to find one or more * locales (or language tags) which meet each user's specific preferences. Note - * that a term "language tag" is used interchangeably with "locale" in this - * locale matching documentation. + * that the term "{@index "language tag"}" is used interchangeably + * with "locale" in the following locale matching documentation. * - *
In order to do matching a user's preferred locales to a set of language + *
In order to match a user's preferred locales to a set of language * tags, RFC 4647 Matching of * Language Tags defines two mechanisms: filtering and lookup. * Filtering is used to get all matching locales, whereas - * lookup is to choose the best matching locale. + * lookup is to select the best matching locale. * Matching is done case-insensitively. These matching mechanisms are described * in the following sections. * @@ -348,7 +412,8 @@ import sun.util.locale.provider.TimeZoneNameUtility; * language ranges: basic and extended. See * {@link Locale.LanguageRange Locale.LanguageRange} for details. * - *
The filtering operation returns all matching language tags. It is defined * in RFC 4647 as follows: @@ -366,7 +431,7 @@ import sun.util.locale.provider.TimeZoneNameUtility; * {@link Locale.FilteringMode} is a parameter to specify how filtering should * be done. * - *
The lookup operation returns the best matching language tags. It is * defined in RFC 4647 as follows: @@ -398,76 +463,50 @@ import sun.util.locale.provider.TimeZoneNameUtility; * an {@link Iterator} over a {@link Collection} of language tags is treated as * the best matching one. * - *
Once you've obtained a {@code Locale} you can query it for information - * about itself. Use {@code getCountry} to get the country (or region) - * code and {@code getLanguage} to get the language code. - * You can use {@code getDisplayCountry} to get the - * name of the country suitable for displaying to the user. Similarly, - * you can use {@code getDisplayLanguage} to get the name of - * the language suitable for displaying to the user. Interestingly, - * the {@code getDisplayXXX} methods are themselves locale-sensitive - * and have two versions: one that uses the default - * {@link Locale.Category#DISPLAY DISPLAY} locale and one - * that uses the locale specified as an argument. + *
During serialization, writeObject writes all fields to the output + * stream, including extensions. * - *
The Java Platform provides a number of classes that perform locale-sensitive - * operations. For example, the {@code NumberFormat} class formats - * numbers, currency, and percentages in a locale-sensitive manner. Classes - * such as {@code NumberFormat} have several convenience methods - * for creating a default object of that type. For example, the - * {@code NumberFormat} class provides these three convenience methods - * for creating a default {@code NumberFormat} object: - * {@snippet lang=java : - * NumberFormat.getInstance(); - * NumberFormat.getCurrencyInstance(); - * NumberFormat.getPercentInstance(); - * } - * Each of these methods has two variants; one with an explicit locale - * and one without; the latter uses the default - * {@link Locale.Category#FORMAT FORMAT} locale: - * {@snippet lang=java : - * NumberFormat.getInstance(myLocale); - * NumberFormat.getCurrencyInstance(myLocale); - * NumberFormat.getPercentInstance(myLocale); - * } - * A {@code Locale} is the mechanism for identifying the kind of object - * ({@code NumberFormat}) that you would like to get. The locale is - * just a mechanism for identifying objects, - * not a container for the objects themselves. + *
During deserialization, readResolve adds extensions as described + * in {@linkplain ##special_cases_constructor Special Cases}, only + * for the two cases th_TH_TH and ja_JP_JP. * - *
In order to maintain compatibility, Locale's - * constructors retain their behavior prior to the Java Runtime - * Environment version 1.7. The same is largely true for the - * {@code toString} method. Thus Locale objects can continue to - * be used as they were. In particular, clients who parse the output - * of toString into language, country, and variant fields can continue - * to do so (although this is strongly discouraged), although the - * variant field will have additional information in it if script or - * extensions are present. + * @implNote + *
The following commentary is provided for apps that want to ensure + * interoperability with older releases of {@code Locale} provided by the + * reference implementation. + *
In addition, BCP 47 imposes syntax restrictions that are not * imposed by Locale's constructors. This means that conversions * between some Locales and BCP 47 language tags cannot be made without - * losing information. Thus {@code toLanguageTag} cannot + * losing information. Thus {@link #toLanguageTag} cannot * represent the state of locales whose language, country, or variant * do not conform to BCP 47. * - *
Because of these issues, it is recommended that clients migrate + *
Because of these issues, it is recommended that apps migrate * away from constructing non-conforming locales and use the - * {@code forLanguageTag} and {@code Locale.Builder} APIs instead. - * Clients desiring a string representation of the complete locale can - * then always rely on {@code toLanguageTag} for this purpose. + * {@link #forLanguageTag(String)} and {@link Locale.Builder} APIs instead. + * Apps desiring a string representation of the complete locale can + * then always rely on {@link #toLanguageTag} for this purpose. * - *
For compatibility reasons, two * non-conforming locales are treated as special cases. These are * {@code ja_JP_JP} and {@code th_TH_TH}. These are ill-formed - * in BCP 47 since the variants are too short. To ease migration to BCP 47, + * in BCP 47 since the {@linkplain ##def_variant variants} are too short. To ease migration to BCP 47, * these are treated specially during construction. These two cases (and only * these) cause a constructor to generate an extension, all other values behave * exactly as they did prior to Java 7. @@ -487,25 +526,16 @@ import sun.util.locale.provider.TimeZoneNameUtility; * constructor is called with the arguments "th", "TH", "TH", the * extension "u-nu-thai" is automatically added. * - *
During serialization, writeObject writes all fields to the output - * stream, including extensions. - * - *
During deserialization, readResolve adds extensions as described - * in {@linkplain ##special_cases_constructor Special Cases}, only - * for the two cases th_TH_TH and ja_JP_JP. - * - *
Locale's constructor has always converted three language codes to + *
Locale's constructors have always converted three language codes to * their earlier, obsoleted forms: {@code he} maps to {@code iw}, * {@code yi} maps to {@code ji}, and {@code id} maps to * {@code in}. Since Java SE 17, this is no longer the case. Each * language maps to its new form; {@code iw} maps to {@code he}, {@code ji} * maps to {@code yi}, and {@code in} maps to {@code id}. * - *
For the backward compatible behavior, the system property + *
For backwards compatible behavior, the system property * {@systemProperty java.locale.useOldISOCodes} reverts the behavior * back to that of before Java SE 17. If the system property is set to * {@code true}, those three current language codes are mapped to their @@ -524,22 +554,12 @@ import sun.util.locale.provider.TimeZoneNameUtility; * lookup mechanism also implements this mapping, so that resources * can be named using either convention, see {@link ResourceBundle.Control}. * - *
The Locale constructors have always specified that the language - * and the country param be two characters in length, although in - * practice they have accepted any length. The specification has now - * been relaxed to allow language codes of two to eight characters and - * country (region) codes of two to three characters, and in - * particular, three-letter language codes and three-digit region - * codes as specified in the IANA Language Subtag Registry. For - * compatibility, the implementation still does not impose a length - * constraint. - * * @spec https://www.rfc-editor.org/info/rfc4647 * RFC 4647: Matching of Language Tags * @spec https://www.rfc-editor.org/info/rfc5646 * RFC 5646: Tags for Identifying Languages + * @spec https://unicode.org/reports/tr35/ + * Unicode Locale Data Markup Language * @see Builder * @see ResourceBundle * @see java.text.Format @@ -1229,28 +1249,28 @@ public final class Locale implements Cloneable, Serializable { } /** - * {@return an array of installed locales} + * {@return an array of available locales} * * The returned array represents the union of locales supported - * by the Java runtime environment and by installed + * by the Java runtime environment and by deployed * {@link java.util.spi.LocaleServiceProvider LocaleServiceProvider} * implementations. At a minimum, the returned array must contain a - * {@code Locale} instance equal to {@link Locale#ROOT Locale.ROOT} and - * a {@code Locale} instance equal to {@link Locale#US Locale.US}. + * {@code Locale} instance equal to {@link #ROOT Locale.ROOT} and + * a {@code Locale} instance equal to {@link #US Locale.US}. */ public static Locale[] getAvailableLocales() { return LocaleServiceProviderPool.getAllAvailableLocales(); } /** - * {@return a stream of installed locales} + * {@return a stream of available locales} * * The returned stream represents the union of locales supported - * by the Java runtime environment and by installed + * by the Java runtime environment and by deployed * {@link java.util.spi.LocaleServiceProvider LocaleServiceProvider} * implementations. At a minimum, the returned stream must contain a - * {@code Locale} instance equal to {@link Locale#ROOT Locale.ROOT} and - * a {@code Locale} instance equal to {@link Locale#US Locale.US}. + * {@code Locale} instance equal to {@link #ROOT Locale.ROOT} and + * a {@code Locale} instance equal to {@link #US Locale.US}. * * @implNote Unlike {@code getAvailableLocales()}, this method does * not create a defensive copy of the Locale array. @@ -1532,8 +1552,8 @@ public final class Locale implements Cloneable, Serializable { * Java 6 and prior. * *
If both the language and country fields are missing, this function will return - * the empty string, even if the variant, script, or extensions field is present (you - * can't have a locale with just a variant, the variant must accompany a well-formed + * the empty string, even if the variant, script, or extensions field is present + * (a locale with just a variant is not allowed, the variant must accompany a well-formed * language or country code). * *
If script or extensions are present and variant is missing, no underscore is @@ -1613,7 +1633,7 @@ public final class Locale implements Cloneable, Serializable { * (delimited by '-' or '_') is emitted as a subtag. Otherwise: *
[0-9a-zA-Z]{1,8}
+ * [0-9a-zA-Z]{1,8}
, the variant will be truncated
+ * {@code [0-9a-zA-Z]{1,8}}, the variant will be truncated
* and the problematic sub-segment and all following sub-segments
* will be omitted. If the remainder is non-empty, it will be
* emitted as a private use subtag as above (even if the remainder
@@ -1774,7 +1794,7 @@ public final class Locale implements Cloneable, Serializable {
*
* If the specified language tag contains any ill-formed subtags, * the first such subtag and all following subtags are ignored. Compare - * to {@link Locale.Builder#setLanguageTag} which throws an exception + * to {@link Locale.Builder#setLanguageTag(String)} which throws an exception * in this case. * *
The following conversions are performed:
Builders can be reused; {@code clear()} resets all * fields to their default values. * - * @see Locale#forLanguageTag + * @see Locale#forLanguageTag(String) * @see Locale#of(String, String, String) * @since 1.7 */ @@ -2760,7 +2776,7 @@ public final class Locale implements Cloneable, Serializable { * language tag. Discards the existing state. Null and the * empty string cause the builder to be reset, like {@link * #clear}. Legacy tags (see {@link - * Locale#forLanguageTag}) are converted to their canonical + * Locale#forLanguageTag(String)}) are converted to their canonical * form before being processed. Otherwise, the language tag * must be well-formed (see {@link Locale}) or an exception is * thrown (unlike {@code Locale.forLanguageTag}, which @@ -2862,7 +2878,7 @@ public final class Locale implements Cloneable, Serializable { * the {@code Locale} class does not impose any syntactic * restriction on variant, and the variant value in * {@code Locale} is case sensitive. To set such a variant, - * use {@link Locale#of(String, String, String)}. + * use {@link #of(String, String, String)}. * * @param variant the variant * @return This builder. @@ -2884,12 +2900,12 @@ public final class Locale implements Cloneable, Serializable { * must be {@linkplain Locale##def_extensions well-formed} or an exception * is thrown. * - *
Note: The key {@link Locale#UNICODE_LOCALE_EXTENSION + *
Note: The key {@link #UNICODE_LOCALE_EXTENSION * UNICODE_LOCALE_EXTENSION} ('u') is used for the Unicode locale extension. * Setting a value for this key replaces any existing Unicode locale key/type * pairs with those defined in the extension. * - *
Note: The key {@link Locale#PRIVATE_USE_EXTENSION + *
Note: The key {@link #PRIVATE_USE_EXTENSION * PRIVATE_USE_EXTENSION} ('x') is used for the private use code. To be * well-formed, the value for this key needs only to have subtags of one to * eight alphanumeric characters, not two to eight as in the general case. @@ -2918,7 +2934,7 @@ public final class Locale implements Cloneable, Serializable { * *
Keys and types are converted to lower case. * - *
Note:Setting the 'u' extension via {@link #setExtension} + *
Note:Setting the 'u' extension via {@link #setExtension(char, String)} * replaces all Unicode locale keywords with those defined in the * extension. * @@ -3010,9 +3026,9 @@ public final class Locale implements Cloneable, Serializable { * Returns an instance of {@code Locale} obtained from the fields set * on this builder. * - *
This applies the conversions listed in {@link Locale#forLanguageTag} + *
This applies the conversions listed in {@link #forLanguageTag(String)}
* when constructing a Locale. (Legacy tags are handled in
- * {@link #setLanguageTag}.)
+ * {@link #setLanguageTag(String)}.)
*
* @return A Locale.
*/
@@ -3193,10 +3209,10 @@ public final class Locale implements Cloneable, Serializable {
*
* @spec https://www.rfc-editor.org/info/rfc4234 RFC 4234: Augmented BNF for Syntax Specifications: ABNF
* @spec https://www.rfc-editor.org/info/rfc4647 RFC 4647: Matching of Language Tags
- * @see #filter
- * @see #filterTags
- * @see #lookup
- * @see #lookupTag
+ * @see #filter(List, Collection, FilteringMode)
+ * @see #filterTags(List, Collection, FilteringMode)
+ * @see #lookup(List, Collection)
+ * @see #lookupTag(List, Collection)
*
* @since 1.8
*/
@@ -3413,7 +3429,7 @@ public final class Locale implements Cloneable, Serializable {
* found in the given {@code ranges} is ill-formed
* @spec https://www.rfc-editor.org/info/rfc2616 RFC 2616: Hypertext Transfer Protocol -- HTTP/1.1
* @see #parse(String)
- * @see #mapEquivalents
+ * @see #mapEquivalents(List, Map)
*/
public static List