diff --git a/src/java.base/share/classes/java/nio/charset/Charset.java b/src/java.base/share/classes/java/nio/charset/Charset.java index b87dfc16436..83dc81ba259 100644 --- a/src/java.base/share/classes/java/nio/charset/Charset.java +++ b/src/java.base/share/classes/java/nio/charset/Charset.java @@ -168,37 +168,54 @@ import java.util.TreeMap; *
The {@code UTF-8} charset is specified by RFC 2279; the * transformation format upon which it is based is specified in - * Amendment 2 of ISO 10646-1 and is also described in the Unicode * Standard. * *
The {@code UTF-16} charsets are specified by RFC 2781; the * transformation formats upon which they are based are specified in - * Amendment 1 of ISO 10646-1 and are also described in the Unicode * Standard. * - *
The {@code UTF-16} charsets use sixteen-bit quantities and are + *
The {@code UTF-32} charsets are based upon transformation formats + * which are specified in + * ISO 10646-1 and are also described in the Unicode + * Standard. + * + *
The {@code UTF-16} and {@code UTF-32} charsets use sixteen-bit and thirty-two-bit
+ * quantities respectively, and are
* therefore sensitive to byte order. In these encodings the byte order of a
* stream may be indicated by an initial byte-order mark represented by
- * the Unicode character '\uFEFF'
. Byte-order marks are handled
+ * the Unicode character {@code U+FEFF}. Byte-order marks are handled
* as follows:
*
*
When decoding, the {@code UTF-16BE} and {@code UTF-16LE} + *
When decoding, the {@code UTF-16BE}, {@code UTF-16LE}, + * {@code UTF-32BE}, and {@code UTF-32LE} * charsets interpret the initial byte-order marks as a ZERO-WIDTH * NON-BREAKING SPACE; when encoding, they do not write * byte-order marks.
When decoding, the {@code UTF-16} charset interprets the + *
When decoding, the {@code UTF-16} and {@code UTF-32} charsets interpret the
* byte-order mark at the beginning of the input stream to indicate the
* byte-order of the stream but defaults to big-endian if there is no
* byte-order mark; when encoding, it uses big-endian byte order and writes
diff --git a/src/java.base/share/classes/java/nio/charset/StandardCharsets.java b/src/java.base/share/classes/java/nio/charset/StandardCharsets.java
index 31bc454dbe6..c85a9f16050 100644
--- a/src/java.base/share/classes/java/nio/charset/StandardCharsets.java
+++ b/src/java.base/share/classes/java/nio/charset/StandardCharsets.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -74,4 +74,23 @@ public final class StandardCharsets {
* optional byte-order mark.
*/
public static final Charset UTF_16 = new sun.nio.cs.UTF_16();
+
+ /**
+ * Thirty-two-bit UCS Transformation Format, big-endian byte order.
+ * @since 22
+ */
+ public static final Charset UTF_32BE = new sun.nio.cs.UTF_32BE();
+
+ /**
+ * Thirty-two-bit UCS Transformation Format, little-endian byte order.
+ * @since 22
+ */
+ public static final Charset UTF_32LE = new sun.nio.cs.UTF_32LE();
+
+ /**
+ * Thirty-two-bit UCS Transformation Format, byte order identified by an
+ * optional byte-order mark.
+ * @since 22
+ */
+ public static final Charset UTF_32 = new sun.nio.cs.UTF_32();
}
diff --git a/src/java.base/share/classes/sun/nio/cs/StandardCharsets.java.template b/src/java.base/share/classes/sun/nio/cs/StandardCharsets.java.template
index 339e01de39e..6db98fb4fa4 100644
--- a/src/java.base/share/classes/sun/nio/cs/StandardCharsets.java.template
+++ b/src/java.base/share/classes/sun/nio/cs/StandardCharsets.java.template
@@ -93,6 +93,9 @@ public class StandardCharsets extends CharsetProvider {
map.put("utf-16", java.nio.charset.StandardCharsets.UTF_16);
map.put("utf-16be", java.nio.charset.StandardCharsets.UTF_16BE);
map.put("utf-16le", java.nio.charset.StandardCharsets.UTF_16LE);
+ map.put("utf-32", java.nio.charset.StandardCharsets.UTF_32);
+ map.put("utf-32be", java.nio.charset.StandardCharsets.UTF_32BE);
+ map.put("utf-32le", java.nio.charset.StandardCharsets.UTF_32LE);
cache = map;
}
return map;
diff --git a/test/jdk/java/nio/charset/StandardCharsets/Standard.java b/test/jdk/java/nio/charset/StandardCharsets/Standard.java
index 44c4e86a4f6..69783bb5a62 100644
--- a/test/jdk/java/nio/charset/StandardCharsets/Standard.java
+++ b/test/jdk/java/nio/charset/StandardCharsets/Standard.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -23,7 +23,7 @@
/*
* @test
- * @bug 4884238
+ * @bug 4884238 8310047
* @summary Test standard charset name constants.
* @author Mike Duigou
* @run main Standard
@@ -41,7 +41,9 @@ public class Standard {
private final static String standardCharsets[] = {
"US-ASCII", "ISO-8859-1", "UTF-8",
- "UTF-16BE", "UTF-16LE", "UTF-16" };
+ "UTF-16BE", "UTF-16LE", "UTF-16",
+ "UTF-32BE", "UTF-32LE", "UTF-32",
+ };
public static void realMain(String[] args) {
check(StandardCharsets.US_ASCII instanceof Charset);
@@ -50,6 +52,9 @@ public class Standard {
check(StandardCharsets.UTF_16BE instanceof Charset);
check(StandardCharsets.UTF_16LE instanceof Charset);
check(StandardCharsets.UTF_16 instanceof Charset);
+ check(StandardCharsets.UTF_32BE instanceof Charset);
+ check(StandardCharsets.UTF_32LE instanceof Charset);
+ check(StandardCharsets.UTF_32 instanceof Charset);
check("US-ASCII".equals(StandardCharsets.US_ASCII.name()));
check("ISO-8859-1".equals(StandardCharsets.ISO_8859_1.name()));
@@ -57,6 +62,9 @@ public class Standard {
check("UTF-16BE".equals(StandardCharsets.UTF_16BE.name()));
check("UTF-16LE".equals(StandardCharsets.UTF_16LE.name()));
check("UTF-16".equals(StandardCharsets.UTF_16.name()));
+ check("UTF-32BE".equals(StandardCharsets.UTF_32BE.name()));
+ check("UTF-32LE".equals(StandardCharsets.UTF_32LE.name()));
+ check("UTF-32".equals(StandardCharsets.UTF_32.name()));
check(Charset.forName("US-ASCII") == StandardCharsets.US_ASCII);
check(Charset.forName("ISO-8859-1") == StandardCharsets.ISO_8859_1);
@@ -64,6 +72,9 @@ public class Standard {
check(Charset.forName("UTF-16BE") == StandardCharsets.UTF_16BE);
check(Charset.forName("UTF-16LE") == StandardCharsets.UTF_16LE);
check(Charset.forName("UTF-16") == StandardCharsets.UTF_16);
+ check(Charset.forName("UTF-32BE") == StandardCharsets.UTF_32BE);
+ check(Charset.forName("UTF-32LE") == StandardCharsets.UTF_32LE);
+ check(Charset.forName("UTF-32") == StandardCharsets.UTF_32);
Set