mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-27 06:45:07 +02:00
8310047: Add UTF-32 based Charsets into StandardCharsets
Reviewed-by: alanb, lancea, bpb, jpai, jlu
This commit is contained in:
parent
caadad4fdc
commit
00c7f914c6
4 changed files with 60 additions and 10 deletions
|
@ -168,37 +168,54 @@ import java.util.TreeMap;
|
||||||
* <tr><th scope="row" style="vertical-align:top">{@code UTF-16}</th>
|
* <tr><th scope="row" style="vertical-align:top">{@code UTF-16}</th>
|
||||||
* <td>Sixteen-bit UCS Transformation Format,
|
* <td>Sixteen-bit UCS Transformation Format,
|
||||||
* byte order identified by an optional byte-order mark</td></tr>
|
* byte order identified by an optional byte-order mark</td></tr>
|
||||||
|
* <tr><th scope="row" style="vertical-align:top">{@code UTF-32BE}</th>
|
||||||
|
* <td>Thirty-two-bit UCS Transformation Format,
|
||||||
|
* big-endian byte order</td></tr>
|
||||||
|
* <tr><th scope="row" style="vertical-align:top">{@code UTF-32LE}</th>
|
||||||
|
* <td>Thirty-two-bit UCS Transformation Format,
|
||||||
|
* little-endian byte order</td></tr>
|
||||||
|
* <tr><th scope="row" style="vertical-align:top">{@code UTF-32}</th>
|
||||||
|
* <td>Thirty-two-bit UCS Transformation Format,
|
||||||
|
* byte order identified by an optional byte-order mark</td></tr>
|
||||||
* </tbody>
|
* </tbody>
|
||||||
* </table></blockquote>
|
* </table></blockquote>
|
||||||
*
|
*
|
||||||
* <p> The {@code UTF-8} charset is specified by <a
|
* <p> The {@code UTF-8} charset is specified by <a
|
||||||
* href="http://www.ietf.org/rfc/rfc2279.txt"><i>RFC 2279</i></a>; the
|
* href="http://www.ietf.org/rfc/rfc2279.txt"><i>RFC 2279</i></a>; the
|
||||||
* transformation format upon which it is based is specified in
|
* transformation format upon which it is based is specified in
|
||||||
* Amendment 2 of ISO 10646-1 and is also described in the <a
|
* ISO 10646-1 and is also described in the <a
|
||||||
* href="http://www.unicode.org/standard/standard.html"><i>Unicode
|
* href="http://www.unicode.org/standard/standard.html"><i>Unicode
|
||||||
* Standard</i></a>.
|
* Standard</i></a>.
|
||||||
*
|
*
|
||||||
* <p> The {@code UTF-16} charsets are specified by <a
|
* <p> The {@code UTF-16} charsets are specified by <a
|
||||||
* href="http://www.ietf.org/rfc/rfc2781.txt"><i>RFC 2781</i></a>; the
|
* href="http://www.ietf.org/rfc/rfc2781.txt"><i>RFC 2781</i></a>; the
|
||||||
* transformation formats upon which they are based are specified in
|
* transformation formats upon which they are based are specified in
|
||||||
* Amendment 1 of ISO 10646-1 and are also described in the <a
|
* ISO 10646-1 and are also described in the <a
|
||||||
* href="http://www.unicode.org/standard/standard.html"><i>Unicode
|
* href="http://www.unicode.org/standard/standard.html"><i>Unicode
|
||||||
* Standard</i></a>.
|
* Standard</i></a>.
|
||||||
*
|
*
|
||||||
* <p> The {@code UTF-16} charsets use sixteen-bit quantities and are
|
* <p> The {@code UTF-32} charsets are based upon transformation formats
|
||||||
|
* which are specified in
|
||||||
|
* ISO 10646-1 and are also described in the <a
|
||||||
|
* href="http://www.unicode.org/standard/standard.html"><i>Unicode
|
||||||
|
* Standard</i></a>.
|
||||||
|
*
|
||||||
|
* <p> The {@code UTF-16} and {@code UTF-32} charsets use sixteen-bit and thirty-two-bit
|
||||||
|
* quantities respectively, and are
|
||||||
* therefore sensitive to byte order. In these encodings the byte order of a
|
* therefore sensitive to byte order. In these encodings the byte order of a
|
||||||
* stream may be indicated by an initial <i>byte-order mark</i> represented by
|
* stream may be indicated by an initial <i>byte-order mark</i> represented by
|
||||||
* the Unicode character <code>'\uFEFF'</code>. Byte-order marks are handled
|
* the Unicode character {@code U+FEFF}. Byte-order marks are handled
|
||||||
* as follows:
|
* as follows:
|
||||||
*
|
*
|
||||||
* <ul>
|
* <ul>
|
||||||
*
|
*
|
||||||
* <li><p> When decoding, the {@code UTF-16BE} and {@code UTF-16LE}
|
* <li><p> When decoding, the {@code UTF-16BE}, {@code UTF-16LE},
|
||||||
|
* {@code UTF-32BE}, and {@code UTF-32LE}
|
||||||
* charsets interpret the initial byte-order marks as a <small>ZERO-WIDTH
|
* charsets interpret the initial byte-order marks as a <small>ZERO-WIDTH
|
||||||
* NON-BREAKING SPACE</small>; when encoding, they do not write
|
* NON-BREAKING SPACE</small>; when encoding, they do not write
|
||||||
* byte-order marks. </p></li>
|
* byte-order marks. </p></li>
|
||||||
*
|
*
|
||||||
* <li><p> When decoding, the {@code UTF-16} charset interprets the
|
* <li><p> When decoding, the {@code UTF-16} and {@code UTF-32} charsets interpret the
|
||||||
* byte-order mark at the beginning of the input stream to indicate the
|
* byte-order mark at the beginning of the input stream to indicate the
|
||||||
* byte-order of the stream but defaults to big-endian if there is no
|
* byte-order of the stream but defaults to big-endian if there is no
|
||||||
* byte-order mark; when encoding, it uses big-endian byte order and writes
|
* byte-order mark; when encoding, it uses big-endian byte order and writes
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2011, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
@ -74,4 +74,23 @@ public final class StandardCharsets {
|
||||||
* optional byte-order mark.
|
* optional byte-order mark.
|
||||||
*/
|
*/
|
||||||
public static final Charset UTF_16 = new sun.nio.cs.UTF_16();
|
public static final Charset UTF_16 = new sun.nio.cs.UTF_16();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Thirty-two-bit UCS Transformation Format, big-endian byte order.
|
||||||
|
* @since 22
|
||||||
|
*/
|
||||||
|
public static final Charset UTF_32BE = new sun.nio.cs.UTF_32BE();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Thirty-two-bit UCS Transformation Format, little-endian byte order.
|
||||||
|
* @since 22
|
||||||
|
*/
|
||||||
|
public static final Charset UTF_32LE = new sun.nio.cs.UTF_32LE();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Thirty-two-bit UCS Transformation Format, byte order identified by an
|
||||||
|
* optional byte-order mark.
|
||||||
|
* @since 22
|
||||||
|
*/
|
||||||
|
public static final Charset UTF_32 = new sun.nio.cs.UTF_32();
|
||||||
}
|
}
|
||||||
|
|
|
@ -93,6 +93,9 @@ public class StandardCharsets extends CharsetProvider {
|
||||||
map.put("utf-16", java.nio.charset.StandardCharsets.UTF_16);
|
map.put("utf-16", java.nio.charset.StandardCharsets.UTF_16);
|
||||||
map.put("utf-16be", java.nio.charset.StandardCharsets.UTF_16BE);
|
map.put("utf-16be", java.nio.charset.StandardCharsets.UTF_16BE);
|
||||||
map.put("utf-16le", java.nio.charset.StandardCharsets.UTF_16LE);
|
map.put("utf-16le", java.nio.charset.StandardCharsets.UTF_16LE);
|
||||||
|
map.put("utf-32", java.nio.charset.StandardCharsets.UTF_32);
|
||||||
|
map.put("utf-32be", java.nio.charset.StandardCharsets.UTF_32BE);
|
||||||
|
map.put("utf-32le", java.nio.charset.StandardCharsets.UTF_32LE);
|
||||||
cache = map;
|
cache = map;
|
||||||
}
|
}
|
||||||
return map;
|
return map;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2011, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
@ -23,7 +23,7 @@
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* @test
|
* @test
|
||||||
* @bug 4884238
|
* @bug 4884238 8310047
|
||||||
* @summary Test standard charset name constants.
|
* @summary Test standard charset name constants.
|
||||||
* @author Mike Duigou
|
* @author Mike Duigou
|
||||||
* @run main Standard
|
* @run main Standard
|
||||||
|
@ -41,7 +41,9 @@ public class Standard {
|
||||||
|
|
||||||
private final static String standardCharsets[] = {
|
private final static String standardCharsets[] = {
|
||||||
"US-ASCII", "ISO-8859-1", "UTF-8",
|
"US-ASCII", "ISO-8859-1", "UTF-8",
|
||||||
"UTF-16BE", "UTF-16LE", "UTF-16" };
|
"UTF-16BE", "UTF-16LE", "UTF-16",
|
||||||
|
"UTF-32BE", "UTF-32LE", "UTF-32",
|
||||||
|
};
|
||||||
|
|
||||||
public static void realMain(String[] args) {
|
public static void realMain(String[] args) {
|
||||||
check(StandardCharsets.US_ASCII instanceof Charset);
|
check(StandardCharsets.US_ASCII instanceof Charset);
|
||||||
|
@ -50,6 +52,9 @@ public class Standard {
|
||||||
check(StandardCharsets.UTF_16BE instanceof Charset);
|
check(StandardCharsets.UTF_16BE instanceof Charset);
|
||||||
check(StandardCharsets.UTF_16LE instanceof Charset);
|
check(StandardCharsets.UTF_16LE instanceof Charset);
|
||||||
check(StandardCharsets.UTF_16 instanceof Charset);
|
check(StandardCharsets.UTF_16 instanceof Charset);
|
||||||
|
check(StandardCharsets.UTF_32BE instanceof Charset);
|
||||||
|
check(StandardCharsets.UTF_32LE instanceof Charset);
|
||||||
|
check(StandardCharsets.UTF_32 instanceof Charset);
|
||||||
|
|
||||||
check("US-ASCII".equals(StandardCharsets.US_ASCII.name()));
|
check("US-ASCII".equals(StandardCharsets.US_ASCII.name()));
|
||||||
check("ISO-8859-1".equals(StandardCharsets.ISO_8859_1.name()));
|
check("ISO-8859-1".equals(StandardCharsets.ISO_8859_1.name()));
|
||||||
|
@ -57,6 +62,9 @@ public class Standard {
|
||||||
check("UTF-16BE".equals(StandardCharsets.UTF_16BE.name()));
|
check("UTF-16BE".equals(StandardCharsets.UTF_16BE.name()));
|
||||||
check("UTF-16LE".equals(StandardCharsets.UTF_16LE.name()));
|
check("UTF-16LE".equals(StandardCharsets.UTF_16LE.name()));
|
||||||
check("UTF-16".equals(StandardCharsets.UTF_16.name()));
|
check("UTF-16".equals(StandardCharsets.UTF_16.name()));
|
||||||
|
check("UTF-32BE".equals(StandardCharsets.UTF_32BE.name()));
|
||||||
|
check("UTF-32LE".equals(StandardCharsets.UTF_32LE.name()));
|
||||||
|
check("UTF-32".equals(StandardCharsets.UTF_32.name()));
|
||||||
|
|
||||||
check(Charset.forName("US-ASCII") == StandardCharsets.US_ASCII);
|
check(Charset.forName("US-ASCII") == StandardCharsets.US_ASCII);
|
||||||
check(Charset.forName("ISO-8859-1") == StandardCharsets.ISO_8859_1);
|
check(Charset.forName("ISO-8859-1") == StandardCharsets.ISO_8859_1);
|
||||||
|
@ -64,6 +72,9 @@ public class Standard {
|
||||||
check(Charset.forName("UTF-16BE") == StandardCharsets.UTF_16BE);
|
check(Charset.forName("UTF-16BE") == StandardCharsets.UTF_16BE);
|
||||||
check(Charset.forName("UTF-16LE") == StandardCharsets.UTF_16LE);
|
check(Charset.forName("UTF-16LE") == StandardCharsets.UTF_16LE);
|
||||||
check(Charset.forName("UTF-16") == StandardCharsets.UTF_16);
|
check(Charset.forName("UTF-16") == StandardCharsets.UTF_16);
|
||||||
|
check(Charset.forName("UTF-32BE") == StandardCharsets.UTF_32BE);
|
||||||
|
check(Charset.forName("UTF-32LE") == StandardCharsets.UTF_32LE);
|
||||||
|
check(Charset.forName("UTF-32") == StandardCharsets.UTF_32);
|
||||||
|
|
||||||
Set<String> charsets = new HashSet<>();
|
Set<String> charsets = new HashSet<>();
|
||||||
Field standardCharsetFields[] = StandardCharsets.class.getFields();
|
Field standardCharsetFields[] = StandardCharsets.class.getFields();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue