8260265: UTF-8 by Default

Reviewed-by: alanb, rriggs
This commit is contained in:
Naoto Sato 2021-08-30 21:13:59 +00:00
parent 32048536e9
commit 7fc8540907
22 changed files with 385 additions and 201 deletions

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1998, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -65,7 +65,7 @@ import java.util.Objects;
* will be replaced by the character(s) whose encoding would result
* in those consecutive bytes.
* The encoding scheme used to decode these characters may be specified,
* or if unspecified, the default encoding of the platform will be used.
* or if unspecified, the default charset will be used.
* </ul>
* <p>
* There are two possible ways in which this decoder could deal with
@ -74,6 +74,8 @@ import java.util.Objects;
* Which approach the decoder takes is left to the
* implementation.
*
* @see Charset#defaultCharset()
*
* @author Mark Chamness
* @author Michael McCloskey
* @since 1.2
@ -86,17 +88,17 @@ public class URLDecoder {
*/
private URLDecoder() {}
// The platform default encoding
// The default charset
static String dfltEncName = URLEncoder.dfltEncName;
/**
* Decodes a {@code x-www-form-urlencoded} string.
* The platform's default encoding is used to determine what characters
* The default charset is used to determine what characters
* are represented by any consecutive sequences of the form
* "<i>{@code %xy}</i>".
* @param s the {@code String} to decode
* @deprecated The resulting string may vary depending on the platform's
* default encoding. Instead, use the decode(String,String) method
* @deprecated The resulting string may vary depending on the
* default charset. Instead, use the decode(String,String) method
* to specify the encoding.
* @return the newly decoded {@code String}
*/
@ -108,7 +110,7 @@ public class URLDecoder {
try {
str = decode(s, dfltEncName);
} catch (UnsupportedEncodingException e) {
// The system should always have the platform default
// The system should always have the default charset
}
return str;
@ -120,7 +122,7 @@ public class URLDecoder {
*
* <p>
* This method behaves the same as {@linkplain decode(String s, Charset charset)}
* except that it will {@linkplain java.nio.charset.Charset#forName look up the charset}
* except that it will {@linkplain Charset#forName look up the charset}
* using the given encoding name.
*
* @implNote This implementation will throw an {@link java.lang.IllegalArgumentException}
@ -152,7 +154,7 @@ public class URLDecoder {
/**
* Decodes an {@code application/x-www-form-urlencoded} string using
* a specific {@linkplain java.nio.charset.Charset Charset}.
* a specific {@linkplain Charset Charset}.
* The supplied charset is used to determine
* what characters are represented by any consecutive sequences of the
* form "<i>{@code %xy}</i>".
@ -172,7 +174,7 @@ public class URLDecoder {
* @throws NullPointerException if {@code s} or {@code charset} is {@code null}
* @throws IllegalArgumentException if the implementation encounters illegal
* characters
* @see URLEncoder#encode(java.lang.String, java.nio.charset.Charset)
* @see URLEncoder#encode(java.lang.String, Charset)
* @since 10
*/
public static String decode(String s, Charset charset) {

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1995, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1995, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -32,7 +32,8 @@ import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException ;
import java.util.BitSet;
import java.util.Objects;
import sun.security.action.GetPropertyAction;
import jdk.internal.util.StaticProperty;
/**
* Utility class for HTML form encoding. This class contains static methods
@ -60,7 +61,7 @@ import sun.security.action.GetPropertyAction;
* two-digit hexadecimal representation of the byte.
* The recommended encoding scheme to use is UTF-8. However,
* for compatibility reasons, if an encoding is not specified,
* then the default encoding of the platform is used.
* then the default charset is used.
* </ul>
*
* <p>
@ -70,6 +71,8 @@ import sun.security.action.GetPropertyAction;
* &#252; is encoded as two bytes C3 (hex) and BC (hex), and the
* character @ is encoded as one byte 40 (hex).
*
* @see Charset#defaultCharset()
*
* @author Herb Jellinek
* @since 1.0
*/
@ -134,7 +137,7 @@ public class URLEncoder {
dontNeedEncoding.set('.');
dontNeedEncoding.set('*');
dfltEncName = GetPropertyAction.privilegedGetProperty("file.encoding");
dfltEncName = StaticProperty.fileEncoding();
}
/**
@ -144,12 +147,12 @@ public class URLEncoder {
/**
* Translates a string into {@code x-www-form-urlencoded}
* format. This method uses the platform's default encoding
* format. This method uses the default charset
* as the encoding scheme to obtain the bytes for unsafe characters.
*
* @param s {@code String} to be translated.
* @deprecated The resulting string may vary depending on the platform's
* default encoding. Instead, use the encode(String,String)
* @deprecated The resulting string may vary depending on the
* default charset. Instead, use the encode(String,String)
* method to specify the encoding.
* @return the translated {@code String}.
*/
@ -161,7 +164,7 @@ public class URLEncoder {
try {
str = encode(s, dfltEncName);
} catch (UnsupportedEncodingException e) {
// The system should always have the platform default
// The system should always have the default charset
}
return str;
@ -172,7 +175,7 @@ public class URLEncoder {
* format using a specific encoding scheme.
* <p>
* This method behaves the same as {@linkplain #encode(String s, Charset charset)}
* except that it will {@linkplain java.nio.charset.Charset#forName look up the charset}
* except that it will {@linkplain Charset#forName look up the charset}
* using the given encoding name.
*
* @param s {@code String} to be translated.
@ -201,7 +204,7 @@ public class URLEncoder {
/**
* Translates a string into {@code application/x-www-form-urlencoded}
* format using a specific {@linkplain java.nio.charset.Charset Charset}.
* format using a specific {@linkplain Charset Charset}.
* This method uses the supplied charset to obtain the bytes for unsafe
* characters.
* <p>
@ -214,7 +217,7 @@ public class URLEncoder {
* @param charset the given charset
* @return the translated {@code String}.
* @throws NullPointerException if {@code s} or {@code charset} is {@code null}.
* @see URLDecoder#decode(java.lang.String, java.nio.charset.Charset)
* @see URLDecoder#decode(java.lang.String, Charset)
* @since 10
*/
public static String encode(String s, Charset charset) {