8325590: Regression in round-tripping UTF-16 strings after JDK-8311906

Reviewed-by: alanb, redestad
This commit is contained in:
Roger Riggs 2024-02-13 15:16:50 +00:00
parent 57b04e1b5d
commit 13d9e8ff38
2 changed files with 37 additions and 7 deletions

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 1994, 2023, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 1994, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -841,7 +841,7 @@ public final class String
} }
if (COMPACT_STRINGS) { if (COMPACT_STRINGS) {
byte[] val = StringUTF16.compress(ca, 0, caLen); byte[] val = StringUTF16.compress(ca, 0, caLen);
int coder = StringUTF16.coderFromArrayLen(val, len); byte coder = StringUTF16.coderFromArrayLen(val, caLen);
return new String(val, coder); return new String(val, coder);
} }
return new String(StringUTF16.toBytes(ca, 0, caLen), UTF16); return new String(StringUTF16.toBytes(ca, 0, caLen), UTF16);

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -29,8 +29,13 @@ import java.nio.charset.MalformedInputException;
import java.nio.charset.UnmappableCharacterException; import java.nio.charset.UnmappableCharacterException;
import static java.nio.charset.StandardCharsets.ISO_8859_1; import static java.nio.charset.StandardCharsets.ISO_8859_1;
import static java.nio.charset.StandardCharsets.US_ASCII; import static java.nio.charset.StandardCharsets.US_ASCII;
import static java.nio.charset.StandardCharsets.UTF_16;
import static java.nio.charset.StandardCharsets.UTF_8; import static java.nio.charset.StandardCharsets.UTF_8;
import static java.nio.charset.StandardCharsets.UTF_16;
import static java.nio.charset.StandardCharsets.UTF_16BE;
import static java.nio.charset.StandardCharsets.UTF_16LE;
import static java.nio.charset.StandardCharsets.UTF_32;
import static java.nio.charset.StandardCharsets.UTF_32BE;
import static java.nio.charset.StandardCharsets.UTF_32LE;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.OpenOption; import java.nio.file.OpenOption;
import java.nio.file.Path; import java.nio.file.Path;
@ -40,15 +45,15 @@ import static java.nio.file.StandardOpenOption.CREATE;
import java.util.Arrays; import java.util.Arrays;
import java.util.Random; import java.util.Random;
import java.util.concurrent.Callable; import java.util.concurrent.Callable;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue; import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail; import static org.testng.Assert.fail;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider; import org.testng.annotations.DataProvider;
import org.testng.annotations.Test; import org.testng.annotations.Test;
/* @test /* @test
* @bug 8201276 8205058 8209576 8287541 8288589 * @bug 8201276 8205058 8209576 8287541 8288589 8325590
* @build ReadWriteString PassThroughFileSystem * @build ReadWriteString PassThroughFileSystem
* @run testng ReadWriteString * @run testng ReadWriteString
* @summary Unit test for methods for Files readString and write methods. * @summary Unit test for methods for Files readString and write methods.
@ -61,6 +66,7 @@ public class ReadWriteString {
// data for text files // data for text files
final String TEXT_UNICODE = "\u201CHello\u201D"; final String TEXT_UNICODE = "\u201CHello\u201D";
final String TEXT_ASCII = "ABCDEFGHIJKLMNOPQRSTUVWXYZ\n abcdefghijklmnopqrstuvwxyz\n 1234567890\n"; final String TEXT_ASCII = "ABCDEFGHIJKLMNOPQRSTUVWXYZ\n abcdefghijklmnopqrstuvwxyz\n 1234567890\n";
final static String TEXT_PERSON_CART_WHEELING = "\ud83e\udd38";
private static final String JA_STRING = "\u65e5\u672c\u8a9e\u6587\u5b57\u5217"; private static final String JA_STRING = "\u65e5\u672c\u8a9e\u6587\u5b57\u5217";
private static final Charset WINDOWS_1252 = Charset.forName("windows-1252"); private static final Charset WINDOWS_1252 = Charset.forName("windows-1252");
private static final Charset WINDOWS_31J = Charset.forName("windows-31j"); private static final Charset WINDOWS_31J = Charset.forName("windows-31j");
@ -154,7 +160,16 @@ public class ReadWriteString {
{testFiles[1], TEXT_ASCII, US_ASCII, US_ASCII}, {testFiles[1], TEXT_ASCII, US_ASCII, US_ASCII},
{testFiles[1], TEXT_ASCII, US_ASCII, UTF_8}, {testFiles[1], TEXT_ASCII, US_ASCII, UTF_8},
{testFiles[1], TEXT_UNICODE, UTF_8, null}, {testFiles[1], TEXT_UNICODE, UTF_8, null},
{testFiles[1], TEXT_UNICODE, UTF_8, UTF_8} {testFiles[1], TEXT_UNICODE, UTF_8, UTF_8},
{testFiles[1], TEXT_ASCII, US_ASCII, ISO_8859_1},
{testFiles[1], TEXT_PERSON_CART_WHEELING, UTF_16, UTF_16},
{testFiles[1], TEXT_PERSON_CART_WHEELING, UTF_16BE, UTF_16BE},
{testFiles[1], TEXT_PERSON_CART_WHEELING, UTF_16LE, UTF_16LE},
{testFiles[1], TEXT_PERSON_CART_WHEELING, UTF_32, UTF_32},
{testFiles[1], TEXT_PERSON_CART_WHEELING, UTF_32BE, UTF_32BE},
{testFiles[1], TEXT_PERSON_CART_WHEELING, UTF_32LE, UTF_32LE},
{testFiles[1], TEXT_PERSON_CART_WHEELING, WINDOWS_1252, WINDOWS_1252},
{testFiles[1], TEXT_PERSON_CART_WHEELING, WINDOWS_31J, WINDOWS_31J}
}; };
} }
@ -304,6 +319,21 @@ public class ReadWriteString {
throw new RuntimeException("An instance of " + expected + " should be thrown"); throw new RuntimeException("An instance of " + expected + " should be thrown");
} }
// Verify File.readString with UTF16 to confirm proper string length and contents.
// A regression test for 8325590
@Test
public void testSingleUTF16() throws IOException {
String original = "🤸"; // "\ud83e\udd38";
Files.writeString(testFiles[0], original, UTF_16);
String actual = Files.readString(testFiles[0], UTF_16);
if (!actual.equals(original)) {
System.out.printf("expected (%s), was (%s)\n", original, actual);
System.out.printf("expected UTF_16 bytes: %s\n", Arrays.toString(original.getBytes(UTF_16)));
System.out.printf("actual UTF_16 bytes: %s\n", Arrays.toString(actual.getBytes(UTF_16)));
}
assertEquals(actual, original, "Round trip string mismatch with multi-byte encoding");
}
private void checkNullPointerException(Callable<?> c) { private void checkNullPointerException(Callable<?> c) {
try { try {
c.call(); c.call();