8201276: (fs) Add methods to Files for reading/writing a string from/to a file

Reviewed-by: rriggs, smarks, sherman, forax, alanb, mli
This commit is contained in:
Joe Wang 2018-06-13 12:50:45 -07:00
parent b5eadc5721
commit ca487166f4
5 changed files with 635 additions and 3 deletions

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -589,6 +589,10 @@ class StringCoding {
}
private static byte[] encode8859_1(byte coder, byte[] val) {
return encode8859_1(coder, val, true);
}
private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) {
if (coder == LATIN1) {
return Arrays.copyOf(val, val.length);
}
@ -602,6 +606,9 @@ class StringCoding {
sp = sp + ret;
dp = dp + ret;
if (ret != len) {
if (!doReplace) {
throwMalformed(sp, 1);
}
char c = StringUTF16.getChar(val, sp++);
if (Character.isHighSurrogate(c) && sp < sl &&
Character.isLowSurrogate(StringUTF16.getChar(val, sp))) {
@ -676,6 +683,12 @@ class StringCoding {
", length : " + nb);
}
private static void throwMalformed(byte[] val) {
int dp = 0;
while (dp < val.length && val[dp] >=0) { dp++; }
throwMalformed(dp, 1);
}
private static char repl = '\ufffd';
private static Result decodeUTF8(byte[] src, int sp, int len, boolean doReplace) {
@ -931,7 +944,7 @@ class StringCoding {
////////////////////// for j.u.z.ZipCoder //////////////////////////
/*
* Throws iae, instead of replacing, if malformed or unmappble.
* Throws iae, instead of replacing, if malformed or unmappable.
*/
static String newStringUTF8NoRepl(byte[] src, int off, int len) {
if (COMPACT_STRINGS && !hasNegatives(src, off, len))
@ -941,9 +954,137 @@ class StringCoding {
}
/*
* Throws iae, instead of replacing, if unmappble.
* Throws iae, instead of replacing, if unmappable.
*/
static byte[] getBytesUTF8NoRepl(String s) {
return encodeUTF8(s.coder(), s.value(), false);
}
////////////////////// for j.n.f.Files //////////////////////////
private static boolean isASCII(byte[] src) {
return !hasNegatives(src, 0, src.length);
}
private static String newStringLatin1(byte[] src) {
if (COMPACT_STRINGS)
return new String(src, LATIN1);
return new String(StringLatin1.inflate(src, 0, src.length), UTF16);
}
static String newStringNoRepl(byte[] src, Charset cs) {
if (cs == UTF_8) {
if (COMPACT_STRINGS && isASCII(src))
return new String(src, LATIN1);
Result ret = decodeUTF8_0(src, 0, src.length, false);
return new String(ret.value, ret.coder);
}
if (cs == ISO_8859_1) {
return newStringLatin1(src);
}
if (cs == US_ASCII) {
if (isASCII(src)) {
return newStringLatin1(src);
} else {
throwMalformed(src);
}
}
CharsetDecoder cd = cs.newDecoder();
// ascii fastpath
if ((cd instanceof ArrayDecoder) &&
((ArrayDecoder)cd).isASCIICompatible() && isASCII(src)) {
return newStringLatin1(src);
}
int len = src.length;
if (len == 0) {
return "";
}
int en = scale(len, cd.maxCharsPerByte());
char[] ca = new char[en];
if (cs.getClass().getClassLoader0() != null &&
System.getSecurityManager() != null) {
src = Arrays.copyOf(src, len);
}
ByteBuffer bb = ByteBuffer.wrap(src);
CharBuffer cb = CharBuffer.wrap(ca);
try {
CoderResult cr = cd.decode(bb, cb, true);
if (!cr.isUnderflow())
cr.throwException();
cr = cd.flush(cb);
if (!cr.isUnderflow())
cr.throwException();
} catch (CharacterCodingException x) {
throw new IllegalArgumentException(x); // todo
}
Result ret = resultCached.get().with(ca, 0, cb.position());
return new String(ret.value, ret.coder);
}
/*
* Throws iae, instead of replacing, if unmappable.
*/
static byte[] getBytesNoRepl(String s, Charset cs) {
byte[] val = s.value();
byte coder = s.coder();
if (cs == UTF_8) {
if (isASCII(val)) {
return val;
}
return encodeUTF8(coder, val, false);
}
if (cs == ISO_8859_1) {
if (coder == LATIN1) {
return val;
}
return encode8859_1(coder, val, false);
}
if (cs == US_ASCII) {
if (coder == LATIN1) {
if (isASCII(val)) {
return val;
} else {
throwMalformed(val);
}
}
}
CharsetEncoder ce = cs.newEncoder();
// fastpath for ascii compatible
if (coder == LATIN1 && (((ce instanceof ArrayEncoder) &&
((ArrayEncoder)ce).isASCIICompatible() &&
isASCII(val)))) {
return val;
}
int len = val.length >> coder; // assume LATIN1=0/UTF16=1;
int en = scale(len, ce.maxBytesPerChar());
byte[] ba = new byte[en];
if (len == 0) {
return ba;
}
if (ce instanceof ArrayEncoder) {
int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
: ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
if (blen != -1) {
return safeTrim(ba, blen, true);
}
}
boolean isTrusted = cs.getClass().getClassLoader0() == null ||
System.getSecurityManager() == null;
char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
: StringUTF16.toChars(val);
ByteBuffer bb = ByteBuffer.wrap(ba);
CharBuffer cb = CharBuffer.wrap(ca, 0, len);
try {
CoderResult cr = ce.encode(cb, bb, true);
if (!cr.isUnderflow())
cr.throwException();
cr = ce.flush(bb);
if (!cr.isUnderflow())
cr.throwException();
} catch (CharacterCodingException x) {
throw new Error(x);
}
return safeTrim(ba, bb.position(), isTrusted);
}
}

View file

@ -47,6 +47,7 @@ import java.security.AccessController;
import java.security.PrivilegedAction;
import java.nio.channels.Channel;
import java.nio.channels.spi.SelectorProvider;
import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
@ -2152,6 +2153,14 @@ public final class System {
return ModuleLayer.layers(loader);
}
public String newStringNoRepl(byte[] bytes, Charset cs) {
return StringCoding.newStringNoRepl(bytes, cs);
}
public byte[] getBytesNoRepl(String s, Charset cs) {
return StringCoding.getBytesNoRepl(s, cs);
}
public String newStringUTF8NoRepl(byte[] bytes, int off, int len) {
return StringCoding.newStringUTF8NoRepl(bytes, off, len);
}

View file

@ -3121,6 +3121,9 @@ public final class Files {
*/
private static final int MAX_BUFFER_SIZE = Integer.MAX_VALUE - 8;
private static final jdk.internal.misc.JavaLangAccess JLA =
jdk.internal.misc.SharedSecrets.getJavaLangAccess();
/**
* Reads all the bytes from an input stream. Uses {@code initialSize} as a hint
* about how many bytes the stream will have.
@ -3202,6 +3205,81 @@ public final class Files {
}
}
/**
* Reads all content from a file into a string, decoding from bytes to characters
* using the {@link StandardCharsets#UTF_8 UTF-8} {@link Charset charset}.
* The method ensures that the file is closed when all content have been read
* or an I/O error, or other runtime exception, is thrown.
*
* <p> This method is equivalent to:
* {@code readString(path, StandardCharsets.UTF_8) }
*
* @param path the path to the file
*
* @return a String containing the content read from the file
*
* @throws IOException
* if an I/O error occurs reading from the file or a malformed or
* unmappable byte sequence is read
* @throws OutOfMemoryError
* if the file is extremely large, for example larger than {@code 2GB}
* @throws SecurityException
* In the case of the default provider, and a security manager is
* installed, the {@link SecurityManager#checkRead(String) checkRead}
* method is invoked to check read access to the file.
*
* @since 11
*/
public static String readString(Path path) throws IOException {
return readString(path, StandardCharsets.UTF_8);
}
/**
* Reads all characters from a file into a string, decoding from bytes to characters
* using the specified {@linkplain Charset charset}.
* The method ensures that the file is closed when all content have been read
* or an I/O error, or other runtime exception, is thrown.
*
* <p> This method reads all content including the line separators in the middle
* and/or at the end. The resulting string will contain line separators as they
* appear in the file.
*
* @apiNote
* This method is intended for simple cases where it is appropriate and convenient
* to read the content of a file into a String. It is not intended for reading
* very large files.
*
*
*
* @param path the path to the file
* @param cs the charset to use for decoding
*
* @return a String containing the content read from the file
*
* @throws IOException
* if an I/O error occurs reading from the file or a malformed or
* unmappable byte sequence is read
* @throws OutOfMemoryError
* if the file is extremely large, for example larger than {@code 2GB}
* @throws SecurityException
* In the case of the default provider, and a security manager is
* installed, the {@link SecurityManager#checkRead(String) checkRead}
* method is invoked to check read access to the file.
*
* @since 11
*/
public static String readString(Path path, Charset cs) throws IOException {
Objects.requireNonNull(path);
Objects.requireNonNull(cs);
byte[] ba = readAllBytes(path);
try {
return JLA.newStringNoRepl(ba, cs);
} catch (IllegalArgumentException e) {
throw new IOException(e);
}
}
/**
* Read all lines from a file. This method ensures that the file is
* closed when all bytes have been read or an I/O error, or other runtime
@ -3456,6 +3534,110 @@ public final class Files {
return write(path, lines, StandardCharsets.UTF_8, options);
}
/**
* Write a {@linkplain java.lang.CharSequence CharSequence} to a file.
* Characters are encoded into bytes using the
* {@link StandardCharsets#UTF_8 UTF-8} {@link Charset charset}.
*
* <p> This method is equivalent to:
* {@code writeString(path, test, StandardCharsets.UTF_8, options) }
*
* @param path
* the path to the file
* @param csq
* the CharSequence to be written
* @param options
* options specifying how the file is opened
*
* @return the path
*
* @throws IllegalArgumentException
* if {@code options} contains an invalid combination of options
* @throws IOException
* if an I/O error occurs writing to or creating the file, or the
* text cannot be encoded using the specified charset
* @throws UnsupportedOperationException
* if an unsupported option is specified
* @throws SecurityException
* In the case of the default provider, and a security manager is
* installed, the {@link SecurityManager#checkWrite(String) checkWrite}
* method is invoked to check write access to the file. The {@link
* SecurityManager#checkDelete(String) checkDelete} method is
* invoked to check delete access if the file is opened with the
* {@code DELETE_ON_CLOSE} option.
*
* @since 11
*/
public static Path writeString(Path path, CharSequence csq, OpenOption... options)
throws IOException
{
return writeString(path, csq, StandardCharsets.UTF_8, options);
}
/**
* Write a {@linkplain java.lang.CharSequence CharSequence} to a file.
* Characters are encoded into bytes using the specified
* {@linkplain java.nio.charset.Charset charset}.
*
* <p> All characters are written as they are, including the line separators in
* the char sequence. No extra characters are added.
*
* <p> The {@code options} parameter specifies how the file is created
* or opened. If no options are present then this method works as if the
* {@link StandardOpenOption#CREATE CREATE}, {@link
* StandardOpenOption#TRUNCATE_EXISTING TRUNCATE_EXISTING}, and {@link
* StandardOpenOption#WRITE WRITE} options are present. In other words, it
* opens the file for writing, creating the file if it doesn't exist, or
* initially truncating an existing {@link #isRegularFile regular-file} to
* a size of {@code 0}.
*
*
* @param path
* the path to the file
* @param csq
* the CharSequence to be written
* @param cs
* the charset to use for encoding
* @param options
* options specifying how the file is opened
*
* @return the path
*
* @throws IllegalArgumentException
* if {@code options} contains an invalid combination of options
* @throws IOException
* if an I/O error occurs writing to or creating the file, or the
* text cannot be encoded using the specified charset
* @throws UnsupportedOperationException
* if an unsupported option is specified
* @throws SecurityException
* In the case of the default provider, and a security manager is
* installed, the {@link SecurityManager#checkWrite(String) checkWrite}
* method is invoked to check write access to the file. The {@link
* SecurityManager#checkDelete(String) checkDelete} method is
* invoked to check delete access if the file is opened with the
* {@code DELETE_ON_CLOSE} option.
*
* @since 11
*/
public static Path writeString(Path path, CharSequence csq, Charset cs, OpenOption... options)
throws IOException
{
// ensure the text is not null before opening file
Objects.requireNonNull(path);
Objects.requireNonNull(csq);
Objects.requireNonNull(cs);
try {
byte[] bytes = JLA.getBytesNoRepl(String.valueOf(csq), cs);
write(path, bytes, options);
} catch (IllegalArgumentException e) {
throw new IOException(e);
}
return path;
}
// -- Stream APIs --
/**