mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-27 14:54:52 +02:00
8201276: (fs) Add methods to Files for reading/writing a string from/to a file
Reviewed-by: rriggs, smarks, sherman, forax, alanb, mli
This commit is contained in:
parent
b5eadc5721
commit
ca487166f4
5 changed files with 635 additions and 3 deletions
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -589,6 +589,10 @@ class StringCoding {
|
|||
}
|
||||
|
||||
private static byte[] encode8859_1(byte coder, byte[] val) {
|
||||
return encode8859_1(coder, val, true);
|
||||
}
|
||||
|
||||
private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) {
|
||||
if (coder == LATIN1) {
|
||||
return Arrays.copyOf(val, val.length);
|
||||
}
|
||||
|
@ -602,6 +606,9 @@ class StringCoding {
|
|||
sp = sp + ret;
|
||||
dp = dp + ret;
|
||||
if (ret != len) {
|
||||
if (!doReplace) {
|
||||
throwMalformed(sp, 1);
|
||||
}
|
||||
char c = StringUTF16.getChar(val, sp++);
|
||||
if (Character.isHighSurrogate(c) && sp < sl &&
|
||||
Character.isLowSurrogate(StringUTF16.getChar(val, sp))) {
|
||||
|
@ -676,6 +683,12 @@ class StringCoding {
|
|||
", length : " + nb);
|
||||
}
|
||||
|
||||
private static void throwMalformed(byte[] val) {
|
||||
int dp = 0;
|
||||
while (dp < val.length && val[dp] >=0) { dp++; }
|
||||
throwMalformed(dp, 1);
|
||||
}
|
||||
|
||||
private static char repl = '\ufffd';
|
||||
|
||||
private static Result decodeUTF8(byte[] src, int sp, int len, boolean doReplace) {
|
||||
|
@ -931,7 +944,7 @@ class StringCoding {
|
|||
////////////////////// for j.u.z.ZipCoder //////////////////////////
|
||||
|
||||
/*
|
||||
* Throws iae, instead of replacing, if malformed or unmappble.
|
||||
* Throws iae, instead of replacing, if malformed or unmappable.
|
||||
*/
|
||||
static String newStringUTF8NoRepl(byte[] src, int off, int len) {
|
||||
if (COMPACT_STRINGS && !hasNegatives(src, off, len))
|
||||
|
@ -941,9 +954,137 @@ class StringCoding {
|
|||
}
|
||||
|
||||
/*
|
||||
* Throws iae, instead of replacing, if unmappble.
|
||||
* Throws iae, instead of replacing, if unmappable.
|
||||
*/
|
||||
static byte[] getBytesUTF8NoRepl(String s) {
|
||||
return encodeUTF8(s.coder(), s.value(), false);
|
||||
}
|
||||
|
||||
////////////////////// for j.n.f.Files //////////////////////////
|
||||
|
||||
private static boolean isASCII(byte[] src) {
|
||||
return !hasNegatives(src, 0, src.length);
|
||||
}
|
||||
|
||||
private static String newStringLatin1(byte[] src) {
|
||||
if (COMPACT_STRINGS)
|
||||
return new String(src, LATIN1);
|
||||
return new String(StringLatin1.inflate(src, 0, src.length), UTF16);
|
||||
}
|
||||
|
||||
static String newStringNoRepl(byte[] src, Charset cs) {
|
||||
if (cs == UTF_8) {
|
||||
if (COMPACT_STRINGS && isASCII(src))
|
||||
return new String(src, LATIN1);
|
||||
Result ret = decodeUTF8_0(src, 0, src.length, false);
|
||||
return new String(ret.value, ret.coder);
|
||||
}
|
||||
if (cs == ISO_8859_1) {
|
||||
return newStringLatin1(src);
|
||||
}
|
||||
if (cs == US_ASCII) {
|
||||
if (isASCII(src)) {
|
||||
return newStringLatin1(src);
|
||||
} else {
|
||||
throwMalformed(src);
|
||||
}
|
||||
}
|
||||
|
||||
CharsetDecoder cd = cs.newDecoder();
|
||||
// ascii fastpath
|
||||
if ((cd instanceof ArrayDecoder) &&
|
||||
((ArrayDecoder)cd).isASCIICompatible() && isASCII(src)) {
|
||||
return newStringLatin1(src);
|
||||
}
|
||||
int len = src.length;
|
||||
if (len == 0) {
|
||||
return "";
|
||||
}
|
||||
int en = scale(len, cd.maxCharsPerByte());
|
||||
char[] ca = new char[en];
|
||||
if (cs.getClass().getClassLoader0() != null &&
|
||||
System.getSecurityManager() != null) {
|
||||
src = Arrays.copyOf(src, len);
|
||||
}
|
||||
ByteBuffer bb = ByteBuffer.wrap(src);
|
||||
CharBuffer cb = CharBuffer.wrap(ca);
|
||||
try {
|
||||
CoderResult cr = cd.decode(bb, cb, true);
|
||||
if (!cr.isUnderflow())
|
||||
cr.throwException();
|
||||
cr = cd.flush(cb);
|
||||
if (!cr.isUnderflow())
|
||||
cr.throwException();
|
||||
} catch (CharacterCodingException x) {
|
||||
throw new IllegalArgumentException(x); // todo
|
||||
}
|
||||
Result ret = resultCached.get().with(ca, 0, cb.position());
|
||||
return new String(ret.value, ret.coder);
|
||||
}
|
||||
|
||||
/*
|
||||
* Throws iae, instead of replacing, if unmappable.
|
||||
*/
|
||||
static byte[] getBytesNoRepl(String s, Charset cs) {
|
||||
byte[] val = s.value();
|
||||
byte coder = s.coder();
|
||||
if (cs == UTF_8) {
|
||||
if (isASCII(val)) {
|
||||
return val;
|
||||
}
|
||||
return encodeUTF8(coder, val, false);
|
||||
}
|
||||
if (cs == ISO_8859_1) {
|
||||
if (coder == LATIN1) {
|
||||
return val;
|
||||
}
|
||||
return encode8859_1(coder, val, false);
|
||||
}
|
||||
if (cs == US_ASCII) {
|
||||
if (coder == LATIN1) {
|
||||
if (isASCII(val)) {
|
||||
return val;
|
||||
} else {
|
||||
throwMalformed(val);
|
||||
}
|
||||
}
|
||||
}
|
||||
CharsetEncoder ce = cs.newEncoder();
|
||||
// fastpath for ascii compatible
|
||||
if (coder == LATIN1 && (((ce instanceof ArrayEncoder) &&
|
||||
((ArrayEncoder)ce).isASCIICompatible() &&
|
||||
isASCII(val)))) {
|
||||
return val;
|
||||
}
|
||||
int len = val.length >> coder; // assume LATIN1=0/UTF16=1;
|
||||
int en = scale(len, ce.maxBytesPerChar());
|
||||
byte[] ba = new byte[en];
|
||||
if (len == 0) {
|
||||
return ba;
|
||||
}
|
||||
if (ce instanceof ArrayEncoder) {
|
||||
int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
|
||||
: ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
|
||||
if (blen != -1) {
|
||||
return safeTrim(ba, blen, true);
|
||||
}
|
||||
}
|
||||
boolean isTrusted = cs.getClass().getClassLoader0() == null ||
|
||||
System.getSecurityManager() == null;
|
||||
char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
|
||||
: StringUTF16.toChars(val);
|
||||
ByteBuffer bb = ByteBuffer.wrap(ba);
|
||||
CharBuffer cb = CharBuffer.wrap(ca, 0, len);
|
||||
try {
|
||||
CoderResult cr = ce.encode(cb, bb, true);
|
||||
if (!cr.isUnderflow())
|
||||
cr.throwException();
|
||||
cr = ce.flush(bb);
|
||||
if (!cr.isUnderflow())
|
||||
cr.throwException();
|
||||
} catch (CharacterCodingException x) {
|
||||
throw new Error(x);
|
||||
}
|
||||
return safeTrim(ba, bb.position(), isTrusted);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -47,6 +47,7 @@ import java.security.AccessController;
|
|||
import java.security.PrivilegedAction;
|
||||
import java.nio.channels.Channel;
|
||||
import java.nio.channels.spi.SelectorProvider;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
@ -2152,6 +2153,14 @@ public final class System {
|
|||
return ModuleLayer.layers(loader);
|
||||
}
|
||||
|
||||
public String newStringNoRepl(byte[] bytes, Charset cs) {
|
||||
return StringCoding.newStringNoRepl(bytes, cs);
|
||||
}
|
||||
|
||||
public byte[] getBytesNoRepl(String s, Charset cs) {
|
||||
return StringCoding.getBytesNoRepl(s, cs);
|
||||
}
|
||||
|
||||
public String newStringUTF8NoRepl(byte[] bytes, int off, int len) {
|
||||
return StringCoding.newStringUTF8NoRepl(bytes, off, len);
|
||||
}
|
||||
|
|
|
@ -3121,6 +3121,9 @@ public final class Files {
|
|||
*/
|
||||
private static final int MAX_BUFFER_SIZE = Integer.MAX_VALUE - 8;
|
||||
|
||||
private static final jdk.internal.misc.JavaLangAccess JLA =
|
||||
jdk.internal.misc.SharedSecrets.getJavaLangAccess();
|
||||
|
||||
/**
|
||||
* Reads all the bytes from an input stream. Uses {@code initialSize} as a hint
|
||||
* about how many bytes the stream will have.
|
||||
|
@ -3202,6 +3205,81 @@ public final class Files {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads all content from a file into a string, decoding from bytes to characters
|
||||
* using the {@link StandardCharsets#UTF_8 UTF-8} {@link Charset charset}.
|
||||
* The method ensures that the file is closed when all content have been read
|
||||
* or an I/O error, or other runtime exception, is thrown.
|
||||
*
|
||||
* <p> This method is equivalent to:
|
||||
* {@code readString(path, StandardCharsets.UTF_8) }
|
||||
*
|
||||
* @param path the path to the file
|
||||
*
|
||||
* @return a String containing the content read from the file
|
||||
*
|
||||
* @throws IOException
|
||||
* if an I/O error occurs reading from the file or a malformed or
|
||||
* unmappable byte sequence is read
|
||||
* @throws OutOfMemoryError
|
||||
* if the file is extremely large, for example larger than {@code 2GB}
|
||||
* @throws SecurityException
|
||||
* In the case of the default provider, and a security manager is
|
||||
* installed, the {@link SecurityManager#checkRead(String) checkRead}
|
||||
* method is invoked to check read access to the file.
|
||||
*
|
||||
* @since 11
|
||||
*/
|
||||
public static String readString(Path path) throws IOException {
|
||||
return readString(path, StandardCharsets.UTF_8);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads all characters from a file into a string, decoding from bytes to characters
|
||||
* using the specified {@linkplain Charset charset}.
|
||||
* The method ensures that the file is closed when all content have been read
|
||||
* or an I/O error, or other runtime exception, is thrown.
|
||||
*
|
||||
* <p> This method reads all content including the line separators in the middle
|
||||
* and/or at the end. The resulting string will contain line separators as they
|
||||
* appear in the file.
|
||||
*
|
||||
* @apiNote
|
||||
* This method is intended for simple cases where it is appropriate and convenient
|
||||
* to read the content of a file into a String. It is not intended for reading
|
||||
* very large files.
|
||||
*
|
||||
*
|
||||
*
|
||||
* @param path the path to the file
|
||||
* @param cs the charset to use for decoding
|
||||
*
|
||||
* @return a String containing the content read from the file
|
||||
*
|
||||
* @throws IOException
|
||||
* if an I/O error occurs reading from the file or a malformed or
|
||||
* unmappable byte sequence is read
|
||||
* @throws OutOfMemoryError
|
||||
* if the file is extremely large, for example larger than {@code 2GB}
|
||||
* @throws SecurityException
|
||||
* In the case of the default provider, and a security manager is
|
||||
* installed, the {@link SecurityManager#checkRead(String) checkRead}
|
||||
* method is invoked to check read access to the file.
|
||||
*
|
||||
* @since 11
|
||||
*/
|
||||
public static String readString(Path path, Charset cs) throws IOException {
|
||||
Objects.requireNonNull(path);
|
||||
Objects.requireNonNull(cs);
|
||||
|
||||
byte[] ba = readAllBytes(path);
|
||||
try {
|
||||
return JLA.newStringNoRepl(ba, cs);
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new IOException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read all lines from a file. This method ensures that the file is
|
||||
* closed when all bytes have been read or an I/O error, or other runtime
|
||||
|
@ -3456,6 +3534,110 @@ public final class Files {
|
|||
return write(path, lines, StandardCharsets.UTF_8, options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a {@linkplain java.lang.CharSequence CharSequence} to a file.
|
||||
* Characters are encoded into bytes using the
|
||||
* {@link StandardCharsets#UTF_8 UTF-8} {@link Charset charset}.
|
||||
*
|
||||
* <p> This method is equivalent to:
|
||||
* {@code writeString(path, test, StandardCharsets.UTF_8, options) }
|
||||
*
|
||||
* @param path
|
||||
* the path to the file
|
||||
* @param csq
|
||||
* the CharSequence to be written
|
||||
* @param options
|
||||
* options specifying how the file is opened
|
||||
*
|
||||
* @return the path
|
||||
*
|
||||
* @throws IllegalArgumentException
|
||||
* if {@code options} contains an invalid combination of options
|
||||
* @throws IOException
|
||||
* if an I/O error occurs writing to or creating the file, or the
|
||||
* text cannot be encoded using the specified charset
|
||||
* @throws UnsupportedOperationException
|
||||
* if an unsupported option is specified
|
||||
* @throws SecurityException
|
||||
* In the case of the default provider, and a security manager is
|
||||
* installed, the {@link SecurityManager#checkWrite(String) checkWrite}
|
||||
* method is invoked to check write access to the file. The {@link
|
||||
* SecurityManager#checkDelete(String) checkDelete} method is
|
||||
* invoked to check delete access if the file is opened with the
|
||||
* {@code DELETE_ON_CLOSE} option.
|
||||
*
|
||||
* @since 11
|
||||
*/
|
||||
public static Path writeString(Path path, CharSequence csq, OpenOption... options)
|
||||
throws IOException
|
||||
{
|
||||
return writeString(path, csq, StandardCharsets.UTF_8, options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a {@linkplain java.lang.CharSequence CharSequence} to a file.
|
||||
* Characters are encoded into bytes using the specified
|
||||
* {@linkplain java.nio.charset.Charset charset}.
|
||||
*
|
||||
* <p> All characters are written as they are, including the line separators in
|
||||
* the char sequence. No extra characters are added.
|
||||
*
|
||||
* <p> The {@code options} parameter specifies how the file is created
|
||||
* or opened. If no options are present then this method works as if the
|
||||
* {@link StandardOpenOption#CREATE CREATE}, {@link
|
||||
* StandardOpenOption#TRUNCATE_EXISTING TRUNCATE_EXISTING}, and {@link
|
||||
* StandardOpenOption#WRITE WRITE} options are present. In other words, it
|
||||
* opens the file for writing, creating the file if it doesn't exist, or
|
||||
* initially truncating an existing {@link #isRegularFile regular-file} to
|
||||
* a size of {@code 0}.
|
||||
*
|
||||
*
|
||||
* @param path
|
||||
* the path to the file
|
||||
* @param csq
|
||||
* the CharSequence to be written
|
||||
* @param cs
|
||||
* the charset to use for encoding
|
||||
* @param options
|
||||
* options specifying how the file is opened
|
||||
*
|
||||
* @return the path
|
||||
*
|
||||
* @throws IllegalArgumentException
|
||||
* if {@code options} contains an invalid combination of options
|
||||
* @throws IOException
|
||||
* if an I/O error occurs writing to or creating the file, or the
|
||||
* text cannot be encoded using the specified charset
|
||||
* @throws UnsupportedOperationException
|
||||
* if an unsupported option is specified
|
||||
* @throws SecurityException
|
||||
* In the case of the default provider, and a security manager is
|
||||
* installed, the {@link SecurityManager#checkWrite(String) checkWrite}
|
||||
* method is invoked to check write access to the file. The {@link
|
||||
* SecurityManager#checkDelete(String) checkDelete} method is
|
||||
* invoked to check delete access if the file is opened with the
|
||||
* {@code DELETE_ON_CLOSE} option.
|
||||
*
|
||||
* @since 11
|
||||
*/
|
||||
public static Path writeString(Path path, CharSequence csq, Charset cs, OpenOption... options)
|
||||
throws IOException
|
||||
{
|
||||
// ensure the text is not null before opening file
|
||||
Objects.requireNonNull(path);
|
||||
Objects.requireNonNull(csq);
|
||||
Objects.requireNonNull(cs);
|
||||
|
||||
try {
|
||||
byte[] bytes = JLA.getBytesNoRepl(String.valueOf(csq), cs);
|
||||
write(path, bytes, options);
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new IOException(e);
|
||||
}
|
||||
|
||||
return path;
|
||||
}
|
||||
|
||||
// -- Stream APIs --
|
||||
|
||||
/**
|
||||
|
|
|
@ -30,6 +30,7 @@ import java.lang.module.ModuleDescriptor;
|
|||
import java.lang.reflect.Executable;
|
||||
import java.lang.reflect.Method;
|
||||
import java.net.URI;
|
||||
import java.nio.charset.Charset;
|
||||
import java.security.AccessControlContext;
|
||||
import java.security.ProtectionDomain;
|
||||
import java.util.Iterator;
|
||||
|
@ -255,6 +256,36 @@ public interface JavaLangAccess {
|
|||
*/
|
||||
Stream<ModuleLayer> layers(ClassLoader loader);
|
||||
|
||||
/**
|
||||
* Constructs a new {@code String} by decoding the specified subarray of
|
||||
* bytes using the specified {@linkplain java.nio.charset.Charset charset}.
|
||||
*
|
||||
* The caller of this method shall relinquish and transfer the ownership of
|
||||
* the byte array to the callee since the later will not make a copy.
|
||||
*
|
||||
* @param bytes the byte array source
|
||||
* @param cs the Charset
|
||||
* @return the newly created string
|
||||
* @throws IllegalArgumentException for malformed or unmappable bytes
|
||||
*/
|
||||
String newStringNoRepl(byte[] bytes, Charset cs);
|
||||
|
||||
/**
|
||||
* Encode the given string into a sequence of bytes using the specified Charset.
|
||||
*
|
||||
* This method avoids copying the String's internal representation if the input
|
||||
* is ASCII.
|
||||
*
|
||||
* This method throws IllegalArgumentException instead of replacing when
|
||||
* malformed input or unmappable characters are encountered.
|
||||
*
|
||||
* @param s the string to encode
|
||||
* @param cs the charset
|
||||
* @return the encoded bytes
|
||||
* @throws IllegalArgumentException for malformed input or unmappable characters
|
||||
*/
|
||||
byte[] getBytesNoRepl(String s, Charset cs);
|
||||
|
||||
/**
|
||||
* Returns a new string by decoding from the given utf8 bytes array.
|
||||
*
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue