mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-26 14:24:46 +02:00
8339699: Optimize DataOutputStream writeUTF
Reviewed-by: liach, bpb
This commit is contained in:
parent
559289487d
commit
b42fbf43df
5 changed files with 252 additions and 182 deletions
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright (c) 1994, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1994, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -25,8 +26,13 @@
|
|||
|
||||
package java.io;
|
||||
|
||||
import jdk.internal.access.JavaLangAccess;
|
||||
import jdk.internal.access.SharedSecrets;
|
||||
import jdk.internal.util.ByteArray;
|
||||
|
||||
import static jdk.internal.util.ModifiedUtf.putChar;
|
||||
import static jdk.internal.util.ModifiedUtf.utfLen;
|
||||
|
||||
/**
|
||||
* A data output stream lets an application write primitive Java data
|
||||
* types to an output stream in a portable way. An application can
|
||||
|
@ -44,6 +50,8 @@ import jdk.internal.util.ByteArray;
|
|||
* @since 1.0
|
||||
*/
|
||||
public class DataOutputStream extends FilterOutputStream implements DataOutput {
|
||||
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
|
||||
|
||||
/**
|
||||
* The number of bytes written to the data output stream so far.
|
||||
* If this counter overflows, it will be wrapped to Integer.MAX_VALUE.
|
||||
|
@ -352,15 +360,11 @@ public class DataOutputStream extends FilterOutputStream implements DataOutput {
|
|||
* {@code str} would exceed 65535 bytes in length
|
||||
* @throws IOException if some other I/O error occurs.
|
||||
*/
|
||||
@SuppressWarnings("deprecation")
|
||||
static int writeUTF(String str, DataOutput out) throws IOException {
|
||||
final int strlen = str.length();
|
||||
int utflen = strlen; // optimized for ASCII
|
||||
|
||||
for (int i = 0; i < strlen; i++) {
|
||||
int c = str.charAt(i);
|
||||
if (c >= 0x80 || c == 0)
|
||||
utflen += (c >= 0x800) ? 2 : 1;
|
||||
}
|
||||
int countNonZeroAscii = JLA.countNonZeroAscii(str);
|
||||
int utflen = utfLen(str, countNonZeroAscii);
|
||||
|
||||
if (utflen > 65535 || /* overflow */ utflen < strlen)
|
||||
throw new UTFDataFormatException(tooLongMsg(str, utflen));
|
||||
|
@ -377,25 +381,11 @@ public class DataOutputStream extends FilterOutputStream implements DataOutput {
|
|||
int count = 0;
|
||||
ByteArray.setUnsignedShort(bytearr, count, utflen);
|
||||
count += 2;
|
||||
int i = 0;
|
||||
for (i = 0; i < strlen; i++) { // optimized for initial run of ASCII
|
||||
int c = str.charAt(i);
|
||||
if (c >= 0x80 || c == 0) break;
|
||||
bytearr[count++] = (byte) c;
|
||||
}
|
||||
str.getBytes(0, countNonZeroAscii, bytearr, count);
|
||||
count += countNonZeroAscii;
|
||||
|
||||
for (; i < strlen; i++) {
|
||||
int c = str.charAt(i);
|
||||
if (c < 0x80 && c != 0) {
|
||||
bytearr[count++] = (byte) c;
|
||||
} else if (c >= 0x800) {
|
||||
bytearr[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
|
||||
bytearr[count++] = (byte) (0x80 | ((c >> 6) & 0x3F));
|
||||
bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
|
||||
} else {
|
||||
bytearr[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
|
||||
bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
|
||||
}
|
||||
for (int i = countNonZeroAscii; i < strlen;) {
|
||||
count = putChar(bytearr, count, str.charAt(i++));
|
||||
}
|
||||
out.write(bytearr, 0, utflen + 2);
|
||||
return utflen + 2;
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright (c) 1996, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -34,8 +35,13 @@ import java.util.Objects;
|
|||
import java.util.StringJoiner;
|
||||
|
||||
import jdk.internal.util.ByteArray;
|
||||
import jdk.internal.access.JavaLangAccess;
|
||||
import jdk.internal.access.SharedSecrets;
|
||||
import sun.reflect.misc.ReflectUtil;
|
||||
|
||||
import static jdk.internal.util.ModifiedUtf.putChar;
|
||||
import static jdk.internal.util.ModifiedUtf.utfLen;
|
||||
|
||||
/**
|
||||
* An ObjectOutputStream writes primitive data types and graphs of Java objects
|
||||
* to an OutputStream. The objects can be read (reconstituted) using an
|
||||
|
@ -169,6 +175,7 @@ import sun.reflect.misc.ReflectUtil;
|
|||
public class ObjectOutputStream
|
||||
extends OutputStream implements ObjectOutput, ObjectStreamConstants
|
||||
{
|
||||
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
|
||||
|
||||
private static class Caches {
|
||||
/** cache of subclass security audit results */
|
||||
|
@ -885,7 +892,7 @@ public class ObjectOutputStream
|
|||
* stream
|
||||
*/
|
||||
public void writeUTF(String str) throws IOException {
|
||||
bout.writeUTF(str);
|
||||
bout.writeUTFInternal(str, false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1317,14 +1324,7 @@ public class ObjectOutputStream
|
|||
*/
|
||||
private void writeString(String str, boolean unshared) throws IOException {
|
||||
handles.assign(unshared ? null : str);
|
||||
long utflen = bout.getUTFLength(str);
|
||||
if (utflen <= 0xFFFF) {
|
||||
bout.writeByte(TC_STRING);
|
||||
bout.writeUTF(str, utflen);
|
||||
} else {
|
||||
bout.writeByte(TC_LONGSTRING);
|
||||
bout.writeLongUTF(str, utflen);
|
||||
}
|
||||
bout.writeUTFInternal(str, true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1994,26 +1994,27 @@ public class ObjectOutputStream
|
|||
}
|
||||
}
|
||||
|
||||
public void writeBytes(String s) throws IOException {
|
||||
int endoff = s.length();
|
||||
int cpos = 0;
|
||||
int csize = 0;
|
||||
for (int off = 0; off < endoff; ) {
|
||||
if (cpos >= csize) {
|
||||
cpos = 0;
|
||||
csize = Math.min(endoff - off, CHAR_BUF_SIZE);
|
||||
s.getChars(off, off + csize, cbuf, 0);
|
||||
}
|
||||
if (pos >= MAX_BLOCK_SIZE) {
|
||||
@SuppressWarnings("deprecation")
|
||||
void writeBytes(String s, int len) throws IOException {
|
||||
int pos = this.pos;
|
||||
for (int strpos = 0; strpos < len;) {
|
||||
int rem = MAX_BLOCK_SIZE - pos;
|
||||
int csize = Math.min(len - strpos, rem);
|
||||
s.getBytes(strpos, strpos + csize, buf, pos);
|
||||
pos += csize;
|
||||
strpos += csize;
|
||||
|
||||
if (pos == MAX_BLOCK_SIZE) {
|
||||
this.pos = pos;
|
||||
drain();
|
||||
pos = 0;
|
||||
}
|
||||
int n = Math.min(csize - cpos, MAX_BLOCK_SIZE - pos);
|
||||
int stop = pos + n;
|
||||
while (pos < stop) {
|
||||
buf[pos++] = (byte) cbuf[cpos++];
|
||||
}
|
||||
off += n;
|
||||
}
|
||||
this.pos = pos;
|
||||
}
|
||||
|
||||
public void writeBytes(String s) throws IOException {
|
||||
writeBytes(s, s.length());
|
||||
}
|
||||
|
||||
public void writeChars(String s) throws IOException {
|
||||
|
@ -2026,8 +2027,47 @@ public class ObjectOutputStream
|
|||
}
|
||||
}
|
||||
|
||||
public void writeUTF(String s) throws IOException {
|
||||
writeUTF(s, getUTFLength(s));
|
||||
public void writeUTF(String str) throws IOException {
|
||||
writeUTFInternal(str, false);
|
||||
}
|
||||
|
||||
private void writeUTFInternal(String str, boolean writeHeader) throws IOException {
|
||||
int strlen = str.length();
|
||||
int countNonZeroAscii = JLA.countNonZeroAscii(str);
|
||||
int utflen = utfLen(str, countNonZeroAscii);
|
||||
if (utflen <= 0xFFFF) {
|
||||
if(writeHeader) {
|
||||
writeByte(TC_STRING);
|
||||
}
|
||||
writeShort(utflen);
|
||||
} else {
|
||||
if(writeHeader) {
|
||||
writeByte(TC_LONGSTRING);
|
||||
}
|
||||
writeLong(utflen);
|
||||
}
|
||||
|
||||
if (countNonZeroAscii != 0) {
|
||||
writeBytes(str, countNonZeroAscii);
|
||||
}
|
||||
if (countNonZeroAscii != strlen) {
|
||||
writeMoreUTF(str, countNonZeroAscii);
|
||||
}
|
||||
}
|
||||
|
||||
private void writeMoreUTF(String str, int stroff) throws IOException {
|
||||
int pos = this.pos;
|
||||
for (int strlen = str.length(); stroff < strlen;) {
|
||||
char c = str.charAt(stroff++);
|
||||
int csize = c != 0 && c < 0x80 ? 1 : c >= 0x800 ? 3 : 2;
|
||||
if (pos + csize >= MAX_BLOCK_SIZE) {
|
||||
this.pos = pos;
|
||||
drain();
|
||||
pos = 0;
|
||||
}
|
||||
pos = putChar(buf, pos, c);
|
||||
}
|
||||
this.pos = pos;
|
||||
}
|
||||
|
||||
|
||||
|
@ -2153,112 +2193,6 @@ public class ObjectOutputStream
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the length in bytes of the UTF encoding of the given string.
|
||||
*/
|
||||
long getUTFLength(String s) {
|
||||
int len = s.length();
|
||||
long utflen = 0;
|
||||
for (int off = 0; off < len; ) {
|
||||
int csize = Math.min(len - off, CHAR_BUF_SIZE);
|
||||
s.getChars(off, off + csize, cbuf, 0);
|
||||
for (int cpos = 0; cpos < csize; cpos++) {
|
||||
char c = cbuf[cpos];
|
||||
if (c >= 0x0001 && c <= 0x007F) {
|
||||
utflen++;
|
||||
} else if (c > 0x07FF) {
|
||||
utflen += 3;
|
||||
} else {
|
||||
utflen += 2;
|
||||
}
|
||||
}
|
||||
off += csize;
|
||||
}
|
||||
return utflen;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the given string in UTF format. This method is used in
|
||||
* situations where the UTF encoding length of the string is already
|
||||
* known; specifying it explicitly avoids a prescan of the string to
|
||||
* determine its UTF length.
|
||||
*/
|
||||
void writeUTF(String s, long utflen) throws IOException {
|
||||
if (utflen > 0xFFFFL) {
|
||||
throw new UTFDataFormatException();
|
||||
}
|
||||
writeShort((int) utflen);
|
||||
if (utflen == (long) s.length()) {
|
||||
writeBytes(s);
|
||||
} else {
|
||||
writeUTFBody(s);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes given string in "long" UTF format. "Long" UTF format is
|
||||
* identical to standard UTF, except that it uses an 8 byte header
|
||||
* (instead of the standard 2 bytes) to convey the UTF encoding length.
|
||||
*/
|
||||
void writeLongUTF(String s) throws IOException {
|
||||
writeLongUTF(s, getUTFLength(s));
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes given string in "long" UTF format, where the UTF encoding
|
||||
* length of the string is already known.
|
||||
*/
|
||||
void writeLongUTF(String s, long utflen) throws IOException {
|
||||
writeLong(utflen);
|
||||
if (utflen == (long) s.length()) {
|
||||
writeBytes(s);
|
||||
} else {
|
||||
writeUTFBody(s);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the "body" (i.e., the UTF representation minus the 2-byte or
|
||||
* 8-byte length header) of the UTF encoding for the given string.
|
||||
*/
|
||||
private void writeUTFBody(String s) throws IOException {
|
||||
int limit = MAX_BLOCK_SIZE - 3;
|
||||
int len = s.length();
|
||||
for (int off = 0; off < len; ) {
|
||||
int csize = Math.min(len - off, CHAR_BUF_SIZE);
|
||||
s.getChars(off, off + csize, cbuf, 0);
|
||||
for (int cpos = 0; cpos < csize; cpos++) {
|
||||
char c = cbuf[cpos];
|
||||
if (pos <= limit) {
|
||||
if (c <= 0x007F && c != 0) {
|
||||
buf[pos++] = (byte) c;
|
||||
} else if (c > 0x07FF) {
|
||||
buf[pos + 2] = (byte) (0x80 | ((c >> 0) & 0x3F));
|
||||
buf[pos + 1] = (byte) (0x80 | ((c >> 6) & 0x3F));
|
||||
buf[pos + 0] = (byte) (0xE0 | ((c >> 12) & 0x0F));
|
||||
pos += 3;
|
||||
} else {
|
||||
buf[pos + 1] = (byte) (0x80 | ((c >> 0) & 0x3F));
|
||||
buf[pos + 0] = (byte) (0xC0 | ((c >> 6) & 0x1F));
|
||||
pos += 2;
|
||||
}
|
||||
} else { // write one byte at a time to normalize block
|
||||
if (c <= 0x007F && c != 0) {
|
||||
write(c);
|
||||
} else if (c > 0x07FF) {
|
||||
write(0xE0 | ((c >> 12) & 0x0F));
|
||||
write(0x80 | ((c >> 6) & 0x3F));
|
||||
write(0x80 | ((c >> 0) & 0x3F));
|
||||
} else {
|
||||
write(0xC0 | ((c >> 6) & 0x1F));
|
||||
write(0x80 | ((c >> 0) & 0x3F));
|
||||
}
|
||||
}
|
||||
}
|
||||
off += csize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue