mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-26 14:24:46 +02:00
8345120: A likely bug in StringSupport::chunkedStrlenShort
Reviewed-by: mcimadamore
This commit is contained in:
parent
659f70b370
commit
8dada7373f
5 changed files with 275 additions and 209 deletions
|
@ -889,23 +889,27 @@ public abstract sealed class AbstractMemorySegmentImpl
|
|||
layout.varHandle().set((MemorySegment)this, index * layout.byteSize(), value);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public String getString(long offset) {
|
||||
return getString(offset, sun.nio.cs.UTF_8.INSTANCE);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public String getString(long offset, Charset charset) {
|
||||
Objects.requireNonNull(charset);
|
||||
return StringSupport.read(this, offset, charset);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public void setString(long offset, String str) {
|
||||
Objects.requireNonNull(str);
|
||||
setString(offset, str, sun.nio.cs.UTF_8.INSTANCE);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public void setString(long offset, String str, Charset charset) {
|
||||
Objects.requireNonNull(charset);
|
||||
|
|
|
@ -28,7 +28,6 @@ package jdk.internal.foreign;
|
|||
import jdk.internal.misc.ScopedMemoryAccess;
|
||||
import jdk.internal.util.Architecture;
|
||||
import jdk.internal.util.ArraysSupport;
|
||||
import jdk.internal.util.ByteArrayLittleEndian;
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
import jdk.internal.vm.annotation.Stable;
|
||||
|
||||
|
@ -50,6 +49,7 @@ public final class SegmentBulkOperations {
|
|||
private SegmentBulkOperations() {}
|
||||
|
||||
private static final ScopedMemoryAccess SCOPED_MEMORY_ACCESS = ScopedMemoryAccess.getScopedMemoryAccess();
|
||||
private static final long LONG_MASK = ~7L; // The last three bits are zero
|
||||
|
||||
// All the threshold values below MUST be a power of two and should preferably be
|
||||
// greater or equal to 2^3.
|
||||
|
@ -75,21 +75,21 @@ public final class SegmentBulkOperations {
|
|||
int offset = 0;
|
||||
// 0...0X...X000
|
||||
final int limit = (int) (dst.length & (NATIVE_THRESHOLD_FILL - 8));
|
||||
for (; offset < limit; offset += 8) {
|
||||
for (; offset < limit; offset += Long.BYTES) {
|
||||
SCOPED_MEMORY_ACCESS.putLongUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + offset, longValue, !Architecture.isLittleEndian());
|
||||
}
|
||||
int remaining = (int) dst.length - limit;
|
||||
// 0...0X00
|
||||
if (remaining >= 4) {
|
||||
if (remaining >= Integer.BYTES) {
|
||||
SCOPED_MEMORY_ACCESS.putIntUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + offset, (int) longValue, !Architecture.isLittleEndian());
|
||||
offset += 4;
|
||||
remaining -= 4;
|
||||
offset += Integer.BYTES;
|
||||
remaining -= Integer.BYTES;
|
||||
}
|
||||
// 0...00X0
|
||||
if (remaining >= 2) {
|
||||
if (remaining >= Short.BYTES) {
|
||||
SCOPED_MEMORY_ACCESS.putShortUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + offset, (short) longValue, !Architecture.isLittleEndian());
|
||||
offset += 2;
|
||||
remaining -= 2;
|
||||
offset += Short.BYTES;
|
||||
remaining -= Short.BYTES;
|
||||
}
|
||||
// 0...000X
|
||||
if (remaining == 1) {
|
||||
|
@ -123,26 +123,26 @@ public final class SegmentBulkOperations {
|
|||
// is an overlap, we could tolerate one particular direction of overlap (but not the other).
|
||||
|
||||
// 0...0X...X000
|
||||
final int limit = (int) (size & (NATIVE_THRESHOLD_COPY - 8));
|
||||
final int limit = (int) (size & (NATIVE_THRESHOLD_COPY - Long.BYTES));
|
||||
int offset = 0;
|
||||
for (; offset < limit; offset += 8) {
|
||||
for (; offset < limit; offset += Long.BYTES) {
|
||||
final long v = SCOPED_MEMORY_ACCESS.getLongUnaligned(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset, !Architecture.isLittleEndian());
|
||||
SCOPED_MEMORY_ACCESS.putLongUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v, !Architecture.isLittleEndian());
|
||||
}
|
||||
int remaining = (int) size - offset;
|
||||
// 0...0X00
|
||||
if (remaining >= 4) {
|
||||
if (remaining >= Integer.BYTES) {
|
||||
final int v = SCOPED_MEMORY_ACCESS.getIntUnaligned(src.sessionImpl(), src.unsafeGetBase(),src.unsafeGetOffset() + srcOffset + offset, !Architecture.isLittleEndian());
|
||||
SCOPED_MEMORY_ACCESS.putIntUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v, !Architecture.isLittleEndian());
|
||||
offset += 4;
|
||||
remaining -= 4;
|
||||
offset += Integer.BYTES;
|
||||
remaining -= Integer.BYTES;
|
||||
}
|
||||
// 0...00X0
|
||||
if (remaining >= 2) {
|
||||
if (remaining >= Short.BYTES) {
|
||||
final short v = SCOPED_MEMORY_ACCESS.getShortUnaligned(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset, !Architecture.isLittleEndian());
|
||||
SCOPED_MEMORY_ACCESS.putShortUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v, !Architecture.isLittleEndian());
|
||||
offset += 2;
|
||||
remaining -=2;
|
||||
offset += Short.BYTES;
|
||||
remaining -= Short.BYTES;
|
||||
}
|
||||
// 0...000X
|
||||
if (remaining == 1) {
|
||||
|
@ -202,9 +202,9 @@ public final class SegmentBulkOperations {
|
|||
return 1;
|
||||
}
|
||||
int result = 1;
|
||||
final long longBytes = length & ((1L << 62) - 8);
|
||||
final long longBytes = length & LONG_MASK;
|
||||
final long limit = fromOffset + longBytes;
|
||||
for (; fromOffset < limit; fromOffset += 8) {
|
||||
for (; fromOffset < limit; fromOffset += Long.BYTES) {
|
||||
long val = SCOPED_MEMORY_ACCESS.getLongUnaligned(segment.sessionImpl(), segment.unsafeGetBase(), segment.unsafeGetOffset() + fromOffset, !Architecture.isLittleEndian());
|
||||
result = result * POWERS_OF_31[7]
|
||||
+ ((byte) (val >>> 56)) * POWERS_OF_31[6]
|
||||
|
@ -218,24 +218,24 @@ public final class SegmentBulkOperations {
|
|||
}
|
||||
int remaining = (int) (length - longBytes);
|
||||
// 0...0X00
|
||||
if (remaining >= 4) {
|
||||
if (remaining >= Integer.BYTES) {
|
||||
int val = SCOPED_MEMORY_ACCESS.getIntUnaligned(segment.sessionImpl(), segment.unsafeGetBase(), segment.unsafeGetOffset() + fromOffset, !Architecture.isLittleEndian());
|
||||
result = result * POWERS_OF_31[3]
|
||||
+ ((byte) (val >>> 24)) * POWERS_OF_31[2]
|
||||
+ ((byte) (val >>> 16)) * POWERS_OF_31[1]
|
||||
+ ((byte) (val >>> 8)) * POWERS_OF_31[0]
|
||||
+ ((byte) val);
|
||||
fromOffset += 4;
|
||||
remaining -= 4;
|
||||
fromOffset += Integer.BYTES;
|
||||
remaining -= Integer.BYTES;
|
||||
}
|
||||
// 0...00X0
|
||||
if (remaining >= 2) {
|
||||
if (remaining >= Short.BYTES) {
|
||||
short val = SCOPED_MEMORY_ACCESS.getShortUnaligned(segment.sessionImpl(), segment.unsafeGetBase(), segment.unsafeGetOffset() + fromOffset, !Architecture.isLittleEndian());
|
||||
result = result * POWERS_OF_31[1]
|
||||
+ ((byte) (val >>> 8)) * POWERS_OF_31[0]
|
||||
+ ((byte) val);
|
||||
fromOffset += 2;
|
||||
remaining -= 2;
|
||||
fromOffset += Short.BYTES;
|
||||
remaining -= Short.BYTES;
|
||||
}
|
||||
// 0...000X
|
||||
if (remaining == 1) {
|
||||
|
@ -288,7 +288,7 @@ public final class SegmentBulkOperations {
|
|||
long start, int length, boolean srcAndDstBytesDiffer) {
|
||||
int offset = 0;
|
||||
final int limit = length & (NATIVE_THRESHOLD_MISMATCH - 8);
|
||||
for (; offset < limit; offset += 8) {
|
||||
for (; offset < limit; offset += Long.BYTES) {
|
||||
final long s = SCOPED_MEMORY_ACCESS.getLongUnaligned(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcFromOffset + offset, false);
|
||||
final long d = SCOPED_MEMORY_ACCESS.getLongUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstFromOffset + offset, false);
|
||||
if (s != d) {
|
||||
|
@ -298,24 +298,24 @@ public final class SegmentBulkOperations {
|
|||
int remaining = length - offset;
|
||||
|
||||
// 0...0X00
|
||||
if (remaining >= 4) {
|
||||
if (remaining >= Integer.BYTES) {
|
||||
final int s = SCOPED_MEMORY_ACCESS.getIntUnaligned(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcFromOffset + offset, false);
|
||||
final int d = SCOPED_MEMORY_ACCESS.getIntUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstFromOffset + offset, false);
|
||||
if (s != d) {
|
||||
return start + offset + mismatch(s, d);
|
||||
}
|
||||
offset += 4;
|
||||
remaining -= 4;
|
||||
offset += Integer.BYTES;
|
||||
remaining -= Integer.BYTES;
|
||||
}
|
||||
// 0...00X0
|
||||
if (remaining >= 2) {
|
||||
if (remaining >= Short.BYTES) {
|
||||
final short s = SCOPED_MEMORY_ACCESS.getShortUnaligned(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcFromOffset + offset, false);
|
||||
final short d = SCOPED_MEMORY_ACCESS.getShortUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstFromOffset + offset, false);
|
||||
if (s != d) {
|
||||
return start + offset + mismatch(s, d);
|
||||
}
|
||||
offset += 2;
|
||||
remaining -= 2;
|
||||
offset += Short.BYTES;
|
||||
remaining -= Short.BYTES;
|
||||
}
|
||||
// 0...000X
|
||||
if (remaining == 1) {
|
||||
|
|
|
@ -27,8 +27,10 @@ package jdk.internal.foreign;
|
|||
|
||||
import jdk.internal.access.JavaLangAccess;
|
||||
import jdk.internal.access.SharedSecrets;
|
||||
import jdk.internal.foreign.abi.SharedUtils;
|
||||
import jdk.internal.misc.ScopedMemoryAccess;
|
||||
import jdk.internal.util.Architecture;
|
||||
import jdk.internal.util.ArraysSupport;
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.nio.charset.Charset;
|
||||
|
@ -40,11 +42,14 @@ import static java.lang.foreign.ValueLayout.*;
|
|||
*/
|
||||
public final class StringSupport {
|
||||
|
||||
static final JavaLangAccess JAVA_LANG_ACCESS = SharedSecrets.getJavaLangAccess();
|
||||
private static final JavaLangAccess JAVA_LANG_ACCESS = SharedSecrets.getJavaLangAccess();
|
||||
private static final ScopedMemoryAccess SCOPED_MEMORY_ACCESS = ScopedMemoryAccess.getScopedMemoryAccess();
|
||||
private static final long LONG_MASK = ~7L; // The last three bits are zero
|
||||
|
||||
private StringSupport() {}
|
||||
|
||||
public static String read(MemorySegment segment, long offset, Charset charset) {
|
||||
@ForceInline
|
||||
public static String read(AbstractMemorySegmentImpl segment, long offset, Charset charset) {
|
||||
return switch (CharsetKind.of(charset)) {
|
||||
case SINGLE_BYTE -> readByte(segment, offset, charset);
|
||||
case DOUBLE_BYTE -> readShort(segment, offset, charset);
|
||||
|
@ -52,7 +57,8 @@ public final class StringSupport {
|
|||
};
|
||||
}
|
||||
|
||||
public static void write(MemorySegment segment, long offset, Charset charset, String string) {
|
||||
@ForceInline
|
||||
public static void write(AbstractMemorySegmentImpl segment, long offset, Charset charset, String string) {
|
||||
switch (CharsetKind.of(charset)) {
|
||||
case SINGLE_BYTE -> writeByte(segment, offset, charset, string);
|
||||
case DOUBLE_BYTE -> writeShort(segment, offset, charset, string);
|
||||
|
@ -60,111 +66,183 @@ public final class StringSupport {
|
|||
}
|
||||
}
|
||||
|
||||
private static String readByte(MemorySegment segment, long offset, Charset charset) {
|
||||
long len = chunkedStrlenByte(segment, offset);
|
||||
byte[] bytes = new byte[(int)len];
|
||||
MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, (int)len);
|
||||
@ForceInline
|
||||
private static String readByte(AbstractMemorySegmentImpl segment, long offset, Charset charset) {
|
||||
final int len = strlenByte(segment, offset, segment.byteSize());
|
||||
final byte[] bytes = new byte[len];
|
||||
MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len);
|
||||
return new String(bytes, charset);
|
||||
}
|
||||
|
||||
private static void writeByte(MemorySegment segment, long offset, Charset charset, String string) {
|
||||
@ForceInline
|
||||
private static void writeByte(AbstractMemorySegmentImpl segment, long offset, Charset charset, String string) {
|
||||
int bytes = copyBytes(string, segment, charset, offset);
|
||||
segment.set(JAVA_BYTE, offset + bytes, (byte)0);
|
||||
}
|
||||
|
||||
private static String readShort(MemorySegment segment, long offset, Charset charset) {
|
||||
long len = chunkedStrlenShort(segment, offset);
|
||||
byte[] bytes = new byte[(int)len];
|
||||
MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, (int)len);
|
||||
@ForceInline
|
||||
private static String readShort(AbstractMemorySegmentImpl segment, long offset, Charset charset) {
|
||||
int len = strlenShort(segment, offset, segment.byteSize());
|
||||
byte[] bytes = new byte[len];
|
||||
MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len);
|
||||
return new String(bytes, charset);
|
||||
}
|
||||
|
||||
private static void writeShort(MemorySegment segment, long offset, Charset charset, String string) {
|
||||
@ForceInline
|
||||
private static void writeShort(AbstractMemorySegmentImpl segment, long offset, Charset charset, String string) {
|
||||
int bytes = copyBytes(string, segment, charset, offset);
|
||||
segment.set(JAVA_SHORT_UNALIGNED, offset + bytes, (short)0);
|
||||
}
|
||||
|
||||
private static String readInt(MemorySegment segment, long offset, Charset charset) {
|
||||
long len = strlenInt(segment, offset);
|
||||
byte[] bytes = new byte[(int)len];
|
||||
MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, (int)len);
|
||||
@ForceInline
|
||||
private static String readInt(AbstractMemorySegmentImpl segment, long offset, Charset charset) {
|
||||
int len = strlenInt(segment, offset, segment.byteSize());
|
||||
byte[] bytes = new byte[len];
|
||||
MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len);
|
||||
return new String(bytes, charset);
|
||||
}
|
||||
|
||||
private static void writeInt(MemorySegment segment, long offset, Charset charset, String string) {
|
||||
@ForceInline
|
||||
private static void writeInt(AbstractMemorySegmentImpl segment, long offset, Charset charset, String string) {
|
||||
int bytes = copyBytes(string, segment, charset, offset);
|
||||
segment.set(JAVA_INT_UNALIGNED, offset + bytes, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@return the shortest distance beginning at the provided {@code start}
|
||||
* to the encountering of a zero byte in the provided {@code segment}}
|
||||
* {@return the index of the first zero byte beginning at the provided
|
||||
* {@code fromOffset} to the encountering of a zero byte in the provided
|
||||
* {@code segment} checking bytes before the {@code toOffset}}
|
||||
* <p>
|
||||
* The method divides the region of interest into three distinct regions:
|
||||
* <ul>
|
||||
* <li>head (access made on a byte-by-byte basis) (if any)</li>
|
||||
* <li>body (access made with eight bytes at a time at physically 64-bit-aligned memory) (if any)</li>
|
||||
* <li>tail (access made on a byte-by-byte basis) (if any)</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* The body is using a heuristic method to determine if a long word
|
||||
* contains a zero byte. The method might have false positives but
|
||||
* never false negatives.
|
||||
* The method is using a heuristic method to determine if a long word contains a
|
||||
* zero byte. The method might have false positives but never false negatives.
|
||||
* <p>
|
||||
* This method is inspired by the `glibc/string/strlen.c` implementation
|
||||
*
|
||||
* @param segment to examine
|
||||
* @param start from where examination shall begin
|
||||
* @param segment to examine
|
||||
* @param fromOffset from where examination shall begin (inclusive)
|
||||
* @param toOffset to where examination shall end (exclusive)
|
||||
* @throws IllegalArgumentException if the examined region contains no zero bytes
|
||||
* within a length that can be accepted by a String
|
||||
*/
|
||||
public static int chunkedStrlenByte(MemorySegment segment, long start) {
|
||||
|
||||
// Handle the first unaligned "head" bytes separately
|
||||
int headCount = (int)SharedUtils.remainsToAlignment(segment.address() + start, Long.BYTES);
|
||||
|
||||
int offset = 0;
|
||||
for (; offset < headCount; offset++) {
|
||||
byte curr = segment.get(JAVA_BYTE, start + offset);
|
||||
if (curr == 0) {
|
||||
return offset;
|
||||
}
|
||||
@ForceInline
|
||||
public static int strlenByte(final AbstractMemorySegmentImpl segment,
|
||||
final long fromOffset,
|
||||
final long toOffset) {
|
||||
final long length = toOffset - fromOffset;
|
||||
segment.checkBounds(fromOffset, length);
|
||||
if (length == 0) {
|
||||
// The state has to be checked explicitly for zero-length segments
|
||||
segment.scope.checkValidState();
|
||||
throw nullNotFound(segment, fromOffset, toOffset);
|
||||
}
|
||||
|
||||
// We are now on a long-aligned boundary so this is the "body"
|
||||
int bodyCount = bodyCount(segment.byteSize() - start - headCount);
|
||||
|
||||
for (; offset < bodyCount; offset += Long.BYTES) {
|
||||
// We know we are `long` aligned so, we can save on alignment checking here
|
||||
long curr = segment.get(JAVA_LONG_UNALIGNED, start + offset);
|
||||
// Is this a candidate?
|
||||
if (mightContainZeroByte(curr)) {
|
||||
for (int j = 0; j < 8; j++) {
|
||||
if (segment.get(JAVA_BYTE, start + offset + j) == 0) {
|
||||
return offset + j;
|
||||
final long longBytes = length & LONG_MASK;
|
||||
final long longLimit = fromOffset + longBytes;
|
||||
long offset = fromOffset;
|
||||
for (; offset < longLimit; offset += Long.BYTES) {
|
||||
long val = SCOPED_MEMORY_ACCESS.getLongUnaligned(segment.sessionImpl(), segment.unsafeGetBase(), segment.unsafeGetOffset() + offset, !Architecture.isLittleEndian());
|
||||
if (mightContainZeroByte(val)) {
|
||||
for (int j = 0; j < Long.BYTES; j++) {
|
||||
if (SCOPED_MEMORY_ACCESS.getByte(segment.sessionImpl(), segment.unsafeGetBase(), segment.unsafeGetOffset() + offset + j) == 0) {
|
||||
return requireWithinStringSize(offset + j - fromOffset, segment, fromOffset, toOffset);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle the "tail"
|
||||
return requireWithinArraySize((long) offset + strlenByte(segment, start + offset));
|
||||
// Handle the tail
|
||||
for (; offset < toOffset; offset++) {
|
||||
byte val = SCOPED_MEMORY_ACCESS.getByte(segment.sessionImpl(), segment.unsafeGetBase(), segment.unsafeGetOffset() + offset);
|
||||
if (val == 0) {
|
||||
return requireWithinStringSize(offset - fromOffset, segment, fromOffset, toOffset);
|
||||
}
|
||||
}
|
||||
throw nullNotFound(segment, fromOffset, toOffset);
|
||||
}
|
||||
|
||||
/* Bits 63 and N * 8 (N = 1..7) of this number are zero. Call these bits
|
||||
the "holes". Note that there is a hole just to the left of
|
||||
each byte, with an extra at the end:
|
||||
@ForceInline
|
||||
public static int strlenShort(final AbstractMemorySegmentImpl segment,
|
||||
final long fromOffset,
|
||||
final long toOffset) {
|
||||
final long length = toOffset - fromOffset;
|
||||
segment.checkBounds(fromOffset, length);
|
||||
if (length == 0) {
|
||||
segment.scope.checkValidState();
|
||||
throw nullNotFound(segment, fromOffset, toOffset);
|
||||
}
|
||||
final long longBytes = length & LONG_MASK;
|
||||
final long longLimit = fromOffset + longBytes;
|
||||
long offset = fromOffset;
|
||||
for (; offset < longLimit; offset += Long.BYTES) {
|
||||
long val = SCOPED_MEMORY_ACCESS.getLongUnaligned(segment.sessionImpl(), segment.unsafeGetBase(), segment.unsafeGetOffset() + offset, !Architecture.isLittleEndian());
|
||||
if (mightContainZeroShort(val)) {
|
||||
for (int j = 0; j < Long.BYTES; j += Short.BYTES) {
|
||||
if (SCOPED_MEMORY_ACCESS.getShortUnaligned(segment.sessionImpl(), segment.unsafeGetBase(), segment.unsafeGetOffset() + offset + j, !Architecture.isLittleEndian()) == 0) {
|
||||
return requireWithinStringSize(offset + j - fromOffset, segment, fromOffset, toOffset);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Handle the tail
|
||||
// Prevent over scanning as we step by 2
|
||||
final long endScan = toOffset & ~1; // The last bit is zero
|
||||
for (; offset < endScan; offset += Short.BYTES) {
|
||||
short val = SCOPED_MEMORY_ACCESS.getShortUnaligned(segment.sessionImpl(), segment.unsafeGetBase(), segment.unsafeGetOffset() + offset, !Architecture.isLittleEndian());
|
||||
if (val == 0) {
|
||||
return requireWithinStringSize(offset - fromOffset, segment, fromOffset, toOffset);
|
||||
}
|
||||
}
|
||||
throw nullNotFound(segment, fromOffset, toOffset);
|
||||
}
|
||||
|
||||
bits: 01111110 11111110 11111110 11111110 11111110 11111110 11111110 11111111
|
||||
bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD EEEEEEEE FFFFFFFF GGGGGGGG HHHHHHHH
|
||||
@ForceInline
|
||||
public static int strlenInt(final AbstractMemorySegmentImpl segment,
|
||||
final long fromOffset,
|
||||
final long toOffset) {
|
||||
final long length = toOffset - fromOffset;
|
||||
segment.checkBounds(fromOffset, length);
|
||||
if (length == 0) {
|
||||
segment.scope.checkValidState();
|
||||
throw nullNotFound(segment, fromOffset, toOffset);
|
||||
}
|
||||
final long longBytes = length & LONG_MASK;
|
||||
final long longLimit = fromOffset + longBytes;
|
||||
long offset = fromOffset;
|
||||
for (; offset < longLimit; offset += Long.BYTES) {
|
||||
long val = SCOPED_MEMORY_ACCESS.getLongUnaligned(segment.sessionImpl(), segment.unsafeGetBase(), segment.unsafeGetOffset() + offset, !Architecture.isLittleEndian());
|
||||
if (mightContainZeroInt(val)) {
|
||||
for (int j = 0; j < Long.BYTES; j += Integer.BYTES) {
|
||||
if (SCOPED_MEMORY_ACCESS.getIntUnaligned(segment.sessionImpl(), segment.unsafeGetBase(), segment.unsafeGetOffset() + offset + j, !Architecture.isLittleEndian()) == 0) {
|
||||
return requireWithinStringSize(offset + j - fromOffset, segment, fromOffset, toOffset);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Handle the tail
|
||||
// Prevent over scanning as we step by 4
|
||||
final long endScan = toOffset & ~3; // The last two bit are zero
|
||||
for (; offset < endScan; offset += Integer.BYTES) {
|
||||
int val = SCOPED_MEMORY_ACCESS.getIntUnaligned(segment.sessionImpl(), segment.unsafeGetBase(), segment.unsafeGetOffset() + offset, !Architecture.isLittleEndian());
|
||||
if (val == 0) {
|
||||
return requireWithinStringSize(offset - fromOffset, segment, fromOffset, toOffset);
|
||||
}
|
||||
}
|
||||
throw nullNotFound(segment, fromOffset, toOffset);
|
||||
}
|
||||
|
||||
The 1-bits make sure that carries propagate to the next 0-bit.
|
||||
The 0-bits provide holes for carries to fall into.
|
||||
/*
|
||||
Bits 63 and N * 8 (N = 1..7) of this number are zero. Call these bits
|
||||
the "holes". Note that there is a hole just to the left of
|
||||
each byte, with an extra at the end:
|
||||
|
||||
bits: 01111110 11111110 11111110 11111110 11111110 11111110 11111110 11111111
|
||||
bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD EEEEEEEE FFFFFFFF GGGGGGGG HHHHHHHH
|
||||
|
||||
The 1-bits make sure that carries propagate to the next 0-bit.
|
||||
The 0-bits provide holes for carries to fall into.
|
||||
*/
|
||||
private static final long HIMAGIC_FOR_BYTES = 0x8080_8080_8080_8080L;
|
||||
private static final long LOMAGIC_FOR_BYTES = 0x0101_0101_0101_0101L;
|
||||
|
||||
static boolean mightContainZeroByte(long l) {
|
||||
private static boolean mightContainZeroByte(long l) {
|
||||
return ((l - LOMAGIC_FOR_BYTES) & (~l) & HIMAGIC_FOR_BYTES) != 0;
|
||||
}
|
||||
|
||||
|
@ -175,99 +253,40 @@ public final class StringSupport {
|
|||
return ((l - LOMAGIC_FOR_SHORTS) & (~l) & HIMAGIC_FOR_SHORTS) != 0;
|
||||
}
|
||||
|
||||
static int requireWithinArraySize(long size) {
|
||||
private static final long HIMAGIC_FOR_INTS = 0x8000_0000_8000_0000L;
|
||||
private static final long LOMAGIC_FOR_INTS = 0x0000_0001_0000_0001L;
|
||||
|
||||
static boolean mightContainZeroInt(long l) {
|
||||
return ((l - LOMAGIC_FOR_INTS) & (~l) & HIMAGIC_FOR_INTS) != 0;
|
||||
}
|
||||
|
||||
|
||||
private static int requireWithinStringSize(long size,
|
||||
AbstractMemorySegmentImpl segment,
|
||||
long fromOffset,
|
||||
long toOffset) {
|
||||
if (size > ArraysSupport.SOFT_MAX_ARRAY_LENGTH) {
|
||||
throw newIaeStringTooLarge();
|
||||
throw stringTooLarge(segment, fromOffset, toOffset);
|
||||
}
|
||||
return (int) size;
|
||||
}
|
||||
|
||||
static int bodyCount(long remaining) {
|
||||
return (int) Math.min(
|
||||
// Make sure we do not wrap around
|
||||
Integer.MAX_VALUE - Long.BYTES,
|
||||
// Remaining bytes to consider
|
||||
remaining)
|
||||
& -Long.BYTES; // Mask 0xFFFFFFF8
|
||||
private static IllegalArgumentException stringTooLarge(AbstractMemorySegmentImpl segment,
|
||||
long fromOffset,
|
||||
long toOffset) {
|
||||
return new IllegalArgumentException("String too large: " + exceptionInfo(segment, fromOffset, toOffset));
|
||||
}
|
||||
|
||||
private static int strlenByte(MemorySegment segment, long start) {
|
||||
for (int offset = 0; offset < ArraysSupport.SOFT_MAX_ARRAY_LENGTH; offset += 1) {
|
||||
byte curr = segment.get(JAVA_BYTE, start + offset);
|
||||
if (curr == 0) {
|
||||
return offset;
|
||||
}
|
||||
}
|
||||
throw newIaeStringTooLarge();
|
||||
private static IndexOutOfBoundsException nullNotFound(AbstractMemorySegmentImpl segment,
|
||||
long fromOffset,
|
||||
long toOffset) {
|
||||
return new IndexOutOfBoundsException("No null terminator found: " + exceptionInfo(segment, fromOffset, toOffset));
|
||||
}
|
||||
|
||||
/**
|
||||
* {@return the shortest distance beginning at the provided {@code start}
|
||||
* to the encountering of a zero short in the provided {@code segment}}
|
||||
* <p>
|
||||
* Note: The inspected region must be short aligned.
|
||||
*
|
||||
* @see #chunkedStrlenByte(MemorySegment, long) for more information
|
||||
*
|
||||
* @param segment to examine
|
||||
* @param start from where examination shall begin
|
||||
* @throws IllegalArgumentException if the examined region contains no zero shorts
|
||||
* within a length that can be accepted by a String
|
||||
*/
|
||||
public static int chunkedStrlenShort(MemorySegment segment, long start) {
|
||||
|
||||
// Handle the first unaligned "head" bytes separately
|
||||
int headCount = (int)SharedUtils.remainsToAlignment(segment.address() + start, Long.BYTES);
|
||||
|
||||
int offset = 0;
|
||||
for (; offset < headCount; offset += Short.BYTES) {
|
||||
short curr = segment.get(JAVA_SHORT_UNALIGNED, start + offset);
|
||||
if (curr == 0) {
|
||||
return offset;
|
||||
}
|
||||
}
|
||||
|
||||
// We are now on a long-aligned boundary so this is the "body"
|
||||
int bodyCount = bodyCount(segment.byteSize() - start - headCount);
|
||||
|
||||
for (; offset < bodyCount; offset += Long.BYTES) {
|
||||
// We know we are `long` aligned so, we can save on alignment checking here
|
||||
long curr = segment.get(JAVA_LONG_UNALIGNED, start + offset);
|
||||
// Is this a candidate?
|
||||
if (mightContainZeroShort(curr)) {
|
||||
for (int j = 0; j < Long.BYTES; j += Short.BYTES) {
|
||||
if (segment.get(JAVA_SHORT_UNALIGNED, start + offset + j) == 0) {
|
||||
return offset + j;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle the "tail"
|
||||
return requireWithinArraySize((long) offset + strlenShort(segment, start + offset));
|
||||
}
|
||||
|
||||
private static int strlenShort(MemorySegment segment, long start) {
|
||||
for (int offset = 0; offset < ArraysSupport.SOFT_MAX_ARRAY_LENGTH; offset += Short.BYTES) {
|
||||
short curr = segment.get(JAVA_SHORT_UNALIGNED, start + offset);
|
||||
if (curr == (short)0) {
|
||||
return offset;
|
||||
}
|
||||
}
|
||||
throw newIaeStringTooLarge();
|
||||
}
|
||||
|
||||
// The gain of using `long` wide operations for `int` is lower than for the two other `byte` and `short` variants
|
||||
// so, there is only one method for ints.
|
||||
public static int strlenInt(MemorySegment segment, long start) {
|
||||
for (int offset = 0; offset < ArraysSupport.SOFT_MAX_ARRAY_LENGTH; offset += Integer.BYTES) {
|
||||
// We are guaranteed to be aligned here so, we can use unaligned access.
|
||||
int curr = segment.get(JAVA_INT_UNALIGNED, start + offset);
|
||||
if (curr == 0) {
|
||||
return offset;
|
||||
}
|
||||
}
|
||||
throw newIaeStringTooLarge();
|
||||
private static String exceptionInfo(AbstractMemorySegmentImpl segment,
|
||||
long fromOffset,
|
||||
long toOffset) {
|
||||
return segment + " using region [" + fromOffset + ", " + toOffset + ")";
|
||||
}
|
||||
|
||||
public enum CharsetKind {
|
||||
|
@ -323,9 +342,4 @@ public final class StringSupport {
|
|||
public static void copyToSegmentRaw(String string, MemorySegment segment, long offset) {
|
||||
JAVA_LANG_ACCESS.copyToSegmentRaw(string, segment, offset);
|
||||
}
|
||||
|
||||
private static IllegalArgumentException newIaeStringTooLarge() {
|
||||
return new IllegalArgumentException("String too large");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue