8345465: Fix performance regression on x64 after JDK-8345120

Reviewed-by: mcimadamore
This commit is contained in:
Per Minborg 2024-12-10 10:01:27 +00:00
parent 2979806c72
commit 06c44dd568
2 changed files with 29 additions and 24 deletions

View file

@ -130,8 +130,8 @@ public final class StringSupport {
final long toOffset) {
final long length = toOffset - fromOffset;
segment.checkBounds(fromOffset, length);
if (length == 0) {
// The state has to be checked explicitly for zero-length segments
if (length < Byte.BYTES) {
// There can be no null terminator present
segment.scope.checkValidState();
throw nullNotFound(segment, fromOffset, toOffset);
}
@ -164,7 +164,8 @@ public final class StringSupport {
final long toOffset) {
final long length = toOffset - fromOffset;
segment.checkBounds(fromOffset, length);
if (length == 0) {
if (length < Short.BYTES) {
// There can be no null terminator present
segment.scope.checkValidState();
throw nullNotFound(segment, fromOffset, toOffset);
}
@ -199,19 +200,23 @@ public final class StringSupport {
final long toOffset) {
final long length = toOffset - fromOffset;
segment.checkBounds(fromOffset, length);
if (length == 0) {
if (length < Integer.BYTES) {
// There can be no null terminator present
segment.scope.checkValidState();
throw nullNotFound(segment, fromOffset, toOffset);
}
final long longBytes = length & LONG_MASK;
final long longLimit = fromOffset + longBytes;
long offset = fromOffset;
for (; offset < longLimit; offset += Long.BYTES) {
long val = SCOPED_MEMORY_ACCESS.getLongUnaligned(segment.sessionImpl(), segment.unsafeGetBase(), segment.unsafeGetOffset() + offset, !Architecture.isLittleEndian());
if (mightContainZeroInt(val)) {
for (int j = 0; j < Long.BYTES; j += Integer.BYTES) {
if (SCOPED_MEMORY_ACCESS.getIntUnaligned(segment.sessionImpl(), segment.unsafeGetBase(), segment.unsafeGetOffset() + offset + j, !Architecture.isLittleEndian()) == 0) {
return requireWithinStringSize(offset + j - fromOffset, segment, fromOffset, toOffset);
// For quad byte strings, it does not pay off to use long scanning on x64
if (!Architecture.isX64()) {
final long longBytes = length & LONG_MASK;
final long longLimit = fromOffset + longBytes;
for (; offset < longLimit; offset += Long.BYTES) {
long val = SCOPED_MEMORY_ACCESS.getLongUnaligned(segment.sessionImpl(), segment.unsafeGetBase(), segment.unsafeGetOffset() + offset, !Architecture.isLittleEndian());
if (mightContainZeroInt(val)) {
for (int j = 0; j < Long.BYTES; j += Integer.BYTES) {
if (SCOPED_MEMORY_ACCESS.getIntUnaligned(segment.sessionImpl(), segment.unsafeGetBase(), segment.unsafeGetOffset() + offset + j, !Architecture.isLittleEndian()) == 0) {
return requireWithinStringSize(offset + j - fromOffset, segment, fromOffset, toOffset);
}
}
}
}

View file

@ -55,10 +55,10 @@ import static java.lang.foreign.ValueLayout.*;
"--enable-native-access=ALL-UNNAMED"})
public class InternalStrLen {
private AbstractMemorySegmentImpl singleByteSegment;
private AbstractMemorySegmentImpl singleByteSegmentMisaligned;
private AbstractMemorySegmentImpl doubleByteSegment;
private AbstractMemorySegmentImpl quadByteSegment;
private MemorySegment singleByteSegment;
private MemorySegment singleByteSegmentMisaligned;
private MemorySegment doubleByteSegment;
private MemorySegment quadByteSegment;
@Param({"1", "4", "16", "251", "1024"})
int size;
@ -66,9 +66,9 @@ public class InternalStrLen {
@Setup
public void setup() {
var arena = Arena.ofAuto();
singleByteSegment = (AbstractMemorySegmentImpl) arena.allocate((size + 1L) * Byte.BYTES);
doubleByteSegment = (AbstractMemorySegmentImpl) arena.allocate((size + 1L) * Short.BYTES);
quadByteSegment = (AbstractMemorySegmentImpl) arena.allocate((size + 1L) * Integer.BYTES);
singleByteSegment = arena.allocate((size + 1L) * Byte.BYTES);
doubleByteSegment = arena.allocate((size + 1L) * Short.BYTES);
quadByteSegment = arena.allocate((size + 1L) * Integer.BYTES);
Stream.of(singleByteSegment, doubleByteSegment, quadByteSegment)
.forEach(s -> IntStream.range(0, (int) s.byteSize() - 1)
.forEach(i -> s.set(
@ -79,7 +79,7 @@ public class InternalStrLen {
singleByteSegment.set(ValueLayout.JAVA_BYTE, singleByteSegment.byteSize() - Byte.BYTES, (byte) 0);
doubleByteSegment.set(ValueLayout.JAVA_SHORT, doubleByteSegment.byteSize() - Short.BYTES, (short) 0);
quadByteSegment.set(ValueLayout.JAVA_INT, quadByteSegment.byteSize() - Integer.BYTES, 0);
singleByteSegmentMisaligned = (AbstractMemorySegmentImpl) arena.allocate(singleByteSegment.byteSize() + 1).
singleByteSegmentMisaligned = arena.allocate(singleByteSegment.byteSize() + 1).
asSlice(1);
MemorySegment.copy(singleByteSegment, 0, singleByteSegmentMisaligned, 0, singleByteSegment.byteSize());
}
@ -106,22 +106,22 @@ public class InternalStrLen {
@Benchmark
public int chunkedSingle() {
return StringSupport.strlenByte(singleByteSegment, 0, singleByteSegment.byteSize());
return StringSupport.strlenByte((AbstractMemorySegmentImpl) singleByteSegment, 0, singleByteSegment.byteSize());
}
@Benchmark
public int chunkedSingleMisaligned() {
return StringSupport.strlenByte(singleByteSegmentMisaligned, 0, singleByteSegment.byteSize());
return StringSupport.strlenByte((AbstractMemorySegmentImpl) singleByteSegmentMisaligned, 0, singleByteSegment.byteSize());
}
@Benchmark
public int chunkedDouble() {
return StringSupport.strlenShort(doubleByteSegment, 0, doubleByteSegment.byteSize());
return StringSupport.strlenShort((AbstractMemorySegmentImpl) doubleByteSegment, 0, doubleByteSegment.byteSize());
}
@Benchmark
public int changedElementQuad() {
return StringSupport.strlenInt(quadByteSegment, 0, quadByteSegment.byteSize());
return StringSupport.strlenInt((AbstractMemorySegmentImpl) quadByteSegment, 0, quadByteSegment.byteSize());
}
// These are the legacy methods