8338591: Improve performance of MemorySegment::copy

Reviewed-by: mcimadamore
This commit is contained in:
Per Minborg 2024-09-05 13:10:24 +00:00
parent cb9f5c5791
commit 6be927260a
4 changed files with 293 additions and 13 deletions

View file

@ -1570,8 +1570,9 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
@ForceInline
static void copy(MemorySegment srcSegment, long srcOffset,
MemorySegment dstSegment, long dstOffset, long bytes) {
copy(srcSegment, ValueLayout.JAVA_BYTE, srcOffset,
dstSegment, ValueLayout.JAVA_BYTE, dstOffset,
AbstractMemorySegmentImpl.copy((AbstractMemorySegmentImpl) srcSegment, srcOffset,
(AbstractMemorySegmentImpl) dstSegment, dstOffset,
bytes);
}

View file

@ -304,20 +304,25 @@ public abstract sealed class AbstractMemorySegmentImpl
@Override
public final Optional<MemorySegment> asOverlappingSlice(MemorySegment other) {
AbstractMemorySegmentImpl that = (AbstractMemorySegmentImpl)Objects.requireNonNull(other);
if (unsafeGetBase() == that.unsafeGetBase()) { // both either native or heap
final AbstractMemorySegmentImpl that = (AbstractMemorySegmentImpl)Objects.requireNonNull(other);
if (overlaps(that)) {
final long offsetToThat = that.address() - this.address();
final long newOffset = offsetToThat >= 0 ? offsetToThat : 0;
return Optional.of(asSlice(newOffset, Math.min(this.byteSize() - newOffset, that.byteSize() + offsetToThat)));
}
return Optional.empty();
}
@ForceInline
private boolean overlaps(AbstractMemorySegmentImpl that) {
if (unsafeGetBase() == that.unsafeGetBase()) { // both either native or the same heap segment
final long thisStart = this.unsafeGetOffset();
final long thatStart = that.unsafeGetOffset();
final long thisEnd = thisStart + this.byteSize();
final long thatEnd = thatStart + that.byteSize();
if (thisStart < thatEnd && thisEnd > thatStart) { //overlap occurs
long offsetToThat = that.address() - this.address();
long newOffset = offsetToThat >= 0 ? offsetToThat : 0;
return Optional.of(asSlice(newOffset, Math.min(this.byteSize() - newOffset, that.byteSize() + offsetToThat)));
}
return (thisStart < thatEnd && thisEnd > thatStart); //overlap occurs?
}
return Optional.empty();
return false;
}
@Override
@ -645,6 +650,64 @@ public abstract sealed class AbstractMemorySegmentImpl
}
}
// COPY_NATIVE_THRESHOLD must be a power of two and should be greater than 2^3
private static final long COPY_NATIVE_THRESHOLD = 1 << 6;
@ForceInline
public static void copy(AbstractMemorySegmentImpl src, long srcOffset,
AbstractMemorySegmentImpl dst, long dstOffset,
long size) {
Utils.checkNonNegativeIndex(size, "size");
// Implicit null check for src and dst
src.checkAccess(srcOffset, size, true);
dst.checkAccess(dstOffset, size, false);
if (size <= 0) {
// Do nothing
} else if (size < COPY_NATIVE_THRESHOLD && !src.overlaps(dst)) {
// 0 < size < FILL_NATIVE_LIMIT : 0...0X...XXXX
//
// Strictly, we could check for !src.asSlice(srcOffset, size).overlaps(dst.asSlice(dstOffset, size) but
// this is a bit slower and it likely very unusual there is any difference in the outcome. Also, if there
// is an overlap, we could tolerate one particular direction of overlap (but not the other).
// 0...0X...X000
final int limit = (int) (size & (COPY_NATIVE_THRESHOLD - 8));
int offset = 0;
for (; offset < limit; offset += 8) {
final long v = SCOPED_MEMORY_ACCESS.getLong(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset);
SCOPED_MEMORY_ACCESS.putLong(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v);
}
int remaining = (int) size - offset;
// 0...0X00
if (remaining >= 4) {
final int v = SCOPED_MEMORY_ACCESS.getInt(src.sessionImpl(), src.unsafeGetBase(),src.unsafeGetOffset() + srcOffset + offset);
SCOPED_MEMORY_ACCESS.putInt(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v);
offset += 4;
remaining -= 4;
}
// 0...00X0
if (remaining >= 2) {
final short v = SCOPED_MEMORY_ACCESS.getShort(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset);
SCOPED_MEMORY_ACCESS.putShort(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v);
offset += 2;
remaining -=2;
}
// 0...000X
if (remaining == 1) {
final byte v = SCOPED_MEMORY_ACCESS.getByte(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset);
SCOPED_MEMORY_ACCESS.putByte(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v);
}
// We have now fully handled 0...0X...XXXX
} else {
// For larger sizes, the transition to native code pays off
SCOPED_MEMORY_ACCESS.copyMemory(src.sessionImpl(), dst.sessionImpl(),
src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset,
dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset, size);
}
}
@ForceInline
public static void copy(MemorySegment srcSegment, ValueLayout srcElementLayout, long srcOffset,
MemorySegment dstSegment, ValueLayout dstElementLayout, long dstOffset,