mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-27 06:45:07 +02:00
8338591: Improve performance of MemorySegment::copy
Reviewed-by: mcimadamore
This commit is contained in:
parent
cb9f5c5791
commit
6be927260a
4 changed files with 293 additions and 13 deletions
|
@ -1570,8 +1570,9 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
|||
@ForceInline
|
||||
static void copy(MemorySegment srcSegment, long srcOffset,
|
||||
MemorySegment dstSegment, long dstOffset, long bytes) {
|
||||
copy(srcSegment, ValueLayout.JAVA_BYTE, srcOffset,
|
||||
dstSegment, ValueLayout.JAVA_BYTE, dstOffset,
|
||||
|
||||
AbstractMemorySegmentImpl.copy((AbstractMemorySegmentImpl) srcSegment, srcOffset,
|
||||
(AbstractMemorySegmentImpl) dstSegment, dstOffset,
|
||||
bytes);
|
||||
}
|
||||
|
||||
|
|
|
@ -304,20 +304,25 @@ public abstract sealed class AbstractMemorySegmentImpl
|
|||
|
||||
@Override
|
||||
public final Optional<MemorySegment> asOverlappingSlice(MemorySegment other) {
|
||||
AbstractMemorySegmentImpl that = (AbstractMemorySegmentImpl)Objects.requireNonNull(other);
|
||||
if (unsafeGetBase() == that.unsafeGetBase()) { // both either native or heap
|
||||
final AbstractMemorySegmentImpl that = (AbstractMemorySegmentImpl)Objects.requireNonNull(other);
|
||||
if (overlaps(that)) {
|
||||
final long offsetToThat = that.address() - this.address();
|
||||
final long newOffset = offsetToThat >= 0 ? offsetToThat : 0;
|
||||
return Optional.of(asSlice(newOffset, Math.min(this.byteSize() - newOffset, that.byteSize() + offsetToThat)));
|
||||
}
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
private boolean overlaps(AbstractMemorySegmentImpl that) {
|
||||
if (unsafeGetBase() == that.unsafeGetBase()) { // both either native or the same heap segment
|
||||
final long thisStart = this.unsafeGetOffset();
|
||||
final long thatStart = that.unsafeGetOffset();
|
||||
final long thisEnd = thisStart + this.byteSize();
|
||||
final long thatEnd = thatStart + that.byteSize();
|
||||
|
||||
if (thisStart < thatEnd && thisEnd > thatStart) { //overlap occurs
|
||||
long offsetToThat = that.address() - this.address();
|
||||
long newOffset = offsetToThat >= 0 ? offsetToThat : 0;
|
||||
return Optional.of(asSlice(newOffset, Math.min(this.byteSize() - newOffset, that.byteSize() + offsetToThat)));
|
||||
}
|
||||
return (thisStart < thatEnd && thisEnd > thatStart); //overlap occurs?
|
||||
}
|
||||
return Optional.empty();
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -645,6 +650,64 @@ public abstract sealed class AbstractMemorySegmentImpl
|
|||
}
|
||||
}
|
||||
|
||||
// COPY_NATIVE_THRESHOLD must be a power of two and should be greater than 2^3
|
||||
private static final long COPY_NATIVE_THRESHOLD = 1 << 6;
|
||||
|
||||
@ForceInline
|
||||
public static void copy(AbstractMemorySegmentImpl src, long srcOffset,
|
||||
AbstractMemorySegmentImpl dst, long dstOffset,
|
||||
long size) {
|
||||
|
||||
Utils.checkNonNegativeIndex(size, "size");
|
||||
// Implicit null check for src and dst
|
||||
src.checkAccess(srcOffset, size, true);
|
||||
dst.checkAccess(dstOffset, size, false);
|
||||
|
||||
if (size <= 0) {
|
||||
// Do nothing
|
||||
} else if (size < COPY_NATIVE_THRESHOLD && !src.overlaps(dst)) {
|
||||
// 0 < size < FILL_NATIVE_LIMIT : 0...0X...XXXX
|
||||
//
|
||||
// Strictly, we could check for !src.asSlice(srcOffset, size).overlaps(dst.asSlice(dstOffset, size) but
|
||||
// this is a bit slower and it likely very unusual there is any difference in the outcome. Also, if there
|
||||
// is an overlap, we could tolerate one particular direction of overlap (but not the other).
|
||||
|
||||
// 0...0X...X000
|
||||
final int limit = (int) (size & (COPY_NATIVE_THRESHOLD - 8));
|
||||
int offset = 0;
|
||||
for (; offset < limit; offset += 8) {
|
||||
final long v = SCOPED_MEMORY_ACCESS.getLong(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset);
|
||||
SCOPED_MEMORY_ACCESS.putLong(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v);
|
||||
}
|
||||
int remaining = (int) size - offset;
|
||||
// 0...0X00
|
||||
if (remaining >= 4) {
|
||||
final int v = SCOPED_MEMORY_ACCESS.getInt(src.sessionImpl(), src.unsafeGetBase(),src.unsafeGetOffset() + srcOffset + offset);
|
||||
SCOPED_MEMORY_ACCESS.putInt(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v);
|
||||
offset += 4;
|
||||
remaining -= 4;
|
||||
}
|
||||
// 0...00X0
|
||||
if (remaining >= 2) {
|
||||
final short v = SCOPED_MEMORY_ACCESS.getShort(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset);
|
||||
SCOPED_MEMORY_ACCESS.putShort(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v);
|
||||
offset += 2;
|
||||
remaining -=2;
|
||||
}
|
||||
// 0...000X
|
||||
if (remaining == 1) {
|
||||
final byte v = SCOPED_MEMORY_ACCESS.getByte(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset);
|
||||
SCOPED_MEMORY_ACCESS.putByte(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v);
|
||||
}
|
||||
// We have now fully handled 0...0X...XXXX
|
||||
} else {
|
||||
// For larger sizes, the transition to native code pays off
|
||||
SCOPED_MEMORY_ACCESS.copyMemory(src.sessionImpl(), dst.sessionImpl(),
|
||||
src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset,
|
||||
dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset, size);
|
||||
}
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
public static void copy(MemorySegment srcSegment, ValueLayout srcElementLayout, long srcOffset,
|
||||
MemorySegment dstSegment, ValueLayout dstElementLayout, long dstOffset,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue