8303516: HFAs with nested structs/unions/arrays not handled correctly on AArch64

Reviewed-by: mcimadamore
This commit is contained in:
Jorn Vernee 2023-03-03 14:33:56 +00:00
parent cbdc7a6f88
commit c6de66c03f
8 changed files with 884 additions and 272 deletions

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -158,25 +158,29 @@ public final class SharedUtils {
}
public static Class<?> primitiveCarrierForSize(long size, boolean useFloat) {
return primitiveLayoutForSize(size, useFloat).carrier();
}
public static ValueLayout primitiveLayoutForSize(long size, boolean useFloat) {
if (useFloat) {
if (size == 4) {
return float.class;
return JAVA_FLOAT;
} else if (size == 8) {
return double.class;
return JAVA_DOUBLE;
}
} else {
if (size == 1) {
return byte.class;
return JAVA_BYTE;
} else if (size == 2) {
return short.class;
return JAVA_SHORT;
} else if (size <= 4) {
return int.class;
return JAVA_INT;
} else if (size <= 8) {
return long.class;
return JAVA_LONG;
}
}
throw new IllegalArgumentException("No type for size: " + size + " isFloat=" + useFloat);
throw new IllegalArgumentException("No layout for size: " + size + " isFloat=" + useFloat);
}
public static Linker getSystemLinker() {

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2019, 2022, Arm Limited. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -44,6 +44,7 @@ import jdk.internal.foreign.abi.aarch64.windows.WindowsAArch64CallArranger;
import jdk.internal.foreign.Utils;
import java.lang.foreign.SegmentScope;
import java.lang.foreign.ValueLayout;
import java.lang.invoke.MethodHandle;
import java.lang.invoke.MethodType;
import java.util.List;
@ -65,6 +66,7 @@ import static jdk.internal.foreign.abi.aarch64.AArch64Architecture.Regs.*;
*/
public abstract class CallArranger {
private static final int STACK_SLOT_SIZE = 8;
private static final int MAX_COPY_SIZE = 8;
public static final int MAX_REGISTER_ARGUMENTS = 8;
private static final VMStorage INDIRECT_RESULT = r8;
@ -217,106 +219,150 @@ public abstract class CallArranger {
this.forVariadicFunction = forVariadicFunction;
}
void alignStack(long alignment) {
private boolean hasRegister(int type) {
return hasEnoughRegisters(type, 1);
}
private boolean hasEnoughRegisters(int type, int count) {
return nRegs[type] + count <= MAX_REGISTER_ARGUMENTS;
}
private static Class<?> adjustCarrierForStack(Class<?> carrier) {
if (carrier == float.class) {
carrier = int.class;
} else if (carrier == double.class) {
carrier = long.class;
}
return carrier;
}
record StructStorage(long offset, Class<?> carrier, VMStorage storage) {}
/*
In the simplest case structs are copied in chunks. i.e. the fields don't matter, just the size.
The struct is split into 8-byte chunks, and those chunks are either passed in registers and/or on the stack.
Homogeneous float aggregates (HFAs) can be copied in a field-wise manner, i.e. the struct is split into it's
fields and those fields are the chunks which are passed. For HFAs the rules are more complicated and ABI based:
| enough registers | some registers, but not enough | no registers
----------------+------------------+---------------------------------+-------------------------
Linux | FW in regs | CW on the stack | CW on the stack
MacOs, non-VA | FW in regs | FW on the stack | FW on the stack
MacOs, VA | FW in regs | CW on the stack | CW on the stack
Windows, non-VF | FW in regs | CW on the stack | CW on the stack
Windows, VF | FW in regs | CW split between regs and stack | CW on the stack
(where FW = Field-wise copy, CW = Chunk-wise copy, VA is a variadic argument, and VF is a variadic function)
For regular structs, the rules are as follows:
| enough registers | some registers, but not enough | no registers
----------------+------------------+---------------------------------+-------------------------
Linux | CW in regs | CW on the stack | CW on the stack
MacOs | CW in regs | CW on the stack | CW on the stack
Windows, non-VF | CW in regs | CW on the stack | CW on the stack
Windows, VF | CW in regs | CW split between regs and stack | CW on the stack
*/
StructStorage[] structStorages(GroupLayout layout, boolean forHFA) {
int numChunks = (int)Utils.alignUp(layout.byteSize(), MAX_COPY_SIZE) / MAX_COPY_SIZE;
int regType = StorageType.INTEGER;
List<MemoryLayout> scalarLayouts = null;
int requiredStorages = numChunks;
if (forHFA) {
regType = StorageType.VECTOR;
scalarLayouts = TypeClass.scalarLayouts(layout);
requiredStorages = scalarLayouts.size();
}
boolean hasEnoughRegisters = hasEnoughRegisters(regType, requiredStorages);
// For the ABI variants that pack arguments spilled to the
// stack, HFA arguments are spilled as if their individual
// fields had been allocated separately rather than as if the
// struct had been spilled as a whole.
boolean useFieldWiseSpill = requiresSubSlotStackPacking() && !forVarArgs;
boolean isFieldWise = forHFA && (hasEnoughRegisters || useFieldWiseSpill);
if (!isFieldWise) {
requiredStorages = numChunks;
}
boolean spillPartially = forVariadicFunction && spillsVariadicStructsPartially();
boolean furtherAllocationFromTheStack = !hasEnoughRegisters && !spillPartially;
if (furtherAllocationFromTheStack) {
// Any further allocations for this register type must
// be from the stack.
nRegs[regType] = MAX_REGISTER_ARGUMENTS;
}
if (requiresSubSlotStackPacking() && !isFieldWise) {
// Pad to the next stack slot boundary instead of packing
// additional arguments into the unused space.
alignStack(STACK_SLOT_SIZE);
}
StructStorage[] structStorages = new StructStorage[requiredStorages];
long offset = 0;
for (int i = 0; i < structStorages.length; i++) {
ValueLayout copyLayout;
if (isFieldWise) {
// We should only get here for HFAs, which can't have padding
copyLayout = (ValueLayout) scalarLayouts.get(i);
} else {
// chunk-wise copy
long copySize = Math.min(layout.byteSize() - offset, MAX_COPY_SIZE);
boolean useFloat = false; // never use float for chunk-wise copies
copyLayout = SharedUtils.primitiveLayoutForSize(copySize, useFloat);
}
VMStorage storage = nextStorage(regType, copyLayout);
Class<?> carrier = copyLayout.carrier();
if (isFieldWise && storage.type() == StorageType.STACK) {
// copyLayout is a field of an HFA
// Don't use floats on the stack
carrier = adjustCarrierForStack(carrier);
}
structStorages[i] = new StructStorage(offset, carrier, storage);
offset += copyLayout.byteSize();
}
if (requiresSubSlotStackPacking() && !isFieldWise) {
// Pad to the next stack slot boundary instead of packing
// additional arguments into the unused space.
alignStack(STACK_SLOT_SIZE);
}
return structStorages;
}
private void alignStack(long alignment) {
stackOffset = Utils.alignUp(stackOffset, alignment);
}
VMStorage stackAlloc(long size, long alignment) {
assert forArguments : "no stack returns";
long alignedStackOffset = Utils.alignUp(stackOffset, alignment);
short encodedSize = (short) size;
assert (encodedSize & 0xFFFF) == size;
VMStorage storage =
AArch64Architecture.stackStorage(encodedSize, (int)alignedStackOffset);
stackOffset = alignedStackOffset + size;
return storage;
// allocate a single ValueLayout, either in a register or on the stack
VMStorage nextStorage(int type, ValueLayout layout) {
return hasRegister(type) ? regAlloc(type) : stackAlloc(layout);
}
VMStorage stackAlloc(MemoryLayout layout) {
private VMStorage regAlloc(int type) {
ABIDescriptor abiDescriptor = abiDescriptor();
VMStorage[] source = (forArguments ? abiDescriptor.inputStorage : abiDescriptor.outputStorage)[type];
return source[nRegs[type]++];
}
private VMStorage stackAlloc(ValueLayout layout) {
assert forArguments : "no stack returns";
long stackSlotAlignment = requiresSubSlotStackPacking() && !forVarArgs
? layout.byteAlignment()
: Math.max(layout.byteAlignment(), STACK_SLOT_SIZE);
return stackAlloc(layout.byteSize(), stackSlotAlignment);
}
long alignedStackOffset = Utils.alignUp(stackOffset, stackSlotAlignment);
VMStorage[] regAlloc(int type, int count) {
if (nRegs[type] + count <= MAX_REGISTER_ARGUMENTS) {
ABIDescriptor abiDescriptor = abiDescriptor();
VMStorage[] source =
(forArguments ? abiDescriptor.inputStorage : abiDescriptor.outputStorage)[type];
VMStorage[] result = new VMStorage[count];
for (int i = 0; i < count; i++) {
result[i] = source[nRegs[type]++];
}
return result;
} else {
// Any further allocations for this register type must
// be from the stack.
nRegs[type] = MAX_REGISTER_ARGUMENTS;
return null;
}
}
short encodedSize = (short) layout.byteSize();
assert (encodedSize & 0xFFFF) == layout.byteSize();
VMStorage[] regAlloc(int type, MemoryLayout layout) {
boolean spillRegistersPartially = forVariadicFunction && spillsVariadicStructsPartially();
return spillRegistersPartially ?
regAllocPartial(type, layout) :
regAlloc(type, requiredRegisters(layout));
}
int requiredRegisters(MemoryLayout layout) {
return (int)Utils.alignUp(layout.byteSize(), 8) / 8;
}
VMStorage[] regAllocPartial(int type, MemoryLayout layout) {
int availableRegisters = MAX_REGISTER_ARGUMENTS - nRegs[type];
if (availableRegisters <= 0) {
return null;
}
int requestRegisters = Math.min(requiredRegisters(layout), availableRegisters);
return regAlloc(type, requestRegisters);
}
VMStorage nextStorage(int type, MemoryLayout layout) {
if (type == StorageType.VECTOR) {
boolean forVariadicFunctionArgs = forArguments && forVariadicFunction;
boolean useIntRegsForFloatingPointArgs = forVariadicFunctionArgs && useIntRegsForVariadicFloatingPointArgs();
if (useIntRegsForFloatingPointArgs) {
type = StorageType.INTEGER;
}
}
VMStorage[] storage = regAlloc(type, 1);
if (storage == null) {
return stackAlloc(layout);
}
return storage[0];
}
VMStorage[] nextStorageForHFA(GroupLayout group) {
final int nFields = group.memberLayouts().size();
VMStorage[] regs = regAlloc(StorageType.VECTOR, nFields);
if (regs == null && requiresSubSlotStackPacking() && !forVarArgs) {
// For the ABI variants that pack arguments spilled to the
// stack, HFA arguments are spilled as if their individual
// fields had been allocated separately rather than as if the
// struct had been spilled as a whole.
VMStorage[] slots = new VMStorage[nFields];
for (int i = 0; i < nFields; i++) {
slots[i] = stackAlloc(group.memberLayouts().get(i));
}
return slots;
} else {
return regs;
}
VMStorage storage = AArch64Architecture.stackStorage(encodedSize, (int)alignedStackOffset);
stackOffset = alignedStackOffset + layout.byteSize();
return storage;
}
void adjustForVarArgs() {
@ -335,61 +381,6 @@ public abstract class CallArranger {
this.storageCalculator = new StorageCalculator(forArguments, forVariadicFunction);
}
protected void spillStructUnbox(Binding.Builder bindings, MemoryLayout layout) {
// If a struct has been assigned register or HFA class but
// there are not enough free registers to hold the entire
// struct, it must be passed on the stack. I.e. not split
// between registers and stack.
spillPartialStructUnbox(bindings, layout, 0);
}
protected void spillPartialStructUnbox(Binding.Builder bindings, MemoryLayout layout, long offset) {
while (offset < layout.byteSize()) {
long copy = Math.min(layout.byteSize() - offset, STACK_SLOT_SIZE);
VMStorage storage =
storageCalculator.stackAlloc(copy, STACK_SLOT_SIZE);
if (offset + STACK_SLOT_SIZE < layout.byteSize()) {
bindings.dup();
}
Class<?> type = SharedUtils.primitiveCarrierForSize(copy, false);
bindings.bufferLoad(offset, type)
.vmStore(storage, type);
offset += STACK_SLOT_SIZE;
}
if (requiresSubSlotStackPacking()) {
// Pad to the next stack slot boundary instead of packing
// additional arguments into the unused space.
storageCalculator.alignStack(STACK_SLOT_SIZE);
}
}
protected void spillStructBox(Binding.Builder bindings, MemoryLayout layout) {
// If a struct has been assigned register or HFA class but
// there are not enough free registers to hold the entire
// struct, it must be passed on the stack. I.e. not split
// between registers and stack.
long offset = 0;
while (offset < layout.byteSize()) {
long copy = Math.min(layout.byteSize() - offset, STACK_SLOT_SIZE);
VMStorage storage =
storageCalculator.stackAlloc(copy, STACK_SLOT_SIZE);
Class<?> type = SharedUtils.primitiveCarrierForSize(copy, false);
bindings.dup()
.vmLoad(storage, type)
.bufferStore(offset, type);
offset += STACK_SLOT_SIZE;
}
if (requiresSubSlotStackPacking()) {
// Pad to the next stack slot boundary instead of packing
// additional arguments into the unused space.
storageCalculator.alignStack(STACK_SLOT_SIZE);
}
}
abstract List<Binding> getBindings(Class<?> carrier, MemoryLayout layout);
abstract List<Binding> getIndirectBindings();
@ -419,87 +410,46 @@ public abstract class CallArranger {
Binding.Builder bindings = Binding.builder();
switch (argumentClass) {
case STRUCT_REGISTER: {
case STRUCT_REGISTER, STRUCT_HFA -> {
assert carrier == MemorySegment.class;
VMStorage[] regs = storageCalculator.regAlloc(StorageType.INTEGER, layout);
boolean forHFA = argumentClass == TypeClass.STRUCT_HFA;
StorageCalculator.StructStorage[] structStorages
= storageCalculator.structStorages((GroupLayout) layout, forHFA);
if (regs != null) {
int regIndex = 0;
long offset = 0;
while (offset < layout.byteSize() && regIndex < regs.length) {
final long copy = Math.min(layout.byteSize() - offset, 8);
VMStorage storage = regs[regIndex++];
Class<?> type = SharedUtils.primitiveCarrierForSize(copy, false);
if (offset + copy < layout.byteSize()) {
bindings.dup();
}
bindings.bufferLoad(offset, type)
.vmStore(storage, type);
offset += copy;
for (int i = 0; i < structStorages.length; i++) {
StorageCalculator.StructStorage structStorage = structStorages[i];
if (i < structStorages.length - 1) {
bindings.dup();
}
final long bytesLeft = Math.min(layout.byteSize() - offset, 8);
if (bytesLeft > 0) {
spillPartialStructUnbox(bindings, layout, offset);
}
} else {
spillStructUnbox(bindings, layout);
bindings.bufferLoad(structStorage.offset(), structStorage.carrier())
.vmStore(structStorage.storage(), structStorage.carrier());
}
break;
}
case STRUCT_REFERENCE: {
case STRUCT_REFERENCE -> {
assert carrier == MemorySegment.class;
bindings.copy(layout)
.unboxAddress();
VMStorage storage = storageCalculator.nextStorage(
StorageType.INTEGER, AArch64.C_POINTER);
VMStorage storage = storageCalculator.nextStorage(StorageType.INTEGER, AArch64.C_POINTER);
bindings.vmStore(storage, long.class);
break;
}
case STRUCT_HFA: {
assert carrier == MemorySegment.class;
GroupLayout group = (GroupLayout)layout;
VMStorage[] regs = storageCalculator.nextStorageForHFA(group);
if (regs != null) {
long offset = 0;
for (int i = 0; i < group.memberLayouts().size(); i++) {
VMStorage storage = regs[i];
final long size = group.memberLayouts().get(i).byteSize();
boolean useFloat = storage.type() == StorageType.VECTOR;
Class<?> type = SharedUtils.primitiveCarrierForSize(size, useFloat);
if (i + 1 < group.memberLayouts().size()) {
bindings.dup();
}
bindings.bufferLoad(offset, type)
.vmStore(storage, type);
offset += size;
}
} else {
spillStructUnbox(bindings, layout);
}
break;
}
case POINTER: {
case POINTER -> {
bindings.unboxAddress();
VMStorage storage =
storageCalculator.nextStorage(StorageType.INTEGER, layout);
VMStorage storage = storageCalculator.nextStorage(StorageType.INTEGER, (ValueLayout) layout);
bindings.vmStore(storage, long.class);
break;
}
case INTEGER: {
VMStorage storage =
storageCalculator.nextStorage(StorageType.INTEGER, layout);
case INTEGER -> {
VMStorage storage = storageCalculator.nextStorage(StorageType.INTEGER, (ValueLayout) layout);
bindings.vmStore(storage, carrier);
break;
}
case FLOAT: {
VMStorage storage =
storageCalculator.nextStorage(StorageType.VECTOR, layout);
case FLOAT -> {
boolean forVariadicFunctionArgs = forArguments && forVariadicFunction;
boolean useIntReg = forVariadicFunctionArgs && useIntRegsForVariadicFloatingPointArgs();
int type = useIntReg ? StorageType.INTEGER : StorageType.VECTOR;
VMStorage storage = storageCalculator.nextStorage(type, (ValueLayout) layout);
bindings.vmStore(storage, carrier);
break;
}
default:
throw new UnsupportedOperationException("Unhandled class " + argumentClass);
default -> throw new UnsupportedOperationException("Unhandled class " + argumentClass);
}
return bindings.build();
}
@ -523,70 +473,36 @@ public abstract class CallArranger {
TypeClass argumentClass = TypeClass.classifyLayout(layout);
Binding.Builder bindings = Binding.builder();
switch (argumentClass) {
case STRUCT_REGISTER -> {
case STRUCT_REGISTER, STRUCT_HFA -> {
assert carrier == MemorySegment.class;
boolean forHFA = argumentClass == TypeClass.STRUCT_HFA;
bindings.allocate(layout);
VMStorage[] regs = storageCalculator.regAlloc(
StorageType.INTEGER, layout);
if (regs != null) {
int regIndex = 0;
long offset = 0;
while (offset < layout.byteSize()) {
final long copy = Math.min(layout.byteSize() - offset, 8);
VMStorage storage = regs[regIndex++];
bindings.dup();
boolean useFloat = storage.type() == StorageType.VECTOR;
Class<?> type = SharedUtils.primitiveCarrierForSize(copy, useFloat);
bindings.vmLoad(storage, type)
.bufferStore(offset, type);
offset += copy;
}
} else {
spillStructBox(bindings, layout);
StorageCalculator.StructStorage[] structStorages
= storageCalculator.structStorages((GroupLayout) layout, forHFA);
for (StorageCalculator.StructStorage structStorage : structStorages) {
bindings.dup();
bindings.vmLoad(structStorage.storage(), structStorage.carrier())
.bufferStore(structStorage.offset(), structStorage.carrier());
}
}
case STRUCT_REFERENCE -> {
assert carrier == MemorySegment.class;
VMStorage storage = storageCalculator.nextStorage(
StorageType.INTEGER, AArch64.C_POINTER);
VMStorage storage = storageCalculator.nextStorage(StorageType.INTEGER, AArch64.C_POINTER);
bindings.vmLoad(storage, long.class)
.boxAddress(layout);
}
case STRUCT_HFA -> {
assert carrier == MemorySegment.class;
bindings.allocate(layout);
GroupLayout group = (GroupLayout) layout;
VMStorage[] regs = storageCalculator.nextStorageForHFA(group);
if (regs != null) {
long offset = 0;
for (int i = 0; i < group.memberLayouts().size(); i++) {
VMStorage storage = regs[i];
final long size = group.memberLayouts().get(i).byteSize();
boolean useFloat = storage.type() == StorageType.VECTOR;
Class<?> type = SharedUtils.primitiveCarrierForSize(size, useFloat);
bindings.dup()
.vmLoad(storage, type)
.bufferStore(offset, type);
offset += size;
}
} else {
spillStructBox(bindings, layout);
}
}
case POINTER -> {
VMStorage storage =
storageCalculator.nextStorage(StorageType.INTEGER, layout);
VMStorage storage = storageCalculator.nextStorage(StorageType.INTEGER, (ValueLayout) layout);
bindings.vmLoad(storage, long.class)
.boxAddressRaw(Utils.pointeeSize(layout));
}
case INTEGER -> {
VMStorage storage =
storageCalculator.nextStorage(StorageType.INTEGER, layout);
VMStorage storage = storageCalculator.nextStorage(StorageType.INTEGER, (ValueLayout) layout);
bindings.vmLoad(storage, carrier);
}
case FLOAT -> {
VMStorage storage =
storageCalculator.nextStorage(StorageType.VECTOR, layout);
VMStorage storage = storageCalculator.nextStorage(StorageType.VECTOR, (ValueLayout) layout);
bindings.vmLoad(storage, carrier);
}
default -> throw new UnsupportedOperationException("Unhandled class " + argumentClass);

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2021, Arm Limited. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -28,7 +28,10 @@ package jdk.internal.foreign.abi.aarch64;
import java.lang.foreign.GroupLayout;
import java.lang.foreign.MemoryLayout;
import java.lang.foreign.MemorySegment;
import java.lang.foreign.SequenceLayout;
import java.lang.foreign.ValueLayout;
import java.util.List;
import java.util.ArrayList;
public enum TypeClass {
STRUCT_REGISTER,
@ -58,15 +61,38 @@ public enum TypeClass {
return type.bitSize() <= MAX_AGGREGATE_REGS_SIZE * 64;
}
static List<MemoryLayout> scalarLayouts(GroupLayout gl) {
List<MemoryLayout> out = new ArrayList<>();
scalarLayoutsInternal(out, gl);
return out;
}
private static void scalarLayoutsInternal(List<MemoryLayout> out, GroupLayout gl) {
for (MemoryLayout member : gl.memberLayouts()) {
if (member instanceof GroupLayout memberGl) {
scalarLayoutsInternal(out, memberGl);
} else if (member instanceof SequenceLayout memberSl) {
for (long i = 0; i < memberSl.elementCount(); i++) {
out.add(memberSl.elementLayout());
}
} else {
// padding or value layouts
out.add(member);
}
}
}
static boolean isHomogeneousFloatAggregate(MemoryLayout type) {
if (!(type instanceof GroupLayout groupLayout))
return false;
final int numElements = groupLayout.memberLayouts().size();
List<MemoryLayout> scalarLayouts = scalarLayouts(groupLayout);
final int numElements = scalarLayouts.size();
if (numElements > 4 || numElements == 0)
return false;
MemoryLayout baseType = groupLayout.memberLayouts().get(0);
MemoryLayout baseType = scalarLayouts.get(0);
if (!(baseType instanceof ValueLayout))
return false;
@ -75,7 +101,7 @@ public enum TypeClass {
if (baseArgClass != FLOAT)
return false;
for (MemoryLayout elem : groupLayout.memberLayouts()) {
for (MemoryLayout elem : scalarLayouts) {
if (!(elem instanceof ValueLayout))
return false;