mirror of
https://github.com/openjdk/jdk.git
synced 2025-09-24 04:54:40 +02:00
8132160: support for AVX 512 call frames and stack management
Simplify save/restore frame on x86 systems which support EVEX. Reviewed-by: kvn, iveresov
This commit is contained in:
parent
9f9739c156
commit
d67924dc8e
17 changed files with 1578 additions and 1257 deletions
|
@ -69,7 +69,9 @@ class SimpleRuntimeFrame {
|
|||
class RegisterSaver {
|
||||
// Capture info about frame layout. Layout offsets are in jint
|
||||
// units because compiler frame slots are jints.
|
||||
#define HALF_ZMM_BANK_WORDS 128
|
||||
#define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off
|
||||
#define DEF_ZMM_OFFS(regnum) zmm ## regnum ## _off = zmm_off + (regnum-16)*64/BytesPerInt, zmm ## regnum ## H_off
|
||||
enum layout {
|
||||
fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area
|
||||
xmm_off = fpu_state_off + 160/BytesPerInt, // offset in fxsave save area
|
||||
|
@ -89,23 +91,24 @@ class RegisterSaver {
|
|||
DEF_XMM_OFFS(13),
|
||||
DEF_XMM_OFFS(14),
|
||||
DEF_XMM_OFFS(15),
|
||||
DEF_XMM_OFFS(16),
|
||||
DEF_XMM_OFFS(17),
|
||||
DEF_XMM_OFFS(18),
|
||||
DEF_XMM_OFFS(19),
|
||||
DEF_XMM_OFFS(20),
|
||||
DEF_XMM_OFFS(21),
|
||||
DEF_XMM_OFFS(22),
|
||||
DEF_XMM_OFFS(23),
|
||||
DEF_XMM_OFFS(24),
|
||||
DEF_XMM_OFFS(25),
|
||||
DEF_XMM_OFFS(26),
|
||||
DEF_XMM_OFFS(27),
|
||||
DEF_XMM_OFFS(28),
|
||||
DEF_XMM_OFFS(29),
|
||||
DEF_XMM_OFFS(30),
|
||||
DEF_XMM_OFFS(31),
|
||||
fpu_state_end = fpu_state_off + ((FPUStateSizeInWords - 1)*wordSize / BytesPerInt),
|
||||
zmm_off = fpu_state_off + ((FPUStateSizeInWords - (HALF_ZMM_BANK_WORDS + 1))*wordSize / BytesPerInt),
|
||||
DEF_ZMM_OFFS(16),
|
||||
DEF_ZMM_OFFS(17),
|
||||
DEF_ZMM_OFFS(18),
|
||||
DEF_ZMM_OFFS(19),
|
||||
DEF_ZMM_OFFS(20),
|
||||
DEF_ZMM_OFFS(21),
|
||||
DEF_ZMM_OFFS(22),
|
||||
DEF_ZMM_OFFS(23),
|
||||
DEF_ZMM_OFFS(24),
|
||||
DEF_ZMM_OFFS(25),
|
||||
DEF_ZMM_OFFS(26),
|
||||
DEF_ZMM_OFFS(27),
|
||||
DEF_ZMM_OFFS(28),
|
||||
DEF_ZMM_OFFS(29),
|
||||
DEF_ZMM_OFFS(30),
|
||||
DEF_ZMM_OFFS(31),
|
||||
fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt),
|
||||
fpu_stateH_end,
|
||||
r15_off, r15H_off,
|
||||
r14_off, r14H_off,
|
||||
|
@ -155,9 +158,10 @@ class RegisterSaver {
|
|||
|
||||
OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
|
||||
int vect_words = 0;
|
||||
int num_xmm_regs = 16;
|
||||
if (UseAVX > 2) {
|
||||
num_xmm_regs = 32;
|
||||
int off = 0;
|
||||
int num_xmm_regs = XMMRegisterImpl::number_of_registers;
|
||||
if (UseAVX < 3) {
|
||||
num_xmm_regs = num_xmm_regs/2;
|
||||
}
|
||||
#ifdef COMPILER2
|
||||
if (save_vectors) {
|
||||
|
@ -165,9 +169,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
|||
assert(MaxVectorSize == 64, "only 512bit vectors are supported now");
|
||||
// Save upper half of YMM registers
|
||||
vect_words = 16 * num_xmm_regs / wordSize;
|
||||
additional_frame_words += vect_words;
|
||||
if (UseAVX > 2) {
|
||||
// Save upper half of ZMM registers as well
|
||||
if (UseAVX < 3) {
|
||||
additional_frame_words += vect_words;
|
||||
}
|
||||
}
|
||||
|
@ -195,77 +197,13 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
|||
__ enter(); // rsp becomes 16-byte aligned here
|
||||
__ push_CPU_state(); // Push a multiple of 16 bytes
|
||||
|
||||
if (vect_words > 0) {
|
||||
// push cpu state handles this on EVEX enabled targets
|
||||
if ((vect_words > 0) && (UseAVX < 3)) {
|
||||
assert(vect_words*wordSize >= 256, "");
|
||||
__ subptr(rsp, 256); // Save upper half of YMM registes(0..15)
|
||||
__ vextractf128h(Address(rsp, 0), xmm0);
|
||||
__ vextractf128h(Address(rsp, 16), xmm1);
|
||||
__ vextractf128h(Address(rsp, 32), xmm2);
|
||||
__ vextractf128h(Address(rsp, 48), xmm3);
|
||||
__ vextractf128h(Address(rsp, 64), xmm4);
|
||||
__ vextractf128h(Address(rsp, 80), xmm5);
|
||||
__ vextractf128h(Address(rsp, 96), xmm6);
|
||||
__ vextractf128h(Address(rsp, 112), xmm7);
|
||||
__ vextractf128h(Address(rsp, 128), xmm8);
|
||||
__ vextractf128h(Address(rsp, 144), xmm9);
|
||||
__ vextractf128h(Address(rsp, 160), xmm10);
|
||||
__ vextractf128h(Address(rsp, 176), xmm11);
|
||||
__ vextractf128h(Address(rsp, 192), xmm12);
|
||||
__ vextractf128h(Address(rsp, 208), xmm13);
|
||||
__ vextractf128h(Address(rsp, 224), xmm14);
|
||||
__ vextractf128h(Address(rsp, 240), xmm15);
|
||||
if (UseAVX > 2) {
|
||||
__ subptr(rsp, 256); // Save upper half of YMM registes(16..31)
|
||||
__ vextractf128h(Address(rsp, 0), xmm16);
|
||||
__ vextractf128h(Address(rsp, 16), xmm17);
|
||||
__ vextractf128h(Address(rsp, 32), xmm18);
|
||||
__ vextractf128h(Address(rsp, 48), xmm19);
|
||||
__ vextractf128h(Address(rsp, 64), xmm20);
|
||||
__ vextractf128h(Address(rsp, 80), xmm21);
|
||||
__ vextractf128h(Address(rsp, 96), xmm22);
|
||||
__ vextractf128h(Address(rsp, 112), xmm23);
|
||||
__ vextractf128h(Address(rsp, 128), xmm24);
|
||||
__ vextractf128h(Address(rsp, 144), xmm25);
|
||||
__ vextractf128h(Address(rsp, 160), xmm26);
|
||||
__ vextractf128h(Address(rsp, 176), xmm27);
|
||||
__ vextractf128h(Address(rsp, 192), xmm28);
|
||||
__ vextractf128h(Address(rsp, 208), xmm29);
|
||||
__ vextractf128h(Address(rsp, 224), xmm30);
|
||||
__ vextractf128h(Address(rsp, 240), xmm31);
|
||||
// Now handle the ZMM registers (0..31)
|
||||
__ subptr(rsp, 1024); // Save upper half of ZMM registes
|
||||
__ vextractf64x4h(Address(rsp, 0), xmm0);
|
||||
__ vextractf64x4h(Address(rsp, 32), xmm1);
|
||||
__ vextractf64x4h(Address(rsp, 64), xmm2);
|
||||
__ vextractf64x4h(Address(rsp, 96), xmm3);
|
||||
__ vextractf64x4h(Address(rsp, 128), xmm4);
|
||||
__ vextractf64x4h(Address(rsp, 160), xmm5);
|
||||
__ vextractf64x4h(Address(rsp, 192), xmm6);
|
||||
__ vextractf64x4h(Address(rsp, 224), xmm7);
|
||||
__ vextractf64x4h(Address(rsp, 256), xmm8);
|
||||
__ vextractf64x4h(Address(rsp, 288), xmm9);
|
||||
__ vextractf64x4h(Address(rsp, 320), xmm10);
|
||||
__ vextractf64x4h(Address(rsp, 352), xmm11);
|
||||
__ vextractf64x4h(Address(rsp, 384), xmm12);
|
||||
__ vextractf64x4h(Address(rsp, 416), xmm13);
|
||||
__ vextractf64x4h(Address(rsp, 448), xmm14);
|
||||
__ vextractf64x4h(Address(rsp, 480), xmm15);
|
||||
__ vextractf64x4h(Address(rsp, 512), xmm16);
|
||||
__ vextractf64x4h(Address(rsp, 544), xmm17);
|
||||
__ vextractf64x4h(Address(rsp, 576), xmm18);
|
||||
__ vextractf64x4h(Address(rsp, 608), xmm19);
|
||||
__ vextractf64x4h(Address(rsp, 640), xmm20);
|
||||
__ vextractf64x4h(Address(rsp, 672), xmm21);
|
||||
__ vextractf64x4h(Address(rsp, 704), xmm22);
|
||||
__ vextractf64x4h(Address(rsp, 736), xmm23);
|
||||
__ vextractf64x4h(Address(rsp, 768), xmm24);
|
||||
__ vextractf64x4h(Address(rsp, 800), xmm25);
|
||||
__ vextractf64x4h(Address(rsp, 832), xmm26);
|
||||
__ vextractf64x4h(Address(rsp, 864), xmm27);
|
||||
__ vextractf64x4h(Address(rsp, 896), xmm28);
|
||||
__ vextractf64x4h(Address(rsp, 928), xmm29);
|
||||
__ vextractf64x4h(Address(rsp, 960), xmm30);
|
||||
__ vextractf64x4h(Address(rsp, 992), xmm31);
|
||||
// Save upper half of YMM registes(0..num_xmm_regs)
|
||||
__ subptr(rsp, num_xmm_regs*16);
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
__ vextractf128h(Address(rsp, off++*16), as_XMMRegister(n));
|
||||
}
|
||||
}
|
||||
if (frame::arg_reg_save_area_bytes != 0) {
|
||||
|
@ -299,39 +237,24 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
|||
map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm0_off ), xmm0->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm1_off ), xmm1->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm2_off ), xmm2->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm3_off ), xmm3->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm4_off ), xmm4->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm5_off ), xmm5->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm6_off ), xmm6->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm7_off ), xmm7->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm8_off ), xmm8->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm9_off ), xmm9->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm10_off), xmm10->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm11_off), xmm11->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm12_off), xmm12->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm13_off), xmm13->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm14_off), xmm14->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm15_off), xmm15->as_VMReg());
|
||||
if (UseAVX > 2) {
|
||||
map->set_callee_saved(STACK_OFFSET(xmm16_off), xmm16->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm17_off), xmm17->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm18_off), xmm18->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm19_off), xmm19->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm20_off), xmm20->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm21_off), xmm21->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm22_off), xmm22->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm23_off), xmm23->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm24_off), xmm24->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm25_off), xmm25->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm26_off), xmm26->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm27_off), xmm27->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm28_off), xmm28->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm29_off), xmm29->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm30_off), xmm30->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm31_off), xmm31->as_VMReg());
|
||||
// For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15,
|
||||
// on EVEX enabled targets, we get it included in the xsave area
|
||||
off = xmm0_off;
|
||||
int delta = xmm1_off - off;
|
||||
for (int n = 0; n < 16; n++) {
|
||||
XMMRegister xmm_name = as_XMMRegister(n);
|
||||
map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg());
|
||||
off += delta;
|
||||
}
|
||||
if(UseAVX > 2) {
|
||||
// Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets
|
||||
off = zmm16_off;
|
||||
delta = zmm17_off - off;
|
||||
for (int n = 16; n < num_xmm_regs; n++) {
|
||||
XMMRegister xmm_name = as_XMMRegister(n);
|
||||
map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg());
|
||||
off += delta;
|
||||
}
|
||||
}
|
||||
|
||||
// %%% These should all be a waste but we'll keep things as they were for now
|
||||
|
@ -351,39 +274,24 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
|||
map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm0H_off ), xmm0->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm1H_off ), xmm1->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm2H_off ), xmm2->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm3H_off ), xmm3->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm4H_off ), xmm4->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm5H_off ), xmm5->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm6H_off ), xmm6->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm7H_off ), xmm7->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm8H_off ), xmm8->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm9H_off ), xmm9->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm10H_off), xmm10->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm11H_off), xmm11->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm12H_off), xmm12->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm13H_off), xmm13->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm14H_off), xmm14->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm15H_off), xmm15->as_VMReg()->next());
|
||||
// For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15,
|
||||
// on EVEX enabled targets, we get it included in the xsave area
|
||||
off = xmm0H_off;
|
||||
delta = xmm1H_off - off;
|
||||
for (int n = 0; n < 16; n++) {
|
||||
XMMRegister xmm_name = as_XMMRegister(n);
|
||||
map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next());
|
||||
off += delta;
|
||||
}
|
||||
if (UseAVX > 2) {
|
||||
map->set_callee_saved(STACK_OFFSET(xmm16H_off), xmm16->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm17H_off), xmm17->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm18H_off), xmm18->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm19H_off), xmm19->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm20H_off), xmm20->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm21H_off), xmm21->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm22H_off), xmm22->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm23H_off), xmm23->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm24H_off), xmm24->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm25H_off), xmm25->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm26H_off), xmm26->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm27H_off), xmm27->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm28H_off), xmm28->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm29H_off), xmm29->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm30H_off), xmm30->as_VMReg()->next());
|
||||
map->set_callee_saved(STACK_OFFSET(xmm31H_off), xmm31->as_VMReg()->next());
|
||||
// Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets
|
||||
off = zmm16H_off;
|
||||
delta = zmm17H_off - off;
|
||||
for (int n = 16; n < num_xmm_regs; n++) {
|
||||
XMMRegister xmm_name = as_XMMRegister(n);
|
||||
map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next());
|
||||
off += delta;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -391,86 +299,25 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
|||
}
|
||||
|
||||
void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
|
||||
int num_xmm_regs = XMMRegisterImpl::number_of_registers;
|
||||
if (UseAVX < 3) {
|
||||
num_xmm_regs = num_xmm_regs/2;
|
||||
}
|
||||
if (frame::arg_reg_save_area_bytes != 0) {
|
||||
// Pop arg register save area
|
||||
__ addptr(rsp, frame::arg_reg_save_area_bytes);
|
||||
}
|
||||
#ifdef COMPILER2
|
||||
if (restore_vectors) {
|
||||
// Restore upper half of YMM registes (0..15)
|
||||
assert(UseAVX > 0, "512bit vectors are supported only with AVX");
|
||||
assert(MaxVectorSize == 64, "only 512bit vectors are supported now");
|
||||
__ vinsertf128h(xmm0, Address(rsp, 0));
|
||||
__ vinsertf128h(xmm1, Address(rsp, 16));
|
||||
__ vinsertf128h(xmm2, Address(rsp, 32));
|
||||
__ vinsertf128h(xmm3, Address(rsp, 48));
|
||||
__ vinsertf128h(xmm4, Address(rsp, 64));
|
||||
__ vinsertf128h(xmm5, Address(rsp, 80));
|
||||
__ vinsertf128h(xmm6, Address(rsp, 96));
|
||||
__ vinsertf128h(xmm7, Address(rsp,112));
|
||||
__ vinsertf128h(xmm8, Address(rsp,128));
|
||||
__ vinsertf128h(xmm9, Address(rsp,144));
|
||||
__ vinsertf128h(xmm10, Address(rsp,160));
|
||||
__ vinsertf128h(xmm11, Address(rsp,176));
|
||||
__ vinsertf128h(xmm12, Address(rsp,192));
|
||||
__ vinsertf128h(xmm13, Address(rsp,208));
|
||||
__ vinsertf128h(xmm14, Address(rsp,224));
|
||||
__ vinsertf128h(xmm15, Address(rsp,240));
|
||||
__ addptr(rsp, 256);
|
||||
if (UseAVX > 2) {
|
||||
// Restore upper half of YMM registes (16..31)
|
||||
__ vinsertf128h(xmm16, Address(rsp, 0));
|
||||
__ vinsertf128h(xmm17, Address(rsp, 16));
|
||||
__ vinsertf128h(xmm18, Address(rsp, 32));
|
||||
__ vinsertf128h(xmm19, Address(rsp, 48));
|
||||
__ vinsertf128h(xmm20, Address(rsp, 64));
|
||||
__ vinsertf128h(xmm21, Address(rsp, 80));
|
||||
__ vinsertf128h(xmm22, Address(rsp, 96));
|
||||
__ vinsertf128h(xmm23, Address(rsp,112));
|
||||
__ vinsertf128h(xmm24, Address(rsp,128));
|
||||
__ vinsertf128h(xmm25, Address(rsp,144));
|
||||
__ vinsertf128h(xmm26, Address(rsp,160));
|
||||
__ vinsertf128h(xmm27, Address(rsp,176));
|
||||
__ vinsertf128h(xmm28, Address(rsp,192));
|
||||
__ vinsertf128h(xmm29, Address(rsp,208));
|
||||
__ vinsertf128h(xmm30, Address(rsp,224));
|
||||
__ vinsertf128h(xmm31, Address(rsp,240));
|
||||
__ addptr(rsp, 256);
|
||||
// Restore upper half of ZMM registes.
|
||||
__ vinsertf64x4h(xmm0, Address(rsp, 0));
|
||||
__ vinsertf64x4h(xmm1, Address(rsp, 32));
|
||||
__ vinsertf64x4h(xmm2, Address(rsp, 64));
|
||||
__ vinsertf64x4h(xmm3, Address(rsp, 96));
|
||||
__ vinsertf64x4h(xmm4, Address(rsp, 128));
|
||||
__ vinsertf64x4h(xmm5, Address(rsp, 160));
|
||||
__ vinsertf64x4h(xmm6, Address(rsp, 192));
|
||||
__ vinsertf64x4h(xmm7, Address(rsp, 224));
|
||||
__ vinsertf64x4h(xmm8, Address(rsp, 256));
|
||||
__ vinsertf64x4h(xmm9, Address(rsp, 288));
|
||||
__ vinsertf64x4h(xmm10, Address(rsp, 320));
|
||||
__ vinsertf64x4h(xmm11, Address(rsp, 352));
|
||||
__ vinsertf64x4h(xmm12, Address(rsp, 384));
|
||||
__ vinsertf64x4h(xmm13, Address(rsp, 416));
|
||||
__ vinsertf64x4h(xmm14, Address(rsp, 448));
|
||||
__ vinsertf64x4h(xmm15, Address(rsp, 480));
|
||||
__ vinsertf64x4h(xmm16, Address(rsp, 512));
|
||||
__ vinsertf64x4h(xmm17, Address(rsp, 544));
|
||||
__ vinsertf64x4h(xmm18, Address(rsp, 576));
|
||||
__ vinsertf64x4h(xmm19, Address(rsp, 608));
|
||||
__ vinsertf64x4h(xmm20, Address(rsp, 640));
|
||||
__ vinsertf64x4h(xmm21, Address(rsp, 672));
|
||||
__ vinsertf64x4h(xmm22, Address(rsp, 704));
|
||||
__ vinsertf64x4h(xmm23, Address(rsp, 736));
|
||||
__ vinsertf64x4h(xmm24, Address(rsp, 768));
|
||||
__ vinsertf64x4h(xmm25, Address(rsp, 800));
|
||||
__ vinsertf64x4h(xmm26, Address(rsp, 832));
|
||||
__ vinsertf64x4h(xmm27, Address(rsp, 864));
|
||||
__ vinsertf64x4h(xmm28, Address(rsp, 896));
|
||||
__ vinsertf64x4h(xmm29, Address(rsp, 928));
|
||||
__ vinsertf64x4h(xmm30, Address(rsp, 960));
|
||||
__ vinsertf64x4h(xmm31, Address(rsp, 992));
|
||||
__ addptr(rsp, 1024);
|
||||
// On EVEX enabled targets everything is handled in pop fpu state
|
||||
if ((restore_vectors) && (UseAVX < 3)) {
|
||||
assert(UseAVX > 0, "256/512-bit vectors are supported only with AVX");
|
||||
assert(MaxVectorSize == 64, "up to 512bit vectors are supported now");
|
||||
int off = 0;
|
||||
// Restore upper half of YMM registes (0..num_xmm_regs)
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
__ vinsertf128h(as_XMMRegister(n), Address(rsp, off++*16));
|
||||
}
|
||||
__ addptr(rsp, num_xmm_regs*16);
|
||||
}
|
||||
#else
|
||||
assert(!restore_vectors, "vectors are generated only by C2");
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue