8205475: AARCH64: optimize FPU loads and stores in C1_Runtime1_aarch64.cpp

Reviewed-by: aph, adinn
This commit is contained in:
Dmitrij Pochepko 2018-06-25 21:22:16 +03:00
parent 0499c44e3c
commit c7c819cd8b

View file

@ -265,9 +265,11 @@ static OopMap* save_live_registers(StubAssembler* sasm,
__ push(RegSet::range(r0, r29), sp); // integer registers except lr & sp __ push(RegSet::range(r0, r29), sp); // integer registers except lr & sp
if (save_fpu_registers) { if (save_fpu_registers) {
for (int i = 30; i >= 0; i -= 2) for (int i = 31; i>= 0; i -= 4) {
__ stpd(as_FloatRegister(i), as_FloatRegister(i+1), __ sub(sp, sp, 4 * wordSize); // no pre-increment for st1. Emulate it without modifying other registers
Address(__ pre(sp, -2 * wordSize))); __ st1(as_FloatRegister(i-3), as_FloatRegister(i-2), as_FloatRegister(i-1),
as_FloatRegister(i), __ T1D, Address(sp));
}
} else { } else {
__ add(sp, sp, -32 * wordSize); __ add(sp, sp, -32 * wordSize);
} }
@ -277,9 +279,9 @@ static OopMap* save_live_registers(StubAssembler* sasm,
static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) { static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) {
if (restore_fpu_registers) { if (restore_fpu_registers) {
for (int i = 0; i < 32; i += 2) for (int i = 0; i < 32; i += 4)
__ ldpd(as_FloatRegister(i), as_FloatRegister(i+1), __ ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
Address(__ post(sp, 2 * wordSize))); as_FloatRegister(i+3), __ T1D, Address(__ post(sp, 4 * wordSize)));
} else { } else {
__ add(sp, sp, 32 * wordSize); __ add(sp, sp, 32 * wordSize);
} }
@ -290,9 +292,9 @@ static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registe
static void restore_live_registers_except_r0(StubAssembler* sasm, bool restore_fpu_registers = true) { static void restore_live_registers_except_r0(StubAssembler* sasm, bool restore_fpu_registers = true) {
if (restore_fpu_registers) { if (restore_fpu_registers) {
for (int i = 0; i < 32; i += 2) for (int i = 0; i < 32; i += 4)
__ ldpd(as_FloatRegister(i), as_FloatRegister(i+1), __ ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
Address(__ post(sp, 2 * wordSize))); as_FloatRegister(i+3), __ T1D, Address(__ post(sp, 4 * wordSize)));
} else { } else {
__ add(sp, sp, 32 * wordSize); __ add(sp, sp, 32 * wordSize);
} }