mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-28 07:14:30 +02:00
8005544: Use 256bit YMM registers in arraycopy stubs on x86
Use YMM registers in arraycopy and array_fill stubs. Reviewed-by: roland, twisti
This commit is contained in:
parent
cfcd28fd9d
commit
7b9133b99c
5 changed files with 194 additions and 88 deletions
|
@ -3656,6 +3656,15 @@ void Assembler::vextracti128h(Address dst, XMMRegister src) {
|
|||
emit_int8(0x01);
|
||||
}
|
||||
|
||||
// duplicate 4-bytes integer data from src into 8 locations in dest
|
||||
void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
|
||||
assert(VM_Version::supports_avx2(), "");
|
||||
bool vector256 = true;
|
||||
int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
|
||||
emit_int8(0x58);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vzeroupper() {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
(void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
|
||||
|
|
|
@ -1754,6 +1754,9 @@ private:
|
|||
void vextractf128h(Address dst, XMMRegister src);
|
||||
void vextracti128h(Address dst, XMMRegister src);
|
||||
|
||||
// duplicate 4-bytes integer data from src into 8 locations in dest
|
||||
void vpbroadcastd(XMMRegister dst, XMMRegister src);
|
||||
|
||||
// AVX instruction which is used to clear upper 128 bits of YMM registers and
|
||||
// to avoid transaction penalty between AVX and SSE states. There is no
|
||||
// penalty if legacy SSE instructions are encoded using VEX prefix because
|
||||
|
|
|
@ -6011,8 +6011,31 @@ void MacroAssembler::generate_fill(BasicType t, bool aligned,
|
|||
{
|
||||
assert( UseSSE >= 2, "supported cpu only" );
|
||||
Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
|
||||
// Fill 32-byte chunks
|
||||
movdl(xtmp, value);
|
||||
if (UseAVX >= 2 && UseUnalignedLoadStores) {
|
||||
// Fill 64-byte chunks
|
||||
Label L_fill_64_bytes_loop, L_check_fill_32_bytes;
|
||||
vpbroadcastd(xtmp, xtmp);
|
||||
|
||||
subl(count, 16 << shift);
|
||||
jcc(Assembler::less, L_check_fill_32_bytes);
|
||||
align(16);
|
||||
|
||||
BIND(L_fill_64_bytes_loop);
|
||||
vmovdqu(Address(to, 0), xtmp);
|
||||
vmovdqu(Address(to, 32), xtmp);
|
||||
addptr(to, 64);
|
||||
subl(count, 16 << shift);
|
||||
jcc(Assembler::greaterEqual, L_fill_64_bytes_loop);
|
||||
|
||||
BIND(L_check_fill_32_bytes);
|
||||
addl(count, 8 << shift);
|
||||
jccb(Assembler::less, L_check_fill_8_bytes);
|
||||
vmovdqu(Address(to, 0), xtmp);
|
||||
addptr(to, 32);
|
||||
subl(count, 8 << shift);
|
||||
} else {
|
||||
// Fill 32-byte chunks
|
||||
pshufd(xtmp, xtmp, 0);
|
||||
|
||||
subl(count, 8 << shift);
|
||||
|
@ -6034,6 +6057,7 @@ void MacroAssembler::generate_fill(BasicType t, bool aligned,
|
|||
addptr(to, 32);
|
||||
subl(count, 8 << shift);
|
||||
jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
|
||||
}
|
||||
BIND(L_check_fill_8_bytes);
|
||||
addl(count, 8 << shift);
|
||||
jccb(Assembler::zero, L_exit);
|
||||
|
|
|
@ -797,6 +797,12 @@ class StubGenerator: public StubCodeGenerator {
|
|||
__ BIND(L_copy_64_bytes_loop);
|
||||
|
||||
if (UseUnalignedLoadStores) {
|
||||
if (UseAVX >= 2) {
|
||||
__ vmovdqu(xmm0, Address(from, 0));
|
||||
__ vmovdqu(Address(from, to_from, Address::times_1, 0), xmm0);
|
||||
__ vmovdqu(xmm1, Address(from, 32));
|
||||
__ vmovdqu(Address(from, to_from, Address::times_1, 32), xmm1);
|
||||
} else {
|
||||
__ movdqu(xmm0, Address(from, 0));
|
||||
__ movdqu(Address(from, to_from, Address::times_1, 0), xmm0);
|
||||
__ movdqu(xmm1, Address(from, 16));
|
||||
|
@ -805,7 +811,7 @@ class StubGenerator: public StubCodeGenerator {
|
|||
__ movdqu(Address(from, to_from, Address::times_1, 32), xmm2);
|
||||
__ movdqu(xmm3, Address(from, 48));
|
||||
__ movdqu(Address(from, to_from, Address::times_1, 48), xmm3);
|
||||
|
||||
}
|
||||
} else {
|
||||
__ movq(xmm0, Address(from, 0));
|
||||
__ movq(Address(from, to_from, Address::times_1, 0), xmm0);
|
||||
|
|
|
@ -1286,23 +1286,54 @@ class StubGenerator: public StubCodeGenerator {
|
|||
// end_to - destination array end address
|
||||
// qword_count - 64-bits element count, negative
|
||||
// to - scratch
|
||||
// L_copy_32_bytes - entry label
|
||||
// L_copy_bytes - entry label
|
||||
// L_copy_8_bytes - exit label
|
||||
//
|
||||
void copy_32_bytes_forward(Register end_from, Register end_to,
|
||||
void copy_bytes_forward(Register end_from, Register end_to,
|
||||
Register qword_count, Register to,
|
||||
Label& L_copy_32_bytes, Label& L_copy_8_bytes) {
|
||||
Label& L_copy_bytes, Label& L_copy_8_bytes) {
|
||||
DEBUG_ONLY(__ stop("enter at entry label, not here"));
|
||||
Label L_loop;
|
||||
__ align(OptoLoopAlignment);
|
||||
__ BIND(L_loop);
|
||||
if (UseUnalignedLoadStores) {
|
||||
Label L_end;
|
||||
// Copy 64-bytes per iteration
|
||||
__ BIND(L_loop);
|
||||
if (UseAVX >= 2) {
|
||||
__ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
|
||||
__ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
|
||||
__ vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24));
|
||||
__ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1);
|
||||
} else {
|
||||
__ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
|
||||
__ movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
|
||||
__ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40));
|
||||
__ movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1);
|
||||
__ movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24));
|
||||
__ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2);
|
||||
__ movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8));
|
||||
__ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3);
|
||||
}
|
||||
__ BIND(L_copy_bytes);
|
||||
__ addptr(qword_count, 8);
|
||||
__ jcc(Assembler::lessEqual, L_loop);
|
||||
__ subptr(qword_count, 4); // sub(8) and add(4)
|
||||
__ jccb(Assembler::greater, L_end);
|
||||
// Copy trailing 32 bytes
|
||||
if (UseAVX >= 2) {
|
||||
__ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
|
||||
__ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
|
||||
} else {
|
||||
__ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
|
||||
__ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
|
||||
__ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8));
|
||||
__ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1);
|
||||
|
||||
}
|
||||
__ addptr(qword_count, 4);
|
||||
__ BIND(L_end);
|
||||
} else {
|
||||
// Copy 32-bytes per iteration
|
||||
__ BIND(L_loop);
|
||||
__ movq(to, Address(end_from, qword_count, Address::times_8, -24));
|
||||
__ movq(Address(end_to, qword_count, Address::times_8, -24), to);
|
||||
__ movq(to, Address(end_from, qword_count, Address::times_8, -16));
|
||||
|
@ -1311,15 +1342,15 @@ class StubGenerator: public StubCodeGenerator {
|
|||
__ movq(Address(end_to, qword_count, Address::times_8, - 8), to);
|
||||
__ movq(to, Address(end_from, qword_count, Address::times_8, - 0));
|
||||
__ movq(Address(end_to, qword_count, Address::times_8, - 0), to);
|
||||
}
|
||||
__ BIND(L_copy_32_bytes);
|
||||
|
||||
__ BIND(L_copy_bytes);
|
||||
__ addptr(qword_count, 4);
|
||||
__ jcc(Assembler::lessEqual, L_loop);
|
||||
}
|
||||
__ subptr(qword_count, 4);
|
||||
__ jcc(Assembler::less, L_copy_8_bytes); // Copy trailing qwords
|
||||
}
|
||||
|
||||
|
||||
// Copy big chunks backward
|
||||
//
|
||||
// Inputs:
|
||||
|
@ -1327,23 +1358,55 @@ class StubGenerator: public StubCodeGenerator {
|
|||
// dest - destination array address
|
||||
// qword_count - 64-bits element count
|
||||
// to - scratch
|
||||
// L_copy_32_bytes - entry label
|
||||
// L_copy_bytes - entry label
|
||||
// L_copy_8_bytes - exit label
|
||||
//
|
||||
void copy_32_bytes_backward(Register from, Register dest,
|
||||
void copy_bytes_backward(Register from, Register dest,
|
||||
Register qword_count, Register to,
|
||||
Label& L_copy_32_bytes, Label& L_copy_8_bytes) {
|
||||
Label& L_copy_bytes, Label& L_copy_8_bytes) {
|
||||
DEBUG_ONLY(__ stop("enter at entry label, not here"));
|
||||
Label L_loop;
|
||||
__ align(OptoLoopAlignment);
|
||||
__ BIND(L_loop);
|
||||
if (UseUnalignedLoadStores) {
|
||||
Label L_end;
|
||||
// Copy 64-bytes per iteration
|
||||
__ BIND(L_loop);
|
||||
if (UseAVX >= 2) {
|
||||
__ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32));
|
||||
__ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0);
|
||||
__ vmovdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
|
||||
__ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
|
||||
} else {
|
||||
__ movdqu(xmm0, Address(from, qword_count, Address::times_8, 48));
|
||||
__ movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0);
|
||||
__ movdqu(xmm1, Address(from, qword_count, Address::times_8, 32));
|
||||
__ movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1);
|
||||
__ movdqu(xmm2, Address(from, qword_count, Address::times_8, 16));
|
||||
__ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2);
|
||||
__ movdqu(xmm3, Address(from, qword_count, Address::times_8, 0));
|
||||
__ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm3);
|
||||
}
|
||||
__ BIND(L_copy_bytes);
|
||||
__ subptr(qword_count, 8);
|
||||
__ jcc(Assembler::greaterEqual, L_loop);
|
||||
|
||||
__ addptr(qword_count, 4); // add(8) and sub(4)
|
||||
__ jccb(Assembler::less, L_end);
|
||||
// Copy trailing 32 bytes
|
||||
if (UseAVX >= 2) {
|
||||
__ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 0));
|
||||
__ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm0);
|
||||
} else {
|
||||
__ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16));
|
||||
__ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0);
|
||||
__ movdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
|
||||
__ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
|
||||
|
||||
}
|
||||
__ subptr(qword_count, 4);
|
||||
__ BIND(L_end);
|
||||
} else {
|
||||
// Copy 32-bytes per iteration
|
||||
__ BIND(L_loop);
|
||||
__ movq(to, Address(from, qword_count, Address::times_8, 24));
|
||||
__ movq(Address(dest, qword_count, Address::times_8, 24), to);
|
||||
__ movq(to, Address(from, qword_count, Address::times_8, 16));
|
||||
|
@ -1352,10 +1415,11 @@ class StubGenerator: public StubCodeGenerator {
|
|||
__ movq(Address(dest, qword_count, Address::times_8, 8), to);
|
||||
__ movq(to, Address(from, qword_count, Address::times_8, 0));
|
||||
__ movq(Address(dest, qword_count, Address::times_8, 0), to);
|
||||
}
|
||||
__ BIND(L_copy_32_bytes);
|
||||
|
||||
__ BIND(L_copy_bytes);
|
||||
__ subptr(qword_count, 4);
|
||||
__ jcc(Assembler::greaterEqual, L_loop);
|
||||
}
|
||||
__ addptr(qword_count, 4);
|
||||
__ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords
|
||||
}
|
||||
|
@ -1385,7 +1449,7 @@ class StubGenerator: public StubCodeGenerator {
|
|||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ pc();
|
||||
|
||||
Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes;
|
||||
Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes;
|
||||
Label L_copy_byte, L_exit;
|
||||
const Register from = rdi; // source array address
|
||||
const Register to = rsi; // destination array address
|
||||
|
@ -1417,7 +1481,7 @@ class StubGenerator: public StubCodeGenerator {
|
|||
__ lea(end_from, Address(from, qword_count, Address::times_8, -8));
|
||||
__ lea(end_to, Address(to, qword_count, Address::times_8, -8));
|
||||
__ negptr(qword_count); // make the count negative
|
||||
__ jmp(L_copy_32_bytes);
|
||||
__ jmp(L_copy_bytes);
|
||||
|
||||
// Copy trailing qwords
|
||||
__ BIND(L_copy_8_bytes);
|
||||
|
@ -1460,8 +1524,8 @@ class StubGenerator: public StubCodeGenerator {
|
|||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
// Copy in 32-bytes chunks
|
||||
copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
|
||||
// Copy in multi-bytes chunks
|
||||
copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
|
||||
__ jmp(L_copy_4_bytes);
|
||||
|
||||
return start;
|
||||
|
@ -1488,7 +1552,7 @@ class StubGenerator: public StubCodeGenerator {
|
|||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ pc();
|
||||
|
||||
Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes;
|
||||
Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes;
|
||||
const Register from = rdi; // source array address
|
||||
const Register to = rsi; // destination array address
|
||||
const Register count = rdx; // elements count
|
||||
|
@ -1531,10 +1595,10 @@ class StubGenerator: public StubCodeGenerator {
|
|||
// Check for and copy trailing dword
|
||||
__ BIND(L_copy_4_bytes);
|
||||
__ testl(byte_count, 4);
|
||||
__ jcc(Assembler::zero, L_copy_32_bytes);
|
||||
__ jcc(Assembler::zero, L_copy_bytes);
|
||||
__ movl(rax, Address(from, qword_count, Address::times_8));
|
||||
__ movl(Address(to, qword_count, Address::times_8), rax);
|
||||
__ jmp(L_copy_32_bytes);
|
||||
__ jmp(L_copy_bytes);
|
||||
|
||||
// Copy trailing qwords
|
||||
__ BIND(L_copy_8_bytes);
|
||||
|
@ -1549,8 +1613,8 @@ class StubGenerator: public StubCodeGenerator {
|
|||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
// Copy in 32-bytes chunks
|
||||
copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
|
||||
// Copy in multi-bytes chunks
|
||||
copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
|
||||
|
||||
restore_arg_regs();
|
||||
inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free
|
||||
|
@ -1585,7 +1649,7 @@ class StubGenerator: public StubCodeGenerator {
|
|||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ pc();
|
||||
|
||||
Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes,L_copy_2_bytes,L_exit;
|
||||
Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes,L_copy_2_bytes,L_exit;
|
||||
const Register from = rdi; // source array address
|
||||
const Register to = rsi; // destination array address
|
||||
const Register count = rdx; // elements count
|
||||
|
@ -1616,7 +1680,7 @@ class StubGenerator: public StubCodeGenerator {
|
|||
__ lea(end_from, Address(from, qword_count, Address::times_8, -8));
|
||||
__ lea(end_to, Address(to, qword_count, Address::times_8, -8));
|
||||
__ negptr(qword_count);
|
||||
__ jmp(L_copy_32_bytes);
|
||||
__ jmp(L_copy_bytes);
|
||||
|
||||
// Copy trailing qwords
|
||||
__ BIND(L_copy_8_bytes);
|
||||
|
@ -1652,8 +1716,8 @@ class StubGenerator: public StubCodeGenerator {
|
|||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
// Copy in 32-bytes chunks
|
||||
copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
|
||||
// Copy in multi-bytes chunks
|
||||
copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
|
||||
__ jmp(L_copy_4_bytes);
|
||||
|
||||
return start;
|
||||
|
@ -1700,7 +1764,7 @@ class StubGenerator: public StubCodeGenerator {
|
|||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ pc();
|
||||
|
||||
Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes;
|
||||
Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes;
|
||||
const Register from = rdi; // source array address
|
||||
const Register to = rsi; // destination array address
|
||||
const Register count = rdx; // elements count
|
||||
|
@ -1735,10 +1799,10 @@ class StubGenerator: public StubCodeGenerator {
|
|||
// Check for and copy trailing dword
|
||||
__ BIND(L_copy_4_bytes);
|
||||
__ testl(word_count, 2);
|
||||
__ jcc(Assembler::zero, L_copy_32_bytes);
|
||||
__ jcc(Assembler::zero, L_copy_bytes);
|
||||
__ movl(rax, Address(from, qword_count, Address::times_8));
|
||||
__ movl(Address(to, qword_count, Address::times_8), rax);
|
||||
__ jmp(L_copy_32_bytes);
|
||||
__ jmp(L_copy_bytes);
|
||||
|
||||
// Copy trailing qwords
|
||||
__ BIND(L_copy_8_bytes);
|
||||
|
@ -1753,8 +1817,8 @@ class StubGenerator: public StubCodeGenerator {
|
|||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
// Copy in 32-bytes chunks
|
||||
copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
|
||||
// Copy in multi-bytes chunks
|
||||
copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
|
||||
|
||||
restore_arg_regs();
|
||||
inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free
|
||||
|
@ -1790,7 +1854,7 @@ class StubGenerator: public StubCodeGenerator {
|
|||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ pc();
|
||||
|
||||
Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
|
||||
Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
|
||||
const Register from = rdi; // source array address
|
||||
const Register to = rsi; // destination array address
|
||||
const Register count = rdx; // elements count
|
||||
|
@ -1826,7 +1890,7 @@ class StubGenerator: public StubCodeGenerator {
|
|||
__ lea(end_from, Address(from, qword_count, Address::times_8, -8));
|
||||
__ lea(end_to, Address(to, qword_count, Address::times_8, -8));
|
||||
__ negptr(qword_count);
|
||||
__ jmp(L_copy_32_bytes);
|
||||
__ jmp(L_copy_bytes);
|
||||
|
||||
// Copy trailing qwords
|
||||
__ BIND(L_copy_8_bytes);
|
||||
|
@ -1853,8 +1917,8 @@ class StubGenerator: public StubCodeGenerator {
|
|||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
// Copy 32-bytes chunks
|
||||
copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
|
||||
// Copy in multi-bytes chunks
|
||||
copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
|
||||
__ jmp(L_copy_4_bytes);
|
||||
|
||||
return start;
|
||||
|
@ -1882,7 +1946,7 @@ class StubGenerator: public StubCodeGenerator {
|
|||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ pc();
|
||||
|
||||
Label L_copy_32_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit;
|
||||
Label L_copy_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit;
|
||||
const Register from = rdi; // source array address
|
||||
const Register to = rsi; // destination array address
|
||||
const Register count = rdx; // elements count
|
||||
|
@ -1916,10 +1980,10 @@ class StubGenerator: public StubCodeGenerator {
|
|||
|
||||
// Check for and copy trailing dword
|
||||
__ testl(dword_count, 1);
|
||||
__ jcc(Assembler::zero, L_copy_32_bytes);
|
||||
__ jcc(Assembler::zero, L_copy_bytes);
|
||||
__ movl(rax, Address(from, dword_count, Address::times_4, -4));
|
||||
__ movl(Address(to, dword_count, Address::times_4, -4), rax);
|
||||
__ jmp(L_copy_32_bytes);
|
||||
__ jmp(L_copy_bytes);
|
||||
|
||||
// Copy trailing qwords
|
||||
__ BIND(L_copy_8_bytes);
|
||||
|
@ -1937,8 +2001,8 @@ class StubGenerator: public StubCodeGenerator {
|
|||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
// Copy in 32-bytes chunks
|
||||
copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
|
||||
// Copy in multi-bytes chunks
|
||||
copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
|
||||
|
||||
__ bind(L_exit);
|
||||
if (is_oop) {
|
||||
|
@ -1976,7 +2040,7 @@ class StubGenerator: public StubCodeGenerator {
|
|||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ pc();
|
||||
|
||||
Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
|
||||
Label L_copy_bytes, L_copy_8_bytes, L_exit;
|
||||
const Register from = rdi; // source array address
|
||||
const Register to = rsi; // destination array address
|
||||
const Register qword_count = rdx; // elements count
|
||||
|
@ -2008,7 +2072,7 @@ class StubGenerator: public StubCodeGenerator {
|
|||
__ lea(end_from, Address(from, qword_count, Address::times_8, -8));
|
||||
__ lea(end_to, Address(to, qword_count, Address::times_8, -8));
|
||||
__ negptr(qword_count);
|
||||
__ jmp(L_copy_32_bytes);
|
||||
__ jmp(L_copy_bytes);
|
||||
|
||||
// Copy trailing qwords
|
||||
__ BIND(L_copy_8_bytes);
|
||||
|
@ -2027,8 +2091,8 @@ class StubGenerator: public StubCodeGenerator {
|
|||
__ ret(0);
|
||||
}
|
||||
|
||||
// Copy 64-byte chunks
|
||||
copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
|
||||
// Copy in multi-bytes chunks
|
||||
copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
|
||||
|
||||
if (is_oop) {
|
||||
__ BIND(L_exit);
|
||||
|
@ -2065,7 +2129,7 @@ class StubGenerator: public StubCodeGenerator {
|
|||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ pc();
|
||||
|
||||
Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
|
||||
Label L_copy_bytes, L_copy_8_bytes, L_exit;
|
||||
const Register from = rdi; // source array address
|
||||
const Register to = rsi; // destination array address
|
||||
const Register qword_count = rdx; // elements count
|
||||
|
@ -2091,7 +2155,7 @@ class StubGenerator: public StubCodeGenerator {
|
|||
gen_write_ref_array_pre_barrier(to, saved_count, dest_uninitialized);
|
||||
}
|
||||
|
||||
__ jmp(L_copy_32_bytes);
|
||||
__ jmp(L_copy_bytes);
|
||||
|
||||
// Copy trailing qwords
|
||||
__ BIND(L_copy_8_bytes);
|
||||
|
@ -2110,8 +2174,8 @@ class StubGenerator: public StubCodeGenerator {
|
|||
__ ret(0);
|
||||
}
|
||||
|
||||
// Copy in 32-bytes chunks
|
||||
copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
|
||||
// Copy in multi-bytes chunks
|
||||
copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
|
||||
|
||||
if (is_oop) {
|
||||
__ BIND(L_exit);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue