mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-26 06:14:49 +02:00
4809552: Optimize Arrays.fill(...)
Reviewed-by: kvn
This commit is contained in:
parent
519c627fe5
commit
08d9e03b81
17 changed files with 940 additions and 11 deletions
|
@ -1587,6 +1587,185 @@ class StubGenerator: public StubCodeGenerator {
|
|||
return start;
|
||||
}
|
||||
|
||||
//
|
||||
// Generate stub for disjoint short fill. If "aligned" is true, the
|
||||
// "to" address is assumed to be heapword aligned.
|
||||
//
|
||||
// Arguments for generated stub:
|
||||
// to: O0
|
||||
// value: O1
|
||||
// count: O2 treated as signed
|
||||
//
|
||||
address generate_fill(BasicType t, bool aligned, const char* name) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ pc();
|
||||
|
||||
const Register to = O0; // source array address
|
||||
const Register value = O1; // fill value
|
||||
const Register count = O2; // elements count
|
||||
// O3 is used as a temp register
|
||||
|
||||
assert_clean_int(count, O3); // Make sure 'count' is clean int.
|
||||
|
||||
Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
|
||||
Label L_fill_2_bytes, L_fill_4_bytes, L_fill_32_bytes;
|
||||
|
||||
int shift = -1;
|
||||
switch (t) {
|
||||
case T_BYTE:
|
||||
shift = 2;
|
||||
break;
|
||||
case T_SHORT:
|
||||
shift = 1;
|
||||
break;
|
||||
case T_INT:
|
||||
shift = 0;
|
||||
break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
|
||||
BLOCK_COMMENT("Entry:");
|
||||
|
||||
if (t == T_BYTE) {
|
||||
// Zero extend value
|
||||
__ and3(value, 0xff, value);
|
||||
__ sllx(value, 8, O3);
|
||||
__ or3(value, O3, value);
|
||||
}
|
||||
if (t == T_SHORT) {
|
||||
// Zero extend value
|
||||
__ sethi(0xffff0000, O3);
|
||||
__ andn(value, O3, value);
|
||||
}
|
||||
if (t == T_BYTE || t == T_SHORT) {
|
||||
__ sllx(value, 16, O3);
|
||||
__ or3(value, O3, value);
|
||||
}
|
||||
|
||||
__ cmp(count, 2<<shift); // Short arrays (< 8 bytes) fill by element
|
||||
__ brx(Assembler::lessUnsigned, false, Assembler::pn, L_fill_4_bytes); // use unsigned cmp
|
||||
__ delayed()->andcc(count, 1<<shift, G0);
|
||||
|
||||
if (!aligned && (t == T_BYTE || t == T_SHORT)) {
|
||||
// align source address at 4 bytes address boundary
|
||||
if (t == T_BYTE) {
|
||||
// One byte misalignment happens only for byte arrays
|
||||
__ andcc(to, 1, G0);
|
||||
__ br(Assembler::zero, false, Assembler::pt, L_skip_align1);
|
||||
__ delayed()->nop();
|
||||
__ stb(value, to, 0);
|
||||
__ inc(to, 1);
|
||||
__ dec(count, 1);
|
||||
__ BIND(L_skip_align1);
|
||||
}
|
||||
// Two bytes misalignment happens only for byte and short (char) arrays
|
||||
__ andcc(to, 2, G0);
|
||||
__ br(Assembler::zero, false, Assembler::pt, L_skip_align2);
|
||||
__ delayed()->nop();
|
||||
__ sth(value, to, 0);
|
||||
__ inc(to, 2);
|
||||
__ dec(count, 1 << (shift - 1));
|
||||
__ BIND(L_skip_align2);
|
||||
}
|
||||
#ifdef _LP64
|
||||
if (!aligned) {
|
||||
#endif
|
||||
// align to 8 bytes, we know we are 4 byte aligned to start
|
||||
__ andcc(to, 7, G0);
|
||||
__ br(Assembler::zero, false, Assembler::pt, L_fill_32_bytes);
|
||||
__ delayed()->nop();
|
||||
__ stw(value, to, 0);
|
||||
__ inc(to, 4);
|
||||
__ dec(count, 1 << shift);
|
||||
__ BIND(L_fill_32_bytes);
|
||||
#ifdef _LP64
|
||||
}
|
||||
#endif
|
||||
|
||||
Label L_check_fill_8_bytes;
|
||||
// Fill 32-byte chunks
|
||||
__ subcc(count, 8 << shift, count);
|
||||
__ brx(Assembler::less, false, Assembler::pt, L_check_fill_8_bytes);
|
||||
__ delayed()->nop();
|
||||
|
||||
if (t == T_INT) {
|
||||
// Zero extend value
|
||||
__ srl(value, 0, value);
|
||||
}
|
||||
if (t == T_BYTE || t == T_SHORT || t == T_INT) {
|
||||
__ sllx(value, 32, O3);
|
||||
__ or3(value, O3, value);
|
||||
}
|
||||
|
||||
Label L_fill_32_bytes_loop;
|
||||
__ align(16);
|
||||
__ BIND(L_fill_32_bytes_loop);
|
||||
|
||||
__ stx(value, to, 0);
|
||||
__ stx(value, to, 8);
|
||||
__ stx(value, to, 16);
|
||||
__ stx(value, to, 24);
|
||||
|
||||
__ subcc(count, 8 << shift, count);
|
||||
__ brx(Assembler::greaterEqual, false, Assembler::pt, L_fill_32_bytes_loop);
|
||||
__ delayed()->add(to, 32, to);
|
||||
|
||||
__ BIND(L_check_fill_8_bytes);
|
||||
__ addcc(count, 8 << shift, count);
|
||||
__ brx(Assembler::zero, false, Assembler::pn, L_exit);
|
||||
__ delayed()->subcc(count, 1 << (shift + 1), count);
|
||||
__ brx(Assembler::less, false, Assembler::pn, L_fill_4_bytes);
|
||||
__ delayed()->andcc(count, 1<<shift, G0);
|
||||
|
||||
//
|
||||
// length is too short, just fill 8 bytes at a time
|
||||
//
|
||||
Label L_fill_8_bytes_loop;
|
||||
__ BIND(L_fill_8_bytes_loop);
|
||||
__ stx(value, to, 0);
|
||||
__ subcc(count, 1 << (shift + 1), count);
|
||||
__ brx(Assembler::greaterEqual, false, Assembler::pn, L_fill_8_bytes_loop);
|
||||
__ delayed()->add(to, 8, to);
|
||||
|
||||
// fill trailing 4 bytes
|
||||
__ andcc(count, 1<<shift, G0); // in delay slot of branches
|
||||
__ BIND(L_fill_4_bytes);
|
||||
__ brx(Assembler::zero, false, Assembler::pt, L_fill_2_bytes);
|
||||
if (t == T_BYTE || t == T_SHORT) {
|
||||
__ delayed()->andcc(count, 1<<(shift-1), G0);
|
||||
} else {
|
||||
__ delayed()->nop();
|
||||
}
|
||||
__ stw(value, to, 0);
|
||||
if (t == T_BYTE || t == T_SHORT) {
|
||||
__ inc(to, 4);
|
||||
// fill trailing 2 bytes
|
||||
__ andcc(count, 1<<(shift-1), G0); // in delay slot of branches
|
||||
__ BIND(L_fill_2_bytes);
|
||||
__ brx(Assembler::zero, false, Assembler::pt, L_fill_byte);
|
||||
__ delayed()->andcc(count, 1, count);
|
||||
__ sth(value, to, 0);
|
||||
if (t == T_BYTE) {
|
||||
__ inc(to, 2);
|
||||
// fill trailing byte
|
||||
__ andcc(count, 1, count); // in delay slot of branches
|
||||
__ BIND(L_fill_byte);
|
||||
__ brx(Assembler::zero, false, Assembler::pt, L_exit);
|
||||
__ delayed()->nop();
|
||||
__ stb(value, to, 0);
|
||||
} else {
|
||||
__ BIND(L_fill_byte);
|
||||
}
|
||||
} else {
|
||||
__ BIND(L_fill_2_bytes);
|
||||
}
|
||||
__ BIND(L_exit);
|
||||
__ retl();
|
||||
__ delayed()->mov(G0, O0); // return 0
|
||||
return start;
|
||||
}
|
||||
|
||||
//
|
||||
// Generate stub for conjoint short copy. If "aligned" is true, the
|
||||
// "from" and "to" addresses are assumed to be heapword aligned.
|
||||
|
@ -2855,6 +3034,13 @@ class StubGenerator: public StubCodeGenerator {
|
|||
StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy");
|
||||
StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy");
|
||||
StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy");
|
||||
|
||||
StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
|
||||
StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
|
||||
StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
|
||||
StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
|
||||
StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
|
||||
StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
|
||||
}
|
||||
|
||||
void generate_initial() {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue