mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-26 14:24:46 +02:00
6978249: spill between cpu and fpu registers when those moves are fast
Reviewed-by: kvn
This commit is contained in:
parent
a325f5589a
commit
a54b1ff70e
10 changed files with 116 additions and 8 deletions
|
@ -112,6 +112,11 @@ void VM_Version::initialize() {
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef COMPILER2
|
||||
// Currently not supported anywhere.
|
||||
FLAG_SET_DEFAULT(UseFPUForSpilling, false);
|
||||
#endif
|
||||
|
||||
char buf[512];
|
||||
jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
(has_v8() ? ", has_v8" : ""),
|
||||
|
|
|
@ -482,6 +482,15 @@ void VM_Version::get_processor_features() {
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef COMPILER2
|
||||
if (UseFPUForSpilling) {
|
||||
if (UseSSE < 2) {
|
||||
// Only supported with SSE2+
|
||||
FLAG_SET_DEFAULT(UseFPUForSpilling, false);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value");
|
||||
assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value");
|
||||
|
||||
|
@ -520,6 +529,11 @@ void VM_Version::get_processor_features() {
|
|||
if( supports_sse4_2() && supports_ht() ) { // Nehalem based cpus
|
||||
AllocatePrefetchDistance = 192;
|
||||
AllocatePrefetchLines = 4;
|
||||
#ifdef COMPILER2
|
||||
if (AggressiveOpts && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
|
||||
FLAG_SET_DEFAULT(UseFPUForSpilling, true);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value");
|
||||
|
|
|
@ -852,6 +852,39 @@ static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst
|
|||
}
|
||||
}
|
||||
|
||||
static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
|
||||
int src_hi, int dst_hi, int size, outputStream* st ) {
|
||||
// 32-bit
|
||||
if (cbuf) {
|
||||
emit_opcode(*cbuf, 0x66);
|
||||
emit_opcode(*cbuf, 0x0F);
|
||||
emit_opcode(*cbuf, 0x6E);
|
||||
emit_rm(*cbuf, 0x3, Matcher::_regEncode[dst_lo] & 7, Matcher::_regEncode[src_lo] & 7);
|
||||
#ifndef PRODUCT
|
||||
} else if (!do_size) {
|
||||
st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
|
||||
#endif
|
||||
}
|
||||
return 4;
|
||||
}
|
||||
|
||||
|
||||
static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
|
||||
int src_hi, int dst_hi, int size, outputStream* st ) {
|
||||
// 32-bit
|
||||
if (cbuf) {
|
||||
emit_opcode(*cbuf, 0x66);
|
||||
emit_opcode(*cbuf, 0x0F);
|
||||
emit_opcode(*cbuf, 0x7E);
|
||||
emit_rm(*cbuf, 0x3, Matcher::_regEncode[src_lo] & 7, Matcher::_regEncode[dst_lo] & 7);
|
||||
#ifndef PRODUCT
|
||||
} else if (!do_size) {
|
||||
st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
|
||||
#endif
|
||||
}
|
||||
return 4;
|
||||
}
|
||||
|
||||
static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
|
||||
if( cbuf ) {
|
||||
emit_opcode(*cbuf, 0x8B );
|
||||
|
@ -947,6 +980,12 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bo
|
|||
if( dst_first_rc == rc_int && src_first_rc == rc_stack )
|
||||
size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
|
||||
|
||||
// Check for integer reg-xmm reg copy
|
||||
if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
|
||||
assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
|
||||
"no 64 bit integer-float reg moves" );
|
||||
return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
|
||||
}
|
||||
// --------------------------------------
|
||||
// Check for float reg-reg copy
|
||||
if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
|
||||
|
@ -1018,6 +1057,13 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bo
|
|||
return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
|
||||
}
|
||||
|
||||
// Check for xmm reg-integer reg copy
|
||||
if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
|
||||
assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
|
||||
"no 64 bit float-integer reg moves" );
|
||||
return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
|
||||
}
|
||||
|
||||
// Check for xmm store
|
||||
if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
|
||||
return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
|
||||
|
|
|
@ -1607,8 +1607,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
|
|||
emit_opcode(*cbuf, 0x0F);
|
||||
emit_opcode(*cbuf, 0x7E);
|
||||
emit_rm(*cbuf, 0x3,
|
||||
Matcher::_regEncode[dst_first] & 7,
|
||||
Matcher::_regEncode[src_first] & 7);
|
||||
Matcher::_regEncode[src_first] & 7,
|
||||
Matcher::_regEncode[dst_first] & 7);
|
||||
#ifndef PRODUCT
|
||||
} else if (!do_size) {
|
||||
st->print("movdq %s, %s\t# spill",
|
||||
|
@ -1637,8 +1637,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
|
|||
emit_opcode(*cbuf, 0x0F);
|
||||
emit_opcode(*cbuf, 0x7E);
|
||||
emit_rm(*cbuf, 0x3,
|
||||
Matcher::_regEncode[dst_first] & 7,
|
||||
Matcher::_regEncode[src_first] & 7);
|
||||
Matcher::_regEncode[src_first] & 7,
|
||||
Matcher::_regEncode[dst_first] & 7);
|
||||
#ifndef PRODUCT
|
||||
} else if (!do_size) {
|
||||
st->print("movdl %s, %s\t# spill",
|
||||
|
|
|
@ -178,6 +178,9 @@
|
|||
product(bool, ReduceBulkZeroing, true, \
|
||||
"When bulk-initializing, try to avoid needless zeroing") \
|
||||
\
|
||||
product(bool, UseFPUForSpilling, false, \
|
||||
"Spill integer registers to FPU instead of stack when possible") \
|
||||
\
|
||||
develop_pd(intx, RegisterCostAreaRatio, \
|
||||
"Spill selection in reg allocator: scale area by (X/64K) before " \
|
||||
"adding cost") \
|
||||
|
|
|
@ -780,6 +780,14 @@ bool PhaseConservativeCoalesce::copy_copy( Node *dst_copy, Node *src_copy, Block
|
|||
// Number of bits free
|
||||
uint rm_size = rm.Size();
|
||||
|
||||
if (UseFPUForSpilling && rm.is_AllStack() ) {
|
||||
// Don't coalesce when frequency difference is large
|
||||
Block *dst_b = _phc._cfg._bbs[dst_copy->_idx];
|
||||
Block *src_def_b = _phc._cfg._bbs[src_def->_idx];
|
||||
if (src_def_b->_freq > 10*dst_b->_freq )
|
||||
return false;
|
||||
}
|
||||
|
||||
// If we can use any stack slot, then effective size is infinite
|
||||
if( rm.is_AllStack() ) rm_size += 1000000;
|
||||
// Incompatible masks, no way to coalesce
|
||||
|
|
|
@ -456,6 +456,23 @@ void Matcher::init_first_stack_mask() {
|
|||
*idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP];
|
||||
idealreg2spillmask[Op_RegP]->OR(C->FIRST_STACK_mask());
|
||||
|
||||
if (UseFPUForSpilling) {
|
||||
// This mask logic assumes that the spill operations are
|
||||
// symmetric and that the registers involved are the same size.
|
||||
// On sparc for instance we may have to use 64 bit moves will
|
||||
// kill 2 registers when used with F0-F31.
|
||||
idealreg2spillmask[Op_RegI]->OR(*idealreg2regmask[Op_RegF]);
|
||||
idealreg2spillmask[Op_RegF]->OR(*idealreg2regmask[Op_RegI]);
|
||||
#ifdef _LP64
|
||||
idealreg2spillmask[Op_RegN]->OR(*idealreg2regmask[Op_RegF]);
|
||||
idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
|
||||
idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
|
||||
idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegD]);
|
||||
#else
|
||||
idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegF]);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Make up debug masks. Any spill slot plus callee-save registers.
|
||||
// Caller-save registers are assumed to be trashable by the various
|
||||
// inline-cache fixup routines.
|
||||
|
|
|
@ -975,6 +975,19 @@ uint PhaseChaitin::Split( uint maxlrg ) {
|
|||
insidx++; // Reset iterator to skip USE side split
|
||||
continue;
|
||||
}
|
||||
|
||||
if (UseFPUForSpilling && n->is_Call() && !uup && !dup ) {
|
||||
// The use at the call can force the def down so insert
|
||||
// a split before the use to allow the def more freedom.
|
||||
maxlrg = split_USE(def,b,n,inpidx,maxlrg,dup,false, splits,slidx);
|
||||
// If it wasn't split bail
|
||||
if (!maxlrg) {
|
||||
return 0;
|
||||
}
|
||||
insidx++; // Reset iterator to skip USE side split
|
||||
continue;
|
||||
}
|
||||
|
||||
// Here is the logic chart which describes USE Splitting:
|
||||
// 0 = false or DOWN, 1 = true or UP
|
||||
//
|
||||
|
|
|
@ -3003,10 +3003,6 @@ jint Arguments::parse(const JavaVMInitArgs* args) {
|
|||
CommandLineFlags::printSetFlags();
|
||||
}
|
||||
|
||||
if (PrintFlagsFinal) {
|
||||
CommandLineFlags::printFlags();
|
||||
}
|
||||
|
||||
// Apply CPU specific policy for the BiasedLocking
|
||||
if (UseBiasedLocking) {
|
||||
if (!VM_Version::use_biased_locking() &&
|
||||
|
|
|
@ -128,6 +128,12 @@ jint init_globals() {
|
|||
Universe::verify(); // make sure we're starting with a clean slate
|
||||
}
|
||||
|
||||
// All the flags that get adjusted by VM_Version_init and os::init_2
|
||||
// have been set so dump the flags now.
|
||||
if (PrintFlagsFinal) {
|
||||
CommandLineFlags::printFlags();
|
||||
}
|
||||
|
||||
return JNI_OK;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue