8190934: Regressions on Haswell Xeon due to JDK-8178811

Reviewed-by: neliasso, kvn
This commit is contained in:
Vivek Deshpande 2017-12-15 10:44:06 -08:00
parent d2db163edb
commit 25e03a8e65
4 changed files with 43 additions and 11 deletions

View file

@ -547,8 +547,12 @@ source %{
#define __ _masm. #define __ _masm.
static bool generate_vzeroupper(Compile* C) {
return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
}
static int clear_avx_size() { static int clear_avx_size() {
return (VM_Version::supports_vzeroupper()) ? 3: 0; // vzeroupper return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
} }
// !!!!! Special hack to get all types of calls to specify the byte offset // !!!!! Special hack to get all types of calls to specify the byte offset
@ -931,7 +935,7 @@ int MachPrologNode::reloc() const
void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
{ {
Compile* C = ra_->C; Compile* C = ra_->C;
if (VM_Version::supports_vzeroupper()) { if (generate_vzeroupper(C)) {
st->print("vzeroupper"); st->print("vzeroupper");
st->cr(); st->print("\t"); st->cr(); st->print("\t");
} }
@ -971,9 +975,11 @@ void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
Compile* C = ra_->C; Compile* C = ra_->C;
MacroAssembler _masm(&cbuf); MacroAssembler _masm(&cbuf);
// Clear upper bits of YMM registers when current compiled code uses if (generate_vzeroupper(C)) {
// wide vectors to avoid AVX <-> SSE transition penalty during call. // Clear upper bits of YMM registers when current compiled code uses
__ vzeroupper(); // wide vectors to avoid AVX <-> SSE transition penalty during call.
__ vzeroupper();
}
int framesize = C->frame_size_in_bytes(); int framesize = C->frame_size_in_bytes();
assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
@ -2112,11 +2118,13 @@ encode %{
enc_class clear_avx %{ enc_class clear_avx %{
debug_only(int off0 = cbuf.insts_size()); debug_only(int off0 = cbuf.insts_size());
// Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty if (generate_vzeroupper(Compile::current())) {
// Clear upper bits of YMM registers when current compiled code uses // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
// wide vectors to avoid AVX <-> SSE transition penalty during call. // Clear upper bits of YMM registers when current compiled code uses
MacroAssembler _masm(&cbuf); // wide vectors to avoid AVX <-> SSE transition penalty during call.
__ vzeroupper(); MacroAssembler _masm(&cbuf);
__ vzeroupper();
}
debug_only(int off1 = cbuf.insts_size()); debug_only(int off1 = cbuf.insts_size());
assert(off1 - off0 == clear_avx_size(), "correct size prediction"); assert(off1 - off0 == clear_avx_size(), "correct size prediction");
%} %}

View file

@ -1094,6 +1094,7 @@ void Compile::Init(int aliaslevel) {
_major_progress = true; // start out assuming good things will happen _major_progress = true; // start out assuming good things will happen
set_has_unsafe_access(false); set_has_unsafe_access(false);
set_max_vector_size(0); set_max_vector_size(0);
set_clear_upper_avx(false); //false as default for clear upper bits of ymm registers
Copy::zero_to_bytes(_trap_hist, sizeof(_trap_hist)); Copy::zero_to_bytes(_trap_hist, sizeof(_trap_hist));
set_decompile_count(0); set_decompile_count(0);

View file

@ -379,6 +379,7 @@ class Compile : public Phase {
bool _has_boxed_value; // True if a boxed object is allocated bool _has_boxed_value; // True if a boxed object is allocated
bool _has_reserved_stack_access; // True if the method or an inlined method is annotated with ReservedStackAccess bool _has_reserved_stack_access; // True if the method or an inlined method is annotated with ReservedStackAccess
uint _max_vector_size; // Maximum size of generated vectors uint _max_vector_size; // Maximum size of generated vectors
bool _clear_upper_avx; // Clear upper bits of ymm registers using vzeroupper
uint _trap_hist[trapHistLength]; // Cumulative traps uint _trap_hist[trapHistLength]; // Cumulative traps
bool _trap_can_recompile; // Have we emitted a recompiling trap? bool _trap_can_recompile; // Have we emitted a recompiling trap?
uint _decompile_count; // Cumulative decompilation counts. uint _decompile_count; // Cumulative decompilation counts.
@ -656,8 +657,10 @@ class Compile : public Phase {
void set_has_boxed_value(bool z) { _has_boxed_value = z; } void set_has_boxed_value(bool z) { _has_boxed_value = z; }
bool has_reserved_stack_access() const { return _has_reserved_stack_access; } bool has_reserved_stack_access() const { return _has_reserved_stack_access; }
void set_has_reserved_stack_access(bool z) { _has_reserved_stack_access = z; } void set_has_reserved_stack_access(bool z) { _has_reserved_stack_access = z; }
uint max_vector_size() const { return _max_vector_size; } uint max_vector_size() const { return _max_vector_size; }
void set_max_vector_size(uint s) { _max_vector_size = s; } void set_max_vector_size(uint s) { _max_vector_size = s; }
bool clear_upper_avx() const { return _clear_upper_avx; }
void set_clear_upper_avx(bool s) { _clear_upper_avx = s; }
void set_trap_count(uint r, uint c) { assert(r < trapHistLength, "oob"); _trap_hist[r] = c; } void set_trap_count(uint r, uint c) { assert(r < trapHistLength, "oob"); _trap_hist[r] = c; }
uint trap_count(uint r) const { assert(r < trapHistLength, "oob"); return _trap_hist[r]; } uint trap_count(uint r) const { assert(r < trapHistLength, "oob"); return _trap_hist[r]; }
bool trap_can_recompile() const { return _trap_can_recompile; } bool trap_can_recompile() const { return _trap_can_recompile; }

View file

@ -328,6 +328,13 @@ class LibraryCallKit : public GraphKit {
bool inline_profileBoolean(); bool inline_profileBoolean();
bool inline_isCompileConstant(); bool inline_isCompileConstant();
void clear_upper_avx() {
#ifdef X86
if (UseAVX >= 2) {
C->set_clear_upper_avx(true);
}
#endif
}
}; };
//---------------------------make_vm_intrinsic---------------------------- //---------------------------make_vm_intrinsic----------------------------
@ -1082,6 +1089,7 @@ Node* LibraryCallKit::make_string_method_node(int opcode, Node* str1_start, Node
// All these intrinsics have checks. // All these intrinsics have checks.
C->set_has_split_ifs(true); // Has chance for split-if optimization C->set_has_split_ifs(true); // Has chance for split-if optimization
clear_upper_avx();
return _gvn.transform(result); return _gvn.transform(result);
} }
@ -1156,6 +1164,8 @@ bool LibraryCallKit::inline_array_equals(StrIntrinsicNode::ArgEnc ae) {
const TypeAryPtr* mtype = (ae == StrIntrinsicNode::UU) ? TypeAryPtr::CHARS : TypeAryPtr::BYTES; const TypeAryPtr* mtype = (ae == StrIntrinsicNode::UU) ? TypeAryPtr::CHARS : TypeAryPtr::BYTES;
set_result(_gvn.transform(new AryEqNode(control(), memory(mtype), arg1, arg2, ae))); set_result(_gvn.transform(new AryEqNode(control(), memory(mtype), arg1, arg2, ae)));
clear_upper_avx();
return true; return true;
} }
@ -1227,6 +1237,7 @@ bool LibraryCallKit::inline_preconditions_checkIndex() {
result = _gvn.transform(result); result = _gvn.transform(result);
set_result(result); set_result(result);
replace_in_map(index, result); replace_in_map(index, result);
clear_upper_avx();
return true; return true;
} }
@ -1325,6 +1336,7 @@ bool LibraryCallKit::inline_string_indexOfI(StrIntrinsicNode::ArgEnc ae) {
set_control(_gvn.transform(region)); set_control(_gvn.transform(region));
record_for_igvn(region); record_for_igvn(region);
set_result(_gvn.transform(phi)); set_result(_gvn.transform(phi));
clear_upper_avx();
return true; return true;
} }
@ -1488,6 +1500,8 @@ bool LibraryCallKit::inline_string_copy(bool compress) {
if (compress) { if (compress) {
set_result(_gvn.transform(count)); set_result(_gvn.transform(count));
} }
clear_upper_avx();
return true; return true;
} }
@ -1585,6 +1599,8 @@ bool LibraryCallKit::inline_string_toBytesU() {
if (!stopped()) { if (!stopped()) {
set_result(newcopy); set_result(newcopy);
} }
clear_upper_avx();
return true; return true;
} }
@ -5286,6 +5302,8 @@ bool LibraryCallKit::inline_arraycopy() {
assert(validated, "shouldn't transform if all arguments not validated"); assert(validated, "shouldn't transform if all arguments not validated");
set_all_memory(n); set_all_memory(n);
} }
clear_upper_avx();
return true; return true;
} }
@ -5406,6 +5424,8 @@ bool LibraryCallKit::inline_encodeISOArray() {
Node* res_mem = _gvn.transform(new SCMemProjNode(enc)); Node* res_mem = _gvn.transform(new SCMemProjNode(enc));
set_memory(res_mem, mtype); set_memory(res_mem, mtype);
set_result(enc); set_result(enc);
clear_upper_avx();
return true; return true;
} }