mirror of
https://github.com/openjdk/jdk.git
synced 2025-09-21 11:34:38 +02:00
8190934: Regressions on Haswell Xeon due to JDK-8178811
Reviewed-by: neliasso, kvn
This commit is contained in:
parent
d2db163edb
commit
25e03a8e65
4 changed files with 43 additions and 11 deletions
|
@ -547,8 +547,12 @@ source %{
|
||||||
|
|
||||||
#define __ _masm.
|
#define __ _masm.
|
||||||
|
|
||||||
|
static bool generate_vzeroupper(Compile* C) {
|
||||||
|
return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
|
||||||
|
}
|
||||||
|
|
||||||
static int clear_avx_size() {
|
static int clear_avx_size() {
|
||||||
return (VM_Version::supports_vzeroupper()) ? 3: 0; // vzeroupper
|
return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
|
||||||
}
|
}
|
||||||
|
|
||||||
// !!!!! Special hack to get all types of calls to specify the byte offset
|
// !!!!! Special hack to get all types of calls to specify the byte offset
|
||||||
|
@ -931,7 +935,7 @@ int MachPrologNode::reloc() const
|
||||||
void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
|
void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
|
||||||
{
|
{
|
||||||
Compile* C = ra_->C;
|
Compile* C = ra_->C;
|
||||||
if (VM_Version::supports_vzeroupper()) {
|
if (generate_vzeroupper(C)) {
|
||||||
st->print("vzeroupper");
|
st->print("vzeroupper");
|
||||||
st->cr(); st->print("\t");
|
st->cr(); st->print("\t");
|
||||||
}
|
}
|
||||||
|
@ -971,9 +975,11 @@ void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
|
||||||
Compile* C = ra_->C;
|
Compile* C = ra_->C;
|
||||||
MacroAssembler _masm(&cbuf);
|
MacroAssembler _masm(&cbuf);
|
||||||
|
|
||||||
|
if (generate_vzeroupper(C)) {
|
||||||
// Clear upper bits of YMM registers when current compiled code uses
|
// Clear upper bits of YMM registers when current compiled code uses
|
||||||
// wide vectors to avoid AVX <-> SSE transition penalty during call.
|
// wide vectors to avoid AVX <-> SSE transition penalty during call.
|
||||||
__ vzeroupper();
|
__ vzeroupper();
|
||||||
|
}
|
||||||
|
|
||||||
int framesize = C->frame_size_in_bytes();
|
int framesize = C->frame_size_in_bytes();
|
||||||
assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
|
assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
|
||||||
|
@ -2112,11 +2118,13 @@ encode %{
|
||||||
|
|
||||||
enc_class clear_avx %{
|
enc_class clear_avx %{
|
||||||
debug_only(int off0 = cbuf.insts_size());
|
debug_only(int off0 = cbuf.insts_size());
|
||||||
|
if (generate_vzeroupper(Compile::current())) {
|
||||||
// Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
|
// Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
|
||||||
// Clear upper bits of YMM registers when current compiled code uses
|
// Clear upper bits of YMM registers when current compiled code uses
|
||||||
// wide vectors to avoid AVX <-> SSE transition penalty during call.
|
// wide vectors to avoid AVX <-> SSE transition penalty during call.
|
||||||
MacroAssembler _masm(&cbuf);
|
MacroAssembler _masm(&cbuf);
|
||||||
__ vzeroupper();
|
__ vzeroupper();
|
||||||
|
}
|
||||||
debug_only(int off1 = cbuf.insts_size());
|
debug_only(int off1 = cbuf.insts_size());
|
||||||
assert(off1 - off0 == clear_avx_size(), "correct size prediction");
|
assert(off1 - off0 == clear_avx_size(), "correct size prediction");
|
||||||
%}
|
%}
|
||||||
|
|
|
@ -1094,6 +1094,7 @@ void Compile::Init(int aliaslevel) {
|
||||||
_major_progress = true; // start out assuming good things will happen
|
_major_progress = true; // start out assuming good things will happen
|
||||||
set_has_unsafe_access(false);
|
set_has_unsafe_access(false);
|
||||||
set_max_vector_size(0);
|
set_max_vector_size(0);
|
||||||
|
set_clear_upper_avx(false); //false as default for clear upper bits of ymm registers
|
||||||
Copy::zero_to_bytes(_trap_hist, sizeof(_trap_hist));
|
Copy::zero_to_bytes(_trap_hist, sizeof(_trap_hist));
|
||||||
set_decompile_count(0);
|
set_decompile_count(0);
|
||||||
|
|
||||||
|
|
|
@ -379,6 +379,7 @@ class Compile : public Phase {
|
||||||
bool _has_boxed_value; // True if a boxed object is allocated
|
bool _has_boxed_value; // True if a boxed object is allocated
|
||||||
bool _has_reserved_stack_access; // True if the method or an inlined method is annotated with ReservedStackAccess
|
bool _has_reserved_stack_access; // True if the method or an inlined method is annotated with ReservedStackAccess
|
||||||
uint _max_vector_size; // Maximum size of generated vectors
|
uint _max_vector_size; // Maximum size of generated vectors
|
||||||
|
bool _clear_upper_avx; // Clear upper bits of ymm registers using vzeroupper
|
||||||
uint _trap_hist[trapHistLength]; // Cumulative traps
|
uint _trap_hist[trapHistLength]; // Cumulative traps
|
||||||
bool _trap_can_recompile; // Have we emitted a recompiling trap?
|
bool _trap_can_recompile; // Have we emitted a recompiling trap?
|
||||||
uint _decompile_count; // Cumulative decompilation counts.
|
uint _decompile_count; // Cumulative decompilation counts.
|
||||||
|
@ -658,6 +659,8 @@ class Compile : public Phase {
|
||||||
void set_has_reserved_stack_access(bool z) { _has_reserved_stack_access = z; }
|
void set_has_reserved_stack_access(bool z) { _has_reserved_stack_access = z; }
|
||||||
uint max_vector_size() const { return _max_vector_size; }
|
uint max_vector_size() const { return _max_vector_size; }
|
||||||
void set_max_vector_size(uint s) { _max_vector_size = s; }
|
void set_max_vector_size(uint s) { _max_vector_size = s; }
|
||||||
|
bool clear_upper_avx() const { return _clear_upper_avx; }
|
||||||
|
void set_clear_upper_avx(bool s) { _clear_upper_avx = s; }
|
||||||
void set_trap_count(uint r, uint c) { assert(r < trapHistLength, "oob"); _trap_hist[r] = c; }
|
void set_trap_count(uint r, uint c) { assert(r < trapHistLength, "oob"); _trap_hist[r] = c; }
|
||||||
uint trap_count(uint r) const { assert(r < trapHistLength, "oob"); return _trap_hist[r]; }
|
uint trap_count(uint r) const { assert(r < trapHistLength, "oob"); return _trap_hist[r]; }
|
||||||
bool trap_can_recompile() const { return _trap_can_recompile; }
|
bool trap_can_recompile() const { return _trap_can_recompile; }
|
||||||
|
|
|
@ -328,6 +328,13 @@ class LibraryCallKit : public GraphKit {
|
||||||
|
|
||||||
bool inline_profileBoolean();
|
bool inline_profileBoolean();
|
||||||
bool inline_isCompileConstant();
|
bool inline_isCompileConstant();
|
||||||
|
void clear_upper_avx() {
|
||||||
|
#ifdef X86
|
||||||
|
if (UseAVX >= 2) {
|
||||||
|
C->set_clear_upper_avx(true);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
//---------------------------make_vm_intrinsic----------------------------
|
//---------------------------make_vm_intrinsic----------------------------
|
||||||
|
@ -1082,6 +1089,7 @@ Node* LibraryCallKit::make_string_method_node(int opcode, Node* str1_start, Node
|
||||||
|
|
||||||
// All these intrinsics have checks.
|
// All these intrinsics have checks.
|
||||||
C->set_has_split_ifs(true); // Has chance for split-if optimization
|
C->set_has_split_ifs(true); // Has chance for split-if optimization
|
||||||
|
clear_upper_avx();
|
||||||
|
|
||||||
return _gvn.transform(result);
|
return _gvn.transform(result);
|
||||||
}
|
}
|
||||||
|
@ -1156,6 +1164,8 @@ bool LibraryCallKit::inline_array_equals(StrIntrinsicNode::ArgEnc ae) {
|
||||||
|
|
||||||
const TypeAryPtr* mtype = (ae == StrIntrinsicNode::UU) ? TypeAryPtr::CHARS : TypeAryPtr::BYTES;
|
const TypeAryPtr* mtype = (ae == StrIntrinsicNode::UU) ? TypeAryPtr::CHARS : TypeAryPtr::BYTES;
|
||||||
set_result(_gvn.transform(new AryEqNode(control(), memory(mtype), arg1, arg2, ae)));
|
set_result(_gvn.transform(new AryEqNode(control(), memory(mtype), arg1, arg2, ae)));
|
||||||
|
clear_upper_avx();
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1227,6 +1237,7 @@ bool LibraryCallKit::inline_preconditions_checkIndex() {
|
||||||
result = _gvn.transform(result);
|
result = _gvn.transform(result);
|
||||||
set_result(result);
|
set_result(result);
|
||||||
replace_in_map(index, result);
|
replace_in_map(index, result);
|
||||||
|
clear_upper_avx();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1325,6 +1336,7 @@ bool LibraryCallKit::inline_string_indexOfI(StrIntrinsicNode::ArgEnc ae) {
|
||||||
set_control(_gvn.transform(region));
|
set_control(_gvn.transform(region));
|
||||||
record_for_igvn(region);
|
record_for_igvn(region);
|
||||||
set_result(_gvn.transform(phi));
|
set_result(_gvn.transform(phi));
|
||||||
|
clear_upper_avx();
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -1488,6 +1500,8 @@ bool LibraryCallKit::inline_string_copy(bool compress) {
|
||||||
if (compress) {
|
if (compress) {
|
||||||
set_result(_gvn.transform(count));
|
set_result(_gvn.transform(count));
|
||||||
}
|
}
|
||||||
|
clear_upper_avx();
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1585,6 +1599,8 @@ bool LibraryCallKit::inline_string_toBytesU() {
|
||||||
if (!stopped()) {
|
if (!stopped()) {
|
||||||
set_result(newcopy);
|
set_result(newcopy);
|
||||||
}
|
}
|
||||||
|
clear_upper_avx();
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5286,6 +5302,8 @@ bool LibraryCallKit::inline_arraycopy() {
|
||||||
assert(validated, "shouldn't transform if all arguments not validated");
|
assert(validated, "shouldn't transform if all arguments not validated");
|
||||||
set_all_memory(n);
|
set_all_memory(n);
|
||||||
}
|
}
|
||||||
|
clear_upper_avx();
|
||||||
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -5406,6 +5424,8 @@ bool LibraryCallKit::inline_encodeISOArray() {
|
||||||
Node* res_mem = _gvn.transform(new SCMemProjNode(enc));
|
Node* res_mem = _gvn.transform(new SCMemProjNode(enc));
|
||||||
set_memory(res_mem, mtype);
|
set_memory(res_mem, mtype);
|
||||||
set_result(enc);
|
set_result(enc);
|
||||||
|
clear_upper_avx();
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue