mirror of
https://github.com/openjdk/jdk.git
synced 2025-09-21 11:34:38 +02:00
8190934: Regressions on Haswell Xeon due to JDK-8178811
Reviewed-by: neliasso, kvn
This commit is contained in:
parent
d2db163edb
commit
25e03a8e65
4 changed files with 43 additions and 11 deletions
|
@ -547,8 +547,12 @@ source %{
|
|||
|
||||
#define __ _masm.
|
||||
|
||||
static bool generate_vzeroupper(Compile* C) {
|
||||
return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
|
||||
}
|
||||
|
||||
static int clear_avx_size() {
|
||||
return (VM_Version::supports_vzeroupper()) ? 3: 0; // vzeroupper
|
||||
return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
|
||||
}
|
||||
|
||||
// !!!!! Special hack to get all types of calls to specify the byte offset
|
||||
|
@ -931,7 +935,7 @@ int MachPrologNode::reloc() const
|
|||
void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
|
||||
{
|
||||
Compile* C = ra_->C;
|
||||
if (VM_Version::supports_vzeroupper()) {
|
||||
if (generate_vzeroupper(C)) {
|
||||
st->print("vzeroupper");
|
||||
st->cr(); st->print("\t");
|
||||
}
|
||||
|
@ -971,9 +975,11 @@ void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
|
|||
Compile* C = ra_->C;
|
||||
MacroAssembler _masm(&cbuf);
|
||||
|
||||
if (generate_vzeroupper(C)) {
|
||||
// Clear upper bits of YMM registers when current compiled code uses
|
||||
// wide vectors to avoid AVX <-> SSE transition penalty during call.
|
||||
__ vzeroupper();
|
||||
}
|
||||
|
||||
int framesize = C->frame_size_in_bytes();
|
||||
assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
|
||||
|
@ -2112,11 +2118,13 @@ encode %{
|
|||
|
||||
enc_class clear_avx %{
|
||||
debug_only(int off0 = cbuf.insts_size());
|
||||
if (generate_vzeroupper(Compile::current())) {
|
||||
// Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
|
||||
// Clear upper bits of YMM registers when current compiled code uses
|
||||
// wide vectors to avoid AVX <-> SSE transition penalty during call.
|
||||
MacroAssembler _masm(&cbuf);
|
||||
__ vzeroupper();
|
||||
}
|
||||
debug_only(int off1 = cbuf.insts_size());
|
||||
assert(off1 - off0 == clear_avx_size(), "correct size prediction");
|
||||
%}
|
||||
|
|
|
@ -1094,6 +1094,7 @@ void Compile::Init(int aliaslevel) {
|
|||
_major_progress = true; // start out assuming good things will happen
|
||||
set_has_unsafe_access(false);
|
||||
set_max_vector_size(0);
|
||||
set_clear_upper_avx(false); //false as default for clear upper bits of ymm registers
|
||||
Copy::zero_to_bytes(_trap_hist, sizeof(_trap_hist));
|
||||
set_decompile_count(0);
|
||||
|
||||
|
|
|
@ -379,6 +379,7 @@ class Compile : public Phase {
|
|||
bool _has_boxed_value; // True if a boxed object is allocated
|
||||
bool _has_reserved_stack_access; // True if the method or an inlined method is annotated with ReservedStackAccess
|
||||
uint _max_vector_size; // Maximum size of generated vectors
|
||||
bool _clear_upper_avx; // Clear upper bits of ymm registers using vzeroupper
|
||||
uint _trap_hist[trapHistLength]; // Cumulative traps
|
||||
bool _trap_can_recompile; // Have we emitted a recompiling trap?
|
||||
uint _decompile_count; // Cumulative decompilation counts.
|
||||
|
@ -658,6 +659,8 @@ class Compile : public Phase {
|
|||
void set_has_reserved_stack_access(bool z) { _has_reserved_stack_access = z; }
|
||||
uint max_vector_size() const { return _max_vector_size; }
|
||||
void set_max_vector_size(uint s) { _max_vector_size = s; }
|
||||
bool clear_upper_avx() const { return _clear_upper_avx; }
|
||||
void set_clear_upper_avx(bool s) { _clear_upper_avx = s; }
|
||||
void set_trap_count(uint r, uint c) { assert(r < trapHistLength, "oob"); _trap_hist[r] = c; }
|
||||
uint trap_count(uint r) const { assert(r < trapHistLength, "oob"); return _trap_hist[r]; }
|
||||
bool trap_can_recompile() const { return _trap_can_recompile; }
|
||||
|
|
|
@ -328,6 +328,13 @@ class LibraryCallKit : public GraphKit {
|
|||
|
||||
bool inline_profileBoolean();
|
||||
bool inline_isCompileConstant();
|
||||
void clear_upper_avx() {
|
||||
#ifdef X86
|
||||
if (UseAVX >= 2) {
|
||||
C->set_clear_upper_avx(true);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
//---------------------------make_vm_intrinsic----------------------------
|
||||
|
@ -1082,6 +1089,7 @@ Node* LibraryCallKit::make_string_method_node(int opcode, Node* str1_start, Node
|
|||
|
||||
// All these intrinsics have checks.
|
||||
C->set_has_split_ifs(true); // Has chance for split-if optimization
|
||||
clear_upper_avx();
|
||||
|
||||
return _gvn.transform(result);
|
||||
}
|
||||
|
@ -1156,6 +1164,8 @@ bool LibraryCallKit::inline_array_equals(StrIntrinsicNode::ArgEnc ae) {
|
|||
|
||||
const TypeAryPtr* mtype = (ae == StrIntrinsicNode::UU) ? TypeAryPtr::CHARS : TypeAryPtr::BYTES;
|
||||
set_result(_gvn.transform(new AryEqNode(control(), memory(mtype), arg1, arg2, ae)));
|
||||
clear_upper_avx();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1227,6 +1237,7 @@ bool LibraryCallKit::inline_preconditions_checkIndex() {
|
|||
result = _gvn.transform(result);
|
||||
set_result(result);
|
||||
replace_in_map(index, result);
|
||||
clear_upper_avx();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1325,6 +1336,7 @@ bool LibraryCallKit::inline_string_indexOfI(StrIntrinsicNode::ArgEnc ae) {
|
|||
set_control(_gvn.transform(region));
|
||||
record_for_igvn(region);
|
||||
set_result(_gvn.transform(phi));
|
||||
clear_upper_avx();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -1488,6 +1500,8 @@ bool LibraryCallKit::inline_string_copy(bool compress) {
|
|||
if (compress) {
|
||||
set_result(_gvn.transform(count));
|
||||
}
|
||||
clear_upper_avx();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1585,6 +1599,8 @@ bool LibraryCallKit::inline_string_toBytesU() {
|
|||
if (!stopped()) {
|
||||
set_result(newcopy);
|
||||
}
|
||||
clear_upper_avx();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -5286,6 +5302,8 @@ bool LibraryCallKit::inline_arraycopy() {
|
|||
assert(validated, "shouldn't transform if all arguments not validated");
|
||||
set_all_memory(n);
|
||||
}
|
||||
clear_upper_avx();
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -5406,6 +5424,8 @@ bool LibraryCallKit::inline_encodeISOArray() {
|
|||
Node* res_mem = _gvn.transform(new SCMemProjNode(enc));
|
||||
set_memory(res_mem, mtype);
|
||||
set_result(enc);
|
||||
clear_upper_avx();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue