8074981: Integer/FP scalar reduction optimization

Add scalar reduction optimization to C2 to take advantage of vector instructions in modern x86 CPUs.

Reviewed-by: kvn, twisti
This commit is contained in:
Michael C Berg 2015-04-01 18:07:50 -07:00 committed by Vladimir Kozlov
parent 7c5d30b0e3
commit 9e55e44c85
22 changed files with 1599 additions and 20 deletions

View file

@ -1777,6 +1777,12 @@ private:
void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
// Add horizontal packed integers
void vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
void vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
void phaddw(XMMRegister dst, XMMRegister src);
void phaddd(XMMRegister dst, XMMRegister src);
// Add packed integers
void paddb(XMMRegister dst, XMMRegister src);
void paddw(XMMRegister dst, XMMRegister src);
@ -1869,6 +1875,7 @@ private:
// Copy low 128bit into high 128bit of YMM registers.
void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
void vextractf128h(XMMRegister dst, XMMRegister src);
// Load/store high 128bit of YMM registers which does not destroy other half.
void vinsertf128h(XMMRegister dst, Address src);