Support AVX-512 builds on Windows

"Since limited support for `/arch:AVX512` was added in Visual Studio
2017, and expanded in Visual Studio 2019"[1], we can safely offer this
option, since PHP 8.4 is supposed to build with Visual Studio 2022, and
it is unlikely that someone tries to build PHP 8.4 with Visual Studio,
requesting AVX-512 support.

[1] <https://learn.microsoft.com/en-us/cpp/build/reference/arch-x64?view=msvc-170>
This commit is contained in:
Christoph M. Becker 2024-07-29 21:04:38 +02:00
parent 9083ce4d27
commit 2f17f157ca
No known key found for this signature in database
GPG key ID: D66C9593118BCCB6
2 changed files with 10 additions and 9 deletions

View file

@ -393,7 +393,7 @@ ARG_WITH("test-ini-ext-exclude", "Comma separated list of shared extensions to \
be excluded from the test.ini", "no"); be excluded from the test.ini", "no");
ARG_ENABLE("native-intrinsics", "Comma separated list of intrinsic optimizations to enable. \ ARG_ENABLE("native-intrinsics", "Comma separated list of intrinsic optimizations to enable. \
Available instruction set names are sse, sse2, sse3, ssse3, sse4.1, sse4.2, avx, avx2. \ Available instruction set names are sse, sse2, sse3, ssse3, sse4.1, sse4.2, avx, avx2, avx512. \
SSE and SSE2 are enabled by default. The best instruction set specified will \ SSE and SSE2 are enabled by default. The best instruction set specified will \
automatically enable all the older instruction sets. Note, that the produced binary \ automatically enable all the older instruction sets. Note, that the produced binary \
might not work properly, if the chosen instruction sets are not available on the target \ might not work properly, if the chosen instruction sets are not available on the target \

View file

@ -3331,8 +3331,6 @@ function toolset_setup_common_cflags()
function toolset_setup_intrinsic_cflags() function toolset_setup_intrinsic_cflags()
{ {
var default_enabled = "sse2"; var default_enabled = "sse2";
/* XXX AVX and above needs to be reflected in /arch, for now SSE4.2 is
the best possible optimization.*/
var avail = WScript.CreateObject("Scripting.Dictionary"); var avail = WScript.CreateObject("Scripting.Dictionary");
avail.Add("sse", "__SSE__"); avail.Add("sse", "__SSE__");
avail.Add("sse2", "__SSE2__"); avail.Add("sse2", "__SSE2__");
@ -3341,7 +3339,7 @@ function toolset_setup_intrinsic_cflags()
avail.Add("sse4.1", "__SSE4_1__"); avail.Add("sse4.1", "__SSE4_1__");
avail.Add("sse4.2", "__SSE4_2__"); avail.Add("sse4.2", "__SSE4_2__");
/* From oldest to newest. */ /* From oldest to newest. */
var scale = new Array("sse", "sse2", "sse3", "ssse3", "sse4.1", "sse4.2", "avx", "avx2"); var scale = new Array("sse", "sse2", "sse3", "ssse3", "sse4.1", "sse4.2", "avx", "avx2", "avx512");
if (VS_TOOLSET) { if (VS_TOOLSET) {
if ("disabled" == PHP_NATIVE_INTRINSICS) { if ("disabled" == PHP_NATIVE_INTRINSICS) {
@ -3367,9 +3365,9 @@ function toolset_setup_intrinsic_cflags()
AC_DEFINE(avail.Item(list[i]), 1); AC_DEFINE(avail.Item(list[i]), 1);
} }
/* All means all. __AVX__ and __AVX2__ are defined by compiler. */ /* All means all. __AVX__, __AVX2__, and __AVX512*__ are defined by compiler. */
ADD_FLAG("CFLAGS","/arch:AVX2"); ADD_FLAG("CFLAGS","/arch:AVX512");
configure_subst.Add("PHP_SIMD_SCALE", "AVX2"); configure_subst.Add("PHP_SIMD_SCALE", "AVX512");
} else { } else {
var list = PHP_NATIVE_INTRINSICS.split(","); var list = PHP_NATIVE_INTRINSICS.split(",");
var j = 0; var j = 0;
@ -3378,7 +3376,7 @@ function toolset_setup_intrinsic_cflags()
var it = list[i].toLowerCase(); var it = list[i].toLowerCase();
if (scale[k] == it) { if (scale[k] == it) {
j = k > j ? k : j; j = k > j ? k : j;
} else if (!avail.Exists(it) && "avx2" != it && "avx" != it) { } else if (!avail.Exists(it) && "avx512" != it && "avx2" != it && "avx" != it) {
WARNING("Unknown intrinsic name '" + it + "' ignored"); WARNING("Unknown intrinsic name '" + it + "' ignored");
} }
} }
@ -3395,7 +3393,10 @@ function toolset_setup_intrinsic_cflags()
/* There is no explicit way to enable intrinsics between SSE3 and SSE4.2. /* There is no explicit way to enable intrinsics between SSE3 and SSE4.2.
The declared macros therefore won't affect the code generation, The declared macros therefore won't affect the code generation,
but will enable the guarded code parts. */ but will enable the guarded code parts. */
if ("avx2" == scale[j]) { if ("avx512" == scale[j]) {
ADD_FLAG("CFLAGS","/arch:AVX512");
j -= 3;
} else if ("avx2" == scale[j]) {
ADD_FLAG("CFLAGS","/arch:AVX2"); ADD_FLAG("CFLAGS","/arch:AVX2");
j -= 2; j -= 2;
} else if ("avx" == scale[j]) { } else if ("avx" == scale[j]) {