[zend_hash]: Use AVX2 instructions for better code efficiency (#10858)

We prefer to use AVX2 instructions for code efficiency improvement 1) Reduce instruction path length Generic x86 Instr: 16, SSE2: 6, AVX2: 4 2) Better ICache locality and density To enable AVX2 instructions, compile with '-mavx2' option via CFLAGS environment variable or command line argument. Note: '-mavx' option still leads to using SSE2 instructions. _mm256_cmpeq_epi64() requires AVX2 (-mavx2). Testing: Build with and without '-mavx2', 'make TEST_PHP_ARGS=-j8 test' presented the same test report. Signed-off-by: Tony Su <tao.su@intel.com>
2025-08-15 13:38:49 +02:00 · 2023-03-17 17:54:13 +08:00 · 2023-03-17 17:54:13 +08:00 · d835de1993
commit d835de1993
parent cd0c6bc74b
1 changed files with 12 additions and 2 deletions
--- a/Zend/zend_hash.c
+++ b/Zend/zend_hash.c
@ -26,7 +26,10 @@
 # include <arm_neon.h>
 #endif

-#ifdef __SSE2__
+/* Prefer to use AVX2 instructions for better latency and throughput */
+#if defined(__AVX2__)
+# include <immintrin.h>
+#elif defined( __SSE2__)
 # include <mmintrin.h>
 # include <emmintrin.h>
 #endif
@ -176,7 +179,14 @@ static zend_always_inline void zend_hash_real_init_mixed_ex(HashTable *ht)
 		HT_SET_DATA_ADDR(ht, data);
 		/* Don't overwrite iterator count. */
 		ht->u.v.flags = HASH_FLAG_STATIC_KEYS;
-#ifdef __SSE2__
+#if defined(__AVX2__)
+		do {
+			__m256i ymm0 = _mm256_setzero_si256();
+			ymm0 = _mm256_cmpeq_epi64(ymm0, ymm0);
+			_mm256_storeu_si256((__m256i*)&HT_HASH_EX(data,  0), ymm0);
+			_mm256_storeu_si256((__m256i*)&HT_HASH_EX(data,  8), ymm0);
+		} while(0);
+#elif defined (__SSE2__)
 		do {
 			__m128i xmm0 = _mm_setzero_si128();
 			xmm0 = _mm_cmpeq_epi8(xmm0, xmm0);