[zend_hash]: Use AVX2 instructions for better code efficiency (#10858)

We prefer to use AVX2 instructions for code efficiency improvement
1) Reduce instruction path length
   Generic x86 Instr: 16, SSE2: 6, AVX2: 4
2) Better ICache locality and density

To enable AVX2 instructions, compile with '-mavx2' option via CFLAGS
environment variable or command line argument.

Note: '-mavx' option still leads to using SSE2 instructions.
      _mm256_cmpeq_epi64() requires AVX2 (-mavx2).

Testing:
    Build with and without '-mavx2', 'make TEST_PHP_ARGS=-j8 test'
    presented the same test report.

Signed-off-by: Tony Su <tao.su@intel.com>
This commit is contained in:
Tony Su 2023-03-17 17:54:13 +08:00 committed by GitHub
parent cd0c6bc74b
commit d835de1993
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -26,7 +26,10 @@
# include <arm_neon.h>
#endif
#ifdef __SSE2__
/* Prefer to use AVX2 instructions for better latency and throughput */
#if defined(__AVX2__)
# include <immintrin.h>
#elif defined( __SSE2__)
# include <mmintrin.h>
# include <emmintrin.h>
#endif
@ -176,7 +179,14 @@ static zend_always_inline void zend_hash_real_init_mixed_ex(HashTable *ht)
HT_SET_DATA_ADDR(ht, data);
/* Don't overwrite iterator count. */
ht->u.v.flags = HASH_FLAG_STATIC_KEYS;
#ifdef __SSE2__
#if defined(__AVX2__)
do {
__m256i ymm0 = _mm256_setzero_si256();
ymm0 = _mm256_cmpeq_epi64(ymm0, ymm0);
_mm256_storeu_si256((__m256i*)&HT_HASH_EX(data, 0), ymm0);
_mm256_storeu_si256((__m256i*)&HT_HASH_EX(data, 8), ymm0);
} while(0);
#elif defined (__SSE2__)
do {
__m128i xmm0 = _mm_setzero_si128();
xmm0 = _mm_cmpeq_epi8(xmm0, xmm0);