From a0476fd32fcad28a85259eab0b19e38188695b15 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Fri, 14 Apr 2023 18:22:42 +0200 Subject: [PATCH] Micro-optimize double comparison (#11061) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When using ZEND_NORMALIZE_BOOL(a - b) where a and b are doubles, this generates the following instruction sequence on x64: subsd xmm0, xmm1 pxor xmm1, xmm1 comisd xmm0, xmm1 ... whereas if we use ZEND_THREEWAY_COMPARE we get two instructions less: ucomisd xmm0, xmm1 The only difference is that the threeway compare uses *u*comisd instead of comisd. The difference is that it will cause a FP signal if a signaling NAN is used, but as far as I'm aware this doesn't matter for our use case. Similarly, the amount of instructions on AArch64 is also quite a bit lower for this code compared to the old code. ** Results ** Using the benchmark https://gist.github.com/nielsdos/b36517d81a1af74d96baa3576c2b70df I used hyperfine: hyperfine --runs 25 --warmup 3 './sapi/cli/php sort_double.php' No extensions such as opcache used during benchmarking. BEFORE THIS PATCH ----------------- Time (mean ± σ): 255.5 ms ± 2.2 ms [User: 251.0 ms, System: 2.5 ms] Range (min … max): 251.5 ms … 260.7 ms 25 runs AFTER THIS PATCH ---------------- Time (mean ± σ): 236.2 ms ± 2.8 ms [User: 228.9 ms, System: 5.0 ms] Range (min … max): 231.5 ms … 242.7 ms 25 runs --- Zend/zend_operators.c | 23 +++++++---------------- ext/spl/spl_heap.c | 2 +- ext/standard/array.c | 2 +- 3 files changed, 9 insertions(+), 18 deletions(-) diff --git a/Zend/zend_operators.c b/Zend/zend_operators.c index 312ba833987..00286cac388 100644 --- a/Zend/zend_operators.c +++ b/Zend/zend_operators.c @@ -2109,7 +2109,7 @@ ZEND_API int ZEND_FASTCALL numeric_compare_function(zval *op1, zval *op2) /* {{{ d1 = zval_get_double(op1); d2 = zval_get_double(op2); - return ZEND_NORMALIZE_BOOL(d1 - d2); + return ZEND_THREEWAY_COMPARE(d1, d2); } /* }}} */ @@ -2131,8 +2131,7 @@ static int compare_long_to_string(zend_long lval, zend_string *str) /* {{{ */ } if (type == IS_DOUBLE) { - double diff = (double) lval - str_dval; - return ZEND_NORMALIZE_BOOL(diff); + return ZEND_THREEWAY_COMPARE((double) lval, str_dval); } zend_string *lval_as_str = zend_long_to_str(lval); @@ -2150,15 +2149,11 @@ static int compare_double_to_string(double dval, zend_string *str) /* {{{ */ uint8_t type = is_numeric_string(ZSTR_VAL(str), ZSTR_LEN(str), &str_lval, &str_dval, 0); if (type == IS_LONG) { - double diff = dval - (double) str_lval; - return ZEND_NORMALIZE_BOOL(diff); + return ZEND_THREEWAY_COMPARE(dval, (double) str_lval); } if (type == IS_DOUBLE) { - if (dval == str_dval) { - return 0; - } - return ZEND_NORMALIZE_BOOL(dval - str_dval); + return ZEND_THREEWAY_COMPARE(dval, str_dval); } zend_string *dval_as_str = zend_double_to_str(dval); @@ -2180,17 +2175,13 @@ ZEND_API int ZEND_FASTCALL zend_compare(zval *op1, zval *op2) /* {{{ */ return Z_LVAL_P(op1)>Z_LVAL_P(op2)?1:(Z_LVAL_P(op1)priority); double b = Z_DVAL(((spl_pqueue_elem*) y)->priority); - return ZEND_NORMALIZE_BOOL(a - b); + return ZEND_THREEWAY_COMPARE(a, b); } static spl_ptr_heap *spl_ptr_heap_init(spl_ptr_heap_cmp_func cmp, spl_ptr_heap_ctor_func ctor, spl_ptr_heap_dtor_func dtor, size_t elem_size) /* {{{ */ diff --git a/ext/standard/array.c b/ext/standard/array.c index dc0d434d188..b7ab04e5dee 100644 --- a/ext/standard/array.c +++ b/ext/standard/array.c @@ -164,7 +164,7 @@ static zend_always_inline int php_array_key_compare_numeric_unstable_i(Bucket *f } else { d2 = (double)(zend_long)s->h; } - return ZEND_NORMALIZE_BOOL(d1 - d2); + return ZEND_THREEWAY_COMPARE(d1, d2); } } /* }}} */