From 2eaf319b456a7f3fc9452ad479dab647c759dc63 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 26 Apr 2025 14:34:33 +0200 Subject: [PATCH] Implement php_url_encode_to_smart_str() and use it in http_build_query() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This avoids temporary allocations and some copies. For this benchmark: ```php for ($i=0;$i<2000000;$i++) { http_build_query([999999 => 'foo', 'aaab' => 'def', 'aaaaa'=>1, 'aaaaaaaa' => 'a']); } ``` On an i7-4790: ``` Benchmark 1: ./sapi/cli/php ../buildquery.php Time (mean ± σ): 298.9 ms ± 7.3 ms [User: 295.6 ms, System: 2.3 ms] Range (min … max): 293.6 ms … 314.0 ms 10 runs Benchmark 2: ./sapi/cli/php_old ../buildquery.php Time (mean ± σ): 594.8 ms ± 8.6 ms [User: 590.8 ms, System: 2.4 ms] Range (min … max): 586.3 ms … 616.1 ms 10 runs Summary ./sapi/cli/php ../buildquery.php ran 1.99 ± 0.06 times faster than ./sapi/cli/php_old ../buildquery.php ``` For this benchmark: ```php for ($i=0;$i<2000000;$i++) { http_build_query(['test' => 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa']); } ``` On an i7-4790: ``` Benchmark 1: ./sapi/cli/php ../buildquery.php Time (mean ± σ): 188.4 ms ± 6.7 ms [User: 184.6 ms, System: 2.9 ms] Range (min … max): 182.0 ms … 205.4 ms 14 runs Benchmark 2: ./sapi/cli/php_old ../buildquery.php Time (mean ± σ): 323.9 ms ± 8.7 ms [User: 319.8 ms, System: 2.7 ms] Range (min … max): 318.0 ms … 341.2 ms 10 runs Summary ./sapi/cli/php ../buildquery.php ran 1.72 ± 0.08 times faster than ./sapi/cli/php_old ../buildquery.php ``` --- UPGRADING.INTERNALS | 1 + ext/standard/http.c | 30 ++++-------------------------- ext/standard/url.c | 36 ++++++++++++++++++++++++------------ ext/standard/url.h | 1 + 4 files changed, 30 insertions(+), 38 deletions(-) diff --git a/UPGRADING.INTERNALS b/UPGRADING.INTERNALS index 4516cb2c688..13f38bbc370 100644 --- a/UPGRADING.INTERNALS +++ b/UPGRADING.INTERNALS @@ -70,6 +70,7 @@ PHP 8.5 INTERNALS UPGRADE NOTES non-ex counterparts do not work in-place. . The php_std_date() function has been removed. Use php_format_date() with the "D, d M Y H:i:s \\G\\M\\T" format instead. + . Added php_url_encode_to_smart_str() to encode a URL to a smart_str buffer. ======================== 4. OpCode changes diff --git a/ext/standard/http.c b/ext/standard/http.c index dcb999da9ec..ae6f668e0cb 100644 --- a/ext/standard/http.c +++ b/ext/standard/http.c @@ -37,14 +37,7 @@ static void php_url_encode_scalar(zval *scalar, smart_str *form_str, smart_str_append(form_str, key_prefix); } if (index_string) { - zend_string *encoded_key; - if (encoding_type == PHP_QUERY_RFC3986) { - encoded_key = php_raw_url_encode(index_string, index_string_len); - } else { - encoded_key = php_url_encode(index_string, index_string_len); - } - smart_str_append(form_str, encoded_key); - zend_string_free(encoded_key); + php_url_encode_to_smart_str(form_str, index_string, index_string_len, encoding_type == PHP_QUERY_RFC3986); } else { /* Numeric key */ if (num_prefix) { @@ -59,31 +52,16 @@ static void php_url_encode_scalar(zval *scalar, smart_str *form_str, try_again: switch (Z_TYPE_P(scalar)) { - case IS_STRING: { - zend_string *encoded_data; - if (encoding_type == PHP_QUERY_RFC3986) { - encoded_data = php_raw_url_encode(Z_STRVAL_P(scalar), Z_STRLEN_P(scalar)); - } else { - encoded_data = php_url_encode(Z_STRVAL_P(scalar), Z_STRLEN_P(scalar)); - } - smart_str_append(form_str, encoded_data); - zend_string_free(encoded_data); + case IS_STRING: + php_url_encode_to_smart_str(form_str, Z_STRVAL_P(scalar), Z_STRLEN_P(scalar), encoding_type == PHP_QUERY_RFC3986); break; - } case IS_LONG: smart_str_append_long(form_str, Z_LVAL_P(scalar)); break; case IS_DOUBLE: { - zend_string *encoded_data; zend_string *tmp = zend_double_to_str(Z_DVAL_P(scalar)); - if (encoding_type == PHP_QUERY_RFC3986) { - encoded_data = php_raw_url_encode(ZSTR_VAL(tmp), ZSTR_LEN(tmp)); - } else { - encoded_data = php_url_encode(ZSTR_VAL(tmp), ZSTR_LEN(tmp)); - } - smart_str_append(form_str, encoded_data); + php_url_encode_to_smart_str(form_str, ZSTR_VAL(tmp), ZSTR_LEN(tmp), encoding_type == PHP_QUERY_RFC3986); zend_string_free(tmp); - zend_string_free(encoded_data); break; } case IS_FALSE: diff --git a/ext/standard/url.c b/ext/standard/url.c index 3c79fd22500..504805484ef 100644 --- a/ext/standard/url.c +++ b/ext/standard/url.c @@ -24,6 +24,7 @@ #include "url.h" #include "file.h" #include "zend_simd.h" +#include "Zend/zend_smart_str.h" /* {{{ free_url */ PHPAPI void php_url_free(php_url *theurl) @@ -446,16 +447,13 @@ static int php_htoi(const char *s) static const unsigned char hexchars[] = "0123456789ABCDEF"; -static zend_always_inline zend_string *php_url_encode_impl(const char *s, size_t len, bool raw) /* {{{ */ { +static zend_always_inline size_t php_url_encode_impl(unsigned char *to, const char *s, size_t len, bool raw) /* {{{ */ { unsigned char c; - unsigned char *to; unsigned char const *from, *end; - zend_string *start; + const unsigned char *to_init = to; from = (unsigned char *)s; end = (unsigned char *)s + len; - start = zend_string_safe_alloc(3, len, 0, 0); - to = (unsigned char*)ZSTR_VAL(start); #ifdef XSSE2 while (from + 16 < end) { @@ -534,19 +532,24 @@ static zend_always_inline zend_string *php_url_encode_impl(const char *s, size_t *to++ = c; } } - *to = '\0'; - ZEND_ASSERT(!ZSTR_IS_INTERNED(start) && GC_REFCOUNT(start) == 1); - start = zend_string_truncate(start, to - (unsigned char*)ZSTR_VAL(start), 0); - - return start; + return to - to_init; } /* }}} */ +static zend_always_inline zend_string *php_url_encode_helper(char const *s, size_t len, bool raw) +{ + zend_string *result = zend_string_safe_alloc(3, len, 0, false); + size_t length = php_url_encode_impl((unsigned char *) ZSTR_VAL(result), s, len, raw); + ZSTR_VAL(result)[length] = '\0'; + ZEND_ASSERT(!ZSTR_IS_INTERNED(result) && GC_REFCOUNT(result) == 1); + return zend_string_truncate(result, length, false); +} + /* {{{ php_url_encode */ PHPAPI zend_string *php_url_encode(char const *s, size_t len) { - return php_url_encode_impl(s, len, 0); + return php_url_encode_helper(s, len, false); } /* }}} */ @@ -613,10 +616,19 @@ PHPAPI size_t php_url_decode(char *str, size_t len) /* {{{ php_raw_url_encode */ PHPAPI zend_string *php_raw_url_encode(char const *s, size_t len) { - return php_url_encode_impl(s, len, 1); + return php_url_encode_helper(s, len, true); } /* }}} */ +PHPAPI void php_url_encode_to_smart_str(smart_str *buf, char const *s, size_t len, bool raw) +{ + size_t start_length = smart_str_get_len(buf); + size_t extend = zend_safe_address_guarded(3, len, 0); + char *dest = smart_str_extend(buf, extend); + size_t length = php_url_encode_impl((unsigned char *) dest, s, len, raw); + ZSTR_LEN(buf->s) = start_length + length; +} + /* {{{ URL-encodes string */ PHP_FUNCTION(rawurlencode) { diff --git a/ext/standard/url.h b/ext/standard/url.h index 5c531c0086a..3885ecece57 100644 --- a/ext/standard/url.h +++ b/ext/standard/url.h @@ -38,6 +38,7 @@ PHPAPI size_t php_raw_url_decode(char *str, size_t len); /* return value: length PHPAPI size_t php_raw_url_decode_ex(char *dest, const char *src, size_t src_len); PHPAPI zend_string *php_url_encode(char const *s, size_t len); PHPAPI zend_string *php_raw_url_encode(char const *s, size_t len); +PHPAPI void php_url_encode_to_smart_str(smart_str *buf, char const *s, size_t len, bool raw); #define PHP_URL_SCHEME 0 #define PHP_URL_HOST 1