Implement php_url_encode_to_smart_str() and use it in http_build_query()

This avoids temporary allocations and some copies.

For this benchmark:
```php
for ($i=0;$i<2000000;$i++) {
  http_build_query([999999 => 'foo', 'aaab' => 'def', 'aaaaa'=>1, 'aaaaaaaa' => 'a']);
}
```

On an i7-4790:
```
Benchmark 1: ./sapi/cli/php ../buildquery.php
  Time (mean ± σ):     298.9 ms ±   7.3 ms    [User: 295.6 ms, System: 2.3 ms]
  Range (min … max):   293.6 ms … 314.0 ms    10 runs

Benchmark 2: ./sapi/cli/php_old ../buildquery.php
  Time (mean ± σ):     594.8 ms ±   8.6 ms    [User: 590.8 ms, System: 2.4 ms]
  Range (min … max):   586.3 ms … 616.1 ms    10 runs

Summary
  ./sapi/cli/php ../buildquery.php ran
    1.99 ± 0.06 times faster than ./sapi/cli/php_old ../buildquery.php
```

For this benchmark:
```php
for ($i=0;$i<2000000;$i++) {
  http_build_query(['test' => 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa']);
}
```

On an i7-4790:
```
Benchmark 1: ./sapi/cli/php ../buildquery.php
  Time (mean ± σ):     188.4 ms ±   6.7 ms    [User: 184.6 ms, System: 2.9 ms]
  Range (min … max):   182.0 ms … 205.4 ms    14 runs

Benchmark 2: ./sapi/cli/php_old ../buildquery.php
  Time (mean ± σ):     323.9 ms ±   8.7 ms    [User: 319.8 ms, System: 2.7 ms]
  Range (min … max):   318.0 ms … 341.2 ms    10 runs

Summary
  ./sapi/cli/php ../buildquery.php ran
    1.72 ± 0.08 times faster than ./sapi/cli/php_old ../buildquery.php
```
This commit is contained in:
Niels Dossche 2025-04-26 14:34:33 +02:00
parent 1a23a411a1
commit 2eaf319b45
4 changed files with 30 additions and 38 deletions

View file

@ -70,6 +70,7 @@ PHP 8.5 INTERNALS UPGRADE NOTES
non-ex counterparts do not work in-place.
. The php_std_date() function has been removed. Use php_format_date() with
the "D, d M Y H:i:s \\G\\M\\T" format instead.
. Added php_url_encode_to_smart_str() to encode a URL to a smart_str buffer.
========================
4. OpCode changes

View file

@ -37,14 +37,7 @@ static void php_url_encode_scalar(zval *scalar, smart_str *form_str,
smart_str_append(form_str, key_prefix);
}
if (index_string) {
zend_string *encoded_key;
if (encoding_type == PHP_QUERY_RFC3986) {
encoded_key = php_raw_url_encode(index_string, index_string_len);
} else {
encoded_key = php_url_encode(index_string, index_string_len);
}
smart_str_append(form_str, encoded_key);
zend_string_free(encoded_key);
php_url_encode_to_smart_str(form_str, index_string, index_string_len, encoding_type == PHP_QUERY_RFC3986);
} else {
/* Numeric key */
if (num_prefix) {
@ -59,31 +52,16 @@ static void php_url_encode_scalar(zval *scalar, smart_str *form_str,
try_again:
switch (Z_TYPE_P(scalar)) {
case IS_STRING: {
zend_string *encoded_data;
if (encoding_type == PHP_QUERY_RFC3986) {
encoded_data = php_raw_url_encode(Z_STRVAL_P(scalar), Z_STRLEN_P(scalar));
} else {
encoded_data = php_url_encode(Z_STRVAL_P(scalar), Z_STRLEN_P(scalar));
}
smart_str_append(form_str, encoded_data);
zend_string_free(encoded_data);
case IS_STRING:
php_url_encode_to_smart_str(form_str, Z_STRVAL_P(scalar), Z_STRLEN_P(scalar), encoding_type == PHP_QUERY_RFC3986);
break;
}
case IS_LONG:
smart_str_append_long(form_str, Z_LVAL_P(scalar));
break;
case IS_DOUBLE: {
zend_string *encoded_data;
zend_string *tmp = zend_double_to_str(Z_DVAL_P(scalar));
if (encoding_type == PHP_QUERY_RFC3986) {
encoded_data = php_raw_url_encode(ZSTR_VAL(tmp), ZSTR_LEN(tmp));
} else {
encoded_data = php_url_encode(ZSTR_VAL(tmp), ZSTR_LEN(tmp));
}
smart_str_append(form_str, encoded_data);
php_url_encode_to_smart_str(form_str, ZSTR_VAL(tmp), ZSTR_LEN(tmp), encoding_type == PHP_QUERY_RFC3986);
zend_string_free(tmp);
zend_string_free(encoded_data);
break;
}
case IS_FALSE:

View file

@ -24,6 +24,7 @@
#include "url.h"
#include "file.h"
#include "zend_simd.h"
#include "Zend/zend_smart_str.h"
/* {{{ free_url */
PHPAPI void php_url_free(php_url *theurl)
@ -446,16 +447,13 @@ static int php_htoi(const char *s)
static const unsigned char hexchars[] = "0123456789ABCDEF";
static zend_always_inline zend_string *php_url_encode_impl(const char *s, size_t len, bool raw) /* {{{ */ {
static zend_always_inline size_t php_url_encode_impl(unsigned char *to, const char *s, size_t len, bool raw) /* {{{ */ {
unsigned char c;
unsigned char *to;
unsigned char const *from, *end;
zend_string *start;
const unsigned char *to_init = to;
from = (unsigned char *)s;
end = (unsigned char *)s + len;
start = zend_string_safe_alloc(3, len, 0, 0);
to = (unsigned char*)ZSTR_VAL(start);
#ifdef XSSE2
while (from + 16 < end) {
@ -534,19 +532,24 @@ static zend_always_inline zend_string *php_url_encode_impl(const char *s, size_t
*to++ = c;
}
}
*to = '\0';
ZEND_ASSERT(!ZSTR_IS_INTERNED(start) && GC_REFCOUNT(start) == 1);
start = zend_string_truncate(start, to - (unsigned char*)ZSTR_VAL(start), 0);
return start;
return to - to_init;
}
/* }}} */
static zend_always_inline zend_string *php_url_encode_helper(char const *s, size_t len, bool raw)
{
zend_string *result = zend_string_safe_alloc(3, len, 0, false);
size_t length = php_url_encode_impl((unsigned char *) ZSTR_VAL(result), s, len, raw);
ZSTR_VAL(result)[length] = '\0';
ZEND_ASSERT(!ZSTR_IS_INTERNED(result) && GC_REFCOUNT(result) == 1);
return zend_string_truncate(result, length, false);
}
/* {{{ php_url_encode */
PHPAPI zend_string *php_url_encode(char const *s, size_t len)
{
return php_url_encode_impl(s, len, 0);
return php_url_encode_helper(s, len, false);
}
/* }}} */
@ -613,10 +616,19 @@ PHPAPI size_t php_url_decode(char *str, size_t len)
/* {{{ php_raw_url_encode */
PHPAPI zend_string *php_raw_url_encode(char const *s, size_t len)
{
return php_url_encode_impl(s, len, 1);
return php_url_encode_helper(s, len, true);
}
/* }}} */
PHPAPI void php_url_encode_to_smart_str(smart_str *buf, char const *s, size_t len, bool raw)
{
size_t start_length = smart_str_get_len(buf);
size_t extend = zend_safe_address_guarded(3, len, 0);
char *dest = smart_str_extend(buf, extend);
size_t length = php_url_encode_impl((unsigned char *) dest, s, len, raw);
ZSTR_LEN(buf->s) = start_length + length;
}
/* {{{ URL-encodes string */
PHP_FUNCTION(rawurlencode)
{

View file

@ -38,6 +38,7 @@ PHPAPI size_t php_raw_url_decode(char *str, size_t len); /* return value: length
PHPAPI size_t php_raw_url_decode_ex(char *dest, const char *src, size_t src_len);
PHPAPI zend_string *php_url_encode(char const *s, size_t len);
PHPAPI zend_string *php_raw_url_encode(char const *s, size_t len);
PHPAPI void php_url_encode_to_smart_str(smart_str *buf, char const *s, size_t len, bool raw);
#define PHP_URL_SCHEME 0
#define PHP_URL_HOST 1