Avoid temporary string allocations in php_mb_parse_encoding_list() (#12714)

This brings execution time down from 0.91s to 0.86s on the reference
benchmark [1].

[1] https://github.com/php/php-src/issues/12684#issuecomment-1813799924
This commit is contained in:
Niels Dossche 2023-11-18 11:08:59 +00:00 committed by GitHub
parent a35a69ff0f
commit 3ad422ebd0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 25 additions and 16 deletions

View file

@ -318,6 +318,11 @@ static unsigned int mbfl_name2encoding_perfect_hash(const char *str, size_t len)
#define NAME_HASH_MAX_NAME_LENGTH 23
const mbfl_encoding *mbfl_name2encoding(const char *name)
{
return mbfl_name2encoding_ex(name, strlen(name));
}
const mbfl_encoding *mbfl_name2encoding_ex(const char *name, size_t name_len)
{
const mbfl_encoding *const *encoding;
@ -339,14 +344,13 @@ const mbfl_encoding *mbfl_name2encoding(const char *name)
#endif
/* Use perfect hash lookup for name */
size_t name_len = strlen(name);
if (name_len <= NAME_HASH_MAX_NAME_LENGTH && name_len >= NAME_HASH_MIN_NAME_LENGTH) {
unsigned int key = mbfl_name2encoding_perfect_hash(name, name_len);
if (key <= 186) {
int8_t offset = mbfl_encoding_ptr_list_after_hashing[key];
if (offset >= 0) {
encoding = mbfl_encoding_ptr_list + offset;
if (strcasecmp((*encoding)->name, name) == 0) {
if (strncasecmp((*encoding)->name, name, name_len) == 0) {
return *encoding;
}
}

View file

@ -285,6 +285,7 @@ static inline void mb_convert_buf_reset(mb_convert_buf *buf, size_t len)
}
MBFLAPI extern const mbfl_encoding *mbfl_name2encoding(const char *name);
MBFLAPI extern const mbfl_encoding *mbfl_name2encoding_ex(const char *name, size_t name_len);
MBFLAPI extern const mbfl_encoding *mbfl_no2encoding(enum mbfl_no_encoding no_encoding);
MBFLAPI extern const mbfl_encoding **mbfl_get_supported_encodings(void);
MBFLAPI extern const char *mbfl_no_encoding2name(enum mbfl_no_encoding no_encoding);

View file

@ -276,6 +276,14 @@ static const mbfl_encoding *php_mb_get_encoding_or_pass(const char *encoding_nam
return mbfl_name2encoding(encoding_name);
}
static const mbfl_encoding *php_mb_get_encoding_or_pass_ex(const char *encoding_name, size_t encoding_name_len) {
if (strncmp(encoding_name, "pass", encoding_name_len) == 0) {
return &mbfl_encoding_pass;
}
return mbfl_name2encoding_ex(encoding_name, encoding_name_len);
}
static size_t count_commas(const char *p, const char *end) {
size_t count = 0;
while ((p = memchr(p, ',', end - p))) {
@ -300,15 +308,14 @@ static zend_result php_mb_parse_encoding_list(const char *value, size_t value_le
} else {
bool included_auto;
size_t n, size;
char *p1, *endp, *tmpstr;
const char *p1, *endp, *tmpstr;
const mbfl_encoding **entry, **list;
/* copy the value string for work */
if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
tmpstr = (char *)estrndup(value+1, value_length-2);
tmpstr = value + 1;
value_length -= 2;
} else {
tmpstr = (char *)estrndup(value, value_length);
tmpstr = value;
}
endp = tmpstr + value_length;
@ -319,20 +326,19 @@ static zend_result php_mb_parse_encoding_list(const char *value, size_t value_le
included_auto = 0;
p1 = tmpstr;
while (1) {
char *comma = memchr(p1, ',', endp - p1);
char *p = comma ? comma : endp;
*p = '\0';
const char *comma = memchr(p1, ',', endp - p1);
const char *p = comma ? comma : endp;
/* trim spaces */
while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
p1++;
}
p--;
while (p > p1 && (*p == ' ' || *p == '\t')) {
*p = '\0';
p--;
}
size_t p1_length = p - p1 + 1;
/* convert to the encoding number and check encoding */
if (strcasecmp(p1, "auto") == 0) {
if (strncasecmp(p1, "auto", p1_length) == 0) {
if (!included_auto) {
const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
@ -345,15 +351,14 @@ static zend_result php_mb_parse_encoding_list(const char *value, size_t value_le
}
} else {
const mbfl_encoding *encoding =
allow_pass_encoding ? php_mb_get_encoding_or_pass(p1) : mbfl_name2encoding(p1);
allow_pass_encoding ? php_mb_get_encoding_or_pass_ex(p1, p1_length) : mbfl_name2encoding_ex(p1, p1_length);
if (!encoding) {
/* Called from an INI setting modification */
if (arg_num == 0) {
php_error_docref("ref.mbstring", E_WARNING, "INI setting contains invalid encoding \"%s\"", p1);
php_error_docref("ref.mbstring", E_WARNING, "INI setting contains invalid encoding \"%.*s\"", (int) p1_length, p1);
} else {
zend_argument_value_error(arg_num, "contains invalid encoding \"%s\"", p1);
zend_argument_value_error(arg_num, "contains invalid encoding \"%.*s\"", (int) p1_length, p1);
}
efree(tmpstr);
pefree(ZEND_VOIDP(list), persistent);
return FAILURE;
}
@ -368,7 +373,6 @@ static zend_result php_mb_parse_encoding_list(const char *value, size_t value_le
}
*return_list = list;
*return_size = n;
efree(tmpstr);
}
return SUCCESS;