mirror of
https://github.com/php/php-src.git
synced 2025-08-15 21:48:51 +02:00
Avoid temporary string allocations in php_mb_parse_encoding_list() (#12714)
This brings execution time down from 0.91s to 0.86s on the reference benchmark [1]. [1] https://github.com/php/php-src/issues/12684#issuecomment-1813799924
This commit is contained in:
parent
a35a69ff0f
commit
3ad422ebd0
3 changed files with 25 additions and 16 deletions
|
@ -318,6 +318,11 @@ static unsigned int mbfl_name2encoding_perfect_hash(const char *str, size_t len)
|
|||
#define NAME_HASH_MAX_NAME_LENGTH 23
|
||||
|
||||
const mbfl_encoding *mbfl_name2encoding(const char *name)
|
||||
{
|
||||
return mbfl_name2encoding_ex(name, strlen(name));
|
||||
}
|
||||
|
||||
const mbfl_encoding *mbfl_name2encoding_ex(const char *name, size_t name_len)
|
||||
{
|
||||
const mbfl_encoding *const *encoding;
|
||||
|
||||
|
@ -339,14 +344,13 @@ const mbfl_encoding *mbfl_name2encoding(const char *name)
|
|||
#endif
|
||||
|
||||
/* Use perfect hash lookup for name */
|
||||
size_t name_len = strlen(name);
|
||||
if (name_len <= NAME_HASH_MAX_NAME_LENGTH && name_len >= NAME_HASH_MIN_NAME_LENGTH) {
|
||||
unsigned int key = mbfl_name2encoding_perfect_hash(name, name_len);
|
||||
if (key <= 186) {
|
||||
int8_t offset = mbfl_encoding_ptr_list_after_hashing[key];
|
||||
if (offset >= 0) {
|
||||
encoding = mbfl_encoding_ptr_list + offset;
|
||||
if (strcasecmp((*encoding)->name, name) == 0) {
|
||||
if (strncasecmp((*encoding)->name, name, name_len) == 0) {
|
||||
return *encoding;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -285,6 +285,7 @@ static inline void mb_convert_buf_reset(mb_convert_buf *buf, size_t len)
|
|||
}
|
||||
|
||||
MBFLAPI extern const mbfl_encoding *mbfl_name2encoding(const char *name);
|
||||
MBFLAPI extern const mbfl_encoding *mbfl_name2encoding_ex(const char *name, size_t name_len);
|
||||
MBFLAPI extern const mbfl_encoding *mbfl_no2encoding(enum mbfl_no_encoding no_encoding);
|
||||
MBFLAPI extern const mbfl_encoding **mbfl_get_supported_encodings(void);
|
||||
MBFLAPI extern const char *mbfl_no_encoding2name(enum mbfl_no_encoding no_encoding);
|
||||
|
|
|
@ -276,6 +276,14 @@ static const mbfl_encoding *php_mb_get_encoding_or_pass(const char *encoding_nam
|
|||
return mbfl_name2encoding(encoding_name);
|
||||
}
|
||||
|
||||
static const mbfl_encoding *php_mb_get_encoding_or_pass_ex(const char *encoding_name, size_t encoding_name_len) {
|
||||
if (strncmp(encoding_name, "pass", encoding_name_len) == 0) {
|
||||
return &mbfl_encoding_pass;
|
||||
}
|
||||
|
||||
return mbfl_name2encoding_ex(encoding_name, encoding_name_len);
|
||||
}
|
||||
|
||||
static size_t count_commas(const char *p, const char *end) {
|
||||
size_t count = 0;
|
||||
while ((p = memchr(p, ',', end - p))) {
|
||||
|
@ -300,15 +308,14 @@ static zend_result php_mb_parse_encoding_list(const char *value, size_t value_le
|
|||
} else {
|
||||
bool included_auto;
|
||||
size_t n, size;
|
||||
char *p1, *endp, *tmpstr;
|
||||
const char *p1, *endp, *tmpstr;
|
||||
const mbfl_encoding **entry, **list;
|
||||
|
||||
/* copy the value string for work */
|
||||
if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
|
||||
tmpstr = (char *)estrndup(value+1, value_length-2);
|
||||
tmpstr = value + 1;
|
||||
value_length -= 2;
|
||||
} else {
|
||||
tmpstr = (char *)estrndup(value, value_length);
|
||||
tmpstr = value;
|
||||
}
|
||||
|
||||
endp = tmpstr + value_length;
|
||||
|
@ -319,20 +326,19 @@ static zend_result php_mb_parse_encoding_list(const char *value, size_t value_le
|
|||
included_auto = 0;
|
||||
p1 = tmpstr;
|
||||
while (1) {
|
||||
char *comma = memchr(p1, ',', endp - p1);
|
||||
char *p = comma ? comma : endp;
|
||||
*p = '\0';
|
||||
const char *comma = memchr(p1, ',', endp - p1);
|
||||
const char *p = comma ? comma : endp;
|
||||
/* trim spaces */
|
||||
while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
|
||||
p1++;
|
||||
}
|
||||
p--;
|
||||
while (p > p1 && (*p == ' ' || *p == '\t')) {
|
||||
*p = '\0';
|
||||
p--;
|
||||
}
|
||||
size_t p1_length = p - p1 + 1;
|
||||
/* convert to the encoding number and check encoding */
|
||||
if (strcasecmp(p1, "auto") == 0) {
|
||||
if (strncasecmp(p1, "auto", p1_length) == 0) {
|
||||
if (!included_auto) {
|
||||
const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
|
||||
const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
|
||||
|
@ -345,15 +351,14 @@ static zend_result php_mb_parse_encoding_list(const char *value, size_t value_le
|
|||
}
|
||||
} else {
|
||||
const mbfl_encoding *encoding =
|
||||
allow_pass_encoding ? php_mb_get_encoding_or_pass(p1) : mbfl_name2encoding(p1);
|
||||
allow_pass_encoding ? php_mb_get_encoding_or_pass_ex(p1, p1_length) : mbfl_name2encoding_ex(p1, p1_length);
|
||||
if (!encoding) {
|
||||
/* Called from an INI setting modification */
|
||||
if (arg_num == 0) {
|
||||
php_error_docref("ref.mbstring", E_WARNING, "INI setting contains invalid encoding \"%s\"", p1);
|
||||
php_error_docref("ref.mbstring", E_WARNING, "INI setting contains invalid encoding \"%.*s\"", (int) p1_length, p1);
|
||||
} else {
|
||||
zend_argument_value_error(arg_num, "contains invalid encoding \"%s\"", p1);
|
||||
zend_argument_value_error(arg_num, "contains invalid encoding \"%.*s\"", (int) p1_length, p1);
|
||||
}
|
||||
efree(tmpstr);
|
||||
pefree(ZEND_VOIDP(list), persistent);
|
||||
return FAILURE;
|
||||
}
|
||||
|
@ -368,7 +373,6 @@ static zend_result php_mb_parse_encoding_list(const char *value, size_t value_le
|
|||
}
|
||||
*return_list = list;
|
||||
*return_size = n;
|
||||
efree(tmpstr);
|
||||
}
|
||||
|
||||
return SUCCESS;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue