Optimize unpack() for named fields (#6958)

Create name using either zend_init_string_fast (no repetitions) or by concatenating the name with zend_print_ulong_to_buf. This is much more efficient than using snprintf. We also avoid repeated strlen() calculations.
This commit is contained in:
K 2021-05-14 11:32:46 +02:00 committed by GitHub
parent 4522dcb173
commit 21422e8536
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 106 additions and 39 deletions

View file

@ -739,24 +739,24 @@ PHP_FUNCTION(unpack)
while (formatlen-- > 0) { while (formatlen-- > 0) {
char type = *(format++); char type = *(format++);
char c; char c;
int arg = 1, argb; int repetitions = 1, argb;
char *name; char *name;
int namelen; int namelen;
int size=0; int size = 0;
/* Handle format arguments if any */ /* Handle format arguments if any */
if (formatlen > 0) { if (formatlen > 0) {
c = *format; c = *format;
if (c >= '0' && c <= '9') { if (c >= '0' && c <= '9') {
arg = atoi(format); repetitions = atoi(format);
while (formatlen > 0 && *format >= '0' && *format <= '9') { while (formatlen > 0 && *format >= '0' && *format <= '9') {
format++; format++;
formatlen--; formatlen--;
} }
} else if (c == '*') { } else if (c == '*') {
arg = -1; repetitions = -1;
format++; format++;
formatlen--; formatlen--;
} }
@ -764,7 +764,7 @@ PHP_FUNCTION(unpack)
/* Get of new value in array */ /* Get of new value in array */
name = format; name = format;
argb = arg; argb = repetitions;
while (formatlen > 0 && *format != '/') { while (formatlen > 0 && *format != '/') {
formatlen--; formatlen--;
@ -780,9 +780,9 @@ PHP_FUNCTION(unpack)
/* Never use any input */ /* Never use any input */
case 'X': case 'X':
size = -1; size = -1;
if (arg < 0) { if (repetitions < 0) {
php_error_docref(NULL, E_WARNING, "Type %c: '*' ignored", type); php_error_docref(NULL, E_WARNING, "Type %c: '*' ignored", type);
arg = 1; repetitions = 1;
} }
break; break;
@ -793,14 +793,14 @@ PHP_FUNCTION(unpack)
case 'a': case 'a':
case 'A': case 'A':
case 'Z': case 'Z':
size = arg; size = repetitions;
arg = 1; repetitions = 1;
break; break;
case 'h': case 'h':
case 'H': case 'H':
size = (arg > 0) ? (arg + (arg % 2)) / 2 : arg; size = (repetitions > 0) ? (repetitions + (repetitions % 2)) / 2 : repetitions;
arg = 1; repetitions = 1;
break; break;
/* Use 1 byte of input */ /* Use 1 byte of input */
@ -870,18 +870,9 @@ PHP_FUNCTION(unpack)
RETURN_FALSE; RETURN_FALSE;
} }
/* Do actual unpacking */
for (i = 0; i != arg; i++ ) {
/* Space for name + number, safe as namelen is ensured <= 200 */
char n[256];
if (arg != 1 || namelen == 0) { /* Do actual unpacking */
/* Need to add element number to name */ for (i = 0; i != repetitions; i++ ) {
snprintf(n, sizeof(n), "%.*s%d", namelen, name, i + 1);
} else {
/* Truncate name to next format code or end of string */
snprintf(n, sizeof(n), "%.*s", namelen, name);
}
if (size != 0 && size != -1 && INT_MAX - size + 1 < inputpos) { if (size != 0 && size != -1 && INT_MAX - size + 1 < inputpos) {
php_error_docref(NULL, E_WARNING, "Type %c: integer overflow", type); php_error_docref(NULL, E_WARNING, "Type %c: integer overflow", type);
@ -890,6 +881,22 @@ PHP_FUNCTION(unpack)
} }
if ((inputpos + size) <= inputlen) { if ((inputpos + size) <= inputlen) {
zend_string* real_name;
zval val;
if (repetitions == 1 && namelen > 0) {
/* Use a part of the formatarg argument directly as the name. */
real_name = zend_string_init_fast(name, namelen);
} else {
/* Need to add the 1-based element number to the name */
char buf[MAX_LENGTH_OF_LONG + 1];
char *res = zend_print_ulong_to_buf(buf + sizeof(buf) - 1, i+1);
size_t digits = buf + sizeof(buf) - 1 - res;
real_name = zend_string_concat2(name, namelen, res, digits);
}
switch ((int) type) { switch ((int) type) {
case 'a': { case 'a': {
/* a will not strip any trailing whitespace or null padding */ /* a will not strip any trailing whitespace or null padding */
@ -902,7 +909,8 @@ PHP_FUNCTION(unpack)
size = len; size = len;
add_assoc_stringl(return_value, n, &input[inputpos], len); ZVAL_STRINGL(&val, &input[inputpos], len);
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
break; break;
} }
case 'A': { case 'A': {
@ -928,7 +936,8 @@ PHP_FUNCTION(unpack)
break; break;
} }
add_assoc_stringl(return_value, n, &input[inputpos], len + 1); ZVAL_STRINGL(&val, &input[inputpos], len + 1);
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
break; break;
} }
/* New option added for Z to remain in-line with the Perl implementation */ /* New option added for Z to remain in-line with the Perl implementation */
@ -952,7 +961,8 @@ PHP_FUNCTION(unpack)
} }
len = s; len = s;
add_assoc_stringl(return_value, n, &input[inputpos], len); ZVAL_STRINGL(&val, &input[inputpos], len);
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
break; break;
} }
@ -995,7 +1005,9 @@ PHP_FUNCTION(unpack)
} }
ZSTR_VAL(buf)[len] = '\0'; ZSTR_VAL(buf)[len] = '\0';
add_assoc_str(return_value, n, buf);
ZVAL_STR(&val, buf);
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
break; break;
} }
@ -1003,7 +1015,9 @@ PHP_FUNCTION(unpack)
case 'C': { /* unsigned */ case 'C': { /* unsigned */
uint8_t x = input[inputpos]; uint8_t x = input[inputpos];
zend_long v = (type == 'c') ? (int8_t) x : x; zend_long v = (type == 'c') ? (int8_t) x : x;
add_assoc_long(return_value, n, v);
ZVAL_LONG(&val, v);
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
break; break;
} }
@ -1022,7 +1036,8 @@ PHP_FUNCTION(unpack)
v = x; v = x;
} }
add_assoc_long(return_value, n, v); ZVAL_LONG(&val, v);
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
break; break;
} }
@ -1030,7 +1045,9 @@ PHP_FUNCTION(unpack)
case 'I': { /* unsigned integer, machine size, machine endian */ case 'I': { /* unsigned integer, machine size, machine endian */
unsigned int x = *((unaligned_uint*) &input[inputpos]); unsigned int x = *((unaligned_uint*) &input[inputpos]);
zend_long v = (type == 'i') ? (int) x : x; zend_long v = (type == 'i') ? (int) x : x;
add_assoc_long(return_value, n, v);
ZVAL_LONG(&val, v);
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
break; break;
} }
@ -1049,7 +1066,9 @@ PHP_FUNCTION(unpack)
v = x; v = x;
} }
add_assoc_long(return_value, n, v); ZVAL_LONG(&val, v);
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
break; break;
} }
@ -1069,7 +1088,8 @@ PHP_FUNCTION(unpack)
v = x; v = x;
} }
add_assoc_long(return_value, n, v); ZVAL_LONG(&val, v);
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
break; break;
} }
#endif #endif
@ -1088,7 +1108,8 @@ PHP_FUNCTION(unpack)
memcpy(&v, &input[inputpos], sizeof(float)); memcpy(&v, &input[inputpos], sizeof(float));
} }
add_assoc_double(return_value, n, (double)v); ZVAL_DOUBLE(&val, v);
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
break; break;
} }
@ -1105,7 +1126,9 @@ PHP_FUNCTION(unpack)
} else { } else {
memcpy(&v, &input[inputpos], sizeof(double)); memcpy(&v, &input[inputpos], sizeof(double));
} }
add_assoc_double(return_value, n, v);
ZVAL_DOUBLE(&val, v);
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
break; break;
} }
@ -1116,25 +1139,27 @@ PHP_FUNCTION(unpack)
case 'X': case 'X':
if (inputpos < size) { if (inputpos < size) {
inputpos = -size; inputpos = -size;
i = arg - 1; /* Break out of for loop */ i = repetitions - 1; /* Break out of for loop */
if (arg >= 0) { if (repetitions >= 0) {
php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type); php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type);
} }
} }
break; break;
case '@': case '@':
if (arg <= inputlen) { if (repetitions <= inputlen) {
inputpos = arg; inputpos = repetitions;
} else { } else {
php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type); php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type);
} }
i = arg - 1; /* Done, break out of for loop */ i = repetitions - 1; /* Done, break out of for loop */
break; break;
} }
zend_string_release(real_name);
inputpos += size; inputpos += size;
if (inputpos < 0) { if (inputpos < 0) {
if (size != -1) { /* only print warning if not working with * */ if (size != -1) { /* only print warning if not working with * */
@ -1142,7 +1167,7 @@ PHP_FUNCTION(unpack)
} }
inputpos = 0; inputpos = 0;
} }
} else if (arg < 0) { } else if (repetitions < 0) {
/* Reached end of input for '*' repeater */ /* Reached end of input for '*' repeater */
break; break;
} else { } else {

View file

@ -0,0 +1,42 @@
--TEST--
test unpack() to array with named keys
--FILE--
<?php
$str = pack('VVV', 0x00010203, 0x04050607, 0x08090a0b);
print_r(unpack('Vaa/Vbb/Vcc', $str));
print_r(unpack('V2aa/Vcc', $str));
print_r(unpack('V3aa', $str));
print_r(unpack('V*aa', $str));
print_r(unpack('V*', $str));
?>
--EXPECT--
Array
(
[aa] => 66051
[bb] => 67438087
[cc] => 134810123
)
Array
(
[aa1] => 66051
[aa2] => 67438087
[cc] => 134810123
)
Array
(
[aa1] => 66051
[aa2] => 67438087
[aa3] => 134810123
)
Array
(
[aa1] => 66051
[aa2] => 67438087
[aa3] => 134810123
)
Array
(
[1] => 66051
[2] => 67438087
[3] => 134810123
)