php_json_escape_string() optimization

This commit is contained in:
Dmitry Stogov 2018-06-13 18:26:48 +03:00
parent 52f92b51aa
commit c1ce43d1d2
2 changed files with 69 additions and 35 deletions

View file

@ -27,6 +27,8 @@
smart_str_appendl_ex((dest), (src), strlen(src), (what))
#define smart_str_appends(dest, src) \
smart_str_appendl((dest), (src), strlen(src))
#define smart_str_extend(dest, len) \
smart_str_extend_ex((dest), (len), 0)
#define smart_str_appendc(dest, c) \
smart_str_appendc_ex((dest), (c), 0)
#define smart_str_appendl(dest, src, len) \
@ -71,6 +73,13 @@ do_smart_str_realloc:
return len;
}
static zend_always_inline char* smart_str_extend_ex(smart_str *dest, size_t len, zend_bool persistent) {
size_t new_len = smart_str_alloc(dest, len, persistent);
char *ret = ZSTR_VAL(dest->s) + ZSTR_LEN(dest->s);
ZSTR_LEN(dest->s) = new_len;
return ret;
}
static zend_always_inline void smart_str_free_ex(smart_str *str, zend_bool persistent) {
if (str->s) {
zend_string_release_ex(str->s, persistent);

View file

@ -33,7 +33,7 @@
static const char digits[] = "0123456789abcdef";
static int php_json_escape_string(
smart_str *buf, char *s, size_t len,
smart_str *buf, const char *s, size_t len,
int options, php_json_encoder *encoder);
static int php_json_determine_array_type(zval *val) /* {{{ */
@ -250,12 +250,13 @@ static int php_json_encode_array(smart_str *buf, zval *val, int options, php_jso
/* }}} */
static int php_json_escape_string(
smart_str *buf, char *s, size_t len,
smart_str *buf, const char *s, size_t len,
int options, php_json_encoder *encoder) /* {{{ */
{
int status;
unsigned int us;
size_t pos, checkpoint;
char *dst;
if (len == 0) {
smart_str_appendl(buf, "\"\"", 2);
@ -287,72 +288,89 @@ static int php_json_escape_string(
do {
us = (unsigned char)s[pos];
if (us >= 0x80) {
int utf8_sub = 0;
size_t prev_pos = pos;
if (UNEXPECTED(us >= 0x80)) {
if (pos) {
smart_str_appendl(buf, s, pos);
s += pos;
pos = 0;
}
us = php_next_utf8_char((unsigned char *)s, len, &pos, &status);
len -= pos;
/* check whether UTF8 character is correct */
if (status != SUCCESS) {
if (UNEXPECTED(status != SUCCESS)) {
s += pos;
pos = 0;
if (options & PHP_JSON_INVALID_UTF8_IGNORE) {
/* ignore invalid UTF8 character */
continue;
} else if (options & PHP_JSON_INVALID_UTF8_SUBSTITUTE) {
/* Use Unicode character 'REPLACEMENT CHARACTER' (U+FFFD) */
us = 0xfffd;
utf8_sub = 1;
} else {
if (buf->s) {
ZSTR_LEN(buf->s) = checkpoint;
if (options & PHP_JSON_UNESCAPED_UNICODE) {
smart_str_appendl(buf, "\xef\xbf\xbd", 3);
} else {
smart_str_appendl(buf, "\\ufffd", 6);
}
continue;
} else {
ZSTR_LEN(buf->s) = checkpoint;
encoder->error_code = PHP_JSON_ERROR_UTF8;
if (options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR) {
smart_str_appendl(buf, "null", 4);
}
return FAILURE;
}
}
/* Escape U+2028/U+2029 line terminators, UNLESS both
JSON_UNESCAPED_UNICODE and
JSON_UNESCAPED_LINE_TERMINATORS were provided */
if ((options & PHP_JSON_UNESCAPED_UNICODE)
} else if ((options & PHP_JSON_UNESCAPED_UNICODE)
&& ((options & PHP_JSON_UNESCAPED_LINE_TERMINATORS)
|| us < 0x2028 || us > 0x2029)) {
if (utf8_sub) {
smart_str_appendl(buf, "\xef\xbf\xbd", 3);
} else {
smart_str_appendl(buf, s + prev_pos, pos - prev_pos);
}
smart_str_appendl(buf, s, pos);
s += pos;
pos = 0;
continue;
}
/* From http://en.wikipedia.org/wiki/UTF16 */
if (us >= 0x10000) {
unsigned int next_us;
us -= 0x10000;
next_us = (unsigned short)((us & 0x3ff) | 0xdc00);
us = (unsigned short)((us >> 10) | 0xd800);
smart_str_appendl(buf, "\\u", 2);
smart_str_appendc(buf, digits[(us & 0xf000) >> 12]);
smart_str_appendc(buf, digits[(us & 0xf00) >> 8]);
smart_str_appendc(buf, digits[(us & 0xf0) >> 4]);
smart_str_appendc(buf, digits[(us & 0xf)]);
dst = smart_str_extend(buf, 6);
dst[0] = '\\';
dst[1] = 'u';
dst[2] = digits[(us >> 12) & 0xf];
dst[3] = digits[(us >> 8) & 0xf];
dst[4] = digits[(us >> 4) & 0xf];
dst[5] = digits[us & 0xf];
us = next_us;
}
smart_str_appendl(buf, "\\u", 2);
smart_str_appendc(buf, digits[(us & 0xf000) >> 12]);
smart_str_appendc(buf, digits[(us & 0xf00) >> 8]);
smart_str_appendc(buf, digits[(us & 0xf0) >> 4]);
smart_str_appendc(buf, digits[(us & 0xf)]);
dst = smart_str_extend(buf, 6);
dst[0] = '\\';
dst[1] = 'u';
dst[2] = digits[(us >> 12) & 0xf];
dst[3] = digits[(us >> 8) & 0xf];
dst[4] = digits[(us >> 4) & 0xf];
dst[5] = digits[us & 0xf];
s += pos;
pos = 0;
} else {
static const uint32_t charmap[4] = {
0xffffffff, 0x500080c4, 0x10000000, 0x00000000};
pos++;
len--;
if (EXPECTED(!ZEND_BIT_TEST(charmap, us))) {
smart_str_appendc(buf, (unsigned char) us);
pos++;
} else {
if (pos) {
smart_str_appendl(buf, s, pos);
s += pos;
pos = 0;
}
s++;
switch (us) {
case '"':
if (options & PHP_JSON_HEX_QUOT) {
@ -428,15 +446,22 @@ static int php_json_escape_string(
default:
ZEND_ASSERT(us < ' ');
smart_str_appendl(buf, "\\u00", sizeof("\\u00")-1);
smart_str_appendc(buf, digits[(us & 0xf0) >> 4]);
smart_str_appendc(buf, digits[(us & 0xf)]);
dst = smart_str_extend(buf, 6);
dst[0] = '\\';
dst[1] = 'u';
dst[2] = '0';
dst[3] = '0';
dst[4] = digits[(us >> 4) & 0xf];
dst[5] = digits[us & 0xf];
break;
}
}
}
} while (pos < len);
} while (len);
if (EXPECTED(pos)) {
smart_str_appendl(buf, s, pos);
}
smart_str_appendc(buf, '"');
return SUCCESS;