From 64127b66c64457d6356d06bb517c35f72fa74852 Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Sun, 29 Jan 2023 16:39:19 +0000 Subject: [PATCH] Concatenating two valid UTF-8 strings produces a valid UTF-8 string The UTF-8 valid flag needs to be copied upon interning, otherwise strings that are concatenated at compile time lose this information. However, if previously this string was interned without the flag it is not added E.g. in the case the string is an existing class name. Co-authored-by: Niels Dossche <7771979+nielsdos@users.noreply.github.com> --- Zend/zend_operators.c | 8 +- Zend/zend_string.c | 23 ++- Zend/zend_vm_def.h | 18 ++ Zend/zend_vm_execute.h | 155 ++++++++++++++++++ ext/opcache/ZendAccelerator.c | 2 +- ext/opcache/jit/zend_jit_helpers.c | 18 ++ ext/opcache/zend_persist.c | 6 +- .../tests/strings_marked_as_utf8.phpt | 23 ++- 8 files changed, 235 insertions(+), 18 deletions(-) diff --git a/Zend/zend_operators.c b/Zend/zend_operators.c index 8bfc40c6c5a..5a4c30168ad 100644 --- a/Zend/zend_operators.c +++ b/Zend/zend_operators.c @@ -1887,7 +1887,7 @@ ZEND_API zend_result ZEND_FASTCALL shift_right_function(zval *result, zval *op1, ZEND_API zend_result ZEND_FASTCALL concat_function(zval *result, zval *op1, zval *op2) /* {{{ */ { - zval *orig_op1 = op1; + zval *orig_op1 = op1; zval op1_copy, op2_copy; ZVAL_UNDEF(&op1_copy); @@ -1955,6 +1955,11 @@ ZEND_API zend_result ZEND_FASTCALL concat_function(zval *result, zval *op1, zval size_t op2_len = Z_STRLEN_P(op2); size_t result_len = op1_len + op2_len; zend_string *result_str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(Z_STR_P(op1)) && ZSTR_IS_VALID_UTF8(Z_STR_P(op2))) { + flags = IS_STR_VALID_UTF8; + } if (UNEXPECTED(op1_len > ZSTR_MAX_LEN - op2_len)) { zend_throw_error(NULL, "String size overflow"); @@ -1976,6 +1981,7 @@ ZEND_API zend_result ZEND_FASTCALL concat_function(zval *result, zval *op1, zval i_zval_ptr_dtor(result); } } + GC_ADD_FLAGS(result_str, flags); /* This has to happen first to account for the cases where result == op1 == op2 and * the realloc is done. In this case this line will also update Z_STRVAL_P(op2) to diff --git a/Zend/zend_string.c b/Zend/zend_string.c index 68e6084fdf6..2d6a30d37cb 100644 --- a/Zend/zend_string.c +++ b/Zend/zend_string.c @@ -191,6 +191,19 @@ ZEND_API zend_string* ZEND_FASTCALL zend_interned_string_find_permanent(zend_str return zend_interned_string_ht_lookup(str, &interned_strings_permanent); } +static zend_string* ZEND_FASTCALL zend_init_string_for_interning(zend_string *str, bool persistent) { + uint32_t flags = 0; + if (ZSTR_IS_VALID_UTF8(str)) { + flags = IS_STR_VALID_UTF8; + } + zend_ulong h = ZSTR_H(str); + zend_string_delref(str); + str = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), persistent); + GC_ADD_FLAGS(str, flags); + ZSTR_H(str) = h; + return str; +} + static zend_string* ZEND_FASTCALL zend_new_interned_string_permanent(zend_string *str) { zend_string *ret; @@ -208,10 +221,7 @@ static zend_string* ZEND_FASTCALL zend_new_interned_string_permanent(zend_string ZEND_ASSERT(GC_FLAGS(str) & GC_PERSISTENT); if (GC_REFCOUNT(str) > 1) { - zend_ulong h = ZSTR_H(str); - zend_string_delref(str); - str = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 1); - ZSTR_H(str) = h; + str = zend_init_string_for_interning(str, true); } return zend_add_interned_string(str, &interned_strings_permanent, IS_STR_PERMANENT); @@ -249,10 +259,7 @@ static zend_string* ZEND_FASTCALL zend_new_interned_string_request(zend_string * } #endif if (GC_REFCOUNT(str) > 1) { - zend_ulong h = ZSTR_H(str); - zend_string_delref(str); - str = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0); - ZSTR_H(str) = h; + str = zend_init_string_for_interning(str, false); } ret = zend_add_interned_string(str, &CG(interned_strings), 0); diff --git a/Zend/zend_vm_def.h b/Zend/zend_vm_def.h index fc7cd813239..efde56931d6 100644 --- a/Zend/zend_vm_def.h +++ b/Zend/zend_vm_def.h @@ -384,6 +384,11 @@ ZEND_VM_HANDLER(8, ZEND_CONCAT, CONST|TMPVAR|CV, CONST|TMPVAR|CV, SPEC(NO_CONST_ zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (OP1_TYPE != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (OP2_TYPE == IS_CONST || OP2_TYPE == IS_CV) { @@ -412,6 +417,7 @@ ZEND_VM_HANDLER(8, ZEND_CONCAT, CONST|TMPVAR|CV, CONST|TMPVAR|CV, SPEC(NO_CONST_ } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (OP2_TYPE & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -420,6 +426,7 @@ ZEND_VM_HANDLER(8, ZEND_CONCAT, CONST|TMPVAR|CV, CONST|TMPVAR|CV, SPEC(NO_CONST_ str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (OP1_TYPE & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -3140,6 +3147,11 @@ ZEND_VM_COLD_CONSTCONST_HANDLER(53, ZEND_FAST_CONCAT, CONST|TMPVAR|CV, CONST|TMP zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (OP1_TYPE != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (OP2_TYPE == IS_CONST || OP2_TYPE == IS_CV) { @@ -3165,6 +3177,7 @@ ZEND_VM_COLD_CONSTCONST_HANDLER(53, ZEND_FAST_CONCAT, CONST|TMPVAR|CV, CONST|TMP str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (OP2_TYPE & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -3173,6 +3186,7 @@ ZEND_VM_COLD_CONSTCONST_HANDLER(53, ZEND_FAST_CONCAT, CONST|TMPVAR|CV, CONST|TMP str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (OP1_TYPE & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -3233,6 +3247,10 @@ ZEND_VM_COLD_CONSTCONST_HANDLER(53, ZEND_FAST_CONCAT, CONST|TMPVAR|CV, CONST|TMP str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (OP1_TYPE != IS_CONST) { zend_string_release_ex(op1_str, 0); diff --git a/Zend/zend_vm_execute.h b/Zend/zend_vm_execute.h index a1eefaa72e3..f4e1b67cbd4 100644 --- a/Zend/zend_vm_execute.h +++ b/Zend/zend_vm_execute.h @@ -6624,6 +6624,11 @@ static ZEND_VM_COLD ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_ zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -6649,6 +6654,7 @@ static ZEND_VM_COLD ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_ str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -6657,6 +6663,7 @@ static ZEND_VM_COLD ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_ str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -6717,6 +6724,10 @@ static ZEND_VM_COLD ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_ str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -8691,6 +8702,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CONST_TMPVAR_HANDL zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -8719,6 +8735,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CONST_TMPVAR_HANDL } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -8727,6 +8744,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CONST_TMPVAR_HANDL str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -9116,6 +9134,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_TMPVAR_ zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -9141,6 +9164,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_TMPVAR_ str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -9149,6 +9173,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_TMPVAR_ str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -9209,6 +9234,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_TMPVAR_ str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -11057,6 +11086,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CONST_CV_HANDLER(Z zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -11085,6 +11119,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CONST_CV_HANDLER(Z } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -11093,6 +11128,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CONST_CV_HANDLER(Z str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -11482,6 +11518,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_CV_HAND zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -11507,6 +11548,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_CV_HAND str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -11515,6 +11557,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_CV_HAND str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -11575,6 +11618,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_CV_HAND str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -15088,6 +15135,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_CONST_HANDL zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -15116,6 +15168,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_CONST_HANDL } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -15124,6 +15177,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_CONST_HANDL str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -15827,6 +15881,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CONST_ zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -15852,6 +15911,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CONST_ str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -15860,6 +15920,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CONST_ str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -15920,6 +15981,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CONST_ str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -16525,6 +16590,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_TMPVAR_HAND zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -16553,6 +16623,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_TMPVAR_HAND } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -16561,6 +16632,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_TMPVAR_HAND str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -17264,6 +17336,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_TMPVAR zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -17289,6 +17366,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_TMPVAR str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -17297,6 +17375,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_TMPVAR str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -17357,6 +17436,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_TMPVAR str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -18213,6 +18296,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_CV_HANDLER( zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -18241,6 +18329,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_CV_HANDLER( } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -18249,6 +18338,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_CV_HANDLER( str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -18590,6 +18680,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CV_HAN zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -18615,6 +18710,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CV_HAN str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -18623,6 +18719,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CV_HAN str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -18683,6 +18780,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CV_HAN str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -40255,6 +40356,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_CONST_HANDLER(Z zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -40283,6 +40389,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_CONST_HANDLER(Z } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -40291,6 +40398,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_CONST_HANDLER(Z str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -42751,6 +42859,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CONST_HAND zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -42776,6 +42889,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CONST_HAND str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -42784,6 +42898,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CONST_HAND str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -42844,6 +42959,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CONST_HAND str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -44067,6 +44186,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_TMPVAR_HANDLER( zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -44095,6 +44219,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_TMPVAR_HANDLER( } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -44103,6 +44228,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_TMPVAR_HANDLER( str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -46492,6 +46618,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_TMPVAR_HAN zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -46517,6 +46648,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_TMPVAR_HAN str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -46525,6 +46657,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_TMPVAR_HAN str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -46585,6 +46718,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_TMPVAR_HAN str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -49361,6 +49498,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_CV_HANDLER(ZEND zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -49389,6 +49531,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_CV_HANDLER(ZEND } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -49397,6 +49540,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_CV_HANDLER(ZEND str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -51885,6 +52029,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CV_HANDLER zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -51910,6 +52059,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CV_HANDLER str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -51918,6 +52068,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CV_HANDLER str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -51978,6 +52129,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CV_HANDLER str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV != IS_CONST) { zend_string_release_ex(op1_str, 0); diff --git a/ext/opcache/ZendAccelerator.c b/ext/opcache/ZendAccelerator.c index 0d589c7c61d..4d68bc3191c 100644 --- a/ext/opcache/ZendAccelerator.c +++ b/ext/opcache/ZendAccelerator.c @@ -549,7 +549,7 @@ zend_string* ZEND_FASTCALL accel_new_interned_string(zend_string *str) STRTAB_COLLISION(s) = *hash_slot; *hash_slot = STRTAB_STR_TO_POS(&ZCSG(interned_strings), s); GC_SET_REFCOUNT(s, 2); - GC_TYPE_INFO(s) = GC_STRING | ((IS_STR_INTERNED | IS_STR_PERMANENT) << GC_FLAGS_SHIFT); + GC_TYPE_INFO(s) = GC_STRING | ((IS_STR_INTERNED | IS_STR_PERMANENT) << GC_FLAGS_SHIFT)| (ZSTR_IS_VALID_UTF8(str) ? IS_STR_VALID_UTF8 : 0); ZSTR_H(s) = h; ZSTR_LEN(s) = ZSTR_LEN(str); memcpy(ZSTR_VAL(s), ZSTR_VAL(str), ZSTR_LEN(s) + 1); diff --git a/ext/opcache/jit/zend_jit_helpers.c b/ext/opcache/jit/zend_jit_helpers.c index c594ade575b..261b456a4fc 100644 --- a/ext/opcache/jit/zend_jit_helpers.c +++ b/ext/opcache/jit/zend_jit_helpers.c @@ -1633,6 +1633,11 @@ static void ZEND_FASTCALL zend_jit_fast_assign_concat_helper(zval *op1, zval *op size_t op2_len = Z_STRLEN_P(op2); size_t result_len = op1_len + op2_len; zend_string *result_str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(Z_STR_P(op1)) && ZSTR_IS_VALID_UTF8(Z_STR_P(op2))) { + flags = IS_STR_VALID_UTF8; + } if (UNEXPECTED(op1_len > SIZE_MAX - op2_len)) { zend_throw_error(NULL, "String size overflow"); @@ -1656,6 +1661,7 @@ static void ZEND_FASTCALL zend_jit_fast_assign_concat_helper(zval *op1, zval *op memcpy(ZSTR_VAL(result_str), Z_STRVAL_P(op1), op1_len); } while(0); + GC_ADD_FLAGS(result_str, flags); ZVAL_NEW_STR(op1, result_str); memcpy(ZSTR_VAL(result_str) + op1_len, Z_STRVAL_P(op2), op2_len); ZSTR_VAL(result_str)[result_len] = '\0'; @@ -1667,6 +1673,11 @@ static void ZEND_FASTCALL zend_jit_fast_concat_helper(zval *result, zval *op1, z size_t op2_len = Z_STRLEN_P(op2); size_t result_len = op1_len + op2_len; zend_string *result_str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(Z_STR_P(op1)) && ZSTR_IS_VALID_UTF8(Z_STR_P(op2))) { + flags = IS_STR_VALID_UTF8; + } if (UNEXPECTED(op1_len > SIZE_MAX - op2_len)) { zend_throw_error(NULL, "String size overflow"); @@ -1674,6 +1685,7 @@ static void ZEND_FASTCALL zend_jit_fast_concat_helper(zval *result, zval *op1, z } result_str = zend_string_alloc(result_len, 0); + GC_ADD_FLAGS(result_str, flags); memcpy(ZSTR_VAL(result_str), Z_STRVAL_P(op1), op1_len); ZVAL_NEW_STR(result, result_str); @@ -1689,6 +1701,11 @@ static void ZEND_FASTCALL zend_jit_fast_concat_tmp_helper(zval *result, zval *op size_t op2_len = Z_STRLEN_P(op2); size_t result_len = op1_len + op2_len; zend_string *result_str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(Z_STR_P(op1)) && ZSTR_IS_VALID_UTF8(Z_STR_P(op2))) { + flags = IS_STR_VALID_UTF8; + } if (UNEXPECTED(op1_len > SIZE_MAX - op2_len)) { zend_throw_error(NULL, "String size overflow"); @@ -1710,6 +1727,7 @@ static void ZEND_FASTCALL zend_jit_fast_concat_tmp_helper(zval *result, zval *op memcpy(ZSTR_VAL(result_str), ZSTR_VAL(op1_str), op1_len); } while (0); + GC_ADD_FLAGS(result_str, flags); ZVAL_NEW_STR(result, result_str); memcpy(ZSTR_VAL(result_str) + op1_len, Z_STRVAL_P(op2), op2_len); diff --git a/ext/opcache/zend_persist.c b/ext/opcache/zend_persist.c index dcff7a78832..7bc95b71110 100644 --- a/ext/opcache/zend_persist.c +++ b/ext/opcache/zend_persist.c @@ -37,11 +37,13 @@ #define zend_set_str_gc_flags(str) do { \ GC_SET_REFCOUNT(str, 2); \ + uint32_t flags = GC_STRING | (ZSTR_IS_VALID_UTF8(str) ? IS_STR_VALID_UTF8 : 0); \ if (file_cache_only) { \ - GC_TYPE_INFO(str) = GC_STRING | (IS_STR_INTERNED << GC_FLAGS_SHIFT); \ + flags |= (IS_STR_INTERNED << GC_FLAGS_SHIFT); \ } else { \ - GC_TYPE_INFO(str) = GC_STRING | ((IS_STR_INTERNED | IS_STR_PERMANENT) << GC_FLAGS_SHIFT); \ + flags |= ((IS_STR_INTERNED | IS_STR_PERMANENT) << GC_FLAGS_SHIFT); \ } \ + GC_TYPE_INFO(str) = flags; \ } while (0) #define zend_accel_store_string(str) do { \ diff --git a/ext/zend_test/tests/strings_marked_as_utf8.phpt b/ext/zend_test/tests/strings_marked_as_utf8.phpt index ff84cad62aa..5b6dfb6a076 100644 --- a/ext/zend_test/tests/strings_marked_as_utf8.phpt +++ b/ext/zend_test/tests/strings_marked_as_utf8.phpt @@ -47,10 +47,19 @@ $s = "f" . "o"; var_dump($s); var_dump(zend_test_is_string_marked_as_valid_utf8($s)); +// The "foo" string matches with a "Foo" class which is registered by the zend_test extension. +// That class name does not have the "valid UTF-8" flag because class names in general +// don't have to be UTF-8. As the "foo" string here goes through the interning logic, +// the string gets replaced by the "foo" string from the class, which does +// not have the "valid UTF-8" flag. We therefore choose a different test case: "fxo". +// The previous "foo" test case works because it is not interned. echo "Multiple concatenation known valid UTF-8 in assignment:\n"; $s = "f" . "o" . "o"; var_dump($s); var_dump(zend_test_is_string_marked_as_valid_utf8($s)); +$s = "f" . "x" . "o"; +var_dump($s); +var_dump(zend_test_is_string_marked_as_valid_utf8($s)); echo "Concatenation known valid UTF-8 string with empty string in variables:\n"; $s1 = "f"; @@ -114,16 +123,18 @@ string(8) "2.0E+100" bool(true) Concatenation known valid UTF-8 strings in variables: string(2) "fo" -bool(false) +bool(true) Multiple concatenation known valid UTF-8 strings in variables: string(3) "foo" -bool(false) +bool(true) Concatenation known valid UTF-8 in assignment: string(2) "fo" -bool(false) +bool(true) Multiple concatenation known valid UTF-8 in assignment: string(3) "foo" bool(false) +string(3) "fxo" +bool(true) Concatenation known valid UTF-8 string with empty string in variables: bool(true) bool(true) @@ -131,9 +142,9 @@ Concatenation known valid UTF-8 string with empty string in assignment: bool(true) bool(true) Concatenation in loop: -bool(false) +bool(true) Concatenation in loop (compound assignment): -bool(false) +bool(true) Concatenation of objects: string(2) "zz" -bool(false) +bool(true)