diff --git a/Zend/zend_operators.c b/Zend/zend_operators.c index 8bfc40c6c5a..5a4c30168ad 100644 --- a/Zend/zend_operators.c +++ b/Zend/zend_operators.c @@ -1887,7 +1887,7 @@ ZEND_API zend_result ZEND_FASTCALL shift_right_function(zval *result, zval *op1, ZEND_API zend_result ZEND_FASTCALL concat_function(zval *result, zval *op1, zval *op2) /* {{{ */ { - zval *orig_op1 = op1; + zval *orig_op1 = op1; zval op1_copy, op2_copy; ZVAL_UNDEF(&op1_copy); @@ -1955,6 +1955,11 @@ ZEND_API zend_result ZEND_FASTCALL concat_function(zval *result, zval *op1, zval size_t op2_len = Z_STRLEN_P(op2); size_t result_len = op1_len + op2_len; zend_string *result_str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(Z_STR_P(op1)) && ZSTR_IS_VALID_UTF8(Z_STR_P(op2))) { + flags = IS_STR_VALID_UTF8; + } if (UNEXPECTED(op1_len > ZSTR_MAX_LEN - op2_len)) { zend_throw_error(NULL, "String size overflow"); @@ -1976,6 +1981,7 @@ ZEND_API zend_result ZEND_FASTCALL concat_function(zval *result, zval *op1, zval i_zval_ptr_dtor(result); } } + GC_ADD_FLAGS(result_str, flags); /* This has to happen first to account for the cases where result == op1 == op2 and * the realloc is done. In this case this line will also update Z_STRVAL_P(op2) to diff --git a/Zend/zend_string.c b/Zend/zend_string.c index 68e6084fdf6..2d6a30d37cb 100644 --- a/Zend/zend_string.c +++ b/Zend/zend_string.c @@ -191,6 +191,19 @@ ZEND_API zend_string* ZEND_FASTCALL zend_interned_string_find_permanent(zend_str return zend_interned_string_ht_lookup(str, &interned_strings_permanent); } +static zend_string* ZEND_FASTCALL zend_init_string_for_interning(zend_string *str, bool persistent) { + uint32_t flags = 0; + if (ZSTR_IS_VALID_UTF8(str)) { + flags = IS_STR_VALID_UTF8; + } + zend_ulong h = ZSTR_H(str); + zend_string_delref(str); + str = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), persistent); + GC_ADD_FLAGS(str, flags); + ZSTR_H(str) = h; + return str; +} + static zend_string* ZEND_FASTCALL zend_new_interned_string_permanent(zend_string *str) { zend_string *ret; @@ -208,10 +221,7 @@ static zend_string* ZEND_FASTCALL zend_new_interned_string_permanent(zend_string ZEND_ASSERT(GC_FLAGS(str) & GC_PERSISTENT); if (GC_REFCOUNT(str) > 1) { - zend_ulong h = ZSTR_H(str); - zend_string_delref(str); - str = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 1); - ZSTR_H(str) = h; + str = zend_init_string_for_interning(str, true); } return zend_add_interned_string(str, &interned_strings_permanent, IS_STR_PERMANENT); @@ -249,10 +259,7 @@ static zend_string* ZEND_FASTCALL zend_new_interned_string_request(zend_string * } #endif if (GC_REFCOUNT(str) > 1) { - zend_ulong h = ZSTR_H(str); - zend_string_delref(str); - str = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0); - ZSTR_H(str) = h; + str = zend_init_string_for_interning(str, false); } ret = zend_add_interned_string(str, &CG(interned_strings), 0); diff --git a/Zend/zend_vm_def.h b/Zend/zend_vm_def.h index fc7cd813239..efde56931d6 100644 --- a/Zend/zend_vm_def.h +++ b/Zend/zend_vm_def.h @@ -384,6 +384,11 @@ ZEND_VM_HANDLER(8, ZEND_CONCAT, CONST|TMPVAR|CV, CONST|TMPVAR|CV, SPEC(NO_CONST_ zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (OP1_TYPE != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (OP2_TYPE == IS_CONST || OP2_TYPE == IS_CV) { @@ -412,6 +417,7 @@ ZEND_VM_HANDLER(8, ZEND_CONCAT, CONST|TMPVAR|CV, CONST|TMPVAR|CV, SPEC(NO_CONST_ } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (OP2_TYPE & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -420,6 +426,7 @@ ZEND_VM_HANDLER(8, ZEND_CONCAT, CONST|TMPVAR|CV, CONST|TMPVAR|CV, SPEC(NO_CONST_ str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (OP1_TYPE & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -3140,6 +3147,11 @@ ZEND_VM_COLD_CONSTCONST_HANDLER(53, ZEND_FAST_CONCAT, CONST|TMPVAR|CV, CONST|TMP zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (OP1_TYPE != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (OP2_TYPE == IS_CONST || OP2_TYPE == IS_CV) { @@ -3165,6 +3177,7 @@ ZEND_VM_COLD_CONSTCONST_HANDLER(53, ZEND_FAST_CONCAT, CONST|TMPVAR|CV, CONST|TMP str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (OP2_TYPE & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -3173,6 +3186,7 @@ ZEND_VM_COLD_CONSTCONST_HANDLER(53, ZEND_FAST_CONCAT, CONST|TMPVAR|CV, CONST|TMP str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (OP1_TYPE & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -3233,6 +3247,10 @@ ZEND_VM_COLD_CONSTCONST_HANDLER(53, ZEND_FAST_CONCAT, CONST|TMPVAR|CV, CONST|TMP str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (OP1_TYPE != IS_CONST) { zend_string_release_ex(op1_str, 0); diff --git a/Zend/zend_vm_execute.h b/Zend/zend_vm_execute.h index a1eefaa72e3..f4e1b67cbd4 100644 --- a/Zend/zend_vm_execute.h +++ b/Zend/zend_vm_execute.h @@ -6624,6 +6624,11 @@ static ZEND_VM_COLD ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_ zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -6649,6 +6654,7 @@ static ZEND_VM_COLD ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_ str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -6657,6 +6663,7 @@ static ZEND_VM_COLD ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_ str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -6717,6 +6724,10 @@ static ZEND_VM_COLD ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_ str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -8691,6 +8702,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CONST_TMPVAR_HANDL zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -8719,6 +8735,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CONST_TMPVAR_HANDL } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -8727,6 +8744,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CONST_TMPVAR_HANDL str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -9116,6 +9134,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_TMPVAR_ zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -9141,6 +9164,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_TMPVAR_ str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -9149,6 +9173,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_TMPVAR_ str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -9209,6 +9234,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_TMPVAR_ str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -11057,6 +11086,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CONST_CV_HANDLER(Z zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -11085,6 +11119,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CONST_CV_HANDLER(Z } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -11093,6 +11128,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CONST_CV_HANDLER(Z str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -11482,6 +11518,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_CV_HAND zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -11507,6 +11548,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_CV_HAND str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -11515,6 +11557,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_CV_HAND str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -11575,6 +11618,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_CV_HAND str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -15088,6 +15135,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_CONST_HANDL zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -15116,6 +15168,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_CONST_HANDL } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -15124,6 +15177,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_CONST_HANDL str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -15827,6 +15881,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CONST_ zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -15852,6 +15911,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CONST_ str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -15860,6 +15920,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CONST_ str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -15920,6 +15981,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CONST_ str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -16525,6 +16590,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_TMPVAR_HAND zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -16553,6 +16623,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_TMPVAR_HAND } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -16561,6 +16632,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_TMPVAR_HAND str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -17264,6 +17336,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_TMPVAR zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -17289,6 +17366,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_TMPVAR str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -17297,6 +17375,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_TMPVAR str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -17357,6 +17436,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_TMPVAR str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -18213,6 +18296,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_CV_HANDLER( zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -18241,6 +18329,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_CV_HANDLER( } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -18249,6 +18338,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_CV_HANDLER( str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -18590,6 +18680,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CV_HAN zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -18615,6 +18710,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CV_HAN str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -18623,6 +18719,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CV_HAN str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -18683,6 +18780,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CV_HAN str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -40255,6 +40356,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_CONST_HANDLER(Z zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -40283,6 +40389,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_CONST_HANDLER(Z } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -40291,6 +40398,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_CONST_HANDLER(Z str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -42751,6 +42859,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CONST_HAND zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -42776,6 +42889,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CONST_HAND str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -42784,6 +42898,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CONST_HAND str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -42844,6 +42959,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CONST_HAND str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -44067,6 +44186,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_TMPVAR_HANDLER( zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -44095,6 +44219,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_TMPVAR_HANDLER( } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -44103,6 +44228,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_TMPVAR_HANDLER( str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -46492,6 +46618,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_TMPVAR_HAN zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -46517,6 +46648,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_TMPVAR_HAN str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -46525,6 +46657,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_TMPVAR_HAN str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -46585,6 +46718,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_TMPVAR_HAN str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -49361,6 +49498,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_CV_HANDLER(ZEND zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -49389,6 +49531,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_CV_HANDLER(ZEND } str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -49397,6 +49540,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_CV_HANDLER(ZEND str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -51885,6 +52029,11 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CV_HANDLER zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + flags = IS_STR_VALID_UTF8; + } if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -51910,6 +52059,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CV_HANDLER str = zend_string_extend(op1_str, len + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str) + len, ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op2_str, 0); @@ -51918,6 +52068,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CV_HANDLER str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + GC_ADD_FLAGS(str, flags); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV & (IS_TMP_VAR|IS_VAR)) { zend_string_release_ex(op1_str, 0); @@ -51978,6 +52129,10 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CV_HANDLER str = zend_string_alloc(ZSTR_LEN(op1_str) + ZSTR_LEN(op2_str), 0); memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); + + if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { + GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); + } ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV != IS_CONST) { zend_string_release_ex(op1_str, 0); diff --git a/ext/opcache/ZendAccelerator.c b/ext/opcache/ZendAccelerator.c index 0d589c7c61d..4d68bc3191c 100644 --- a/ext/opcache/ZendAccelerator.c +++ b/ext/opcache/ZendAccelerator.c @@ -549,7 +549,7 @@ zend_string* ZEND_FASTCALL accel_new_interned_string(zend_string *str) STRTAB_COLLISION(s) = *hash_slot; *hash_slot = STRTAB_STR_TO_POS(&ZCSG(interned_strings), s); GC_SET_REFCOUNT(s, 2); - GC_TYPE_INFO(s) = GC_STRING | ((IS_STR_INTERNED | IS_STR_PERMANENT) << GC_FLAGS_SHIFT); + GC_TYPE_INFO(s) = GC_STRING | ((IS_STR_INTERNED | IS_STR_PERMANENT) << GC_FLAGS_SHIFT)| (ZSTR_IS_VALID_UTF8(str) ? IS_STR_VALID_UTF8 : 0); ZSTR_H(s) = h; ZSTR_LEN(s) = ZSTR_LEN(str); memcpy(ZSTR_VAL(s), ZSTR_VAL(str), ZSTR_LEN(s) + 1); diff --git a/ext/opcache/jit/zend_jit_helpers.c b/ext/opcache/jit/zend_jit_helpers.c index c594ade575b..261b456a4fc 100644 --- a/ext/opcache/jit/zend_jit_helpers.c +++ b/ext/opcache/jit/zend_jit_helpers.c @@ -1633,6 +1633,11 @@ static void ZEND_FASTCALL zend_jit_fast_assign_concat_helper(zval *op1, zval *op size_t op2_len = Z_STRLEN_P(op2); size_t result_len = op1_len + op2_len; zend_string *result_str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(Z_STR_P(op1)) && ZSTR_IS_VALID_UTF8(Z_STR_P(op2))) { + flags = IS_STR_VALID_UTF8; + } if (UNEXPECTED(op1_len > SIZE_MAX - op2_len)) { zend_throw_error(NULL, "String size overflow"); @@ -1656,6 +1661,7 @@ static void ZEND_FASTCALL zend_jit_fast_assign_concat_helper(zval *op1, zval *op memcpy(ZSTR_VAL(result_str), Z_STRVAL_P(op1), op1_len); } while(0); + GC_ADD_FLAGS(result_str, flags); ZVAL_NEW_STR(op1, result_str); memcpy(ZSTR_VAL(result_str) + op1_len, Z_STRVAL_P(op2), op2_len); ZSTR_VAL(result_str)[result_len] = '\0'; @@ -1667,6 +1673,11 @@ static void ZEND_FASTCALL zend_jit_fast_concat_helper(zval *result, zval *op1, z size_t op2_len = Z_STRLEN_P(op2); size_t result_len = op1_len + op2_len; zend_string *result_str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(Z_STR_P(op1)) && ZSTR_IS_VALID_UTF8(Z_STR_P(op2))) { + flags = IS_STR_VALID_UTF8; + } if (UNEXPECTED(op1_len > SIZE_MAX - op2_len)) { zend_throw_error(NULL, "String size overflow"); @@ -1674,6 +1685,7 @@ static void ZEND_FASTCALL zend_jit_fast_concat_helper(zval *result, zval *op1, z } result_str = zend_string_alloc(result_len, 0); + GC_ADD_FLAGS(result_str, flags); memcpy(ZSTR_VAL(result_str), Z_STRVAL_P(op1), op1_len); ZVAL_NEW_STR(result, result_str); @@ -1689,6 +1701,11 @@ static void ZEND_FASTCALL zend_jit_fast_concat_tmp_helper(zval *result, zval *op size_t op2_len = Z_STRLEN_P(op2); size_t result_len = op1_len + op2_len; zend_string *result_str; + uint32_t flags = 0; + + if (ZSTR_IS_VALID_UTF8(Z_STR_P(op1)) && ZSTR_IS_VALID_UTF8(Z_STR_P(op2))) { + flags = IS_STR_VALID_UTF8; + } if (UNEXPECTED(op1_len > SIZE_MAX - op2_len)) { zend_throw_error(NULL, "String size overflow"); @@ -1710,6 +1727,7 @@ static void ZEND_FASTCALL zend_jit_fast_concat_tmp_helper(zval *result, zval *op memcpy(ZSTR_VAL(result_str), ZSTR_VAL(op1_str), op1_len); } while (0); + GC_ADD_FLAGS(result_str, flags); ZVAL_NEW_STR(result, result_str); memcpy(ZSTR_VAL(result_str) + op1_len, Z_STRVAL_P(op2), op2_len); diff --git a/ext/opcache/zend_persist.c b/ext/opcache/zend_persist.c index dcff7a78832..7bc95b71110 100644 --- a/ext/opcache/zend_persist.c +++ b/ext/opcache/zend_persist.c @@ -37,11 +37,13 @@ #define zend_set_str_gc_flags(str) do { \ GC_SET_REFCOUNT(str, 2); \ + uint32_t flags = GC_STRING | (ZSTR_IS_VALID_UTF8(str) ? IS_STR_VALID_UTF8 : 0); \ if (file_cache_only) { \ - GC_TYPE_INFO(str) = GC_STRING | (IS_STR_INTERNED << GC_FLAGS_SHIFT); \ + flags |= (IS_STR_INTERNED << GC_FLAGS_SHIFT); \ } else { \ - GC_TYPE_INFO(str) = GC_STRING | ((IS_STR_INTERNED | IS_STR_PERMANENT) << GC_FLAGS_SHIFT); \ + flags |= ((IS_STR_INTERNED | IS_STR_PERMANENT) << GC_FLAGS_SHIFT); \ } \ + GC_TYPE_INFO(str) = flags; \ } while (0) #define zend_accel_store_string(str) do { \ diff --git a/ext/zend_test/tests/strings_marked_as_utf8.phpt b/ext/zend_test/tests/strings_marked_as_utf8.phpt index ff84cad62aa..5b6dfb6a076 100644 --- a/ext/zend_test/tests/strings_marked_as_utf8.phpt +++ b/ext/zend_test/tests/strings_marked_as_utf8.phpt @@ -47,10 +47,19 @@ $s = "f" . "o"; var_dump($s); var_dump(zend_test_is_string_marked_as_valid_utf8($s)); +// The "foo" string matches with a "Foo" class which is registered by the zend_test extension. +// That class name does not have the "valid UTF-8" flag because class names in general +// don't have to be UTF-8. As the "foo" string here goes through the interning logic, +// the string gets replaced by the "foo" string from the class, which does +// not have the "valid UTF-8" flag. We therefore choose a different test case: "fxo". +// The previous "foo" test case works because it is not interned. echo "Multiple concatenation known valid UTF-8 in assignment:\n"; $s = "f" . "o" . "o"; var_dump($s); var_dump(zend_test_is_string_marked_as_valid_utf8($s)); +$s = "f" . "x" . "o"; +var_dump($s); +var_dump(zend_test_is_string_marked_as_valid_utf8($s)); echo "Concatenation known valid UTF-8 string with empty string in variables:\n"; $s1 = "f"; @@ -114,16 +123,18 @@ string(8) "2.0E+100" bool(true) Concatenation known valid UTF-8 strings in variables: string(2) "fo" -bool(false) +bool(true) Multiple concatenation known valid UTF-8 strings in variables: string(3) "foo" -bool(false) +bool(true) Concatenation known valid UTF-8 in assignment: string(2) "fo" -bool(false) +bool(true) Multiple concatenation known valid UTF-8 in assignment: string(3) "foo" bool(false) +string(3) "fxo" +bool(true) Concatenation known valid UTF-8 string with empty string in variables: bool(true) bool(true) @@ -131,9 +142,9 @@ Concatenation known valid UTF-8 string with empty string in assignment: bool(true) bool(true) Concatenation in loop: -bool(false) +bool(true) Concatenation in loop (compound assignment): -bool(false) +bool(true) Concatenation of objects: string(2) "zz" -bool(false) +bool(true)