mirror of
https://github.com/php/php-src.git
synced 2025-08-15 13:38:49 +02:00
Avoid string copies in ext/intl after string conversion (#18636)
Introduces intl_convert_utf8_to_utf16_zstr() to convert a UTF-8 string to a UTF-16 string zend_string* instance. This way we avoid a double copy later from a UChar* into a zend_string*.
This commit is contained in:
parent
16e154a2c4
commit
b8fa1657ff
4 changed files with 66 additions and 27 deletions
|
@ -526,6 +526,10 @@ PHP 8.5 UPGRADE NOTES
|
|||
. Add OPcode specialization for `=== []` and `!== []` comparisons.
|
||||
. Creating exception objects is now much faster.
|
||||
|
||||
- Intl:
|
||||
. Now avoids creating extra string copies when converting strings
|
||||
for use in the collator.
|
||||
|
||||
- ReflectionProperty:
|
||||
. Improved performance of the following methods: getValue(), getRawValue(),
|
||||
isInitialized(), setValue(), setRawValue().
|
||||
|
|
|
@ -38,8 +38,6 @@ static void collator_convert_hash_item_from_utf8_to_utf16(
|
|||
{
|
||||
const char* old_val;
|
||||
size_t old_val_len;
|
||||
UChar* new_val = NULL;
|
||||
int32_t new_val_len = 0;
|
||||
zval znew_val;
|
||||
|
||||
/* Process string values only. */
|
||||
|
@ -49,17 +47,13 @@ static void collator_convert_hash_item_from_utf8_to_utf16(
|
|||
old_val = Z_STRVAL_P( hashData );
|
||||
old_val_len = Z_STRLEN_P( hashData );
|
||||
|
||||
/* Convert it from UTF-8 to UTF-16LE and save the result to new_val[_len]. */
|
||||
intl_convert_utf8_to_utf16( &new_val, &new_val_len, old_val, old_val_len, status );
|
||||
/* Convert it from UTF-8 to UTF-16LE. */
|
||||
zend_string *zstr = intl_convert_utf8_to_utf16_zstr( old_val, old_val_len, status );
|
||||
if( U_FAILURE( *status ) )
|
||||
return;
|
||||
|
||||
/* Update current hash item with the converted value. */
|
||||
ZVAL_STRINGL( &znew_val, (char*)new_val, UBYTES(new_val_len + 1) );
|
||||
//???
|
||||
efree(new_val);
|
||||
/* hack to fix use of initialized value */
|
||||
Z_STRLEN(znew_val) = Z_STRLEN(znew_val) - UBYTES(1);
|
||||
ZVAL_NEW_STR( &znew_val, zstr );
|
||||
|
||||
if( hashKey)
|
||||
{
|
||||
|
@ -176,23 +170,19 @@ zval* collator_convert_zstr_utf16_to_utf8( zval* utf16_zval, zval *rv )
|
|||
|
||||
zend_string *collator_convert_zstr_utf8_to_utf16(zend_string *utf8_str)
|
||||
{
|
||||
UChar *ustr = NULL;
|
||||
int32_t ustr_len = 0;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
/* Convert the string to UTF-16. */
|
||||
intl_convert_utf8_to_utf16(
|
||||
&ustr, &ustr_len,
|
||||
zend_string *zstr = intl_convert_utf8_to_utf16_zstr(
|
||||
ZSTR_VAL(utf8_str), ZSTR_LEN(utf8_str),
|
||||
&status);
|
||||
// FIXME Or throw error or use intl internal error handler
|
||||
if (U_FAILURE(status)) {
|
||||
php_error(E_WARNING,
|
||||
"Error casting object to string in collator_convert_zstr_utf8_to_utf16()");
|
||||
zstr = ZSTR_EMPTY_ALLOC();
|
||||
}
|
||||
|
||||
zend_string *zstr = zend_string_init((char *) ustr, UBYTES(ustr_len), 0);
|
||||
efree((char *)ustr);
|
||||
return zstr;
|
||||
}
|
||||
|
||||
|
@ -203,8 +193,6 @@ zval* collator_convert_object_to_string( zval* obj, zval *rv )
|
|||
{
|
||||
zval* zstr = NULL;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UChar* ustr = NULL;
|
||||
int32_t ustr_len = 0;
|
||||
|
||||
/* Bail out if it's not an object. */
|
||||
if( Z_TYPE_P( obj ) != IS_OBJECT )
|
||||
|
@ -229,25 +217,20 @@ zval* collator_convert_object_to_string( zval* obj, zval *rv )
|
|||
}
|
||||
|
||||
/* Convert the string to UTF-16. */
|
||||
intl_convert_utf8_to_utf16(
|
||||
&ustr, &ustr_len,
|
||||
zend_string *converted_str = intl_convert_utf8_to_utf16_zstr(
|
||||
Z_STRVAL_P( zstr ), Z_STRLEN_P( zstr ),
|
||||
&status );
|
||||
// FIXME Or throw error or use intl internal error handler
|
||||
if( U_FAILURE( status ) )
|
||||
if( U_FAILURE( status ) ) {
|
||||
php_error( E_WARNING, "Error casting object to string in collator_convert_object_to_string()" );
|
||||
converted_str = ZSTR_EMPTY_ALLOC();
|
||||
}
|
||||
|
||||
/* Cleanup zstr to hold utf16 string. */
|
||||
zval_ptr_dtor_str( zstr );
|
||||
|
||||
/* Set string. */
|
||||
ZVAL_STRINGL( zstr, (char*)ustr, UBYTES(ustr_len));
|
||||
//???
|
||||
efree((char *)ustr);
|
||||
|
||||
/* Don't free ustr cause it's set in zstr without copy.
|
||||
* efree( ustr );
|
||||
*/
|
||||
ZVAL_STR( zstr, converted_str );
|
||||
|
||||
return zstr;
|
||||
}
|
||||
|
|
|
@ -104,6 +104,54 @@ void intl_convert_utf8_to_utf16(
|
|||
}
|
||||
/* }}} */
|
||||
|
||||
/* Convert given string from UTF-8 to UTF-16 to a zend_string*
|
||||
*
|
||||
* @param src String to convert.
|
||||
* @param src_len Length of the source string.
|
||||
* @param status Conversion status.
|
||||
*
|
||||
* @return zend_string* on success and NULL on failure
|
||||
*/
|
||||
zend_string *intl_convert_utf8_to_utf16_zstr(
|
||||
const char* src, size_t src_len,
|
||||
UErrorCode* status )
|
||||
{
|
||||
int32_t dst_len = 0;
|
||||
|
||||
*status = U_ZERO_ERROR;
|
||||
|
||||
if(src_len > INT32_MAX) {
|
||||
/* we cannot fit this string */
|
||||
*status = U_BUFFER_OVERFLOW_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Pre-flight */
|
||||
u_strFromUTF8( NULL, 0, &dst_len, src, (int32_t)src_len, status );
|
||||
if( *status != U_BUFFER_OVERFLOW_ERROR && *status != U_STRING_NOT_TERMINATED_WARNING )
|
||||
return NULL;
|
||||
|
||||
/* Note: l=sizeof(UChar)-1 because we need sizeof(UChar) bytes for the NUL terminator instead of 1. */
|
||||
zend_string *dst = zend_string_safe_alloc(sizeof(UChar), dst_len, sizeof(UChar) - 1, false);
|
||||
UChar *dst_buf = (UChar *) ZSTR_VAL(dst);
|
||||
ZEND_ASSERT((ZSTR_LEN(dst) - 1) / 2 == dst_len);
|
||||
/* However, the length must not include the NUL terminator that we included previously. */
|
||||
ZSTR_LEN(dst)--;
|
||||
|
||||
/* Convert source string from UTF-8 to UTF-16. */
|
||||
*status = U_ZERO_ERROR;
|
||||
u_strFromUTF8( dst_buf, dst_len + 1, NULL, src, src_len, status );
|
||||
if( U_FAILURE( *status ) )
|
||||
{
|
||||
zend_string_efree( dst );
|
||||
return NULL;
|
||||
}
|
||||
|
||||
dst_buf[dst_len] = 0;
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
||||
/* {{{ intl_convert_utf16_to_utf8
|
||||
* Convert given string from UTF-16 to UTF-8.
|
||||
*
|
||||
|
|
|
@ -23,6 +23,10 @@ void intl_convert_utf8_to_utf16(
|
|||
const char* src, size_t src_len,
|
||||
UErrorCode* status );
|
||||
|
||||
zend_string *intl_convert_utf8_to_utf16_zstr(
|
||||
const char* src, size_t src_len,
|
||||
UErrorCode* status );
|
||||
|
||||
zend_string* intl_convert_utf16_to_utf8(
|
||||
const UChar* src, int32_t src_len,
|
||||
UErrorCode* status );
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue