- Unicode impl of substr_replace()

This commit is contained in:
Rolland Santimano 2005-09-08 05:05:36 +00:00
parent f872d3346c
commit a441e78984

View file

@ -2553,6 +2553,118 @@ PHP_FUNCTION(substr)
/* }}} */ /* }}} */
/* {{{ php_unify_string_types
*/
PHPAPI void php_unify_string_types(zval **p, zval **q TSRMLS_DC)
{
if (p == NULL || q == NULL) {
return;
}
if (Z_TYPE_PP(p) == IS_UNICODE) {
if (Z_TYPE_PP(q) == IS_BINARY) {
convert_to_binary_ex(p);
} else {
convert_to_unicode_ex(q);
}
} else if (Z_TYPE_PP(p) == IS_BINARY) {
convert_to_binary_ex(q);
} else {
if (Z_TYPE_PP(q) == IS_BINARY) {
convert_to_binary_ex(p);
} else {
convert_to_string_ex(q);
}
}
}
/* {{{ */
/* {{{ php_adjust_limits
*/
PHPAPI void php_adjust_limits(zval **str, int32_t *f, int32_t *l)
{
int32_t i, str_codepts;
if (Z_TYPE_PP(str) == IS_UNICODE) {
i = 0; str_codepts = 0;
while (i < Z_USTRLEN_PP(str)) {
U16_FWD_1(Z_USTRVAL_PP(str), i, Z_USTRLEN_PP(str));
str_codepts++;
}
} else {
str_codepts = Z_STRLEN_PP(str);
}
/* If "from" position is negative, count start position from the end
* of the string */
if (*f < 0) {
*f = str_codepts + *f;
if (*f < 0) {
*f = 0;
}
} else if (*f > str_codepts) {
*f = str_codepts;
}
/* If "length" position is negative, set it to the length
* needed to stop that many codepts/chars from the end of the string */
if (*l < 0) {
*l = str_codepts - *f + *l;
if (*l < 0) {
*l = 0;
}
}
if (((unsigned)(*f) + (unsigned)(*l)) > str_codepts) {
*l = str_codepts - *f;
}
}
/* }}} */
/* {{{ php_do_substr_replace
*/
PHPAPI int32_t php_do_substr_replace(void **result, zval **str, zval **repl, int32_t f, int32_t l TSRMLS_DC)
{
void *buf;
int32_t buf_len, idx;
UChar ch;
if (Z_TYPE_PP(str) == IS_UNICODE) {
buf = emalloc(UBYTES(Z_USTRLEN_PP(str) -l + Z_USTRLEN_PP(repl) + 1));
/* buf_len is codept count here */
buf_len = 0; idx = 0;
while (f-- > 0) {
U16_NEXT(Z_USTRVAL_PP(str), idx, Z_USTRLEN_PP(str), ch);
buf_len += zend_codepoint_to_uchar(ch, (UChar *)buf + buf_len);
}
if (repl != NULL) {
u_memcpy((UChar *)buf + buf_len, Z_USTRVAL_PP(repl), Z_USTRLEN_PP(repl));
buf_len += Z_USTRLEN_PP(repl);
}
U16_FWD_N(Z_USTRVAL_PP(str), idx, Z_USTRLEN_PP(str), l);
u_memcpy((UChar *)buf + buf_len, Z_USTRVAL_PP(str) + idx, Z_USTRLEN_PP(str) - idx);
buf_len += (Z_USTRLEN_PP(str) - idx);
*((UChar *)buf + buf_len) = 0;
buf = erealloc(buf, UBYTES(buf_len + 1));
} else {
/* buf_len is char count here */
buf_len = Z_STRLEN_PP(str) - l + Z_STRLEN_PP(repl);
buf = emalloc(buf_len + 1);
memcpy(buf, Z_STRVAL_PP(str), f);
if (repl != NULL ) {
memcpy((char *)buf + f, Z_STRVAL_PP(repl), Z_STRLEN_PP(repl));
}
memcpy((char *)buf + f + Z_STRLEN_PP(repl), Z_STRVAL_PP(str) + f + l, Z_STRLEN_PP(str) - f - l);
*((char *)buf + buf_len) = '\0';
}
*result = buf;
return buf_len;
}
/* }}} */
/* {{{ proto mixed substr_replace(mixed str, mixed repl, mixed start [, mixed length]) /* {{{ proto mixed substr_replace(mixed str, mixed repl, mixed start [, mixed length])
Replaces part of a string with another string */ Replaces part of a string with another string */
PHP_FUNCTION(substr_replace) PHP_FUNCTION(substr_replace)
@ -2561,10 +2673,10 @@ PHP_FUNCTION(substr_replace)
zval **from; zval **from;
zval **len = NULL; zval **len = NULL;
zval **repl; zval **repl;
char *result; void *result;
int result_len; int32_t result_len;
int l = 0; int32_t l = 0;
int f; int32_t f;
int argc = ZEND_NUM_ARGS(); int argc = ZEND_NUM_ARGS();
HashPosition pos_str, pos_from, pos_repl, pos_len; HashPosition pos_str, pos_from, pos_repl, pos_len;
@ -2575,16 +2687,18 @@ PHP_FUNCTION(substr_replace)
WRONG_PARAM_COUNT; WRONG_PARAM_COUNT;
} }
if (Z_TYPE_PP(str) != IS_ARRAY) { if (Z_TYPE_PP(str) != IS_ARRAY && Z_TYPE_PP(str) != IS_UNICODE &&
convert_to_string_ex(str); Z_TYPE_PP(str) != IS_BINARY && Z_TYPE_PP(str) != IS_STRING) {
convert_to_text_ex(str);
} }
if (Z_TYPE_PP(repl) != IS_ARRAY) { if (Z_TYPE_PP(repl) != IS_ARRAY && Z_TYPE_PP(repl) != IS_UNICODE &&
convert_to_string_ex(repl); Z_TYPE_PP(repl) != IS_BINARY && Z_TYPE_PP(repl) != IS_STRING) {
convert_to_text_ex(repl);
} }
if (Z_TYPE_PP(from) != IS_ARRAY) { if (Z_TYPE_PP(from) != IS_ARRAY) {
convert_to_long_ex(from); convert_to_long_ex(from);
f = Z_LVAL_PP(from);
} }
if (argc > 3) { if (argc > 3) {
if (Z_TYPE_PP(len) != IS_ARRAY) { if (Z_TYPE_PP(len) != IS_ARRAY) {
convert_to_long_ex(len); convert_to_long_ex(len);
@ -2592,80 +2706,59 @@ PHP_FUNCTION(substr_replace)
} }
} else { } else {
if (Z_TYPE_PP(str) != IS_ARRAY) { if (Z_TYPE_PP(str) != IS_ARRAY) {
l = Z_STRLEN_PP(str); if (Z_TYPE_PP(str) == IS_UNICODE) {
l = Z_USTRLEN_PP(str);
} else {
l = Z_STRLEN_PP(str);
}
} }
} }
if (Z_TYPE_PP(str) == IS_STRING) { if (Z_TYPE_PP(str) != IS_ARRAY) {
if ( if ( (argc == 3 && Z_TYPE_PP(from) == IS_ARRAY) ||
(argc == 3 && Z_TYPE_PP(from) == IS_ARRAY) (argc == 4 && Z_TYPE_PP(from) != Z_TYPE_PP(len)) ) {
||
(argc == 4 && Z_TYPE_PP(from) != Z_TYPE_PP(len))
) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "'from' and 'len' should be of same type - numerical or array "); php_error_docref(NULL TSRMLS_CC, E_WARNING, "'from' and 'len' should be of same type - numerical or array ");
RETURN_STRINGL(Z_STRVAL_PP(str), Z_STRLEN_PP(str), 1); RETURN_ZVAL(*str, 1, 0);
} }
if (argc == 4 && Z_TYPE_PP(from) == IS_ARRAY) { if (argc == 4 && Z_TYPE_PP(from) == IS_ARRAY) {
if (zend_hash_num_elements(Z_ARRVAL_PP(from)) != zend_hash_num_elements(Z_ARRVAL_PP(len))) { if (zend_hash_num_elements(Z_ARRVAL_PP(from)) != zend_hash_num_elements(Z_ARRVAL_PP(len))) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "'from' and 'len' should have the same number of elements"); php_error_docref(NULL TSRMLS_CC, E_WARNING, "'from' and 'len' should have the same number of elements");
RETURN_STRINGL(Z_STRVAL_PP(str), Z_STRLEN_PP(str), 1); RETURN_ZVAL(*str, 1, 0);
} }
} }
} }
if (Z_TYPE_PP(str) != IS_ARRAY) { if (Z_TYPE_PP(str) != IS_ARRAY) {
if (Z_TYPE_PP(from) != IS_ARRAY) { if (Z_TYPE_PP(from) != IS_ARRAY ) {
int repl_len = 0;
f = Z_LVAL_PP(from);
/* if "from" position is negative, count start position from the end
* of the string
*/
if (f < 0) {
f = Z_STRLEN_PP(str) + f;
if (f < 0) {
f = 0;
}
} else if (f > Z_STRLEN_PP(str)) {
f = Z_STRLEN_PP(str);
}
/* if "length" position is negative, set it to the length
* needed to stop that many chars from the end of the string
*/
if (l < 0) {
l = (Z_STRLEN_PP(str) - f) + l;
if (l < 0) {
l = 0;
}
}
if (((unsigned) f + (unsigned) l) > Z_STRLEN_PP(str)) {
l = Z_STRLEN_PP(str) - f;
}
if (Z_TYPE_PP(repl) == IS_ARRAY) { if (Z_TYPE_PP(repl) == IS_ARRAY) {
zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(repl), &pos_repl); zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(repl), &pos_repl);
if (SUCCESS == zend_hash_get_current_data_ex(Z_ARRVAL_PP(repl), (void **) &tmp_repl, &pos_repl)) { if (SUCCESS == zend_hash_get_current_data_ex(Z_ARRVAL_PP(repl), (void **) &tmp_repl, &pos_repl)) {
convert_to_string_ex(tmp_repl); if (Z_TYPE_PP(repl) != IS_UNICODE && Z_TYPE_PP(repl) != IS_BINARY && Z_TYPE_PP(repl) != IS_STRING) {
repl_len = Z_STRLEN_PP(tmp_repl); convert_to_text_ex(tmp_repl);
}
} else {
tmp_repl = NULL;
} }
} else { } else {
repl_len = Z_STRLEN_PP(repl); tmp_repl = repl;
} }
result_len = Z_STRLEN_PP(str) - l + repl_len;
result = emalloc(result_len + 1);
memcpy(result, Z_STRVAL_PP(str), f); if (Z_TYPE_PP(str) != Z_TYPE_PP(tmp_repl))
if (repl_len) { php_unify_string_types(str, tmp_repl TSRMLS_CC);
memcpy((result + f), (Z_TYPE_PP(repl) == IS_ARRAY ? Z_STRVAL_PP(tmp_repl) : Z_STRVAL_PP(repl)), repl_len); php_adjust_limits(str, &f, &l);
result_len = php_do_substr_replace(&result, str, tmp_repl, f, l TSRMLS_CC);
if (Z_TYPE_PP(str) == IS_UNICODE) {
RETURN_UNICODEL((UChar *)result, result_len, 0);
} else if (Z_TYPE_PP(str) == IS_BINARY) {
RETURN_BINARYL((char *)result, result_len, 0);
} else {
RETURN_STRINGL((char *)result, result_len, 0);
} }
memcpy((result + f + repl_len), Z_STRVAL_PP(str) + f + l, Z_STRLEN_PP(str) - f - l);
result[result_len] = '\0';
RETURN_STRINGL(result, result_len, 0);
} else { } else {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Functionality of 'from' and 'len' as arrays is not implemented."); php_error_docref(NULL TSRMLS_CC, E_WARNING, "Functionality of 'from' and 'len' as arrays is not implemented.");
RETURN_STRINGL(Z_STRVAL_PP(str), Z_STRLEN_PP(str), 1); RETURN_ZVAL(*str, 1, 0);
} }
} else { /* str is array of strings */ } else { /* str is array of strings */
array_init(return_value); array_init(return_value);
@ -2673,104 +2766,76 @@ PHP_FUNCTION(substr_replace)
if (Z_TYPE_PP(from) == IS_ARRAY) { if (Z_TYPE_PP(from) == IS_ARRAY) {
zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(from), &pos_from); zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(from), &pos_from);
} }
if (argc > 3 && Z_TYPE_PP(len) == IS_ARRAY) { if (argc > 3 && Z_TYPE_PP(len) == IS_ARRAY) {
zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(len), &pos_len); zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(len), &pos_len);
} }
if (Z_TYPE_PP(repl) == IS_ARRAY) { if (Z_TYPE_PP(repl) == IS_ARRAY) {
zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(repl), &pos_repl); zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(repl), &pos_repl);
} }
zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(str), &pos_str); zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(str), &pos_str);
while (zend_hash_get_current_data_ex(Z_ARRVAL_PP(str), (void **) &tmp_str, &pos_str) == SUCCESS) { while (zend_hash_get_current_data_ex(Z_ARRVAL_PP(str), (void **) &tmp_str, &pos_str) == SUCCESS) {
convert_to_string_ex(tmp_str); if (Z_TYPE_PP(tmp_str) != IS_UNICODE && Z_TYPE_PP(tmp_str) != IS_BINARY && Z_TYPE_PP(tmp_str) != IS_STRING) {
convert_to_text_ex(tmp_str);
}
if (Z_TYPE_PP(from) == IS_ARRAY) { if (Z_TYPE_PP(from) == IS_ARRAY) {
if (SUCCESS == zend_hash_get_current_data_ex(Z_ARRVAL_PP(from), (void **) &tmp_from, &pos_from)) { if (SUCCESS == zend_hash_get_current_data_ex(Z_ARRVAL_PP(from), (void **) &tmp_from, &pos_from)) {
convert_to_long_ex(tmp_from); convert_to_long_ex(tmp_from);
f = Z_LVAL_PP(tmp_from); f = Z_LVAL_PP(tmp_from);
if (f < 0) {
f = Z_STRLEN_PP(tmp_str) + f;
if (f < 0) {
f = 0;
}
} else if (f > Z_STRLEN_PP(tmp_str)) {
f = Z_STRLEN_PP(tmp_str);
}
zend_hash_move_forward_ex(Z_ARRVAL_PP(from), &pos_from); zend_hash_move_forward_ex(Z_ARRVAL_PP(from), &pos_from);
} else { } else {
f = 0; f = 0;
} }
} else {
f = Z_LVAL_PP(from);
if (f < 0) {
f = Z_STRLEN_PP(tmp_str) + f;
if (f < 0) {
f = 0;
}
} else if (f > Z_STRLEN_PP(tmp_str)) {
f = Z_STRLEN_PP(tmp_str);
}
} }
if (argc > 3 && Z_TYPE_PP(len) == IS_ARRAY) { if (argc > 3 && (Z_TYPE_PP(len) == IS_ARRAY)) {
if (SUCCESS == zend_hash_get_current_data_ex(Z_ARRVAL_PP(len), (void **) &tmp_len, &pos_len)) { if (SUCCESS == zend_hash_get_current_data_ex(Z_ARRVAL_PP(len), (void **) &tmp_len, &pos_len)) {
convert_to_long_ex(tmp_len); convert_to_long_ex(tmp_len);
l = Z_LVAL_PP(tmp_len); l = Z_LVAL_PP(tmp_len);
zend_hash_move_forward_ex(Z_ARRVAL_PP(len), &pos_len); zend_hash_move_forward_ex(Z_ARRVAL_PP(len), &pos_len);
} else {
if (Z_TYPE_PP(tmp_str) == IS_UNICODE) {
l = Z_USTRLEN_PP(tmp_str);
} else {
l = Z_STRLEN_PP(tmp_str);
}
}
} else if (argc > 3) {
/* 'l' parsed & set at top of funcn */
} else {
if (Z_TYPE_PP(tmp_str) == IS_UNICODE) {
l = Z_USTRLEN_PP(tmp_str);
} else { } else {
l = Z_STRLEN_PP(tmp_str); l = Z_STRLEN_PP(tmp_str);
} }
} else if (argc > 3) {
l = Z_LVAL_PP(len);
} else {
l = Z_STRLEN_PP(tmp_str);
} }
if (l < 0) {
l = (Z_STRLEN_PP(tmp_str) - f) + l;
if (l < 0) {
l = 0;
}
}
if (((unsigned) f + (unsigned) l) > Z_STRLEN_PP(tmp_str)) {
l = Z_STRLEN_PP(tmp_str) - f;
}
result_len = Z_STRLEN_PP(tmp_str) - l;
if (Z_TYPE_PP(repl) == IS_ARRAY) { if (Z_TYPE_PP(repl) == IS_ARRAY) {
if (SUCCESS == zend_hash_get_current_data_ex(Z_ARRVAL_PP(repl), (void **) &tmp_repl, &pos_repl)) { if (SUCCESS == zend_hash_get_current_data_ex(Z_ARRVAL_PP(repl), (void **) &tmp_repl, &pos_repl)) {
convert_to_string_ex(tmp_repl); if (Z_TYPE_PP(repl) != IS_UNICODE && Z_TYPE_PP(repl) != IS_BINARY && Z_TYPE_PP(repl) != IS_STRING) {
result_len += Z_STRLEN_PP(tmp_repl); convert_to_text_ex(tmp_repl);
}
zend_hash_move_forward_ex(Z_ARRVAL_PP(repl), &pos_repl); zend_hash_move_forward_ex(Z_ARRVAL_PP(repl), &pos_repl);
result = emalloc(result_len + 1);
memcpy(result, Z_STRVAL_PP(tmp_str), f);
memcpy((result + f), Z_STRVAL_PP(tmp_repl), Z_STRLEN_PP(tmp_repl));
memcpy((result + f + Z_STRLEN_PP(tmp_repl)), Z_STRVAL_PP(tmp_str) + f + l, Z_STRLEN_PP(tmp_str) - f - l);
} else { } else {
result = emalloc(result_len + 1); tmp_repl = NULL;
memcpy(result, Z_STRVAL_PP(tmp_str), f);
memcpy((result + f), Z_STRVAL_PP(tmp_str) + f + l, Z_STRLEN_PP(tmp_str) - f - l);
} }
} else { } else {
result_len += Z_STRLEN_PP(repl); tmp_repl = repl;
result = emalloc(result_len + 1);
memcpy(result, Z_STRVAL_PP(tmp_str), f);
memcpy((result + f), Z_STRVAL_PP(repl), Z_STRLEN_PP(repl));
memcpy((result + f + Z_STRLEN_PP(repl)), Z_STRVAL_PP(tmp_str) + f + l, Z_STRLEN_PP(tmp_str) - f - l);
} }
result[result_len] = '\0'; if (Z_TYPE_PP(tmp_str) != Z_TYPE_PP(tmp_repl))
add_next_index_stringl(return_value, result, result_len, 0); php_unify_string_types(tmp_str, tmp_repl TSRMLS_CC);
php_adjust_limits(tmp_str, &f, &l);
result_len = php_do_substr_replace(&result, tmp_str, tmp_repl, f, l TSRMLS_CC);
if (Z_TYPE_PP(tmp_str) == IS_UNICODE) {
add_next_index_unicodel(return_value, (UChar *)result, result_len, 0);
} else if (Z_TYPE_PP(tmp_str) == IS_BINARY) {
add_next_index_binaryl(return_value, (char *)result, result_len, 0);
} else {
add_next_index_stringl(return_value, (char *)result, result_len, 0);
}
zend_hash_move_forward_ex(Z_ARRVAL_PP(str), &pos_str); zend_hash_move_forward_ex(Z_ARRVAL_PP(str), &pos_str);
} /*while*/ } /*while*/