- Unicode impl of substr_replace()

This commit is contained in:
Rolland Santimano 2005-09-08 05:05:36 +00:00
parent f872d3346c
commit a441e78984

View file

@ -2553,6 +2553,118 @@ PHP_FUNCTION(substr)
/* }}} */
/* {{{ php_unify_string_types
*/
PHPAPI void php_unify_string_types(zval **p, zval **q TSRMLS_DC)
{
if (p == NULL || q == NULL) {
return;
}
if (Z_TYPE_PP(p) == IS_UNICODE) {
if (Z_TYPE_PP(q) == IS_BINARY) {
convert_to_binary_ex(p);
} else {
convert_to_unicode_ex(q);
}
} else if (Z_TYPE_PP(p) == IS_BINARY) {
convert_to_binary_ex(q);
} else {
if (Z_TYPE_PP(q) == IS_BINARY) {
convert_to_binary_ex(p);
} else {
convert_to_string_ex(q);
}
}
}
/* {{{ */
/* {{{ php_adjust_limits
*/
PHPAPI void php_adjust_limits(zval **str, int32_t *f, int32_t *l)
{
int32_t i, str_codepts;
if (Z_TYPE_PP(str) == IS_UNICODE) {
i = 0; str_codepts = 0;
while (i < Z_USTRLEN_PP(str)) {
U16_FWD_1(Z_USTRVAL_PP(str), i, Z_USTRLEN_PP(str));
str_codepts++;
}
} else {
str_codepts = Z_STRLEN_PP(str);
}
/* If "from" position is negative, count start position from the end
* of the string */
if (*f < 0) {
*f = str_codepts + *f;
if (*f < 0) {
*f = 0;
}
} else if (*f > str_codepts) {
*f = str_codepts;
}
/* If "length" position is negative, set it to the length
* needed to stop that many codepts/chars from the end of the string */
if (*l < 0) {
*l = str_codepts - *f + *l;
if (*l < 0) {
*l = 0;
}
}
if (((unsigned)(*f) + (unsigned)(*l)) > str_codepts) {
*l = str_codepts - *f;
}
}
/* }}} */
/* {{{ php_do_substr_replace
*/
PHPAPI int32_t php_do_substr_replace(void **result, zval **str, zval **repl, int32_t f, int32_t l TSRMLS_DC)
{
void *buf;
int32_t buf_len, idx;
UChar ch;
if (Z_TYPE_PP(str) == IS_UNICODE) {
buf = emalloc(UBYTES(Z_USTRLEN_PP(str) -l + Z_USTRLEN_PP(repl) + 1));
/* buf_len is codept count here */
buf_len = 0; idx = 0;
while (f-- > 0) {
U16_NEXT(Z_USTRVAL_PP(str), idx, Z_USTRLEN_PP(str), ch);
buf_len += zend_codepoint_to_uchar(ch, (UChar *)buf + buf_len);
}
if (repl != NULL) {
u_memcpy((UChar *)buf + buf_len, Z_USTRVAL_PP(repl), Z_USTRLEN_PP(repl));
buf_len += Z_USTRLEN_PP(repl);
}
U16_FWD_N(Z_USTRVAL_PP(str), idx, Z_USTRLEN_PP(str), l);
u_memcpy((UChar *)buf + buf_len, Z_USTRVAL_PP(str) + idx, Z_USTRLEN_PP(str) - idx);
buf_len += (Z_USTRLEN_PP(str) - idx);
*((UChar *)buf + buf_len) = 0;
buf = erealloc(buf, UBYTES(buf_len + 1));
} else {
/* buf_len is char count here */
buf_len = Z_STRLEN_PP(str) - l + Z_STRLEN_PP(repl);
buf = emalloc(buf_len + 1);
memcpy(buf, Z_STRVAL_PP(str), f);
if (repl != NULL ) {
memcpy((char *)buf + f, Z_STRVAL_PP(repl), Z_STRLEN_PP(repl));
}
memcpy((char *)buf + f + Z_STRLEN_PP(repl), Z_STRVAL_PP(str) + f + l, Z_STRLEN_PP(str) - f - l);
*((char *)buf + buf_len) = '\0';
}
*result = buf;
return buf_len;
}
/* }}} */
/* {{{ proto mixed substr_replace(mixed str, mixed repl, mixed start [, mixed length])
Replaces part of a string with another string */
PHP_FUNCTION(substr_replace)
@ -2561,10 +2673,10 @@ PHP_FUNCTION(substr_replace)
zval **from;
zval **len = NULL;
zval **repl;
char *result;
int result_len;
int l = 0;
int f;
void *result;
int32_t result_len;
int32_t l = 0;
int32_t f;
int argc = ZEND_NUM_ARGS();
HashPosition pos_str, pos_from, pos_repl, pos_len;
@ -2575,16 +2687,18 @@ PHP_FUNCTION(substr_replace)
WRONG_PARAM_COUNT;
}
if (Z_TYPE_PP(str) != IS_ARRAY) {
convert_to_string_ex(str);
if (Z_TYPE_PP(str) != IS_ARRAY && Z_TYPE_PP(str) != IS_UNICODE &&
Z_TYPE_PP(str) != IS_BINARY && Z_TYPE_PP(str) != IS_STRING) {
convert_to_text_ex(str);
}
if (Z_TYPE_PP(repl) != IS_ARRAY) {
convert_to_string_ex(repl);
if (Z_TYPE_PP(repl) != IS_ARRAY && Z_TYPE_PP(repl) != IS_UNICODE &&
Z_TYPE_PP(repl) != IS_BINARY && Z_TYPE_PP(repl) != IS_STRING) {
convert_to_text_ex(repl);
}
if (Z_TYPE_PP(from) != IS_ARRAY) {
convert_to_long_ex(from);
f = Z_LVAL_PP(from);
}
if (argc > 3) {
if (Z_TYPE_PP(len) != IS_ARRAY) {
convert_to_long_ex(len);
@ -2592,80 +2706,59 @@ PHP_FUNCTION(substr_replace)
}
} else {
if (Z_TYPE_PP(str) != IS_ARRAY) {
l = Z_STRLEN_PP(str);
if (Z_TYPE_PP(str) == IS_UNICODE) {
l = Z_USTRLEN_PP(str);
} else {
l = Z_STRLEN_PP(str);
}
}
}
if (Z_TYPE_PP(str) == IS_STRING) {
if (
(argc == 3 && Z_TYPE_PP(from) == IS_ARRAY)
||
(argc == 4 && Z_TYPE_PP(from) != Z_TYPE_PP(len))
) {
if (Z_TYPE_PP(str) != IS_ARRAY) {
if ( (argc == 3 && Z_TYPE_PP(from) == IS_ARRAY) ||
(argc == 4 && Z_TYPE_PP(from) != Z_TYPE_PP(len)) ) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "'from' and 'len' should be of same type - numerical or array ");
RETURN_STRINGL(Z_STRVAL_PP(str), Z_STRLEN_PP(str), 1);
RETURN_ZVAL(*str, 1, 0);
}
if (argc == 4 && Z_TYPE_PP(from) == IS_ARRAY) {
if (zend_hash_num_elements(Z_ARRVAL_PP(from)) != zend_hash_num_elements(Z_ARRVAL_PP(len))) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "'from' and 'len' should have the same number of elements");
RETURN_STRINGL(Z_STRVAL_PP(str), Z_STRLEN_PP(str), 1);
RETURN_ZVAL(*str, 1, 0);
}
}
}
if (Z_TYPE_PP(str) != IS_ARRAY) {
if (Z_TYPE_PP(from) != IS_ARRAY) {
int repl_len = 0;
f = Z_LVAL_PP(from);
/* if "from" position is negative, count start position from the end
* of the string
*/
if (f < 0) {
f = Z_STRLEN_PP(str) + f;
if (f < 0) {
f = 0;
}
} else if (f > Z_STRLEN_PP(str)) {
f = Z_STRLEN_PP(str);
}
/* if "length" position is negative, set it to the length
* needed to stop that many chars from the end of the string
*/
if (l < 0) {
l = (Z_STRLEN_PP(str) - f) + l;
if (l < 0) {
l = 0;
}
}
if (((unsigned) f + (unsigned) l) > Z_STRLEN_PP(str)) {
l = Z_STRLEN_PP(str) - f;
}
if (Z_TYPE_PP(from) != IS_ARRAY ) {
if (Z_TYPE_PP(repl) == IS_ARRAY) {
zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(repl), &pos_repl);
if (SUCCESS == zend_hash_get_current_data_ex(Z_ARRVAL_PP(repl), (void **) &tmp_repl, &pos_repl)) {
convert_to_string_ex(tmp_repl);
repl_len = Z_STRLEN_PP(tmp_repl);
if (Z_TYPE_PP(repl) != IS_UNICODE && Z_TYPE_PP(repl) != IS_BINARY && Z_TYPE_PP(repl) != IS_STRING) {
convert_to_text_ex(tmp_repl);
}
} else {
tmp_repl = NULL;
}
} else {
repl_len = Z_STRLEN_PP(repl);
tmp_repl = repl;
}
result_len = Z_STRLEN_PP(str) - l + repl_len;
result = emalloc(result_len + 1);
memcpy(result, Z_STRVAL_PP(str), f);
if (repl_len) {
memcpy((result + f), (Z_TYPE_PP(repl) == IS_ARRAY ? Z_STRVAL_PP(tmp_repl) : Z_STRVAL_PP(repl)), repl_len);
if (Z_TYPE_PP(str) != Z_TYPE_PP(tmp_repl))
php_unify_string_types(str, tmp_repl TSRMLS_CC);
php_adjust_limits(str, &f, &l);
result_len = php_do_substr_replace(&result, str, tmp_repl, f, l TSRMLS_CC);
if (Z_TYPE_PP(str) == IS_UNICODE) {
RETURN_UNICODEL((UChar *)result, result_len, 0);
} else if (Z_TYPE_PP(str) == IS_BINARY) {
RETURN_BINARYL((char *)result, result_len, 0);
} else {
RETURN_STRINGL((char *)result, result_len, 0);
}
memcpy((result + f + repl_len), Z_STRVAL_PP(str) + f + l, Z_STRLEN_PP(str) - f - l);
result[result_len] = '\0';
RETURN_STRINGL(result, result_len, 0);
} else {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Functionality of 'from' and 'len' as arrays is not implemented.");
RETURN_STRINGL(Z_STRVAL_PP(str), Z_STRLEN_PP(str), 1);
RETURN_ZVAL(*str, 1, 0);
}
} else { /* str is array of strings */
array_init(return_value);
@ -2673,104 +2766,76 @@ PHP_FUNCTION(substr_replace)
if (Z_TYPE_PP(from) == IS_ARRAY) {
zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(from), &pos_from);
}
if (argc > 3 && Z_TYPE_PP(len) == IS_ARRAY) {
zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(len), &pos_len);
}
if (Z_TYPE_PP(repl) == IS_ARRAY) {
zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(repl), &pos_repl);
}
zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(str), &pos_str);
while (zend_hash_get_current_data_ex(Z_ARRVAL_PP(str), (void **) &tmp_str, &pos_str) == SUCCESS) {
convert_to_string_ex(tmp_str);
if (Z_TYPE_PP(tmp_str) != IS_UNICODE && Z_TYPE_PP(tmp_str) != IS_BINARY && Z_TYPE_PP(tmp_str) != IS_STRING) {
convert_to_text_ex(tmp_str);
}
if (Z_TYPE_PP(from) == IS_ARRAY) {
if (SUCCESS == zend_hash_get_current_data_ex(Z_ARRVAL_PP(from), (void **) &tmp_from, &pos_from)) {
convert_to_long_ex(tmp_from);
f = Z_LVAL_PP(tmp_from);
if (f < 0) {
f = Z_STRLEN_PP(tmp_str) + f;
if (f < 0) {
f = 0;
}
} else if (f > Z_STRLEN_PP(tmp_str)) {
f = Z_STRLEN_PP(tmp_str);
}
zend_hash_move_forward_ex(Z_ARRVAL_PP(from), &pos_from);
} else {
f = 0;
}
} else {
f = Z_LVAL_PP(from);
if (f < 0) {
f = Z_STRLEN_PP(tmp_str) + f;
if (f < 0) {
f = 0;
}
} else if (f > Z_STRLEN_PP(tmp_str)) {
f = Z_STRLEN_PP(tmp_str);
}
}
if (argc > 3 && Z_TYPE_PP(len) == IS_ARRAY) {
if (argc > 3 && (Z_TYPE_PP(len) == IS_ARRAY)) {
if (SUCCESS == zend_hash_get_current_data_ex(Z_ARRVAL_PP(len), (void **) &tmp_len, &pos_len)) {
convert_to_long_ex(tmp_len);
l = Z_LVAL_PP(tmp_len);
zend_hash_move_forward_ex(Z_ARRVAL_PP(len), &pos_len);
} else {
if (Z_TYPE_PP(tmp_str) == IS_UNICODE) {
l = Z_USTRLEN_PP(tmp_str);
} else {
l = Z_STRLEN_PP(tmp_str);
}
}
} else if (argc > 3) {
/* 'l' parsed & set at top of funcn */
} else {
if (Z_TYPE_PP(tmp_str) == IS_UNICODE) {
l = Z_USTRLEN_PP(tmp_str);
} else {
l = Z_STRLEN_PP(tmp_str);
}
} else if (argc > 3) {
l = Z_LVAL_PP(len);
} else {
l = Z_STRLEN_PP(tmp_str);
}
if (l < 0) {
l = (Z_STRLEN_PP(tmp_str) - f) + l;
if (l < 0) {
l = 0;
}
}
if (((unsigned) f + (unsigned) l) > Z_STRLEN_PP(tmp_str)) {
l = Z_STRLEN_PP(tmp_str) - f;
}
result_len = Z_STRLEN_PP(tmp_str) - l;
if (Z_TYPE_PP(repl) == IS_ARRAY) {
if (SUCCESS == zend_hash_get_current_data_ex(Z_ARRVAL_PP(repl), (void **) &tmp_repl, &pos_repl)) {
convert_to_string_ex(tmp_repl);
result_len += Z_STRLEN_PP(tmp_repl);
if (Z_TYPE_PP(repl) != IS_UNICODE && Z_TYPE_PP(repl) != IS_BINARY && Z_TYPE_PP(repl) != IS_STRING) {
convert_to_text_ex(tmp_repl);
}
zend_hash_move_forward_ex(Z_ARRVAL_PP(repl), &pos_repl);
result = emalloc(result_len + 1);
memcpy(result, Z_STRVAL_PP(tmp_str), f);
memcpy((result + f), Z_STRVAL_PP(tmp_repl), Z_STRLEN_PP(tmp_repl));
memcpy((result + f + Z_STRLEN_PP(tmp_repl)), Z_STRVAL_PP(tmp_str) + f + l, Z_STRLEN_PP(tmp_str) - f - l);
} else {
result = emalloc(result_len + 1);
memcpy(result, Z_STRVAL_PP(tmp_str), f);
memcpy((result + f), Z_STRVAL_PP(tmp_str) + f + l, Z_STRLEN_PP(tmp_str) - f - l);
tmp_repl = NULL;
}
} else {
result_len += Z_STRLEN_PP(repl);
result = emalloc(result_len + 1);
memcpy(result, Z_STRVAL_PP(tmp_str), f);
memcpy((result + f), Z_STRVAL_PP(repl), Z_STRLEN_PP(repl));
memcpy((result + f + Z_STRLEN_PP(repl)), Z_STRVAL_PP(tmp_str) + f + l, Z_STRLEN_PP(tmp_str) - f - l);
tmp_repl = repl;
}
result[result_len] = '\0';
add_next_index_stringl(return_value, result, result_len, 0);
if (Z_TYPE_PP(tmp_str) != Z_TYPE_PP(tmp_repl))
php_unify_string_types(tmp_str, tmp_repl TSRMLS_CC);
php_adjust_limits(tmp_str, &f, &l);
result_len = php_do_substr_replace(&result, tmp_str, tmp_repl, f, l TSRMLS_CC);
if (Z_TYPE_PP(tmp_str) == IS_UNICODE) {
add_next_index_unicodel(return_value, (UChar *)result, result_len, 0);
} else if (Z_TYPE_PP(tmp_str) == IS_BINARY) {
add_next_index_binaryl(return_value, (char *)result, result_len, 0);
} else {
add_next_index_stringl(return_value, (char *)result, result_len, 0);
}
zend_hash_move_forward_ex(Z_ARRVAL_PP(str), &pos_str);
} /*while*/