mirror of
https://github.com/php/php-src.git
synced 2025-08-18 06:58:55 +02:00
- Fixed bug in ucfirst() implementation. If the tmp_len = 0 then it will not
write the uppercased character to the buffer, but only returns the length of the uppercased letter as per http://icu.sourceforge.net/apiref/icu4c/ustring_8h.html#a50. - Updated is_string(): If Unicode semantics is turned on, return "true" for Unicode strings only. If Unicode semantics is turned off, return "true" for native strings only. - Added is_binary() function that returns "true" for native strings only. - Added first implementation of upgraded strtr function. It works except if combining sequences or surrogates are used in the non-array method of calling this function.
This commit is contained in:
parent
34c493ea07
commit
2ffc93140d
4 changed files with 269 additions and 20 deletions
|
@ -3263,8 +3263,8 @@ PHP_FUNCTION(chr)
|
|||
Makes an Unicode string's first character uppercase */
|
||||
static void php_u_ucfirst(zval *ustr, zval *return_value TSRMLS_DC)
|
||||
{
|
||||
UChar tmp[3] = { 0,}; /* UChar32 will be converted to upto 2 UChar units ? */
|
||||
int tmp_len = 0;
|
||||
UChar tmp[3] = { 0, 0, 0 }; /* UChar32 will be converted to upto 2 UChar units ? */
|
||||
int tmp_len = 2;
|
||||
int pos = 0;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
|
@ -3427,6 +3427,181 @@ PHPAPI char *php_strtr(char *str, int len, char *str_from, char *str_to, int trl
|
|||
}
|
||||
/* }}} */
|
||||
|
||||
/* {{{ php_u_strtr
|
||||
*/
|
||||
PHPAPI UChar *php_u_strtr(UChar *str, int len, UChar *str_from, int str_from_len, UChar *str_to, int str_to_len, int trlen)
|
||||
{
|
||||
int i, j;
|
||||
int can_optimize = 1;
|
||||
|
||||
if ((trlen < 1) || (len < 1)) {
|
||||
return str;
|
||||
}
|
||||
|
||||
/* First loop to see if we can use the optimized version */
|
||||
for (i = 0; i < trlen; i++) {
|
||||
if (str_from[i] > 255 || str_to[i] > 255) {
|
||||
can_optimize = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (can_optimize) {
|
||||
for (i = trlen; i < str_from_len; i++) {
|
||||
if (str_from[i] > 255) {
|
||||
can_optimize = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (can_optimize) {
|
||||
for (i = trlen; i < str_to_len; i++) {
|
||||
if (str_from[i] > 255) {
|
||||
can_optimize = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (can_optimize) {
|
||||
UChar xlat[256];
|
||||
|
||||
for (i = 0; i < 256; xlat[i] = i, i++);
|
||||
|
||||
for (i = 0; i < trlen; i++) {
|
||||
xlat[str_from[i]] = str_to[i];
|
||||
}
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
str[i] = xlat[str[i]];
|
||||
}
|
||||
|
||||
return str;
|
||||
} else {
|
||||
/* UTODO: We're quite fucked... this is *extremely* slow, better
|
||||
* algorithm wanted here! It also doesn't handle combining sequences, I
|
||||
* asked the icu-support list for good algorithms. */
|
||||
for (i = 0; i < len; i++) {
|
||||
for (j = 0; j < trlen; j++) {
|
||||
if (str[i] == str_from[j]) {
|
||||
str[i] = str_to[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
/* {{{ php_u_strtr_array
|
||||
*/
|
||||
static void php_u_strtr_array(zval *return_value, UChar *str, int slen, HashTable *hash)
|
||||
{
|
||||
zval **entry;
|
||||
zstr string_key;
|
||||
uint string_key_len;
|
||||
zval **trans;
|
||||
zval ctmp;
|
||||
ulong num_key;
|
||||
int minlen = 128*1024;
|
||||
int maxlen = 0, pos, len, found;
|
||||
UChar *key;
|
||||
HashPosition hpos;
|
||||
smart_str result = {0};
|
||||
HashTable tmp_hash;
|
||||
|
||||
zend_hash_init(&tmp_hash, 0, NULL, NULL, 0);
|
||||
zend_hash_internal_pointer_reset_ex(hash, &hpos);
|
||||
while (zend_hash_get_current_data_ex(hash, (void **)&entry, &hpos) == SUCCESS) {
|
||||
switch (zend_hash_get_current_key_ex(hash, &string_key, &string_key_len, &num_key, 0, &hpos)) {
|
||||
case HASH_KEY_IS_UNICODE:
|
||||
len = string_key_len-1;
|
||||
if (len < 1) {
|
||||
zend_hash_destroy(&tmp_hash);
|
||||
RETURN_FALSE;
|
||||
}
|
||||
zend_u_hash_add(&tmp_hash, IS_UNICODE, string_key, string_key_len, entry, sizeof(zval*), NULL);
|
||||
if (len > maxlen) {
|
||||
maxlen = len;
|
||||
}
|
||||
if (len < minlen) {
|
||||
minlen = len;
|
||||
}
|
||||
break;
|
||||
|
||||
case HASH_KEY_IS_LONG:
|
||||
Z_TYPE(ctmp) = IS_LONG;
|
||||
Z_LVAL(ctmp) = num_key;
|
||||
|
||||
convert_to_unicode(&ctmp);
|
||||
len = Z_USTRLEN(ctmp);
|
||||
zend_u_hash_add(&tmp_hash, IS_UNICODE, Z_UNIVAL(ctmp), len+1, entry, sizeof(zval*), NULL);
|
||||
zval_dtor(&ctmp);
|
||||
|
||||
if (len > maxlen) {
|
||||
maxlen = len;
|
||||
}
|
||||
if (len < minlen) {
|
||||
minlen = len;
|
||||
}
|
||||
break;
|
||||
}
|
||||
zend_hash_move_forward_ex(hash, &hpos);
|
||||
}
|
||||
|
||||
key = eumalloc(maxlen+1);
|
||||
pos = 0;
|
||||
|
||||
while (pos < slen) {
|
||||
if ((pos + maxlen) > slen) {
|
||||
maxlen = slen - pos;
|
||||
}
|
||||
|
||||
found = 0;
|
||||
memcpy(key, str+pos, UBYTES(maxlen));
|
||||
|
||||
for (len = maxlen; len >= minlen; len--) {
|
||||
key[len] = 0;
|
||||
|
||||
if (zend_u_hash_find(&tmp_hash, IS_UNICODE, ZSTR(key), len+1, (void**)&trans) == SUCCESS) {
|
||||
UChar *tval;
|
||||
int tlen;
|
||||
zval tmp;
|
||||
|
||||
if (Z_TYPE_PP(trans) != IS_UNICODE) {
|
||||
tmp = **trans;
|
||||
zval_copy_ctor(&tmp);
|
||||
convert_to_string(&tmp);
|
||||
tval = Z_USTRVAL(tmp);
|
||||
tlen = Z_USTRLEN(tmp);
|
||||
} else {
|
||||
tval = Z_USTRVAL_PP(trans);
|
||||
tlen = Z_USTRLEN_PP(trans);
|
||||
}
|
||||
|
||||
smart_str_appendl(&result, tval, UBYTES(tlen));
|
||||
pos += len;
|
||||
found = 1;
|
||||
|
||||
if (Z_TYPE_PP(trans) != IS_UNICODE) {
|
||||
zval_dtor(&tmp);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (! found) {
|
||||
smart_str_append2c(&result, str[pos]);
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
|
||||
efree(key);
|
||||
zend_hash_destroy(&tmp_hash);
|
||||
smart_str_0(&result);
|
||||
RETVAL_UNICODEL((UChar *) result.c, result.len >> 1, 0);
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
/* {{{ php_strtr_array
|
||||
*/
|
||||
static void php_strtr_array(zval *return_value, char *str, int slen, HashTable *hash)
|
||||
|
@ -3552,27 +3727,52 @@ PHP_FUNCTION(strtr)
|
|||
RETURN_FALSE;
|
||||
}
|
||||
|
||||
convert_to_string_ex(str);
|
||||
if (Z_TYPE_PP(str) != IS_UNICODE && Z_TYPE_PP(str) != IS_STRING) {
|
||||
convert_to_text_ex(str);
|
||||
}
|
||||
|
||||
/* shortcut for empty string */
|
||||
if (Z_STRLEN_PP(str) == 0) {
|
||||
if (Z_TYPE_PP(str) == IS_UNICODE && !Z_USTRLEN_PP(str)) {
|
||||
RETURN_EMPTY_UNICODE();
|
||||
} else if (!Z_STRLEN_PP(str)) {
|
||||
RETURN_EMPTY_STRING();
|
||||
}
|
||||
|
||||
if (ac == 2) {
|
||||
php_strtr_array(return_value, Z_STRVAL_PP(str), Z_STRLEN_PP(str), HASH_OF(*from));
|
||||
} else {
|
||||
convert_to_string_ex(from);
|
||||
convert_to_string_ex(to);
|
||||
if (Z_TYPE_PP(str) == IS_UNICODE) {
|
||||
if (ac == 2) {
|
||||
php_u_strtr_array(return_value, Z_USTRVAL_PP(str), Z_USTRLEN_PP(str), HASH_OF(*from));
|
||||
Z_TYPE_P(return_value) = IS_UNICODE;
|
||||
} else {
|
||||
convert_to_unicode_ex(from);
|
||||
convert_to_unicode_ex(to);
|
||||
|
||||
ZVAL_STRINGL(return_value, Z_STRVAL_PP(str), Z_STRLEN_PP(str), 1);
|
||||
|
||||
php_strtr(Z_STRVAL_P(return_value),
|
||||
Z_STRLEN_P(return_value),
|
||||
Z_STRVAL_PP(from),
|
||||
Z_STRVAL_PP(to),
|
||||
MIN(Z_STRLEN_PP(from),
|
||||
Z_STRLEN_PP(to)));
|
||||
ZVAL_UNICODEL(return_value, Z_USTRVAL_PP(str), Z_USTRLEN_PP(str), 1);
|
||||
|
||||
php_u_strtr(Z_USTRVAL_P(return_value),
|
||||
Z_USTRLEN_P(return_value),
|
||||
Z_USTRVAL_PP(from),
|
||||
Z_USTRLEN_PP(from),
|
||||
Z_USTRVAL_PP(to),
|
||||
Z_USTRLEN_PP(to),
|
||||
MIN(Z_USTRLEN_PP(from), Z_USTRLEN_PP(to)));
|
||||
Z_TYPE_P(return_value) = IS_UNICODE;
|
||||
}
|
||||
} else {
|
||||
if (ac == 2) {
|
||||
php_strtr_array(return_value, Z_STRVAL_PP(str), Z_STRLEN_PP(str), HASH_OF(*from));
|
||||
} else {
|
||||
convert_to_string_ex(from);
|
||||
convert_to_string_ex(to);
|
||||
|
||||
ZVAL_STRINGL(return_value, Z_STRVAL_PP(str), Z_STRLEN_PP(str), 1);
|
||||
|
||||
php_strtr(Z_STRVAL_P(return_value),
|
||||
Z_STRLEN_P(return_value),
|
||||
Z_STRVAL_PP(from),
|
||||
Z_STRVAL_PP(to),
|
||||
MIN(Z_STRLEN_PP(from),
|
||||
Z_STRLEN_PP(to)));
|
||||
}
|
||||
}
|
||||
}
|
||||
/* }}} */
|
||||
|
|
29
ext/standard/tests/strings/strtr2.phpt
Normal file
29
ext/standard/tests/strings/strtr2.phpt
Normal file
|
@ -0,0 +1,29 @@
|
|||
--TEST--
|
||||
strtr() function (with unicode chars and combining sequences)
|
||||
--FILE--
|
||||
<?php
|
||||
declare(encoding="utf8");
|
||||
$from = "aåаиу";
|
||||
$to = "12356";
|
||||
$string = "Dе åkаt krаpt de krуllen van de trap af";
|
||||
var_dump( strtr( $string, $from, $to ) );
|
||||
|
||||
$from = "aeoui";
|
||||
$to = "12356";
|
||||
$string = "De akat krapt de krullen van de trap af";
|
||||
var_dump( strtr( $string, $from, $to ) );
|
||||
|
||||
$ar = array( "a" => 1, "e" => "2", "o" => 3, "u" => 5, "i" => 6 );
|
||||
$string = "De akat krapt de krullen van de trap af";
|
||||
var_dump( strtr( $string, $ar ) );
|
||||
|
||||
// Test with combining chars
|
||||
$from = "åb";
|
||||
$to = "1";
|
||||
$string = "xyzabc";
|
||||
var_dump( strtr( $string, $from, $to ) );
|
||||
?>
|
||||
--EXPECT--
|
||||
unicode(39) "Dе 2k3t kr3pt de kr6llen v1n de tr1p 1f"
|
||||
unicode(39) "D2 1k1t kr1pt d2 kr5ll2n v1n d2 tr1p 1f"
|
||||
unicode(39) "D2 1k1t kr1pt d2 kr5ll2n v1n d2 tr1p 1f"
|
12
ext/standard/tests/strings/ucfirst.phpt
Normal file
12
ext/standard/tests/strings/ucfirst.phpt
Normal file
|
@ -0,0 +1,12 @@
|
|||
--TEST--
|
||||
ucfirst()
|
||||
--FILE--
|
||||
<?php
|
||||
echo ucfirst("peren"), "\n";
|
||||
echo ucfirst("appelen"), "\n";
|
||||
echo ucfirst("ßen"), "\n";
|
||||
?>
|
||||
--EXPECT--
|
||||
Peren
|
||||
Appelen
|
||||
SSen
|
|
@ -285,14 +285,22 @@ PHP_FUNCTION(is_float)
|
|||
}
|
||||
/* }}} */
|
||||
|
||||
/* {{{ proto bool is_string(mixed var)
|
||||
Returns true if variable is a native string */
|
||||
PHP_FUNCTION(is_string)
|
||||
/* {{{ proto bool is_binary(mixed var)
|
||||
Returns true if variable is a native (binary) string */
|
||||
PHP_FUNCTION(is_binary)
|
||||
{
|
||||
php_is_type(INTERNAL_FUNCTION_PARAM_PASSTHRU, IS_STRING);
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
/* {{{ proto bool is_string(mixed var)
|
||||
Returns true if variable is a string */
|
||||
PHP_FUNCTION(is_string)
|
||||
{
|
||||
php_is_type(INTERNAL_FUNCTION_PARAM_PASSTHRU, UG(unicode) ? IS_UNICODE : IS_STRING);
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
/* {{{ proto bool is_unicode(mixed var)
|
||||
Returns true if variable is a unicode string */
|
||||
PHP_FUNCTION(is_unicode)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue