Normalize substr() behavior

Make the behavior of substr(), mb_substr(), iconv_substr() and
grapheme_substr() consistent when it comes to the handling of
out of bounds offsets. substr() will now always clamp out of
bounds offsets to the string boundary. Cases that previously
returned false will now return an empty string. This means that
substr() itself *always* returns a string now (like mb_substr()
already did before.)

Closes GH-6182.
This commit is contained in:
Nikita Popov 2020-09-22 09:56:08 +02:00
parent 17a789e27c
commit 13b791c79c
14 changed files with 188 additions and 164 deletions

View file

@ -638,27 +638,22 @@ static php_iconv_err_t _php_iconv_substr(smart_str *pretval,
return err;
}
if (len < 0) {
if ((len += (total_len - offset)) < 0) {
return PHP_ICONV_ERR_SUCCESS;
}
}
if (offset < 0) {
if ((offset += total_len) < 0) {
return PHP_ICONV_ERR_SUCCESS;
offset = 0;
}
} else if ((size_t)offset > total_len) {
offset = total_len;
}
if((size_t)len > total_len) {
if (len < 0) {
if ((len += (total_len - offset)) < 0) {
len = 0;
}
} else if ((size_t)len > total_len) {
len = total_len;
}
if ((size_t)offset > total_len) {
return PHP_ICONV_ERR_SUCCESS;
}
if ((size_t)(offset + len) > total_len ) {
/* trying to compute the length */
len = total_len - offset;

View file

@ -45,8 +45,8 @@ var_dump(iconv("ISO-2022-JP", "EUC-JP", iconv_substr(iconv("EUC-JP", "ISO-2022-J
666768696a6b6c
a6a4a8a4aaa4ab
a4aba4ada4afa4b1a4b3a4b5a4b7
bool(false)
bool(false)
string(0) ""
string(0) ""
string(14) "This is a test"
string(14) "This is a test"
string(3) "est"
@ -55,8 +55,8 @@ string(3) "est"
string(3) "est"
string(5) "This "
string(5) "This "
bool(false)
bool(false)
bool(false)
bool(false)
string(0) ""
string(0) ""
string(0) ""
string(0) ""
string(10) "¤Á¤Ï ISO-2"

View file

@ -0,0 +1,48 @@
--TEST--
iconv_substr() with out of bounds offset
--SKIPIF--
<?php extension_loaded('iconv') or die('skip iconv extension is not available'); ?>
--FILE--
<?php
var_dump(iconv_substr("foo", 3));
var_dump(iconv_substr("foo", -3));
var_dump(iconv_substr("foo", 4));
var_dump(iconv_substr("foo", -4));
var_dump(iconv_substr("äöü", 3));
var_dump(iconv_substr("äöü", -3));
var_dump(iconv_substr("äöü", 4));
var_dump(iconv_substr("äöü", -4));
var_dump(iconv_substr("foo", 0, 3));
var_dump(iconv_substr("foo", 0, -3));
var_dump(iconv_substr("foo", 0, 4));
var_dump(iconv_substr("foo", 0, -4));
var_dump(iconv_substr("äöü", 0, 3));
var_dump(iconv_substr("äöü", 0, -3));
var_dump(iconv_substr("äöü", 0, 4));
var_dump(iconv_substr("äöü", 0, -4));
var_dump(iconv_substr("äöü", -4, 1));
var_dump(iconv_substr("äöü", -4, -1));
var_dump(iconv_substr("äöü", 2, -2));
?>
--EXPECT--
string(0) ""
string(3) "foo"
string(0) ""
string(3) "foo"
string(0) ""
string(6) "äöü"
string(0) ""
string(6) "äöü"
string(3) "foo"
string(0) ""
string(3) "foo"
string(0) ""
string(6) "äöü"
string(0) ""
string(6) "äöü"
string(0) ""
string(2) "ä"
string(4) "äö"
string(0) ""

View file

@ -371,22 +371,20 @@ PHP_FUNCTION(grapheme_substr)
RETURN_THROWS();
}
if ( OUTSIDE_STRING(lstart, str_len)) {
zend_argument_value_error(2, "must be contained in argument #1 ($string)");
if (lstart < INT32_MIN || lstart > INT32_MAX) {
zend_argument_value_error(2, "is too large");
RETURN_THROWS();
}
/* we checked that it will fit: */
start = (int32_t) lstart;
if(no_length) {
if (no_length) {
length = str_len;
}
if(length < INT32_MIN) {
length = INT32_MIN;
} else if(length > INT32_MAX) {
length = INT32_MAX;
if (length < INT32_MIN || length > INT32_MAX) {
zend_argument_value_error(3, "is too large");
RETURN_THROWS();
}
/* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
@ -451,15 +449,17 @@ PHP_FUNCTION(grapheme_substr)
start += iter_val;
}
if ( 0 != start || sub_str_start_pos >= ustr_len ) {
intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: start not contained in string", 1 );
if (ustr) {
efree(ustr);
if (0 != start) {
if (start > 0) {
if (ustr) {
efree(ustr);
}
ubrk_close(bi);
RETURN_EMPTY_STRING();
}
ubrk_close(bi);
RETURN_FALSE;
sub_str_start_pos = 0;
ubrk_first(bi);
}
/* OK to convert here since if str_len were big, convert above would fail */
@ -526,20 +526,17 @@ PHP_FUNCTION(grapheme_substr)
ubrk_close(bi);
if ( UBRK_DONE == sub_str_end_pos) {
if(length < 0) {
zend_argument_value_error(3, "must be contained in argument #1 ($string)");
if (length < 0) {
efree(ustr);
RETURN_THROWS();
RETURN_EMPTY_STRING();
} else {
sub_str_end_pos = ustr_len;
}
}
if(sub_str_start_pos > sub_str_end_pos) {
intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: length is beyond start", 1 );
if (sub_str_start_pos > sub_str_end_pos) {
efree(ustr);
RETURN_FALSE;
RETURN_EMPTY_STRING();
}
status = U_ZERO_ERROR;

View file

@ -57,20 +57,6 @@ void grapheme_substr_ascii(char *str, size_t str_len, int32_t f, int32_t l, char
return;
}
if ((l < 0 && -l > str_len2)) {
return;
} else if (l > 0 && l > str_len2) {
l = str_len2;
}
if (f > str_len2 || (f < 0 && -f > str_len2)) {
return;
}
if (l < 0 && str_len2 < f - l) {
return;
}
/* if "from" position is negative, count start position from the end
* of the string
*/
@ -79,8 +65,9 @@ void grapheme_substr_ascii(char *str, size_t str_len, int32_t f, int32_t l, char
if (f < 0) {
f = 0;
}
}
} else if (f > str_len2) {
f = str_len2;
}
/* if "length" position is negative, set it to the length
* needed to stop that many chars from the end of the string
@ -90,20 +77,12 @@ void grapheme_substr_ascii(char *str, size_t str_len, int32_t f, int32_t l, char
if (l < 0) {
l = 0;
}
}
if (f >= str_len2) {
return;
}
if ((f + l) > str_len2) {
l = str_len - f;
}
} else if (l > str_len2 - f) {
l = str_len2 - f;
}
*sub_str = str + f;
*sub_str_len = l;
return;
}
/* }}} */

View file

@ -14,11 +14,11 @@ var_dump(intl_get_error_message());
var_dump(grapheme_substr('déjà', -1, 0));
?>
--EXPECT--
bool(false)
string(0) ""
bool(false)
string(61) "grapheme_substr: invalid parameters: U_ILLEGAL_ARGUMENT_ERROR"
string(0) ""
bool(false)
string(65) "grapheme_substr: length is beyond start: U_ILLEGAL_ARGUMENT_ERROR"
string(0) ""
string(12) "U_ZERO_ERROR"
string(0) ""
string(0) ""
string(12) "U_ZERO_ERROR"
string(0) ""

View file

@ -325,23 +325,23 @@ function ut_main()
$tests = array(
array( "abc", 3, "false" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd, 5, "false" ),
array( "abc", 3, "" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd, 5, "" ),
array( "ao" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "O", 2, $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "O" ),
array( $char_o_diaeresis_nfd . $char_a_ring_nfd . "a" . $char_A_ring_nfd . "bc", 2, "a" . $char_A_ring_nfd . "bc" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "O", 5, "O" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd, 5, "false" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd, 5, "" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_O_diaeresis_nfd, 4, $char_O_diaeresis_nfd ),
array( $char_o_diaeresis_nfd . "a" . $char_a_ring_nfd . "bc", 2, $char_a_ring_nfd . "bc" ),
array( "a" . $char_A_ring_nfd . "bc", 1, $char_A_ring_nfd . "bc" ),
array( "Abc", -5, "false" ),
array( $char_a_ring_nfd . "bc", 3, "false" ),
array( "abc", 4, "false" ),
array( "Abc", -5, "Abc" ),
array( $char_a_ring_nfd . "bc", 3, "" ),
array( "abc", 4, "" ),
array( "abC", 2, "C" ),
array( "abc", 1, "bc" ),
array( "Abc", 1, 1, "b" ),
array( "abc", 0, 2, "ab" ),
array( "Abc", -4, 1, "false" ),
array( "Abc", -4, 1, "A" ),
array( "ababc", 1, 2, "ba" ),
array( "ababc", 0, 10, "ababc" ),
@ -350,7 +350,7 @@ function ut_main()
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 5, -1, "Op" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 5, -2, "O" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 5, -3, "" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 5, -4, "false" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 5, -4, "" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, -1, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Op" ),
@ -361,7 +361,7 @@ function ut_main()
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, -6, "a" . $char_a_ring_nfd ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, -7, "a" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, -8, "" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, -9, "false" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, -9, "" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -7, $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq" ),
@ -371,7 +371,7 @@ function ut_main()
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -3, "Opq" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -2, "pq" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -1, "q" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -999, "false" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -999, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, 8, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, 7, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Op" ),
@ -382,7 +382,7 @@ function ut_main()
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, 2, "a" . $char_a_ring_nfd ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, 1, "a" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, 0, "" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -999, "false" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -999, "" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -1, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Op" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -2, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "O" ),
@ -392,7 +392,7 @@ function ut_main()
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -6, "a" . $char_a_ring_nfd ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -7, "a" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -8, "" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -9, "false" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -9, "" ),
);
@ -973,26 +973,23 @@ find "a%CC%8ABca%CC%8A" in "o%CC%88a%CC%8AaA%CC%8AbCa%CC%8Adef" - grapheme_strri
function grapheme_substr($string, $start, $length = -1) {}
substring of "abc" from "3" - grapheme_substr = false == false
substring of "aa%CC%8Abco%CC%88" from "5" - grapheme_substr = false == false
substring of "abc" from "3" - grapheme_substr = ==
substring of "aa%CC%8Abco%CC%88" from "5" - grapheme_substr = ==
substring of "aoa%CC%8Abco%CC%88O" from "2" - grapheme_substr = a%CC%8Abco%CC%88O == a%CC%8Abco%CC%88O
substring of "o%CC%88a%CC%8AaA%CC%8Abc" from "2" - grapheme_substr = aA%CC%8Abc == aA%CC%8Abc
substring of "aa%CC%8Abco%CC%88O" from "5" - grapheme_substr = O == O
substring of "aa%CC%8Abco%CC%88" from "5" - grapheme_substr = false == false
substring of "aa%CC%8Abco%CC%88" from "5" - grapheme_substr = ==
substring of "aa%CC%8AbcO%CC%88" from "4" - grapheme_substr = O%CC%88 == O%CC%88
substring of "o%CC%88aa%CC%8Abc" from "2" - grapheme_substr = a%CC%8Abc == a%CC%8Abc
substring of "aA%CC%8Abc" from "1" - grapheme_substr = A%CC%8Abc == A%CC%8Abc
substring of "Abc" from "-5" - grapheme_substr: grapheme_substr(): Argument #2 ($start) must be contained in argument #1 ($string)
= A%CC%8Abc == false **FAILED**
substring of "a%CC%8Abc" from "3" - grapheme_substr = false == false
substring of "abc" from "4" - grapheme_substr: grapheme_substr(): Argument #2 ($start) must be contained in argument #1 ($string)
= false == false
substring of "Abc" from "-5" - grapheme_substr = Abc == Abc
substring of "a%CC%8Abc" from "3" - grapheme_substr = ==
substring of "abc" from "4" - grapheme_substr = ==
substring of "abC" from "2" - grapheme_substr = C == C
substring of "abc" from "1" - grapheme_substr = bc == bc
substring of "Abc" from "1" - grapheme_substr with length 1 = b == b
substring of "abc" from "0" - grapheme_substr with length 2 = ab == ab
substring of "Abc" from "-4" - grapheme_substr with length 1: grapheme_substr(): Argument #2 ($start) must be contained in argument #1 ($string)
= ab == false **FAILED**
substring of "Abc" from "-4" - grapheme_substr with length 1 = A == A
substring of "ababc" from "1" - grapheme_substr with length 2 = ba == ba
substring of "ababc" from "0" - grapheme_substr with length 10 = ababc == ababc
substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr with length 10 = aa%CC%8Abco%CC%88Opq == aa%CC%8Abco%CC%88Opq
@ -1000,7 +997,7 @@ substring of "aa%CC%8Abco%CC%88Opq" from "5" - grapheme_substr = Opq == Opq
substring of "aa%CC%8Abco%CC%88Opq" from "5" - grapheme_substr with length -1 = Op == Op
substring of "aa%CC%8Abco%CC%88Opq" from "5" - grapheme_substr with length -2 = O == O
substring of "aa%CC%8Abco%CC%88Opq" from "5" - grapheme_substr with length -3 = ==
substring of "aa%CC%8Abco%CC%88Opq" from "5" - grapheme_substr with length -4 = false == false
substring of "aa%CC%8Abco%CC%88Opq" from "5" - grapheme_substr with length -4 = ==
substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr = aa%CC%8Abco%CC%88Opq == aa%CC%8Abco%CC%88Opq
substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr with length -1 = aa%CC%8Abco%CC%88Op == aa%CC%8Abco%CC%88Op
substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr with length -2 = aa%CC%8Abco%CC%88O == aa%CC%8Abco%CC%88O
@ -1010,8 +1007,7 @@ substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr with length -5 =
substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr with length -6 = aa%CC%8A == aa%CC%8A
substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr with length -7 = a == a
substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr with length -8 = ==
substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr with length -9: grapheme_substr(): Argument #3 ($length) must be contained in argument #1 ($string)
= == false **FAILED**
substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr with length -9 = ==
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr = aa%CC%8Abco%CC%88Opq == aa%CC%8Abco%CC%88Opq
substring of "aa%CC%8Abco%CC%88Opq" from "-7" - grapheme_substr = a%CC%8Abco%CC%88Opq == a%CC%8Abco%CC%88Opq
substring of "aa%CC%8Abco%CC%88Opq" from "-6" - grapheme_substr = bco%CC%88Opq == bco%CC%88Opq
@ -1020,8 +1016,7 @@ substring of "aa%CC%8Abco%CC%88Opq" from "-4" - grapheme_substr = o%CC%88Opq ==
substring of "aa%CC%8Abco%CC%88Opq" from "-3" - grapheme_substr = Opq == Opq
substring of "aa%CC%8Abco%CC%88Opq" from "-2" - grapheme_substr = pq == pq
substring of "aa%CC%8Abco%CC%88Opq" from "-1" - grapheme_substr = q == q
substring of "aa%CC%8Abco%CC%88Opq" from "-999" - grapheme_substr: grapheme_substr(): Argument #2 ($start) must be contained in argument #1 ($string)
= q == false **FAILED**
substring of "aa%CC%8Abco%CC%88Opq" from "-999" - grapheme_substr = aa%CC%8Abco%CC%88Opq == aa%CC%8Abco%CC%88Opq
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length 8 = aa%CC%8Abco%CC%88Opq == aa%CC%8Abco%CC%88Opq
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length 7 = aa%CC%8Abco%CC%88Op == aa%CC%8Abco%CC%88Op
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length 6 = aa%CC%8Abco%CC%88O == aa%CC%8Abco%CC%88O
@ -1031,8 +1026,7 @@ substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length 3 =
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length 2 = aa%CC%8A == aa%CC%8A
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length 1 = a == a
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length 0 = ==
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -999: grapheme_substr(): Argument #3 ($length) must be contained in argument #1 ($string)
= == false **FAILED**
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -999 = ==
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -1 = aa%CC%8Abco%CC%88Op == aa%CC%8Abco%CC%88Op
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -2 = aa%CC%8Abco%CC%88O == aa%CC%8Abco%CC%88O
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -3 = aa%CC%8Abco%CC%88 == aa%CC%8Abco%CC%88
@ -1041,8 +1035,7 @@ substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -5 =
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -6 = aa%CC%8A == aa%CC%8A
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -7 = a == a
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -8 = ==
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -9: grapheme_substr(): Argument #3 ($length) must be contained in argument #1 ($string)
= == false **FAILED**
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -9 = ==
function grapheme_strstr($haystack, $needle, $before_needle = FALSE) {}

View file

@ -325,23 +325,23 @@ function ut_main()
$tests = array(
array( "abc", 3, "false" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd, 5, "false" ),
array( "abc", 3, "" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd, 5, "" ),
array( "ao" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "O", 2, $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "O" ),
array( $char_o_diaeresis_nfd . $char_a_ring_nfd . "a" . $char_A_ring_nfd . "bc", 2, "a" . $char_A_ring_nfd . "bc" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "O", 5, "O" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd, 5, "false" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd, 5, "" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_O_diaeresis_nfd, 4, $char_O_diaeresis_nfd ),
array( $char_o_diaeresis_nfd . "a" . $char_a_ring_nfd . "bc", 2, $char_a_ring_nfd . "bc" ),
array( "a" . $char_A_ring_nfd . "bc", 1, $char_A_ring_nfd . "bc" ),
array( "Abc", -5, "false" ),
array( $char_a_ring_nfd . "bc", 3, "false" ),
array( "abc", 4, "false" ),
array( "Abc", -5, "Abc" ),
array( $char_a_ring_nfd . "bc", 3, "" ),
array( "abc", 4, "" ),
array( "abC", 2, "C" ),
array( "abc", 1, "bc" ),
array( "Abc", 1, 1, "b" ),
array( "abc", 0, 2, "ab" ),
array( "Abc", -4, 1, "false" ),
array( "Abc", -4, 1, "A" ),
array( "ababc", 1, 2, "ba" ),
array( "ababc", 0, 10, "ababc" ),
@ -350,7 +350,7 @@ function ut_main()
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 5, -1, "Op" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 5, -2, "O" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 5, -3, "" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 5, -4, "false" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 5, -4, "" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, -1, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Op" ),
@ -361,7 +361,7 @@ function ut_main()
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, -6, "a" . $char_a_ring_nfd ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, -7, "a" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, -8, "" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, -9, "false" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", 0, -9, "" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -7, $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq" ),
@ -371,7 +371,7 @@ function ut_main()
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -3, "Opq" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -2, "pq" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -1, "q" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -999, "false" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -999, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, 8, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, 7, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Op" ),
@ -382,7 +382,7 @@ function ut_main()
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, 2, "a" . $char_a_ring_nfd ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, 1, "a" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, 0, "" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -999, "false" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -999, "" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -1, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Op" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -2, "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "O" ),
@ -392,7 +392,7 @@ function ut_main()
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -6, "a" . $char_a_ring_nfd ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -7, "a" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -8, "" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -9, "false" ),
array( "a" . $char_a_ring_nfd . "bc" . $char_o_diaeresis_nfd . "Opq", -8, -9, "" ),
);
@ -961,23 +961,23 @@ find "a%CC%8ABca%CC%8A" in "o%CC%88a%CC%8AaA%CC%8AbCa%CC%8Adef" - grapheme_strri
function grapheme_substr($string, $start, $length = -1) {}
substring of "abc" from "3" - grapheme_substr = false == false
substring of "aa%CC%8Abco%CC%88" from "5" - grapheme_substr = false == false
substring of "abc" from "3" - grapheme_substr = ==
substring of "aa%CC%8Abco%CC%88" from "5" - grapheme_substr = ==
substring of "aoa%CC%8Abco%CC%88O" from "2" - grapheme_substr = a%CC%8Abco%CC%88O == a%CC%8Abco%CC%88O
substring of "o%CC%88a%CC%8AaA%CC%8Abc" from "2" - grapheme_substr = aA%CC%8Abc == aA%CC%8Abc
substring of "aa%CC%8Abco%CC%88O" from "5" - grapheme_substr = O == O
substring of "aa%CC%8Abco%CC%88" from "5" - grapheme_substr = false == false
substring of "aa%CC%8Abco%CC%88" from "5" - grapheme_substr = ==
substring of "aa%CC%8AbcO%CC%88" from "4" - grapheme_substr = O%CC%88 == O%CC%88
substring of "o%CC%88aa%CC%8Abc" from "2" - grapheme_substr = a%CC%8Abc == a%CC%8Abc
substring of "aA%CC%8Abc" from "1" - grapheme_substr = A%CC%8Abc == A%CC%8Abc
substring of "Abc" from "-5" - grapheme_substr: grapheme_substr(): Argument #2 ($start) must be contained in argument #1 ($string)
substring of "a%CC%8Abc" from "3" - grapheme_substr = false == false
substring of "abc" from "4" - grapheme_substr: grapheme_substr(): Argument #2 ($start) must be contained in argument #1 ($string)
substring of "Abc" from "-5" - grapheme_substr = Abc == Abc
substring of "a%CC%8Abc" from "3" - grapheme_substr = ==
substring of "abc" from "4" - grapheme_substr = ==
substring of "abC" from "2" - grapheme_substr = C == C
substring of "abc" from "1" - grapheme_substr = bc == bc
substring of "Abc" from "1" - grapheme_substr with length 1 = b == b
substring of "abc" from "0" - grapheme_substr with length 2 = ab == ab
substring of "Abc" from "-4" - grapheme_substr with length 1: grapheme_substr(): Argument #2 ($start) must be contained in argument #1 ($string)
substring of "Abc" from "-4" - grapheme_substr with length 1 = A == A
substring of "ababc" from "1" - grapheme_substr with length 2 = ba == ba
substring of "ababc" from "0" - grapheme_substr with length 10 = ababc == ababc
substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr with length 10 = aa%CC%8Abco%CC%88Opq == aa%CC%8Abco%CC%88Opq
@ -985,7 +985,7 @@ substring of "aa%CC%8Abco%CC%88Opq" from "5" - grapheme_substr = Opq == Opq
substring of "aa%CC%8Abco%CC%88Opq" from "5" - grapheme_substr with length -1 = Op == Op
substring of "aa%CC%8Abco%CC%88Opq" from "5" - grapheme_substr with length -2 = O == O
substring of "aa%CC%8Abco%CC%88Opq" from "5" - grapheme_substr with length -3 = ==
substring of "aa%CC%8Abco%CC%88Opq" from "5" - grapheme_substr with length -4 = false == false
substring of "aa%CC%8Abco%CC%88Opq" from "5" - grapheme_substr with length -4 = ==
substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr = aa%CC%8Abco%CC%88Opq == aa%CC%8Abco%CC%88Opq
substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr with length -1 = aa%CC%8Abco%CC%88Op == aa%CC%8Abco%CC%88Op
substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr with length -2 = aa%CC%8Abco%CC%88O == aa%CC%8Abco%CC%88O
@ -995,7 +995,7 @@ substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr with length -5 =
substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr with length -6 = aa%CC%8A == aa%CC%8A
substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr with length -7 = a == a
substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr with length -8 = ==
substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr with length -9: grapheme_substr(): Argument #3 ($length) must be contained in argument #1 ($string)
substring of "aa%CC%8Abco%CC%88Opq" from "0" - grapheme_substr with length -9 = ==
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr = aa%CC%8Abco%CC%88Opq == aa%CC%8Abco%CC%88Opq
substring of "aa%CC%8Abco%CC%88Opq" from "-7" - grapheme_substr = a%CC%8Abco%CC%88Opq == a%CC%8Abco%CC%88Opq
substring of "aa%CC%8Abco%CC%88Opq" from "-6" - grapheme_substr = bco%CC%88Opq == bco%CC%88Opq
@ -1004,7 +1004,7 @@ substring of "aa%CC%8Abco%CC%88Opq" from "-4" - grapheme_substr = o%CC%88Opq ==
substring of "aa%CC%8Abco%CC%88Opq" from "-3" - grapheme_substr = Opq == Opq
substring of "aa%CC%8Abco%CC%88Opq" from "-2" - grapheme_substr = pq == pq
substring of "aa%CC%8Abco%CC%88Opq" from "-1" - grapheme_substr = q == q
substring of "aa%CC%8Abco%CC%88Opq" from "-999" - grapheme_substr: grapheme_substr(): Argument #2 ($start) must be contained in argument #1 ($string)
substring of "aa%CC%8Abco%CC%88Opq" from "-999" - grapheme_substr = aa%CC%8Abco%CC%88Opq == aa%CC%8Abco%CC%88Opq
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length 8 = aa%CC%8Abco%CC%88Opq == aa%CC%8Abco%CC%88Opq
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length 7 = aa%CC%8Abco%CC%88Op == aa%CC%8Abco%CC%88Op
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length 6 = aa%CC%8Abco%CC%88O == aa%CC%8Abco%CC%88O
@ -1014,7 +1014,7 @@ substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length 3 =
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length 2 = aa%CC%8A == aa%CC%8A
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length 1 = a == a
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length 0 = ==
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -999: grapheme_substr(): Argument #3 ($length) must be contained in argument #1 ($string)
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -999 = ==
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -1 = aa%CC%8Abco%CC%88Op == aa%CC%8Abco%CC%88Op
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -2 = aa%CC%8Abco%CC%88O == aa%CC%8Abco%CC%88O
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -3 = aa%CC%8Abco%CC%88 == aa%CC%8Abco%CC%88
@ -1023,7 +1023,7 @@ substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -5 =
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -6 = aa%CC%8A == aa%CC%8A
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -7 = a == a
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -8 = ==
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -9: grapheme_substr(): Argument #3 ($length) must be contained in argument #1 ($string)
substring of "aa%CC%8Abco%CC%88Opq" from "-8" - grapheme_substr with length -9 = ==
function grapheme_strstr($haystack, $needle, $before_needle = FALSE) {}

View file

@ -118,11 +118,25 @@ try {
}
echo "\n";
// TODO: substr is special.
var_dump(grapheme_substr("foo", 3));
var_dump(grapheme_substr("foo", -3));
//var_dump(grapheme_substr("foo", 4));
//var_dump(grapheme_substr("foo", -4));
var_dump(grapheme_substr("foo", 4));
var_dump(grapheme_substr("foo", -4));
var_dump(grapheme_substr("äöü", 3));
var_dump(grapheme_substr("äöü", -3));
var_dump(grapheme_substr("äöü", 4));
var_dump(grapheme_substr("äöü", -4));
var_dump(grapheme_substr("foo", 0, 3));
var_dump(grapheme_substr("foo", 0, -3));
var_dump(grapheme_substr("foo", 0, 4));
var_dump(grapheme_substr("foo", 0, -4));
var_dump(grapheme_substr("äöü", 0, 3));
var_dump(grapheme_substr("äöü", 0, -3));
var_dump(grapheme_substr("äöü", 0, 4));
var_dump(grapheme_substr("äöü", 0, -4));
var_dump(grapheme_substr("äöü", -4, 1));
var_dump(grapheme_substr("äöü", -4, -1));
var_dump(grapheme_substr("äöü", 2, -2));
?>
--EXPECT--
@ -167,5 +181,22 @@ grapheme_stripos(): Argument #3 ($offset) must be contained in argument #1 ($hay
grapheme_strrpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
grapheme_strripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
bool(false)
string(0) ""
string(3) "foo"
string(0) ""
string(3) "foo"
string(0) ""
string(6) "äöü"
string(0) ""
string(6) "äöü"
string(3) "foo"
string(0) ""
string(3) "foo"
string(0) ""
string(6) "äöü"
string(0) ""
string(6) "äöü"
string(0) ""
string(2) "ä"
string(4) "äö"
string(0) ""

View file

@ -607,7 +607,7 @@ function str_ends_with(string $haystack, string $needle): bool {}
function chunk_split(string $str, int $chunklen = 76, string $ending = "\r\n"): string {}
function substr(string $str, int $start, ?int $length = null): string|false {}
function substr(string $str, int $start, ?int $length = null): string {}
function substr_replace(array|string $str, array|string $replace, array|int $start, array|int|null $length = null): string|array {}

View file

@ -1,5 +1,5 @@
/* This is a generated file, edit the .stub.php file instead.
* Stub hash: f029ab7f1d9fa2a99a5612a928d0b731de6aaeed */
* Stub hash: 5275c2ba801f36ecf17e6b615b19373aca34e852 */
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_set_time_limit, 0, 1, _IS_BOOL, 0)
ZEND_ARG_TYPE_INFO(0, seconds, IS_LONG, 0)
@ -924,7 +924,7 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_chunk_split, 0, 1, IS_STRING, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, ending, IS_STRING, 0, "\"\\r\\n\"")
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_substr, 0, 2, MAY_BE_STRING|MAY_BE_FALSE)
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_substr, 0, 2, IS_STRING, 0)
ZEND_ARG_TYPE_INFO(0, str, IS_STRING, 0)
ZEND_ARG_TYPE_INFO(0, start, IS_LONG, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, length, IS_LONG, 1, "null")

View file

@ -2170,9 +2170,7 @@ PHP_FUNCTION(substr)
Z_PARAM_LONG_OR_NULL(l, len_is_null)
ZEND_PARSE_PARAMETERS_END();
if (f > (zend_long)ZSTR_LEN(str)) {
RETURN_FALSE;
} else if (f < 0) {
if (f < 0) {
/* if "from" position is negative, count start position from the end
* of the string
*/
@ -2181,41 +2179,24 @@ PHP_FUNCTION(substr)
} else {
f = (zend_long)ZSTR_LEN(str) + f;
}
if (!len_is_null) {
if (l < 0) {
/* if "length" position is negative, set it to the length
* needed to stop that many chars from the end of the string
*/
if ((size_t)(-l) > ZSTR_LEN(str) - (size_t)f) {
if ((size_t)(-l) > ZSTR_LEN(str)) {
RETURN_FALSE;
} else {
l = 0;
}
} else {
l = (zend_long)ZSTR_LEN(str) - f + l;
}
} else if ((size_t)l > ZSTR_LEN(str) - (size_t)f) {
goto truncate_len;
}
} else {
goto truncate_len;
}
} else if (!len_is_null) {
} else if ((size_t)f > ZSTR_LEN(str)) {
RETURN_EMPTY_STRING();
}
if (!len_is_null) {
if (l < 0) {
/* if "length" position is negative, set it to the length
* needed to stop that many chars from the end of the string
*/
if ((size_t)(-l) > ZSTR_LEN(str) - (size_t)f) {
RETURN_FALSE;
l = 0;
} else {
l = (zend_long)ZSTR_LEN(str) - f + l;
}
} else if ((size_t)l > ZSTR_LEN(str) - (size_t)f) {
goto truncate_len;
l = (zend_long)ZSTR_LEN(str) - f;
}
} else {
truncate_len:
l = (zend_long)ZSTR_LEN(str) - f;
}

View file

@ -102,4 +102,4 @@ strripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
strripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
int(2)
string(8) "abcdeabc"
bool(false)
string(0) ""