Fix GH-13815: mb_trim() inaccurate $characters default value (#13820)

Because the default characters are defined in the stub file, and the
stub file is UTF-8 (typically), the characters are encoded in the string
as UTF-8. When using a different character encoding, there is a mismatch
between what mb_trim expects and the UTF-8 encoded string it gets.

One way of solving this is by making the characters argument nullable,
which would mean that it always uses the internal code path that has the
unicode codepoints that are defaulted actually stored as codepoint
numbers instead of in a string.

Co-authored-by: @ranvis
This commit is contained in:
Niels Dossche 2024-04-24 09:07:55 +02:00 committed by GitHub
parent 13a5a8126e
commit f81370847c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 26 additions and 6 deletions

View file

@ -3129,7 +3129,7 @@ static void php_do_mb_trim(INTERNAL_FUNCTION_PARAMETERS, mb_trim_mode mode)
ZEND_PARSE_PARAMETERS_START(1, 3)
Z_PARAM_STR(str)
Z_PARAM_OPTIONAL
Z_PARAM_STR(what)
Z_PARAM_STR_OR_NULL(what)
Z_PARAM_STR_OR_NULL(encoding)
ZEND_PARSE_PARAMETERS_END();

View file

@ -139,11 +139,11 @@ function mb_ucfirst(string $string, ?string $encoding = null): string {}
function mb_lcfirst(string $string, ?string $encoding = null): string {}
function mb_trim(string $string, string $characters = " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}", ?string $encoding = null): string {}
function mb_trim(string $string, ?string $characters = null, ?string $encoding = null): string {}
function mb_ltrim(string $string, string $characters = " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}", ?string $encoding = null): string {}
function mb_ltrim(string $string, ?string $characters = null, ?string $encoding = null): string {}
function mb_rtrim(string $string, string $characters = " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}", ?string $encoding = null): string {}
function mb_rtrim(string $string, ?string $characters = null, ?string $encoding = null): string {}
/** @refcount 1 */
function mb_detect_encoding(string $string, array|string|null $encodings = null, bool $strict = false): string|false {}

View file

@ -1,5 +1,5 @@
/* This is a generated file, edit the .stub.php file instead.
* Stub hash: ea642b9010bc38a3b13710662fef48663d4385e1 */
* Stub hash: 03c07f68bea7d7b96e6dc11f180f45663b859ed3 */
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_mb_language, 0, 0, MAY_BE_STRING|MAY_BE_BOOL)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, language, IS_STRING, 1, "null")
@ -124,7 +124,7 @@ ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_mb_trim, 0, 1, IS_STRING, 0)
ZEND_ARG_TYPE_INFO(0, string, IS_STRING, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, characters, IS_STRING, 0, "\" \\f\\n\\r\\t\\v\\x00   …\"")
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, characters, IS_STRING, 1, "null")
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 1, "null")
ZEND_END_ARG_INFO()

View file

@ -0,0 +1,20 @@
--TEST--
GH-13815 (mb_trim() inaccurate $characters default value)
--EXTENSIONS--
mbstring
--FILE--
<?php
$strUtf8 = "\u{3042}\u{3000}"; // U+3000: fullwidth space
var_dump(mb_strlen(mb_trim($strUtf8)));
var_dump(mb_strlen(mb_trim($strUtf8, encoding: 'UTF-8')));
mb_internal_encoding('Shift_JIS');
$strSjis = mb_convert_encoding($strUtf8, 'Shift_JIS', 'UTF-8');
var_dump(mb_strlen(mb_trim($strSjis)));
var_dump(mb_strlen(mb_trim($strSjis, encoding: 'Shift_JIS')));
?>
--EXPECT--
int(1)
int(1)
int(1)
int(1)