mirror of
https://github.com/php/php-src.git
synced 2025-08-19 08:49:28 +02:00
Return false on invalid codepoint in mb_chr()
Instead of returning the encoding of the current substitution character. This allows a robust check for the failure case. The substitution character (especially the default of "?") is also a valid output of mb_chr() for a valid input (for "?" that would be 0x3f), so it's a bad choice for an error value.
This commit is contained in:
parent
41e9ba6333
commit
e53162a32b
2 changed files with 27 additions and 46 deletions
|
@ -106,8 +106,6 @@ static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding);
|
|||
|
||||
static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc);
|
||||
|
||||
static inline zend_bool php_mb_is_no_encoding_unicode(enum mbfl_no_encoding no_enc);
|
||||
|
||||
static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc);
|
||||
/* }}} */
|
||||
|
||||
|
@ -3172,13 +3170,6 @@ static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding
|
|||
}
|
||||
|
||||
|
||||
/* See mbfl_no_encoding definition for list of unicode encodings */
|
||||
static inline zend_bool php_mb_is_no_encoding_unicode(enum mbfl_no_encoding no_enc)
|
||||
{
|
||||
return (no_enc >= mbfl_no_encoding_ucs4 && no_enc <= mbfl_no_encoding_utf8_sb);
|
||||
}
|
||||
|
||||
|
||||
/* See mbfl_no_encoding definition for list of UTF-8 encodings */
|
||||
static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)
|
||||
{
|
||||
|
@ -5143,10 +5134,18 @@ static inline char* php_mb_chr(zend_long cp, const char* enc, size_t *output_len
|
|||
}
|
||||
}
|
||||
|
||||
if (php_mb_is_no_encoding_utf8(no_enc)) {
|
||||
if (php_mb_is_unsupported_no_encoding(no_enc)) {
|
||||
php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (0 > cp || cp > 0x10ffff || (cp > 0xd7ff && 0xe000 > cp)) {
|
||||
cp = MBSTRG(current_filter_illegal_substchar);
|
||||
if (cp < 0 || cp > 0x10ffff) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (php_mb_is_no_encoding_utf8(no_enc)) {
|
||||
if (cp > 0xd7ff && 0xe000 > cp) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (cp < 0x80) {
|
||||
|
@ -5182,20 +5181,6 @@ static inline char* php_mb_chr(zend_long cp, const char* enc, size_t *output_len
|
|||
}
|
||||
|
||||
return ret;
|
||||
|
||||
} else if (php_mb_is_unsupported_no_encoding(no_enc)) {
|
||||
php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (0 > cp || 0x10ffff < cp) {
|
||||
|
||||
if (php_mb_is_no_encoding_unicode(MBSTRG(current_internal_encoding)->no_encoding)) {
|
||||
cp = MBSTRG(current_filter_illegal_substchar);
|
||||
} else {
|
||||
cp = 0x3f;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
buf_len = 4;
|
||||
|
@ -5206,9 +5191,21 @@ static inline char* php_mb_chr(zend_long cp, const char* enc, size_t *output_len
|
|||
buf[3] = cp & 0xff;
|
||||
buf[4] = 0;
|
||||
|
||||
{
|
||||
long orig_illegalchars = MBSTRG(illegalchars);
|
||||
MBSTRG(illegalchars) = 0;
|
||||
ret = php_mb_convert_encoding(buf, buf_len, enc, "UCS-4BE", &ret_len);
|
||||
if (MBSTRG(illegalchars) != 0) {
|
||||
efree(buf);
|
||||
efree(ret);
|
||||
MBSTRG(illegalchars) = orig_illegalchars;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
MBSTRG(illegalchars) = orig_illegalchars;
|
||||
}
|
||||
|
||||
efree(buf);
|
||||
if (output_len) {
|
||||
*output_len = ret_len;
|
||||
}
|
||||
|
|
|
@ -7,22 +7,8 @@ mb_chr()
|
|||
var_dump(
|
||||
"\u{20bb7}" === mb_chr(0x20bb7),
|
||||
"\x8f\xa1\xef" === mb_chr(0x50aa, "EUC-JP-2004"),
|
||||
"?" === mb_chr(0xd800)
|
||||
);
|
||||
|
||||
mb_internal_encoding("UCS-4BE");
|
||||
mb_substitute_character(0xfffd);
|
||||
var_dump(
|
||||
"\u{fffd}" === mb_chr(0xd800, "UTF-8")
|
||||
);
|
||||
var_dump(
|
||||
"\u{fffd}" === mb_chr(0xd800, "UTF-8")
|
||||
);
|
||||
|
||||
mb_internal_encoding("EUC-JP");
|
||||
mb_substitute_character(0xa4a2);
|
||||
var_dump(
|
||||
"\u{a4a2}" === mb_chr(0xd800, "UTF-8")
|
||||
false === mb_chr(0xd800),
|
||||
false === mb_chr(0x1f600, "EUC-JP-2004")
|
||||
);
|
||||
|
||||
// Invalid
|
||||
|
@ -39,8 +25,6 @@ bool(true)
|
|||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
|
||||
Warning: mb_chr(): Unknown encoding "typo" in %s on line %d
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue