mirror of
https://github.com/php/php-src.git
synced 2025-08-16 05:58:45 +02:00
Merge branch 'PHP-8.1' into PHP-8.2
* PHP-8.1: [ci skip] NEWS Fix GH-9535 (unintended behavior change for mb_strcut in PHP 8.1)
This commit is contained in:
commit
79ae3090e0
3 changed files with 294 additions and 0 deletions
|
@ -1174,6 +1174,7 @@ mbfl_strcut(
|
|||
bk = _bk;
|
||||
}
|
||||
|
||||
decoder->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
|
||||
(*encoder->filter_flush)(encoder);
|
||||
|
||||
if (bk.decoder.filter_dtor)
|
||||
|
|
187
ext/mbstring/tests/gh9535.phpt
Normal file
187
ext/mbstring/tests/gh9535.phpt
Normal file
|
@ -0,0 +1,187 @@
|
|||
--TEST--
|
||||
GH-9535 (mb_strcut(): The behavior of mb_strcut in mbstring has been changed in PHP8.1)
|
||||
--EXTENSIONS--
|
||||
mbstring
|
||||
--FILE--
|
||||
<?php
|
||||
$encodings = [
|
||||
'BASE64',
|
||||
'HTML-ENTITIES',
|
||||
'Quoted-Printable',
|
||||
'UTF-16',
|
||||
'UTF-16BE',
|
||||
'UTF-16LE',
|
||||
'UTF-7',
|
||||
'UTF7-IMAP',
|
||||
'ISO-2022-JP-MS',
|
||||
'GB18030',
|
||||
'HZ',
|
||||
'ISO-2022-KR',
|
||||
'ISO-2022-JP-MOBILE#KDDI',
|
||||
'CP50220',
|
||||
'CP50221',
|
||||
'CP50222',
|
||||
];
|
||||
|
||||
$input = '宛如繁星般宛如皎月般';
|
||||
$bytes_length = 15;
|
||||
foreach($encodings as $encoding) {
|
||||
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
|
||||
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
|
||||
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
|
||||
echo $encoding.': '.$reconverted_str.PHP_EOL;
|
||||
}
|
||||
|
||||
echo PHP_EOL;
|
||||
|
||||
$input = '星のように月のように';
|
||||
$bytes_length = 20;
|
||||
foreach($encodings as $encoding) {
|
||||
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
|
||||
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
|
||||
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
|
||||
echo $encoding.': '.$reconverted_str.PHP_EOL;
|
||||
}
|
||||
|
||||
echo PHP_EOL;
|
||||
|
||||
$input = 'あaいb';
|
||||
$bytes_length = 10;
|
||||
foreach($encodings as $encoding) {
|
||||
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
|
||||
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
|
||||
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
|
||||
echo $encoding.': '.$reconverted_str.PHP_EOL;
|
||||
}
|
||||
|
||||
echo PHP_EOL;
|
||||
|
||||
$input = 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA';
|
||||
$bytes_length = 10;
|
||||
foreach($encodings as $encoding) {
|
||||
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
|
||||
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
|
||||
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
|
||||
echo $encoding.': '.$reconverted_str.PHP_EOL;
|
||||
}
|
||||
|
||||
echo PHP_EOL;
|
||||
|
||||
$input = '???';
|
||||
$bytes_length = 2;
|
||||
foreach($encodings as $encoding) {
|
||||
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
|
||||
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
|
||||
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
|
||||
echo $encoding.': '.$reconverted_str.PHP_EOL;
|
||||
}
|
||||
|
||||
echo PHP_EOL;
|
||||
|
||||
foreach($encodings as $encoding) {
|
||||
var_dump(mb_strcut($input, 0, $bytes_length, $encoding));
|
||||
}
|
||||
|
||||
?>
|
||||
--EXPECTF--
|
||||
BASE64: 宛如繁
|
||||
HTML-ENTITIES: 宛如
|
||||
Quoted-Printable: %s
|
||||
UTF-16: 宛如繁星般宛如
|
||||
UTF-16BE: 宛如繁星般宛如
|
||||
UTF-16LE: 宛如繁星般宛如
|
||||
UTF-7: 宛如繁星
|
||||
UTF7-IMAP: 宛如繁星
|
||||
ISO-2022-JP-MS: 宛如繁星
|
||||
GB18030: 宛如繁星般宛如
|
||||
HZ: 宛如繁星般
|
||||
ISO-2022-KR: 宛如繁星
|
||||
ISO-2022-JP-MOBILE#KDDI: 宛如繁星
|
||||
CP50220: 宛如繁星
|
||||
CP50221: 宛如繁星
|
||||
CP50222: 宛如繁星
|
||||
|
||||
BASE64: 星のように
|
||||
HTML-ENTITIES: 星の
|
||||
Quoted-Printable: 星の
|
||||
UTF-16: 星のように月のように
|
||||
UTF-16BE: 星のように月のように
|
||||
UTF-16LE: 星のように月のように
|
||||
UTF-7: 星のように月
|
||||
UTF7-IMAP: 星のように月
|
||||
ISO-2022-JP-MS: 星のように月の
|
||||
GB18030: 星のように月のように
|
||||
HZ: 星のように月のよ
|
||||
ISO-2022-KR: 星のように月の
|
||||
ISO-2022-JP-MOBILE#KDDI: 星のように月の
|
||||
CP50220: 星のように月の
|
||||
CP50221: 星のように月の
|
||||
CP50222: 星のように月の
|
||||
|
||||
BASE64: %s
|
||||
HTML-ENTITIES: あa&
|
||||
Quoted-Printable: あa
|
||||
UTF-16: あaいb
|
||||
UTF-16BE: あaいb
|
||||
UTF-16LE: あaいb
|
||||
UTF-7: あa
|
||||
UTF7-IMAP: あa
|
||||
ISO-2022-JP-MS: あa
|
||||
GB18030: あaいb
|
||||
HZ: あa
|
||||
ISO-2022-KR: あa
|
||||
ISO-2022-JP-MOBILE#KDDI: あa
|
||||
CP50220: あa
|
||||
CP50221: あa
|
||||
CP50222: あa
|
||||
|
||||
BASE64: AAAAAA
|
||||
HTML-ENTITIES: AAAAAAAAAA
|
||||
Quoted-Printable: AAAAAAAAAA
|
||||
UTF-16: AAAAA
|
||||
UTF-16BE: AAAAA
|
||||
UTF-16LE: AAAAA
|
||||
UTF-7: AAAAAAAAAA
|
||||
UTF7-IMAP: AAAAAAAAAA
|
||||
ISO-2022-JP-MS: AAAAAAAAAA
|
||||
GB18030: AAAAAAAAAA
|
||||
HZ: AAAAAAAAAA
|
||||
ISO-2022-KR: AAAAAAAAAA
|
||||
ISO-2022-JP-MOBILE#KDDI: AAAAAAAAAA
|
||||
CP50220: AAAAAAAAAA
|
||||
CP50221: AAAAAAAAAA
|
||||
CP50222: AAAAAAAAAA
|
||||
|
||||
BASE64:%s
|
||||
HTML-ENTITIES: ??
|
||||
Quoted-Printable: ??
|
||||
UTF-16: ?
|
||||
UTF-16BE: ?
|
||||
UTF-16LE: ?
|
||||
UTF-7: ??
|
||||
UTF7-IMAP: ??
|
||||
ISO-2022-JP-MS: ??
|
||||
GB18030: ??
|
||||
HZ: ??
|
||||
ISO-2022-KR: ??
|
||||
ISO-2022-JP-MOBILE#KDDI: ??
|
||||
CP50220: ??
|
||||
CP50221: ??
|
||||
CP50222: ??
|
||||
|
||||
string(0) ""
|
||||
string(2) "??"
|
||||
string(2) "??"
|
||||
string(2) "??"
|
||||
string(2) "??"
|
||||
string(2) "??"
|
||||
string(2) "??"
|
||||
string(2) "??"
|
||||
string(2) "??"
|
||||
string(2) "??"
|
||||
string(2) "??"
|
||||
string(2) "??"
|
||||
string(2) "??"
|
||||
string(2) "??"
|
||||
string(2) "??"
|
||||
string(2) "??"
|
106
ext/mbstring/tests/gh9535b.phpt
Normal file
106
ext/mbstring/tests/gh9535b.phpt
Normal file
|
@ -0,0 +1,106 @@
|
|||
--TEST--
|
||||
Output of mb_strcut covers requested range of bytes even when output contains ending escape sequences
|
||||
--EXTENSIONS--
|
||||
mbstring
|
||||
--FILE--
|
||||
<?php
|
||||
// The existing behavior of mb_strcut is wrong for these encodings, when they add an extra closing
|
||||
// escape sequence to a string which would otherwise end in a non-default conversion mode
|
||||
// See https://github.com/php/php-src/pull/9562 for details on the bug
|
||||
|
||||
// These tests were developed when fixing a different bug, but they don't pass because of
|
||||
// the bug involving the added closing escape sequences
|
||||
// When that bug is fixed, we can remove XFAIL (or combine this file with gh9535.phpt)
|
||||
|
||||
$encodings = [
|
||||
'JIS',
|
||||
'ISO-2022-JP',
|
||||
'ISO-2022-JP-2004',
|
||||
];
|
||||
|
||||
$input = '宛如繁星般宛如皎月般';
|
||||
$bytes_length = 15;
|
||||
foreach($encodings as $encoding) {
|
||||
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
|
||||
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
|
||||
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
|
||||
echo $encoding.': '.$reconverted_str.PHP_EOL;
|
||||
}
|
||||
|
||||
echo PHP_EOL;
|
||||
|
||||
$input = '星のように月のように';
|
||||
$bytes_length = 20;
|
||||
foreach($encodings as $encoding) {
|
||||
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
|
||||
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
|
||||
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
|
||||
echo $encoding.': '.$reconverted_str.PHP_EOL;
|
||||
}
|
||||
|
||||
echo PHP_EOL;
|
||||
|
||||
$input = 'あaいb';
|
||||
$bytes_length = 10;
|
||||
foreach($encodings as $encoding) {
|
||||
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
|
||||
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
|
||||
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
|
||||
echo $encoding.': '.$reconverted_str.PHP_EOL;
|
||||
}
|
||||
|
||||
echo PHP_EOL;
|
||||
|
||||
$input = 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA';
|
||||
$bytes_length = 10;
|
||||
foreach($encodings as $encoding) {
|
||||
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
|
||||
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
|
||||
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
|
||||
echo $encoding.': '.$reconverted_str.PHP_EOL;
|
||||
}
|
||||
|
||||
echo PHP_EOL;
|
||||
|
||||
$input = '???';
|
||||
$bytes_length = 2;
|
||||
foreach($encodings as $encoding) {
|
||||
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
|
||||
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
|
||||
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
|
||||
echo $encoding.': '.$reconverted_str.PHP_EOL;
|
||||
}
|
||||
|
||||
echo PHP_EOL;
|
||||
|
||||
foreach($encodings as $encoding) {
|
||||
var_dump(mb_strcut($input, 0, $bytes_length, $encoding));
|
||||
}
|
||||
|
||||
?>
|
||||
--XFAIL--
|
||||
Discussion: https://github.com/php/php-src/pull/9562
|
||||
--EXPECTF--
|
||||
JIS: 宛如繁星般
|
||||
ISO-2022-JP: 宛如繁星般
|
||||
ISO-2022-JP-2004: 宛如繁星
|
||||
|
||||
JIS: 星のように月の
|
||||
ISO-2022-JP: 星のように月の
|
||||
ISO-2022-JP-2004: 星のように月の
|
||||
|
||||
JIS: あa
|
||||
ISO-2022-JP: あa
|
||||
ISO-2022-JP-2004: あa
|
||||
|
||||
JIS: AAAAAAAAAA
|
||||
ISO-2022-JP: AAAAAAAAAA
|
||||
ISO-2022-JP-2004: AAAAAAAAAA
|
||||
|
||||
JIS: ??
|
||||
ISO-2022-JP: ??
|
||||
ISO-2022-JP-2004: ??
|
||||
|
||||
string(2) "??"
|
||||
string(2) "??"
|
||||
string(2) "??"
|
Loading…
Add table
Add a link
Reference in a new issue