Merge branch 'PHP-8.1' into PHP-8.2

* PHP-8.1:
  [ci skip] NEWS
  Fix GH-9535 (unintended behavior change for mb_strcut in PHP 8.1)
This commit is contained in:
Alex Dowad 2022-11-13 14:42:57 +02:00
commit 79ae3090e0
3 changed files with 294 additions and 0 deletions

View file

@ -1174,6 +1174,7 @@ mbfl_strcut(
bk = _bk;
}
decoder->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
(*encoder->filter_flush)(encoder);
if (bk.decoder.filter_dtor)

View file

@ -0,0 +1,187 @@
--TEST--
GH-9535 (mb_strcut(): The behavior of mb_strcut in mbstring has been changed in PHP8.1)
--EXTENSIONS--
mbstring
--FILE--
<?php
$encodings = [
'BASE64',
'HTML-ENTITIES',
'Quoted-Printable',
'UTF-16',
'UTF-16BE',
'UTF-16LE',
'UTF-7',
'UTF7-IMAP',
'ISO-2022-JP-MS',
'GB18030',
'HZ',
'ISO-2022-KR',
'ISO-2022-JP-MOBILE#KDDI',
'CP50220',
'CP50221',
'CP50222',
];
$input = '宛如繁星般宛如皎月般';
$bytes_length = 15;
foreach($encodings as $encoding) {
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
echo $encoding.': '.$reconverted_str.PHP_EOL;
}
echo PHP_EOL;
$input = '星のように月のように';
$bytes_length = 20;
foreach($encodings as $encoding) {
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
echo $encoding.': '.$reconverted_str.PHP_EOL;
}
echo PHP_EOL;
$input = 'あaいb';
$bytes_length = 10;
foreach($encodings as $encoding) {
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
echo $encoding.': '.$reconverted_str.PHP_EOL;
}
echo PHP_EOL;
$input = 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA';
$bytes_length = 10;
foreach($encodings as $encoding) {
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
echo $encoding.': '.$reconverted_str.PHP_EOL;
}
echo PHP_EOL;
$input = '???';
$bytes_length = 2;
foreach($encodings as $encoding) {
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
echo $encoding.': '.$reconverted_str.PHP_EOL;
}
echo PHP_EOL;
foreach($encodings as $encoding) {
var_dump(mb_strcut($input, 0, $bytes_length, $encoding));
}
?>
--EXPECTF--
BASE64: 宛如繁
HTML-ENTITIES: 宛&#22914
Quoted-Printable: %s
UTF-16: 宛如繁星般宛如
UTF-16BE: 宛如繁星般宛如
UTF-16LE: 宛如繁星般宛如
UTF-7: 宛如繁星
UTF7-IMAP: 宛如繁星
ISO-2022-JP-MS: 宛如繁星
GB18030: 宛如繁星般宛如
HZ: 宛如繁星般
ISO-2022-KR: 宛如繁星
ISO-2022-JP-MOBILE#KDDI: 宛如繁星
CP50220: 宛如繁星
CP50221: 宛如繁星
CP50222: 宛如繁星
BASE64: 星のように
HTML-ENTITIES: 星の&#12
Quoted-Printable: 星の
UTF-16: 星のように月のように
UTF-16BE: 星のように月のように
UTF-16LE: 星のように月のように
UTF-7: 星のように月
UTF7-IMAP: 星のように月
ISO-2022-JP-MS: 星のように月の
GB18030: 星のように月のように
HZ: 星のように月のよ
ISO-2022-KR: 星のように月の
ISO-2022-JP-MOBILE#KDDI: 星のように月の
CP50220: 星のように月の
CP50221: 星のように月の
CP50222: 星のように月の
BASE64: %s
HTML-ENTITIES: あa&
Quoted-Printable: あa
UTF-16: あaいb
UTF-16BE: あaいb
UTF-16LE: あaいb
UTF-7: あa
UTF7-IMAP: あa
ISO-2022-JP-MS: あa
GB18030: あaいb
HZ: あa
ISO-2022-KR: あa
ISO-2022-JP-MOBILE#KDDI: あa
CP50220: あa
CP50221: あa
CP50222: あa
BASE64: AAAAAA
HTML-ENTITIES: AAAAAAAAAA
Quoted-Printable: AAAAAAAAAA
UTF-16: AAAAA
UTF-16BE: AAAAA
UTF-16LE: AAAAA
UTF-7: AAAAAAAAAA
UTF7-IMAP: AAAAAAAAAA
ISO-2022-JP-MS: AAAAAAAAAA
GB18030: AAAAAAAAAA
HZ: AAAAAAAAAA
ISO-2022-KR: AAAAAAAAAA
ISO-2022-JP-MOBILE#KDDI: AAAAAAAAAA
CP50220: AAAAAAAAAA
CP50221: AAAAAAAAAA
CP50222: AAAAAAAAAA
BASE64:%s
HTML-ENTITIES: ??
Quoted-Printable: ??
UTF-16: ?
UTF-16BE: ?
UTF-16LE: ?
UTF-7: ??
UTF7-IMAP: ??
ISO-2022-JP-MS: ??
GB18030: ??
HZ: ??
ISO-2022-KR: ??
ISO-2022-JP-MOBILE#KDDI: ??
CP50220: ??
CP50221: ??
CP50222: ??
string(0) ""
string(2) "??"
string(2) "??"
string(2) "??"
string(2) "??"
string(2) "??"
string(2) "??"
string(2) "??"
string(2) "??"
string(2) "??"
string(2) "??"
string(2) "??"
string(2) "??"
string(2) "??"
string(2) "??"
string(2) "??"

View file

@ -0,0 +1,106 @@
--TEST--
Output of mb_strcut covers requested range of bytes even when output contains ending escape sequences
--EXTENSIONS--
mbstring
--FILE--
<?php
// The existing behavior of mb_strcut is wrong for these encodings, when they add an extra closing
// escape sequence to a string which would otherwise end in a non-default conversion mode
// See https://github.com/php/php-src/pull/9562 for details on the bug
// These tests were developed when fixing a different bug, but they don't pass because of
// the bug involving the added closing escape sequences
// When that bug is fixed, we can remove XFAIL (or combine this file with gh9535.phpt)
$encodings = [
'JIS',
'ISO-2022-JP',
'ISO-2022-JP-2004',
];
$input = '宛如繁星般宛如皎月般';
$bytes_length = 15;
foreach($encodings as $encoding) {
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
echo $encoding.': '.$reconverted_str.PHP_EOL;
}
echo PHP_EOL;
$input = '星のように月のように';
$bytes_length = 20;
foreach($encodings as $encoding) {
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
echo $encoding.': '.$reconverted_str.PHP_EOL;
}
echo PHP_EOL;
$input = 'あaいb';
$bytes_length = 10;
foreach($encodings as $encoding) {
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
echo $encoding.': '.$reconverted_str.PHP_EOL;
}
echo PHP_EOL;
$input = 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA';
$bytes_length = 10;
foreach($encodings as $encoding) {
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
echo $encoding.': '.$reconverted_str.PHP_EOL;
}
echo PHP_EOL;
$input = '???';
$bytes_length = 2;
foreach($encodings as $encoding) {
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
echo $encoding.': '.$reconverted_str.PHP_EOL;
}
echo PHP_EOL;
foreach($encodings as $encoding) {
var_dump(mb_strcut($input, 0, $bytes_length, $encoding));
}
?>
--XFAIL--
Discussion: https://github.com/php/php-src/pull/9562
--EXPECTF--
JIS: 宛如繁星般
ISO-2022-JP: 宛如繁星般
ISO-2022-JP-2004: 宛如繁星
JIS: 星のように月の
ISO-2022-JP: 星のように月の
ISO-2022-JP-2004: 星のように月の
JIS: あa
ISO-2022-JP: あa
ISO-2022-JP-2004: あa
JIS: AAAAAAAAAA
ISO-2022-JP: AAAAAAAAAA
ISO-2022-JP-2004: AAAAAAAAAA
JIS: ??
ISO-2022-JP: ??
ISO-2022-JP-2004: ??
string(2) "??"
string(2) "??"
string(2) "??"