mirror of
https://github.com/php/php-src.git
synced 2025-08-16 05:58:45 +02:00
Merge branch 'PHP-8.3'
* PHP-8.3: Fix infinite loop when mb_detect_encoding is used on UTF-8 BOM
This commit is contained in:
commit
00c567a436
2 changed files with 19 additions and 0 deletions
|
@ -3070,6 +3070,12 @@ static size_t count_demerits(struct candidate *array, size_t length, bool strict
|
||||||
uint32_t wchar_buf[128];
|
uint32_t wchar_buf[128];
|
||||||
unsigned int finished = 0; /* For how many candidate encodings have we processed all the input? */
|
unsigned int finished = 0; /* For how many candidate encodings have we processed all the input? */
|
||||||
|
|
||||||
|
for (size_t i = 0; i < length; i++) {
|
||||||
|
if (array[i].in_len == 0) {
|
||||||
|
finished++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
while ((strict || length > 1) && finished < length) {
|
while ((strict || length > 1) && finished < length) {
|
||||||
/* Iterate in reverse order to avoid moving candidates that can be eliminated. */
|
/* Iterate in reverse order to avoid moving candidates that can be eliminated. */
|
||||||
for (size_t i = length - 1; i != (size_t)-1; i--) {
|
for (size_t i = length - 1; i != (size_t)-1; i--) {
|
||||||
|
|
|
@ -25,6 +25,13 @@ print("Bad ASCII (strict): " . mb_detect_encoding("\xDD\x92", ['ASCII', 'UTF-8']
|
||||||
print("Bad ASCII/UTF-8, with more errors for ASCII (non-strict): " . mb_detect_encoding("\xD6\x8A\x8A", ['ASCII', 'UTF-8'], false) . "\n");
|
print("Bad ASCII/UTF-8, with more errors for ASCII (non-strict): " . mb_detect_encoding("\xD6\x8A\x8A", ['ASCII', 'UTF-8'], false) . "\n");
|
||||||
print("Bad ASCII/UTF-8, with more errors for ASCII (strict): " . var_export(mb_detect_encoding("\xD6\x8A\x8A", ['ASCII', 'UTF-8'], true), true) . "\n");
|
print("Bad ASCII/UTF-8, with more errors for ASCII (strict): " . var_export(mb_detect_encoding("\xD6\x8A\x8A", ['ASCII', 'UTF-8'], true), true) . "\n");
|
||||||
|
|
||||||
|
print("UTF-8 BOM (non-strict): " . mb_detect_encoding("\xEF\xBB\xBF", ["UTF-8", "ASCII"], false) . "\n");
|
||||||
|
print("UTF-8 BOM (strict): " . mb_detect_encoding("\xEF\xBB\xBF", ["UTF-8", "ASCII"], true) . "\n");
|
||||||
|
print("UTF-16BE BOM (non-strict): " . mb_detect_encoding("\xFE\xFF", ["UTF-8", "UTF-16BE", "UTF-16LE"], false) . "\n");
|
||||||
|
print("UTF-16BE BOM (strict): " . mb_detect_encoding("\xFE\xFF", ["UTF-8", "UTF-16BE", "UTF-16LE"], true) . "\n");
|
||||||
|
print("UTF-16LE BOM (non-strict): " . mb_detect_encoding("\xFF\xFE", ["UTF-8", "UTF-16BE", "UTF-16LE"], false) . "\n");
|
||||||
|
print("UTF-16LE BOM (strict): " . mb_detect_encoding("\xFF\xFE", ["UTF-8", "UTF-16BE", "UTF-16LE"], true) . "\n");
|
||||||
|
|
||||||
print("SJIS: " . mb_detect_encoding($sjis, 'SJIS', true) . "\n");
|
print("SJIS: " . mb_detect_encoding($sjis, 'SJIS', true) . "\n");
|
||||||
print("JIS: " . mb_detect_encoding($jis, 'JIS', true) . "\n");
|
print("JIS: " . mb_detect_encoding($jis, 'JIS', true) . "\n");
|
||||||
print("EUC-JP (strict): " . mb_detect_encoding($euc_jp, 'UTF-8,EUC-JP,JIS', true) . "\n");
|
print("EUC-JP (strict): " . mb_detect_encoding($euc_jp, 'UTF-8,EUC-JP,JIS', true) . "\n");
|
||||||
|
@ -399,6 +406,12 @@ Bad ASCII (non-strict): UTF-8
|
||||||
Bad ASCII (strict): UTF-8
|
Bad ASCII (strict): UTF-8
|
||||||
Bad ASCII/UTF-8, with more errors for ASCII (non-strict): UTF-8
|
Bad ASCII/UTF-8, with more errors for ASCII (non-strict): UTF-8
|
||||||
Bad ASCII/UTF-8, with more errors for ASCII (strict): false
|
Bad ASCII/UTF-8, with more errors for ASCII (strict): false
|
||||||
|
UTF-8 BOM (non-strict): UTF-8
|
||||||
|
UTF-8 BOM (strict): UTF-8
|
||||||
|
UTF-16BE BOM (non-strict): UTF-16BE
|
||||||
|
UTF-16BE BOM (strict): UTF-16BE
|
||||||
|
UTF-16LE BOM (non-strict): UTF-16LE
|
||||||
|
UTF-16LE BOM (strict): UTF-16LE
|
||||||
SJIS: SJIS
|
SJIS: SJIS
|
||||||
JIS: JIS
|
JIS: JIS
|
||||||
EUC-JP (strict): EUC-JP
|
EUC-JP (strict): EUC-JP
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue