diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 725d591427d..79be29db51d 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -2664,6 +2664,23 @@ PHP_FUNCTION(mb_strtolower) } /* }}} */ +static void remove_non_encodings_from_elist(const mbfl_encoding **elist, size_t *size) +{ + /* mbstring supports some 'text encodings' which aren't really text encodings + * at all, but really 'byte encodings', like Base64, QPrint, and so on. + * These should never be returned by `mb_detect_encoding`. */ + int shift = 0; + for (int i = 0; i < *size; i++) { + const mbfl_encoding *encoding = elist[i]; + if (encoding->no_encoding <= mbfl_no_encoding_charset_min) { + shift++; /* Remove this encoding from the list */ + } else if (shift) { + elist[i - shift] = encoding; + } + } + *size -= shift; +} + /* {{{ Encodings of the given string is returned (as a string) */ PHP_FUNCTION(mb_detect_encoding) { @@ -2709,6 +2726,14 @@ PHP_FUNCTION(mb_detect_encoding) RETURN_THROWS(); } + if (free_elist) { + remove_non_encodings_from_elist(elist, &size); + if (size == 0) { + efree(ZEND_VOIDP(elist)); + RETURN_FALSE; + } + } + if (ZEND_NUM_ARGS() < 3) { strict = MBSTRG(strict_detection); } diff --git a/ext/mbstring/tests/bug81298.phpt b/ext/mbstring/tests/bug81298.phpt index d8565421fe7..37e0bc131ed 100644 --- a/ext/mbstring/tests/bug81298.phpt +++ b/ext/mbstring/tests/bug81298.phpt @@ -16,5 +16,5 @@ var_dump(mb_detect_encoding("foobar.", "ascii,html")); bool(false) string(5) "ASCII" string(5) "ASCII" -string(13) "HTML-ENTITIES" +bool(false) string(5) "ASCII" diff --git a/ext/mbstring/tests/mb_detect_encoding.phpt b/ext/mbstring/tests/mb_detect_encoding.phpt index 571fa1ca7f3..f2be2a617db 100644 --- a/ext/mbstring/tests/mb_detect_encoding.phpt +++ b/ext/mbstring/tests/mb_detect_encoding.phpt @@ -61,6 +61,7 @@ echo mb_detect_encoding($test, ['UTF-8', 'UTF-16']), "\n"; // We once had a problem where all kind of strings would be detected as 'UUENCODE' echo mb_detect_encoding('abc', ['UUENCODE', 'UTF-8']), "\n"; +echo mb_detect_encoding('abc', ['UUENCODE', 'QPrint', 'HTML-ENTITIES', 'Base64', '7bit', '8bit', 'SJIS']), "\n"; echo "== DETECT ORDER ==\n"; @@ -242,6 +243,7 @@ ISO-8859-1 UTF-8 UTF-8 UTF-8 +SJIS == DETECT ORDER == JIS: JIS EUC-JP: EUC-JP diff --git a/ext/mbstring/tests/other_encodings.phpt b/ext/mbstring/tests/other_encodings.phpt index 4a62966be50..321eccb247b 100644 --- a/ext/mbstring/tests/other_encodings.phpt +++ b/ext/mbstring/tests/other_encodings.phpt @@ -17,7 +17,7 @@ var_dump(mb_convert_encoding("ABC", "8bit", "7bit")); echo "7bit done\n"; // "8bit" -var_dump(mb_convert_encoding("\x01\x00", "8bit", "UTF-16BE")); // codepoints over 0xFF are illegal for '8-bit' +var_dump(mb_convert_encoding("\x01\x00", "8bit", "UTF-16BE")); // codepoints over 0xFF are illegal or '8-bit' echo "8bit done\n"; // UCS-2