Flush filter during non-strict encoding detection

If we reach the end of the string without reducing to a single
encoding, then we should flush to check whether the last character
is incomplete.
This commit is contained in:
Nikita Popov 2021-08-27 14:46:54 +02:00
parent 1ce81b6bcd
commit 43cb2548f7
2 changed files with 18 additions and 5 deletions

View file

@ -362,11 +362,9 @@ int mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *str
p++;
}
if (identd->strict) {
for (int i = 0; i < num; i++) {
mbfl_convert_filter *filter = identd->filter_list[i];
(filter->filter_flush)(filter);
}
for (int i = 0; i < num; i++) {
mbfl_convert_filter *filter = identd->filter_list[i];
(filter->filter_flush)(filter);
}
return 0;

View file

@ -0,0 +1,15 @@
--TEST--
mb_detect_encoding() with incomplete trailing sequence
--EXTENSIONS--
mbstring
--FILE--
<?php
// Even in non-strict mode, this should detect as ISO-8859-1. When the end of the string is
// reached neither have illegal characters and would be picked based on score. However, flushing
// the string will disqualify UTF-8 due to illegal characters.
var_dump(mb_detect_encoding("A\xC2", ["UTF-8", "ISO-8859-1"]));
?>
--EXPECT--
string(10) "ISO-8859-1"