In legacy text conversion filters, reset filter state in 'flush' function

Up until now, I believed that mbstring had been designed such
that (legacy) text conversion filter objects should not be
re-used after the 'flush' function is called to complete a
text conversion operation.

However, it turns out that the implementation of
_php_mb_encoding_handler_ex DID re-use filter objects
after flush. That means that functions which were based on
_php_mb_encoding_handler_ex, including mb_parse_str and
php_mb_post_handler, would break in some cases; state left
over from converting one substring (perhaps a variable name)
would affect the results of converting another substring
(perhaps the value of the same variable), and could cause
extraneous characters to get inserted into the output.

All this code should be deleted soon, but fixing it helps me
to avoid spurious failures when fuzzing the new/old code to
look for differences in behavior.
This commit is contained in:
Alex Dowad 2022-08-04 11:04:02 +02:00
parent 18e526cb51
commit f3c8efd711
28 changed files with 35 additions and 2 deletions

View file

@ -257,6 +257,7 @@ static int mbfl_filt_conv_big5_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status == 1) {
/* 2-byte character was truncated */
filter->status = 0;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}

View file

@ -322,6 +322,7 @@ static int mbfl_filt_conv_cp5022x_wchar_flush(mbfl_convert_filter *filter)
* escape sequence was truncated */
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
filter->status = 0;
if (filter->flush_function) {
(*filter->flush_function)(filter->data);
@ -824,7 +825,7 @@ static int mbfl_filt_conv_wchar_cp50222_flush(mbfl_convert_filter *filter)
CK((*filter->output_function)(0x28, filter->data)); /* '(' */
CK((*filter->output_function)(0x42, filter->data)); /* 'B' */
}
filter->status &= 0xff;
filter->status = 0;
if (filter->flush_function) {
(*filter->flush_function)(filter->data);

View file

@ -178,6 +178,7 @@ static int mbfl_filt_conv_cp51932_wchar_flush(mbfl_convert_filter *filter)
if (filter->status) {
/* Input string was truncated */
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
filter->status = 0;
}
if (filter->flush_function) {

View file

@ -217,6 +217,7 @@ static int mbfl_filt_conv_cp932_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status) {
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
filter->status = 0;
}
if (filter->flush_function) {

View file

@ -166,6 +166,7 @@ static int mbfl_filt_conv_cp936_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status) {
/* 2-byte character was truncated */
filter->status = 0;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}

View file

@ -209,6 +209,7 @@ static int mbfl_filt_conv_euccn_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status == 1) {
/* 2-byte character was truncated */
filter->status = 0;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}

View file

@ -180,6 +180,7 @@ static int mbfl_filt_conv_eucjp_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status) {
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
filter->status = 0;
}
if (filter->flush_function) {

View file

@ -226,6 +226,7 @@ static int mbfl_filt_conv_eucjpwin_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status) {
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
filter->status = 0;
}
if (filter->flush_function) {

View file

@ -193,6 +193,7 @@ static int mbfl_filt_conv_euckr_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status == 1) {
/* 2-byte character was truncated */
filter->status = 0;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}

View file

@ -245,6 +245,7 @@ static int mbfl_filt_conv_euctw_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status) {
/* 2-byte or 4-byte character was truncated */
filter->status = 0;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}

View file

@ -231,6 +231,7 @@ static int mbfl_filt_conv_gb18030_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status) {
/* multi-byte character was truncated */
filter->status = 0;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}

View file

@ -154,6 +154,8 @@ static int mbfl_filt_conv_hz_wchar_flush(mbfl_convert_filter *filter)
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
filter->status = 0;
if (filter->flush_function) {
(*filter->flush_function)(filter->data);
}

View file

@ -219,6 +219,7 @@ static int mbfl_filt_conv_2022jpms_wchar_flush(mbfl_convert_filter *filter)
if (filter->status & 0xF) {
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
}
filter->status = 0;
if (filter->flush_function) {
(*filter->flush_function)(filter->data);
@ -354,6 +355,7 @@ int mbfl_filt_conv_any_2022jpms_flush(mbfl_convert_filter *filter)
CK((*filter->output_function)('(', filter->data));
CK((*filter->output_function)('B', filter->data));
}
filter->status = 0;
if (filter->flush_function) {
(*filter->flush_function)(filter->data);

View file

@ -178,6 +178,7 @@ static int mbfl_filt_conv_2022kr_wchar_flush(mbfl_convert_filter *filter)
/* 2-byte character was truncated */
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
filter->status = 0;
if (filter->flush_function) {
(*filter->flush_function)(filter->data);

View file

@ -313,6 +313,7 @@ static int mbfl_filt_conv_2022jp_mobile_wchar_flush(mbfl_convert_filter *filter)
if (filter->status & 0xF) {
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
}
filter->status = 0;
if (filter->flush_function) {
(*filter->flush_function)(filter->data);
@ -483,6 +484,7 @@ static int mbfl_filt_conv_wchar_2022jp_mobile_flush(mbfl_convert_filter *filter)
if ((filter->status & 0xFF) == 1 && (c1 == '#' || (c1 >= '0' && c1 <= '9'))) {
(*filter->output_function)(c1, filter->data);
}
filter->status = filter->cache = 0;
if (filter->flush_function) {
(*filter->flush_function)(filter->data);

View file

@ -271,6 +271,7 @@ static int mbfl_filt_conv_jis_wchar_flush(mbfl_convert_filter *filter)
* or else escape sequence was truncated */
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
filter->status = 0;
if (filter->flush_function) {
(*filter->flush_function)(filter->data);
@ -451,7 +452,7 @@ mbfl_filt_conv_any_jis_flush(mbfl_convert_filter *filter)
CK((*filter->output_function)(0x28, filter->data)); /* '(' */
CK((*filter->output_function)(0x42, filter->data)); /* 'B' */
}
filter->status &= 0xff;
filter->status = 0;
if (filter->flush_function != NULL) {
return (*filter->flush_function)(filter->data);

View file

@ -183,6 +183,7 @@ static int mbfl_filt_conv_sjis_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status) {
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
filter->status = 0;
}
if (filter->flush_function) {

View file

@ -491,6 +491,7 @@ int mbfl_filt_conv_jis2004_wchar_flush(mbfl_convert_filter *filter)
if (filter->status & 0xF) {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
filter->status = 0;
if (filter->flush_function) {
return (*filter->flush_function)(filter->data);

View file

@ -266,6 +266,7 @@ mbfl_filt_conv_sjis_mac_wchar(int c, mbfl_convert_filter *filter)
static int mbfl_filt_conv_sjis_mac_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status == 1) {
filter->status = 0;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}

View file

@ -722,6 +722,7 @@ static int mbfl_filt_conv_sjis_wchar_flush(mbfl_convert_filter *filter)
if (filter->status && filter->status != 4) {
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
}
filter->status = 0;
if (filter->flush_function) {
(*filter->flush_function)(filter->data);
@ -826,6 +827,7 @@ int mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter *filter)
{
int c1 = filter->cache;
if (filter->status == 1 && (c1 == '#' || (c1 >= '0' && c1 <= '9'))) {
filter->cache = filter->status = 0;
CK((*filter->output_function)(c1, filter->data));
} else if (filter->status == 2) {
/* First of a pair of Regional Indicator codepoints came at the end of a string */

View file

@ -218,6 +218,7 @@ static int mbfl_filt_conv_ucs2_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status) {
/* Input string was truncated */
filter->status = 0;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}

View file

@ -301,6 +301,7 @@ static int mbfl_filt_conv_ucs4_wchar_flush(mbfl_convert_filter *filter)
/* Input string was truncated */
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
filter->status = 0;
if (filter->flush_function) {
(*filter->flush_function)(filter->data);

View file

@ -147,6 +147,7 @@ static int mbfl_filt_conv_uhc_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status == 1) {
/* 2-byte character was truncated */
filter->status = 0;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}

View file

@ -323,6 +323,7 @@ static int mbfl_filt_conv_utf16_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status) {
/* Input string was truncated */
filter->status = 0;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}

View file

@ -233,6 +233,7 @@ static int mbfl_filt_conv_utf32_wchar_flush(mbfl_convert_filter *filter)
/* Input string was truncated */
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
filter->cache = filter->status = 0;
if (filter->flush_function) {
(*filter->flush_function)(filter->data);

View file

@ -267,6 +267,7 @@ static int mbfl_filt_conv_utf7_wchar_flush(mbfl_convert_filter *filter)
if (filter->cache) {
/* Either we were expecting the 2nd half of a surrogate pair which
* never came, or else the last Base64 data was not padded with zeroes */
filter->cache = 0;
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
}
@ -373,6 +374,7 @@ int mbfl_filt_conv_wchar_utf7_flush(mbfl_convert_filter *filter)
{
int status = filter->status;
int cache = filter->cache;
filter->status = filter->cache = 0;
/* flush fragments */
switch (status) {

View file

@ -287,6 +287,7 @@ static int mbfl_filt_conv_utf7imap_wchar_flush(mbfl_convert_filter *filter)
/* It is illegal for a UTF-7 IMAP string to end in a Base-64 encoded
* section. It should always change back to ASCII before the end. */
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
filter->status = 0;
}
if (filter->flush_function) {

View file

@ -180,6 +180,7 @@ int mbfl_filt_conv_utf8_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status) {
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
filter->status = 0;
}
if (filter->flush_function) {