mirror of
https://github.com/php/php-src.git
synced 2025-08-21 01:45:16 +02:00
UTF-16 text conversion handles truncated characters as illegal
This broke one old test (Zend/tests/multibyte_encoding_003.phpt), which used a PHP script encoded as UTF-16. The problem was that to terminate the test script, we need the text: "\n--EXPECT--". Out of that text, the terminating newline (0x0A byte) becomes part of the resulting test script; but a bare 0x0A byte with no 0x00 is not valid UTF-16. Since we now treat truncated UTF-16 characters as erroneous, an extra '?' is appended to the output as an 'illegal character' marker. Really, if we are running PHP scripts which are treated as encoded in UTF-16 or some other arbitrary text encoding (not ASCII), and the script is not actually a valid string in that encoding, inserting '?' characters into the code which the PHP interpreter runs is a bad thing to do. In such cases, the script shouldn't be treated as UTF-16 (or whatever) at all. I wonder if mbstring's encoding detection is being used in 'non-strict' mode?
This commit is contained in:
parent
9bfb158872
commit
d9ddeb6e85
2 changed files with 22 additions and 3 deletions
Binary file not shown.
|
@ -33,6 +33,7 @@
|
||||||
static int mbfl_filt_ident_utf16(int c, mbfl_identify_filter *filter);
|
static int mbfl_filt_ident_utf16(int c, mbfl_identify_filter *filter);
|
||||||
static int mbfl_filt_ident_utf16le(int c, mbfl_identify_filter *filter);
|
static int mbfl_filt_ident_utf16le(int c, mbfl_identify_filter *filter);
|
||||||
static int mbfl_filt_ident_utf16be(int c, mbfl_identify_filter *filter);
|
static int mbfl_filt_ident_utf16be(int c, mbfl_identify_filter *filter);
|
||||||
|
static int mbfl_filt_conv_utf16_wchar_flush(mbfl_convert_filter *filter);
|
||||||
|
|
||||||
static const char *mbfl_encoding_utf16_aliases[] = {"utf16", NULL};
|
static const char *mbfl_encoding_utf16_aliases[] = {"utf16", NULL};
|
||||||
|
|
||||||
|
@ -93,7 +94,7 @@ const struct mbfl_convert_vtbl vtbl_utf16_wchar = {
|
||||||
mbfl_filt_conv_common_ctor,
|
mbfl_filt_conv_common_ctor,
|
||||||
NULL,
|
NULL,
|
||||||
mbfl_filt_conv_utf16_wchar,
|
mbfl_filt_conv_utf16_wchar,
|
||||||
mbfl_filt_conv_common_flush,
|
mbfl_filt_conv_utf16_wchar_flush,
|
||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -113,7 +114,7 @@ const struct mbfl_convert_vtbl vtbl_utf16be_wchar = {
|
||||||
mbfl_filt_conv_common_ctor,
|
mbfl_filt_conv_common_ctor,
|
||||||
NULL,
|
NULL,
|
||||||
mbfl_filt_conv_utf16be_wchar,
|
mbfl_filt_conv_utf16be_wchar,
|
||||||
mbfl_filt_conv_common_flush,
|
mbfl_filt_conv_utf16_wchar_flush,
|
||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -133,7 +134,7 @@ const struct mbfl_convert_vtbl vtbl_utf16le_wchar = {
|
||||||
mbfl_filt_conv_common_ctor,
|
mbfl_filt_conv_common_ctor,
|
||||||
NULL,
|
NULL,
|
||||||
mbfl_filt_conv_utf16le_wchar,
|
mbfl_filt_conv_utf16le_wchar,
|
||||||
mbfl_filt_conv_common_flush,
|
mbfl_filt_conv_utf16_wchar_flush,
|
||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -343,6 +344,24 @@ int mbfl_filt_conv_wchar_utf16le(int c, mbfl_convert_filter *filter)
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int mbfl_filt_conv_utf16_wchar_flush(mbfl_convert_filter *filter)
|
||||||
|
{
|
||||||
|
int status = filter->status;
|
||||||
|
int cache = filter->cache;
|
||||||
|
filter->status = filter->cache = 0;
|
||||||
|
|
||||||
|
if (status & 0xF) {
|
||||||
|
/* Input string was truncated */
|
||||||
|
CK((*filter->output_function)(cache | MBFL_WCSGROUP_THROUGH, filter->data));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (filter->flush_function) {
|
||||||
|
(*filter->flush_function)(filter->data);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int mbfl_filt_ident_utf16(int c, mbfl_identify_filter *filter)
|
static int mbfl_filt_ident_utf16(int c, mbfl_identify_filter *filter)
|
||||||
{
|
{
|
||||||
if (filter->status == 0) {
|
if (filter->status == 0) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue