Add more tests for UTF-8 text conversion

This commit is contained in:
Alex Dowad 2021-08-27 22:57:03 +02:00
parent 51a32ccaf4
commit 15ba73cee3
2 changed files with 7 additions and 0 deletions

View file

@ -31,6 +31,7 @@ $badUTF8 = array(
// Multi-byte characters which end too soon and go to ASCII // Multi-byte characters which end too soon and go to ASCII
"\xDFA" => "\x00\x00\x00%\x00\x00\x00A", "\xDFA" => "\x00\x00\x00%\x00\x00\x00A",
"\xEF\xBFA" => "\x00\x00\x00%\x00\x00\x00A", "\xEF\xBFA" => "\x00\x00\x00%\x00\x00\x00A",
"\xF0\xBFA" => "\x00\x00\x00%\x00\x00\x00A",
"\xF0\xBF\xBFA" => "\x00\x00\x00%\x00\x00\x00A", "\xF0\xBF\xBFA" => "\x00\x00\x00%\x00\x00\x00A",
// Multi-byte characters which end too soon and go to another MB char // Multi-byte characters which end too soon and go to another MB char

View file

@ -778,6 +778,7 @@ $invalid = array(
// Multi-byte characters which end too soon and go to ASCII // Multi-byte characters which end too soon and go to ASCII
"\xDFA" => "\x00\x00\x00%\x00\x00\x00A", "\xDFA" => "\x00\x00\x00%\x00\x00\x00A",
"\xEF\xBFA" => "\x00\x00\x00%\x00\x00\x00A", "\xEF\xBFA" => "\x00\x00\x00%\x00\x00\x00A",
"\xF0\xBFA" => "\x00\x00\x00%\x00\x00\x00A",
"\xF0\xBF\xBFA" => "\x00\x00\x00%\x00\x00\x00A", "\xF0\xBF\xBFA" => "\x00\x00\x00%\x00\x00\x00A",
// Multi-byte characters which end too soon and go to another MB char // Multi-byte characters which end too soon and go to another MB char
@ -785,6 +786,11 @@ $invalid = array(
"\xEF\xBF\xDF\xBF" => "\x00\x00\x00%\x00\x00\x07\xFF", "\xEF\xBF\xDF\xBF" => "\x00\x00\x00%\x00\x00\x07\xFF",
"\xF0\xBF\xBF\xDF\xBF" => "\x00\x00\x00%\x00\x00\x07\xFF", "\xF0\xBF\xBF\xDF\xBF" => "\x00\x00\x00%\x00\x00\x07\xFF",
// Multi-byte characters which end too soon and go to a junk byte
// (Which isn't even valid to start a new character)
"\xF0\xBF\xBF\xFF" => "\x00\x00\x00%",
"\xF0\xBF\xFF" => "\x00\x00\x00%",
// Continuation bytes which appear outside of a MB char // Continuation bytes which appear outside of a MB char
"\x80" => "\x00\x00\x00%", "\x80" => "\x00\x00\x00%",
"A\x80" => "\x00\x00\x00A\x00\x00\x00%", "A\x80" => "\x00\x00\x00A\x00\x00\x00%",