Fix problems with ISO-2022-KR conversion

• The legacy conversion code did not emit an error marker if an
  escape sequence was truncated.

• BOTH old and new conversion code would shift from KSC5601
  (KS X 1001) mode to ASCII mode on an invalid escape sequence.
  This doesn't make any sense.
This commit is contained in:
Alex Dowad 2022-08-09 10:37:46 +02:00
parent bfccdbd858
commit d9269becca
2 changed files with 9 additions and 3 deletions

View file

@ -145,7 +145,7 @@ int mbfl_filt_conv_2022kr_wchar(int c, mbfl_convert_filter *filter)
if (c == '$') { if (c == '$') {
filter->status++; filter->status++;
} else { } else {
filter->status = 0; filter->status &= ~0xF;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
} }
break; break;
@ -154,7 +154,7 @@ int mbfl_filt_conv_2022kr_wchar(int c, mbfl_convert_filter *filter)
if (c == ')') { if (c == ')') {
filter->status++; filter->status++;
} else { } else {
filter->status = 0; filter->status &= ~0xF;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
} }
break; break;
@ -258,6 +258,10 @@ int mbfl_filt_conv_wchar_2022kr(int c, mbfl_convert_filter *filter)
static int mbfl_filt_conv_any_2022kr_flush(mbfl_convert_filter *filter) static int mbfl_filt_conv_any_2022kr_flush(mbfl_convert_filter *filter)
{ {
if (filter->status & 0xF) {
/* Escape sequence or 2-byte character was truncated */
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
}
/* back to ascii */ /* back to ascii */
if (filter->status & 0x10) { if (filter->status & 0x10) {
CK((*filter->output_function)(0x0f, filter->data)); /* shift in */ CK((*filter->output_function)(0x0f, filter->data)); /* shift in */
@ -305,7 +309,6 @@ static size_t mb_iso2022kr_to_wchar(unsigned char **in, size_t *in_len, uint32_t
p--; p--;
} }
*out++ = MBFL_BAD_INPUT; *out++ = MBFL_BAD_INPUT;
*state = ASCII;
} }
} else if (c == 0xF) { } else if (c == 0xF) {
*state = ASCII; *state = ASCII;

View file

@ -114,6 +114,9 @@ convertInvalidString("\xFF\x86", "\x1B\$)C%", "UTF-16BE", "ISO-2022-KR");
// character at the end of a string, although the string was already ending in ASCII mode // character at the end of a string, although the string was already ending in ASCII mode
convertValidString("\x68\x46\x00a", "\x1B\$)C\x0E\x68\x46\x0Fa", "UTF-16BE", "ISO-2022-KR", false); convertValidString("\x68\x46\x00a", "\x1B\$)C\x0E\x68\x46\x0Fa", "UTF-16BE", "ISO-2022-KR", false);
// Regression test: Don't shift from KS X 1001 to ASCII mode on invalid escape sequence
convertInvalidString("\x0E\x1BX\x74\x30", "\x00%\x76\x20", "ISO-2022-KR", "UTF-16BE", false);
// Test "long" illegal character markers // Test "long" illegal character markers
mb_substitute_character("long"); mb_substitute_character("long");
convertInvalidString("\x1B", "%", "ISO-2022-KR", "UTF-8"); convertInvalidString("\x1B", "%", "ISO-2022-KR", "UTF-8");