Fix legacy text conversion filter for 'HTML-ENTITIES'

Because this routine used a signed char buffer to hold the bytes
in a (possible) HTML entity, any bytes with the MSB set would
be sign-extended when converting to int; for example, 0x86 would
become 0xFFFFFF86 (or -121).

Codepoints with huge values, like 0xFFFFFF86, are not valid and
if any were passed to the output filter, it would treat them
as errors and emit error markers.
This commit is contained in:
Alex Dowad 2022-08-10 20:33:10 +02:00
parent d9269becca
commit d617fcaae2

View file

@ -180,7 +180,7 @@ int mbfl_filt_conv_html_dec(int c, mbfl_convert_filter *filter)
int pos;
unsigned int ent = 0;
mbfl_html_entity_entry *entity;
char *buffer = (char*)filter->opaque;
unsigned char *buffer = (unsigned char*)filter->opaque;
if (!filter->status) {
if (c == '&' ) {
@ -196,7 +196,7 @@ int mbfl_filt_conv_html_dec(int c, mbfl_convert_filter *filter)
if (filter->status > 3) {
/* numeric entity */
for (pos=3; pos<filter->status; pos++) {
int v = buffer[pos];
int v = buffer[pos];
if (v >= '0' && v <= '9') {
v = v - '0';
} else if (v >= 'A' && v <= 'F') {
@ -242,13 +242,12 @@ int mbfl_filt_conv_html_dec(int c, mbfl_convert_filter *filter)
CK((*filter->output_function)(c, filter->data));
}
filter->status = 0;
/*php_error_docref("ref.mbstring", E_NOTICE, "mbstring decoded '%s'=%d", buffer, ent);*/
} else {
/* named entity */
buffer[filter->status] = 0;
entity = (mbfl_html_entity_entry *)mbfl_html_entity_list;
while (entity->name) {
if (!strcmp(buffer+1, entity->name)) {
if (!strcmp((const char*)buffer+1, entity->name)) {
ent = entity->code;
break;
}