From fa83a8e15e91548ef240a4880c94b861e45efb83 Mon Sep 17 00:00:00 2001 From: Alex Dowad Date: Mon, 27 Jun 2022 09:21:00 +0200 Subject: [PATCH] Fix new conversion filter for HTML entities While fuzzing the new mb_decode_numericentity implementation, I discovered that the fast conversion filter for 'HTML-ENTITIES' did not correctly handle an empty named entity ('&;'), nor did it correctly handle invalid named entities whose names were a prefix of a valid entity. Also, it did not correctly handle the case where a named entity is truncated and another named entity starts abruptly. --- ext/mbstring/libmbfl/filters/mbfilter_htmlent.c | 15 ++++++++++----- ext/mbstring/tests/htmlent_encoding.phpt | 6 ++++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c b/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c index 07752f29455..d463ae714a6 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c @@ -334,6 +334,11 @@ void mbfl_filt_conv_html_dec_copy(mbfl_convert_filter *src, mbfl_convert_filter memcpy(dest->opaque, src->opaque, html_enc_buffer_size+1); } +static bool is_html_entity_char(unsigned char c) +{ + return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '#'; +} + static size_t mb_htmlent_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) { unsigned char *p = *in, *e = p + *in_len; @@ -345,9 +350,9 @@ static size_t mb_htmlent_to_wchar(unsigned char **in, size_t *in_len, uint32_t * if (c == '&') { /* Find terminating ; for HTML entity */ unsigned char *terminator = p; - while (terminator < e && *terminator != ';') + while (terminator < e && is_html_entity_char(*terminator)) terminator++; - if (terminator < e) { + if (terminator < e && *terminator == ';') { if (*p == '#' && (e - p) >= 2) { /* Numeric entity */ unsigned int value = 0; @@ -390,11 +395,11 @@ static size_t mb_htmlent_to_wchar(unsigned char **in, size_t *in_len, uint32_t * *out++ = value; p = terminator + 1; goto next_iteration; - } else { + } else if (terminator > p && terminator < e) { /* Named entity */ mbfl_html_entity_entry *entity = (mbfl_html_entity_entry*)mbfl_html_entity_list; while (entity->name) { - if (!strncmp((char*)p, entity->name, terminator - p)) { + if (!strncmp((char*)p, entity->name, terminator - p) && strlen(entity->name) == terminator - p) { *out++ = entity->code; p = terminator + 1; goto next_iteration; @@ -409,7 +414,7 @@ bad_entity: while (p < terminator && out < limit) { *out++ = *p++; } - if (terminator < e && out < limit) { + if (terminator < e && *terminator == ';' && out < limit) { *out++ = *p++; } } else { diff --git a/ext/mbstring/tests/htmlent_encoding.phpt b/ext/mbstring/tests/htmlent_encoding.phpt index 7f92396742a..3b548138e3d 100644 --- a/ext/mbstring/tests/htmlent_encoding.phpt +++ b/ext/mbstring/tests/htmlent_encoding.phpt @@ -45,6 +45,12 @@ convertFromEntities("\x00", '�'); testConversion(str_repeat('あ', 100), str_repeat('あ', 100)); +convertFromEntities("&;", "&;"); +convertFromEntities("&f;", "&f;"); + +convertFromEntities("&A", "&A"); +convertFromEntities("&A", "&A"); + echo "Done!\n"; ?> --EXPECTF--