mirror of
https://github.com/php/php-src.git
synced 2025-08-15 21:48:51 +02:00
Fix new conversion filter for HTML entities
While fuzzing the new mb_decode_numericentity implementation, I discovered that the fast conversion filter for 'HTML-ENTITIES' did not correctly handle an empty named entity ('&;'), nor did it correctly handle invalid named entities whose names were a prefix of a valid entity. Also, it did not correctly handle the case where a named entity is truncated and another named entity starts abruptly.
This commit is contained in:
parent
9c3972fb3d
commit
fa83a8e15e
2 changed files with 16 additions and 5 deletions
|
@ -334,6 +334,11 @@ void mbfl_filt_conv_html_dec_copy(mbfl_convert_filter *src, mbfl_convert_filter
|
|||
memcpy(dest->opaque, src->opaque, html_enc_buffer_size+1);
|
||||
}
|
||||
|
||||
static bool is_html_entity_char(unsigned char c)
|
||||
{
|
||||
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '#';
|
||||
}
|
||||
|
||||
static size_t mb_htmlent_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
|
||||
{
|
||||
unsigned char *p = *in, *e = p + *in_len;
|
||||
|
@ -345,9 +350,9 @@ static size_t mb_htmlent_to_wchar(unsigned char **in, size_t *in_len, uint32_t *
|
|||
if (c == '&') {
|
||||
/* Find terminating ; for HTML entity */
|
||||
unsigned char *terminator = p;
|
||||
while (terminator < e && *terminator != ';')
|
||||
while (terminator < e && is_html_entity_char(*terminator))
|
||||
terminator++;
|
||||
if (terminator < e) {
|
||||
if (terminator < e && *terminator == ';') {
|
||||
if (*p == '#' && (e - p) >= 2) {
|
||||
/* Numeric entity */
|
||||
unsigned int value = 0;
|
||||
|
@ -390,11 +395,11 @@ static size_t mb_htmlent_to_wchar(unsigned char **in, size_t *in_len, uint32_t *
|
|||
*out++ = value;
|
||||
p = terminator + 1;
|
||||
goto next_iteration;
|
||||
} else {
|
||||
} else if (terminator > p && terminator < e) {
|
||||
/* Named entity */
|
||||
mbfl_html_entity_entry *entity = (mbfl_html_entity_entry*)mbfl_html_entity_list;
|
||||
while (entity->name) {
|
||||
if (!strncmp((char*)p, entity->name, terminator - p)) {
|
||||
if (!strncmp((char*)p, entity->name, terminator - p) && strlen(entity->name) == terminator - p) {
|
||||
*out++ = entity->code;
|
||||
p = terminator + 1;
|
||||
goto next_iteration;
|
||||
|
@ -409,7 +414,7 @@ bad_entity:
|
|||
while (p < terminator && out < limit) {
|
||||
*out++ = *p++;
|
||||
}
|
||||
if (terminator < e && out < limit) {
|
||||
if (terminator < e && *terminator == ';' && out < limit) {
|
||||
*out++ = *p++;
|
||||
}
|
||||
} else {
|
||||
|
|
|
@ -45,6 +45,12 @@ convertFromEntities("\x00", '�');
|
|||
|
||||
testConversion(str_repeat('あ', 100), str_repeat('あ', 100));
|
||||
|
||||
convertFromEntities("&;", "&;");
|
||||
convertFromEntities("&f;", "&f;");
|
||||
|
||||
convertFromEntities("&A", "&A");
|
||||
convertFromEntities("&A", "&A");
|
||||
|
||||
echo "Done!\n";
|
||||
?>
|
||||
--EXPECTF--
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue