mirror of
https://github.com/php/php-src.git
synced 2025-08-16 14:08:47 +02:00
Fix new conversion filter for HTML entities
While fuzzing the new mb_decode_numericentity implementation, I discovered that the fast conversion filter for 'HTML-ENTITIES' did not correctly handle an empty named entity ('&;'), nor did it correctly handle invalid named entities whose names were a prefix of a valid entity. Also, it did not correctly handle the case where a named entity is truncated and another named entity starts abruptly.
This commit is contained in:
parent
9c3972fb3d
commit
fa83a8e15e
2 changed files with 16 additions and 5 deletions
|
@ -334,6 +334,11 @@ void mbfl_filt_conv_html_dec_copy(mbfl_convert_filter *src, mbfl_convert_filter
|
||||||
memcpy(dest->opaque, src->opaque, html_enc_buffer_size+1);
|
memcpy(dest->opaque, src->opaque, html_enc_buffer_size+1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool is_html_entity_char(unsigned char c)
|
||||||
|
{
|
||||||
|
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '#';
|
||||||
|
}
|
||||||
|
|
||||||
static size_t mb_htmlent_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
|
static size_t mb_htmlent_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
|
||||||
{
|
{
|
||||||
unsigned char *p = *in, *e = p + *in_len;
|
unsigned char *p = *in, *e = p + *in_len;
|
||||||
|
@ -345,9 +350,9 @@ static size_t mb_htmlent_to_wchar(unsigned char **in, size_t *in_len, uint32_t *
|
||||||
if (c == '&') {
|
if (c == '&') {
|
||||||
/* Find terminating ; for HTML entity */
|
/* Find terminating ; for HTML entity */
|
||||||
unsigned char *terminator = p;
|
unsigned char *terminator = p;
|
||||||
while (terminator < e && *terminator != ';')
|
while (terminator < e && is_html_entity_char(*terminator))
|
||||||
terminator++;
|
terminator++;
|
||||||
if (terminator < e) {
|
if (terminator < e && *terminator == ';') {
|
||||||
if (*p == '#' && (e - p) >= 2) {
|
if (*p == '#' && (e - p) >= 2) {
|
||||||
/* Numeric entity */
|
/* Numeric entity */
|
||||||
unsigned int value = 0;
|
unsigned int value = 0;
|
||||||
|
@ -390,11 +395,11 @@ static size_t mb_htmlent_to_wchar(unsigned char **in, size_t *in_len, uint32_t *
|
||||||
*out++ = value;
|
*out++ = value;
|
||||||
p = terminator + 1;
|
p = terminator + 1;
|
||||||
goto next_iteration;
|
goto next_iteration;
|
||||||
} else {
|
} else if (terminator > p && terminator < e) {
|
||||||
/* Named entity */
|
/* Named entity */
|
||||||
mbfl_html_entity_entry *entity = (mbfl_html_entity_entry*)mbfl_html_entity_list;
|
mbfl_html_entity_entry *entity = (mbfl_html_entity_entry*)mbfl_html_entity_list;
|
||||||
while (entity->name) {
|
while (entity->name) {
|
||||||
if (!strncmp((char*)p, entity->name, terminator - p)) {
|
if (!strncmp((char*)p, entity->name, terminator - p) && strlen(entity->name) == terminator - p) {
|
||||||
*out++ = entity->code;
|
*out++ = entity->code;
|
||||||
p = terminator + 1;
|
p = terminator + 1;
|
||||||
goto next_iteration;
|
goto next_iteration;
|
||||||
|
@ -409,7 +414,7 @@ bad_entity:
|
||||||
while (p < terminator && out < limit) {
|
while (p < terminator && out < limit) {
|
||||||
*out++ = *p++;
|
*out++ = *p++;
|
||||||
}
|
}
|
||||||
if (terminator < e && out < limit) {
|
if (terminator < e && *terminator == ';' && out < limit) {
|
||||||
*out++ = *p++;
|
*out++ = *p++;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -45,6 +45,12 @@ convertFromEntities("\x00", '�');
|
||||||
|
|
||||||
testConversion(str_repeat('あ', 100), str_repeat('あ', 100));
|
testConversion(str_repeat('あ', 100), str_repeat('あ', 100));
|
||||||
|
|
||||||
|
convertFromEntities("&;", "&;");
|
||||||
|
convertFromEntities("&f;", "&f;");
|
||||||
|
|
||||||
|
convertFromEntities("&A", "&A");
|
||||||
|
convertFromEntities("&A", "&A");
|
||||||
|
|
||||||
echo "Done!\n";
|
echo "Done!\n";
|
||||||
?>
|
?>
|
||||||
--EXPECTF--
|
--EXPECTF--
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue