diff --git a/NEWS b/NEWS index 7fb9a4c2145..1d5c84ab2c9 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,8 @@ PHP NEWS - Optimized out a couple of per-request syscalls (Rasmus) - Optimized digest generation in md5() and sha1() functions. (Ilia) - Upgraded SQLite 3 to version 3.3.16 (Ilia) +- Added a 4th parameter flag to htmlspecialchars() and htmlentities() that + makes the function not encode existing html entities. (Ilia) - Added PDO::FETCH_KEY_PAIR mode that will fetch a 2 column result set into an associated array. (Ilia) - Added function mysql_set_charset(). Allows connection encoding to be diff --git a/ext/standard/html.c b/ext/standard/html.c index e2badee4748..8ac7b417c5f 100644 --- a/ext/standard/html.c +++ b/ext/standard/html.c @@ -1078,12 +1078,15 @@ empty_source: } /* }}} */ - +PHPAPI char *php_escape_html_entities(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset TSRMLS_DC) +{ + return php_escape_html_entities_ex(old, oldlen, newlen, all, quote_style, hint_charset, 1 TSRMLS_CC); +} /* {{{ php_escape_html_entities */ -PHPAPI char *php_escape_html_entities(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset TSRMLS_DC) +PHPAPI char *php_escape_html_entities_ex(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset, zend_bool double_encode TSRMLS_DC) { int i, j, maxlen, len; char *replaced; @@ -1145,8 +1148,34 @@ PHPAPI char *php_escape_html_entities(unsigned char *old, int oldlen, int *newle int is_basic = 0; if (this_char == '&') { - memcpy(replaced + len, "&", sizeof("&") - 1); - len += sizeof("&") - 1; + if (double_encode) { +encode_amp: + memcpy(replaced + len, "&", sizeof("&") - 1); + len += sizeof("&") - 1; + } else { + char *e = memchr(old + i, ';', len - i); + char *s = old + i + 1; + + if (!e || (e - s) > 10) { /* minor optimization to avoid "entities" over 10 chars in length */ + goto encode_amp; + } else { + if (*s == '#') { /* numeric entities */ + s++; + while (s < e) { + if (!isdigit(*s++)) { + goto encode_amp; + } + } + } else { /* text entities */ + while (s < e) { + if (!isalnum(*s++)) { + goto encode_amp; + } + } + } + replaced[len++] = '&'; + } + } is_basic = 1; } else { for (j = 0; basic_entities[j].charcode != 0; j++) { @@ -1193,12 +1222,13 @@ static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all) int len; long quote_style = ENT_COMPAT; char *replaced; + zend_bool double_encode = 1; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ls", &str, &str_len, "e_style, &hint_charset, &hint_charset_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|lsb", &str, &str_len, "e_style, &hint_charset, &hint_charset_len, &double_encode) == FAILURE) { return; } - replaced = php_escape_html_entities(str, str_len, &len, all, quote_style, hint_charset TSRMLS_CC); + replaced = php_escape_html_entities_ex(str, str_len, &len, all, quote_style, hint_charset, double_encode TSRMLS_CC); RETVAL_STRINGL(replaced, len, 0); } /* }}} */ diff --git a/ext/standard/html.h b/ext/standard/html.h index 3e896e18b5c..fec44bb0e45 100644 --- a/ext/standard/html.h +++ b/ext/standard/html.h @@ -38,6 +38,7 @@ PHP_FUNCTION(html_entity_decode); PHP_FUNCTION(get_html_translation_table); PHPAPI char *php_escape_html_entities(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset TSRMLS_DC); +PHPAPI char *php_escape_html_entities_ex(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset, zend_bool double_encode TSRMLS_DC); PHPAPI char *php_unescape_html_entities(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset TSRMLS_DC); #endif /* HTML_H */ diff --git a/ext/standard/tests/strings/htmlentities18.phpt b/ext/standard/tests/strings/htmlentities18.phpt new file mode 100644 index 00000000000..f171adaca2b --- /dev/null +++ b/ext/standard/tests/strings/htmlentities18.phpt @@ -0,0 +1,31 @@ +--TEST-- +htmlentities() / htmlspecialchars() "don't double encode" flag support +--FILE-- + +--EXPECT-- +string(3) "abc" +string(3) "abc" +string(13) "abc&sfdsa" +string(13) "abc&sfdsa" +string(33) "test+s & some more D" +string(33) "test+s & some more D" +string(20) "&; &amp &#a; &9;" +string(20) "&; &amp &#a; &9;" +string(32) "&kffjadfdhsjfhjasdhffasdfas;" +string(32) "&kffjadfdhsjfhjasdhffasdfas;" +string(16) "&#8787978789" +string(16) "&#8787978789"