mirror of
https://github.com/php/php-src.git
synced 2025-08-21 01:45:16 +02:00
[DOC] Added ENT_IGNORE as a compatibility flag for htmlentities() and
htmlspecialchars() to skip multibyte sequences intead of returning an empty string (as iconv's //IGNORE). These functions will still never return an invalid or incomplete multibyte sequence. Example: htmlspecialchars("...", ENT_QUOTES | ENT_COMPAT, "utf-8");
This commit is contained in:
parent
763a92d7e0
commit
eef1ad9ddf
4 changed files with 135 additions and 21 deletions
|
@ -491,6 +491,7 @@ struct basic_entities_dec {
|
|||
|
||||
#define CHECK_LEN(pos, chars_need) \
|
||||
if((str_len - (pos)) < chars_need) { \
|
||||
*newpos = pos; \
|
||||
*status = FAILURE; \
|
||||
return 0; \
|
||||
}
|
||||
|
@ -535,6 +536,7 @@ inline static unsigned short get_next_char(enum entity_charset charset,
|
|||
more = 0;
|
||||
if(stat) {
|
||||
/* we didn't finish the UTF sequence correctly */
|
||||
--pos;
|
||||
*status = FAILURE;
|
||||
}
|
||||
break;
|
||||
|
@ -1138,6 +1140,9 @@ PHPAPI char *php_escape_html_entities_ex(unsigned char *old, int oldlen, int *ne
|
|||
|
||||
if(status == FAILURE) {
|
||||
/* invalid MB sequence */
|
||||
if (quote_style & ENT_HTML_IGNORE_ERRORS) {
|
||||
continue;
|
||||
}
|
||||
efree(replaced);
|
||||
if(!PG(display_errors)) {
|
||||
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid multibyte sequence in argument");
|
||||
|
@ -1319,6 +1324,7 @@ void register_html_constants(INIT_FUNC_ARGS)
|
|||
REGISTER_LONG_CONSTANT("ENT_COMPAT", ENT_COMPAT, CONST_PERSISTENT|CONST_CS);
|
||||
REGISTER_LONG_CONSTANT("ENT_QUOTES", ENT_QUOTES, CONST_PERSISTENT|CONST_CS);
|
||||
REGISTER_LONG_CONSTANT("ENT_NOQUOTES", ENT_NOQUOTES, CONST_PERSISTENT|CONST_CS);
|
||||
REGISTER_LONG_CONSTANT("ENT_IGNORE", ENT_IGNORE, CONST_PERSISTENT|CONST_CS);
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
|
|
|
@ -24,10 +24,12 @@
|
|||
#define ENT_HTML_QUOTE_NONE 0
|
||||
#define ENT_HTML_QUOTE_SINGLE 1
|
||||
#define ENT_HTML_QUOTE_DOUBLE 2
|
||||
#define ENT_HTML_IGNORE_ERRORS 4
|
||||
|
||||
#define ENT_COMPAT ENT_HTML_QUOTE_DOUBLE
|
||||
#define ENT_QUOTES (ENT_HTML_QUOTE_DOUBLE | ENT_HTML_QUOTE_SINGLE)
|
||||
#define ENT_NOQUOTES ENT_HTML_QUOTE_NONE
|
||||
#define ENT_IGNORE ENT_HTML_IGNORE_ERRORS
|
||||
|
||||
void register_html_constants(INIT_FUNC_ARGS);
|
||||
|
||||
|
|
70
ext/standard/tests/strings/htmlentities-utf-2.phpt
Executable file
70
ext/standard/tests/strings/htmlentities-utf-2.phpt
Executable file
|
@ -0,0 +1,70 @@
|
|||
--TEST--
|
||||
HTML entities with invalid chars and ENT_IGNORE
|
||||
--INI--
|
||||
output_handler=
|
||||
--FILE--
|
||||
<?php
|
||||
@setlocale (LC_CTYPE, "C");
|
||||
$strings = array(b"<", b"\xD0", b"\xD0\x90", b"\xD0\x90\xD0", b"\xD0\x90\xD0\xB0", b"\xE0", b"A\xE0", b"\xE0\x80", b"\xE0\x79", b"\xE0\x80\xBE",
|
||||
b"Voil\xE0", b"Clich\xE9s",
|
||||
b"\xFE", b"\xFE\x41", b"\xC3\xA9", b"\xC3\x79", b"\xF7\xBF\xBF\xBF", b"\xFB\xBF\xBF\xBF\xBF", b"\xFD\xBF\xBF\xBF\xBF\xBF",
|
||||
b"\x41\xF7\xF7\x42", b"\x42\xFB\xFB\x42", b"\x43\xFD\xFD\x42", b"\x44\xF7\xF7", b"\x45\xFB\xFB", b"\x46\xFD\xFD"
|
||||
);
|
||||
foreach($strings as $string) {
|
||||
$sc_encoded = htmlspecialchars ($string, ENT_QUOTES | ENT_IGNORE, "utf-8");
|
||||
var_dump(bin2hex($sc_encoded));
|
||||
$ent_encoded = htmlentities ($string, ENT_QUOTES | ENT_IGNORE, "utf-8");
|
||||
var_dump(bin2hex($ent_encoded));
|
||||
}
|
||||
?>
|
||||
--EXPECTF--
|
||||
%unicode|string%(8) "266c743b"
|
||||
%unicode|string%(8) "266c743b"
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(4) "d090"
|
||||
%unicode|string%(4) "d090"
|
||||
%unicode|string%(4) "d090"
|
||||
%unicode|string%(4) "d090"
|
||||
%unicode|string%(8) "d090d0b0"
|
||||
%unicode|string%(8) "d090d0b0"
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(2) "41"
|
||||
%unicode|string%(2) "41"
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(2) "79"
|
||||
%unicode|string%(2) "79"
|
||||
%unicode|string%(8) "2667743b"
|
||||
%unicode|string%(8) "2667743b"
|
||||
%unicode|string%(8) "566f696c"
|
||||
%unicode|string%(8) "566f696c"
|
||||
%unicode|string%(12) "436c69636873"
|
||||
%unicode|string%(12) "436c69636873"
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(2) "41"
|
||||
%unicode|string%(2) "41"
|
||||
%unicode|string%(4) "c3a9"
|
||||
%unicode|string%(16) "266561637574653b"
|
||||
%unicode|string%(2) "79"
|
||||
%unicode|string%(2) "79"
|
||||
%unicode|string%(8) "f7bfbfbf"
|
||||
%unicode|string%(8) "f7bfbfbf"
|
||||
%unicode|string%(10) "fbbfbfbfbf"
|
||||
%unicode|string%(10) "fbbfbfbfbf"
|
||||
%unicode|string%(12) "fdbfbfbfbfbf"
|
||||
%unicode|string%(12) "fdbfbfbfbfbf"
|
||||
%unicode|string%(4) "4142"
|
||||
%unicode|string%(4) "4142"
|
||||
%unicode|string%(4) "4242"
|
||||
%unicode|string%(4) "4242"
|
||||
%unicode|string%(4) "4342"
|
||||
%unicode|string%(4) "4342"
|
||||
%unicode|string%(2) "44"
|
||||
%unicode|string%(2) "44"
|
||||
%unicode|string%(2) "45"
|
||||
%unicode|string%(2) "45"
|
||||
%unicode|string%(2) "46"
|
||||
%unicode|string%(2) "46"
|
|
@ -4,8 +4,12 @@ HTML entities with invalid chars
|
|||
output_handler=
|
||||
--FILE--
|
||||
<?php
|
||||
setlocale (LC_CTYPE, "C");
|
||||
$strings = array("<", "\xD0", "\xD0\x90", "\xD0\x90\xD0", "\xD0\x90\xD0\xB0", "\xE0", "A\xE0", "\xE0\x80", "\xE0\x80\xBE");
|
||||
@setlocale (LC_CTYPE, "C");
|
||||
$strings = array(b"<", b"\xD0", b"\xD0\x90", b"\xD0\x90\xD0", b"\xD0\x90\xD0\xB0", b"\xE0", b"A\xE0", b"\xE0\x80", b"\xE0\x79", b"\xE0\x80\xBE",
|
||||
b"Voil\xE0", b"Clich\xE9s",
|
||||
b"\xFE", b"\xFE\x41", b"\xC3\xA9", b"\xC3\x79", b"\xF7\xBF\xBF\xBF", b"\xFB\xBF\xBF\xBF\xBF", b"\xFD\xBF\xBF\xBF\xBF\xBF",
|
||||
b"\x41\xF7\xF7\x42", b"\x42\xFB\xFB\x42", b"\x43\xFD\xFD\x42", b"\x44\xF7\xF7", b"\x45\xFB\xFB", b"\x46\xFD\xFD"
|
||||
);
|
||||
foreach($strings as $string) {
|
||||
$sc_encoded = htmlspecialchars ($string, ENT_QUOTES, "utf-8");
|
||||
var_dump(bin2hex($sc_encoded));
|
||||
|
@ -13,22 +17,54 @@ foreach($strings as $string) {
|
|||
var_dump(bin2hex($ent_encoded));
|
||||
}
|
||||
?>
|
||||
--EXPECT--
|
||||
unicode(8) "266c743b"
|
||||
unicode(8) "266c743b"
|
||||
unicode(0) ""
|
||||
unicode(0) ""
|
||||
unicode(4) "d090"
|
||||
unicode(4) "d090"
|
||||
unicode(0) ""
|
||||
unicode(0) ""
|
||||
unicode(8) "d090d0b0"
|
||||
unicode(8) "d090d0b0"
|
||||
unicode(0) ""
|
||||
unicode(0) ""
|
||||
unicode(0) ""
|
||||
unicode(0) ""
|
||||
unicode(0) ""
|
||||
unicode(0) ""
|
||||
unicode(8) "2667743b"
|
||||
unicode(8) "2667743b"
|
||||
--EXPECTF--
|
||||
%unicode|string%(8) "266c743b"
|
||||
%unicode|string%(8) "266c743b"
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(4) "d090"
|
||||
%unicode|string%(4) "d090"
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(8) "d090d0b0"
|
||||
%unicode|string%(8) "d090d0b0"
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(8) "2667743b"
|
||||
%unicode|string%(8) "2667743b"
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(4) "c3a9"
|
||||
%unicode|string%(16) "266561637574653b"
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(8) "f7bfbfbf"
|
||||
%unicode|string%(8) "f7bfbfbf"
|
||||
%unicode|string%(10) "fbbfbfbfbf"
|
||||
%unicode|string%(10) "fbbfbfbfbf"
|
||||
%unicode|string%(12) "fdbfbfbfbfbf"
|
||||
%unicode|string%(12) "fdbfbfbfbfbf"
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
%unicode|string%(0) ""
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue