Add default_charset handling

This commit is contained in:
Yasuo Ohgaki 2014-03-20 10:49:23 +09:00
parent d0e3173016
commit e1fe76f28a
2 changed files with 56 additions and 6 deletions

View file

@ -84,6 +84,19 @@
#define sjis_lead(c) ((c) != 0x80 && (c) != 0xA0 && (c) < 0xFD)
#define sjis_trail(c) ((c) >= 0x40 && (c) != 0x7F && (c) < 0xFD)
/* {{{ get_charset
*/
static void get_charset(char **charset, int *charset_len TSRMLS_DC) {
if (PG(internal_encoding) && PG(internal_encoding)[0]) {
*charset_len = strlen(PG(internal_encoding));
*charset = estrndup(PG(internal_encoding), *charset_len);
} else if (SG(default_charset) && SG(default_charset)[0] ) {
*charset_len = strlen(SG(default_charset));
*charset = estrndup(SG(default_charset), *charset_len);
}
}
/* }}} */
/* {{{ get_next_char
*/
static inline unsigned int get_next_char(
@ -1432,8 +1445,8 @@ encode_amp:
*/
static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all)
{
char *str, *hint_charset = PHP_DEFAULT_CHARSET;
int str_len, hint_charset_len = sizeof(PHP_DEFAULT_CHARSET)-1;
char *str, *hint_charset = "";
int str_len, hint_charset_len = 0;
size_t new_len;
long flags = ENT_COMPAT;
char *replaced;
@ -1443,7 +1456,14 @@ static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all)
return;
}
replaced = php_escape_html_entities_ex(str, str_len, &new_len, all, (int) flags, hint_charset, double_encode TSRMLS_CC);
if (hint_charset_len) {
replaced = php_escape_html_entities_ex(str, str_len, &new_len, all, (int) flags, hint_charset, double_encode TSRMLS_CC);
} else {
get_charset(&hint_charset, &hint_charset_len TSRMLS_CC);
replaced = php_escape_html_entities_ex(str, str_len, &new_len, all, (int) flags, hint_charset, double_encode TSRMLS_CC);
efree(hint_charset);
}
RETVAL_STRINGL(replaced, (int)new_len, 0);
}
/* }}} */
@ -1504,8 +1524,8 @@ PHP_FUNCTION(htmlspecialchars_decode)
Convert all HTML entities to their applicable characters */
PHP_FUNCTION(html_entity_decode)
{
char *str, *hint_charset = PHP_DEFAULT_CHARSET;
int str_len, hint_charset_len = sizeof(PHP_DEFAULT_CHARSET)-1;
char *str, *hint_charset = "";
int str_len, hint_charset_len = 0;
size_t new_len = 0;
long quote_style = ENT_COMPAT;
char *replaced;
@ -1515,7 +1535,14 @@ PHP_FUNCTION(html_entity_decode)
return;
}
replaced = php_unescape_html_entities(str, str_len, &new_len, 1 /*all*/, quote_style, hint_charset TSRMLS_CC);
if (hint_charset_len) {
replaced = php_unescape_html_entities(str, str_len, &new_len, 1 /*all*/, quote_style, hint_charset TSRMLS_CC);
} else {
get_charset(&hint_charset, &hint_charset_len TSRMLS_CC);
replaced = php_unescape_html_entities(str, str_len, &new_len, 1 /*all*/, quote_style, hint_charset TSRMLS_CC);
efree(hint_charset);
}
if (replaced) {
RETURN_STRINGL(replaced, (int)new_len, 0);
}

View file

@ -0,0 +1,23 @@
--TEST--
default_charset and htmlentities/htmlspecialchars/html_entity_decode
--INI--
default_charset=UTF-8
internal_encoding=
--FILE--
<?php
ini_set('default_charset', 'cp1252');
var_dump(ini_get('default_charset'), ini_get('internal_encoding'));
var_dump(htmlentities("\xA3", ENT_HTML5));
var_dump(htmlentities("\xA3", ENT_HTML5, 'cp1252'));
var_dump(bin2hex(html_entity_decode("&pound;", ENT_HTML5)));
var_dump(bin2hex(html_entity_decode("&pound;", ENT_HTML5, 'cp1252')));
?>
--EXPECT--
string(6) "cp1252"
string(0) ""
string(7) "&pound;"
string(7) "&pound;"
string(2) "a3"
string(2) "a3"