- Fixed bug #49687 (utf8_decode vulnerabilities and deficiencies in the number

of reported malformed sequences). (Gustavo)
#Made a public interface for get_next_char/utf-8 in trunk to use in utf8_decode.
#In PHP 5.3, trunk's get_next_char was copied to xml.c because 5.3's
#get_next_char is different and is not prepared to recover appropriately from
#errors.
This commit is contained in:
Gustavo André dos Santos Lopes 2010-10-27 18:13:25 +00:00
parent da400e7500
commit e69b1ff2c4
4 changed files with 49 additions and 32 deletions

View file

@ -92,9 +92,9 @@ ZEND_EXTERN_MODULE_GLOBALS(mbstring)
/* {{{ get_next_char /* {{{ get_next_char
*/ */
static unsigned int get_next_char( static inline unsigned int get_next_char(
enum entity_charset charset, enum entity_charset charset,
unsigned char *str, const unsigned char *str,
size_t str_len, size_t str_len,
size_t *cursor, size_t *cursor,
int *status) int *status)
@ -352,6 +352,18 @@ static unsigned int get_next_char(
} }
/* }}} */ /* }}} */
/* {{{ php_next_utf8_char
* Public interface for get_next_char used with UTF-8 */
PHPAPI unsigned int php_next_utf8_char(
const unsigned char *str,
size_t str_len,
size_t *cursor,
int *status)
{
return get_next_char(cs_utf_8, str, str_len, cursor, status);
}
/* }}} */
/* {{{ entity_charset determine_charset /* {{{ entity_charset determine_charset
* returns the charset identifier based on current locale or a hint. * returns the charset identifier based on current locale or a hint.
* defaults to UTF-8 */ * defaults to UTF-8 */

View file

@ -57,5 +57,6 @@ PHP_FUNCTION(get_html_translation_table);
PHPAPI char *php_escape_html_entities(unsigned char *old, size_t oldlen, size_t *newlen, int all, int flags, char *hint_charset TSRMLS_DC); PHPAPI char *php_escape_html_entities(unsigned char *old, size_t oldlen, size_t *newlen, int all, int flags, char *hint_charset TSRMLS_DC);
PHPAPI char *php_escape_html_entities_ex(unsigned char *old, size_t oldlen, size_t *newlen, int all, int flags, char *hint_charset, zend_bool double_encode TSRMLS_DC); PHPAPI char *php_escape_html_entities_ex(unsigned char *old, size_t oldlen, size_t *newlen, int all, int flags, char *hint_charset, zend_bool double_encode TSRMLS_DC);
PHPAPI char *php_unescape_html_entities(unsigned char *old, size_t oldlen, size_t *newlen, int all, int flags, char *hint_charset TSRMLS_DC); PHPAPI char *php_unescape_html_entities(unsigned char *old, size_t oldlen, size_t *newlen, int all, int flags, char *hint_charset TSRMLS_DC);
PHPAPI unsigned int php_next_utf8_char(const unsigned char *str, size_t str_len, size_t *cursor, int *status);
#endif /* HTML_H */ #endif /* HTML_H */

View file

@ -0,0 +1,24 @@
--TEST--
Bug #49687 Several utf8_decode deficiencies and vulnerabilities
--SKIPIF--
<?php
require_once("skipif.inc");
if (!extension_loaded('xml')) die ("skip xml extension not available");
?>
--FILE--
<?php
$tests = array(
"\x41\xC2\x3E\x42",
"\xE3\x80\x22",
"\x41\x98\xBA\x42\xE2\x98\x43\xE2\x98\xBA\xE2\x98",
);
foreach ($tests as $t) {
echo bin2hex(utf8_decode($t)), "\n";
}
echo "Done.\n";
--EXPECT--
413f3e42
3f22
413f3f423f433f3f
Done.

View file

@ -32,6 +32,7 @@
#include "zend_variables.h" #include "zend_variables.h"
#include "ext/standard/php_string.h" #include "ext/standard/php_string.h"
#include "ext/standard/info.h" #include "ext/standard/info.h"
#include "ext/standard/html.h"
#if HAVE_XML #if HAVE_XML
@ -662,7 +663,7 @@ PHPAPI char *xml_utf8_encode(const char *s, int len, int *newlen, const XML_Char
/* {{{ xml_utf8_decode */ /* {{{ xml_utf8_decode */
PHPAPI char *xml_utf8_decode(const XML_Char *s, int len, int *newlen, const XML_Char *encoding) PHPAPI char *xml_utf8_decode(const XML_Char *s, int len, int *newlen, const XML_Char *encoding)
{ {
int pos = len; size_t pos = 0;
char *newbuf = emalloc(len + 1); char *newbuf = emalloc(len + 1);
unsigned int c; unsigned int c;
char (*decoder)(unsigned short) = NULL; char (*decoder)(unsigned short) = NULL;
@ -681,36 +682,15 @@ PHPAPI char *xml_utf8_decode(const XML_Char *s, int len, int *newlen, const XML_
newbuf[*newlen] = '\0'; newbuf[*newlen] = '\0';
return newbuf; return newbuf;
} }
while (pos > 0) {
c = (unsigned char)(*s); while (pos < (size_t)len) {
if (c >= 0xf0) { /* four bytes encoded, 21 bits */ int status = FAILURE;
if(pos-4 >= 0) { c = php_next_utf8_char((const unsigned char*)s, (size_t) len, &pos, &status);
c = ((s[0]&7)<<18) | ((s[1]&63)<<12) | ((s[2]&63)<<6) | (s[3]&63);
} else { if (status == FAILURE || c > 0xFFU) {
c = '?'; c = '?';
} }
s += 4;
pos -= 4;
} else if (c >= 0xe0) { /* three bytes encoded, 16 bits */
if(pos-3 >= 0) {
c = ((s[0]&63)<<12) | ((s[1]&63)<<6) | (s[2]&63);
} else {
c = '?';
}
s += 3;
pos -= 3;
} else if (c >= 0xc0) { /* two bytes encoded, 11 bits */
if(pos-2 >= 0) {
c = ((s[0]&63)<<6) | (s[1]&63);
} else {
c = '?';
}
s += 2;
pos -= 2;
} else {
s++;
pos--;
}
newbuf[*newlen] = decoder ? decoder(c) : c; newbuf[*newlen] = decoder ? decoder(c) : c;
++*newlen; ++*newlen;
} }