mirror of
https://github.com/php/php-src.git
synced 2025-08-15 21:48:51 +02:00
- Fixed bug #49687 (utf8_decode vulnerabilities and deficiencies in the number
of reported malformed sequences). (Gustavo) #Made a public interface for get_next_char/utf-8 in trunk to use in utf8_decode. #In PHP 5.3, trunk's get_next_char was copied to xml.c because 5.3's #get_next_char is different and is not prepared to recover appropriately from #errors.
This commit is contained in:
parent
da400e7500
commit
e69b1ff2c4
4 changed files with 49 additions and 32 deletions
|
@ -92,9 +92,9 @@ ZEND_EXTERN_MODULE_GLOBALS(mbstring)
|
|||
|
||||
/* {{{ get_next_char
|
||||
*/
|
||||
static unsigned int get_next_char(
|
||||
static inline unsigned int get_next_char(
|
||||
enum entity_charset charset,
|
||||
unsigned char *str,
|
||||
const unsigned char *str,
|
||||
size_t str_len,
|
||||
size_t *cursor,
|
||||
int *status)
|
||||
|
@ -352,6 +352,18 @@ static unsigned int get_next_char(
|
|||
}
|
||||
/* }}} */
|
||||
|
||||
/* {{{ php_next_utf8_char
|
||||
* Public interface for get_next_char used with UTF-8 */
|
||||
PHPAPI unsigned int php_next_utf8_char(
|
||||
const unsigned char *str,
|
||||
size_t str_len,
|
||||
size_t *cursor,
|
||||
int *status)
|
||||
{
|
||||
return get_next_char(cs_utf_8, str, str_len, cursor, status);
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
/* {{{ entity_charset determine_charset
|
||||
* returns the charset identifier based on current locale or a hint.
|
||||
* defaults to UTF-8 */
|
||||
|
|
|
@ -57,5 +57,6 @@ PHP_FUNCTION(get_html_translation_table);
|
|||
PHPAPI char *php_escape_html_entities(unsigned char *old, size_t oldlen, size_t *newlen, int all, int flags, char *hint_charset TSRMLS_DC);
|
||||
PHPAPI char *php_escape_html_entities_ex(unsigned char *old, size_t oldlen, size_t *newlen, int all, int flags, char *hint_charset, zend_bool double_encode TSRMLS_DC);
|
||||
PHPAPI char *php_unescape_html_entities(unsigned char *old, size_t oldlen, size_t *newlen, int all, int flags, char *hint_charset TSRMLS_DC);
|
||||
PHPAPI unsigned int php_next_utf8_char(const unsigned char *str, size_t str_len, size_t *cursor, int *status);
|
||||
|
||||
#endif /* HTML_H */
|
||||
|
|
24
ext/xml/tests/bug49687.phpt
Normal file
24
ext/xml/tests/bug49687.phpt
Normal file
|
@ -0,0 +1,24 @@
|
|||
--TEST--
|
||||
Bug #49687 Several utf8_decode deficiencies and vulnerabilities
|
||||
--SKIPIF--
|
||||
<?php
|
||||
require_once("skipif.inc");
|
||||
if (!extension_loaded('xml')) die ("skip xml extension not available");
|
||||
?>
|
||||
--FILE--
|
||||
<?php
|
||||
|
||||
$tests = array(
|
||||
"\x41\xC2\x3E\x42",
|
||||
"\xE3\x80\x22",
|
||||
"\x41\x98\xBA\x42\xE2\x98\x43\xE2\x98\xBA\xE2\x98",
|
||||
);
|
||||
foreach ($tests as $t) {
|
||||
echo bin2hex(utf8_decode($t)), "\n";
|
||||
}
|
||||
echo "Done.\n";
|
||||
--EXPECT--
|
||||
413f3e42
|
||||
3f22
|
||||
413f3f423f433f3f
|
||||
Done.
|
|
@ -32,6 +32,7 @@
|
|||
#include "zend_variables.h"
|
||||
#include "ext/standard/php_string.h"
|
||||
#include "ext/standard/info.h"
|
||||
#include "ext/standard/html.h"
|
||||
|
||||
#if HAVE_XML
|
||||
|
||||
|
@ -662,7 +663,7 @@ PHPAPI char *xml_utf8_encode(const char *s, int len, int *newlen, const XML_Char
|
|||
/* {{{ xml_utf8_decode */
|
||||
PHPAPI char *xml_utf8_decode(const XML_Char *s, int len, int *newlen, const XML_Char *encoding)
|
||||
{
|
||||
int pos = len;
|
||||
size_t pos = 0;
|
||||
char *newbuf = emalloc(len + 1);
|
||||
unsigned int c;
|
||||
char (*decoder)(unsigned short) = NULL;
|
||||
|
@ -681,36 +682,15 @@ PHPAPI char *xml_utf8_decode(const XML_Char *s, int len, int *newlen, const XML_
|
|||
newbuf[*newlen] = '\0';
|
||||
return newbuf;
|
||||
}
|
||||
while (pos > 0) {
|
||||
c = (unsigned char)(*s);
|
||||
if (c >= 0xf0) { /* four bytes encoded, 21 bits */
|
||||
if(pos-4 >= 0) {
|
||||
c = ((s[0]&7)<<18) | ((s[1]&63)<<12) | ((s[2]&63)<<6) | (s[3]&63);
|
||||
} else {
|
||||
|
||||
while (pos < (size_t)len) {
|
||||
int status = FAILURE;
|
||||
c = php_next_utf8_char((const unsigned char*)s, (size_t) len, &pos, &status);
|
||||
|
||||
if (status == FAILURE || c > 0xFFU) {
|
||||
c = '?';
|
||||
}
|
||||
s += 4;
|
||||
pos -= 4;
|
||||
} else if (c >= 0xe0) { /* three bytes encoded, 16 bits */
|
||||
if(pos-3 >= 0) {
|
||||
c = ((s[0]&63)<<12) | ((s[1]&63)<<6) | (s[2]&63);
|
||||
} else {
|
||||
c = '?';
|
||||
}
|
||||
s += 3;
|
||||
pos -= 3;
|
||||
} else if (c >= 0xc0) { /* two bytes encoded, 11 bits */
|
||||
if(pos-2 >= 0) {
|
||||
c = ((s[0]&63)<<6) | (s[1]&63);
|
||||
} else {
|
||||
c = '?';
|
||||
}
|
||||
s += 2;
|
||||
pos -= 2;
|
||||
} else {
|
||||
s++;
|
||||
pos--;
|
||||
}
|
||||
|
||||
newbuf[*newlen] = decoder ? decoder(c) : c;
|
||||
++*newlen;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue