Removed compile time dependency from ext/mbstring

This commit is contained in:
Dmitry Stogov 2010-12-08 11:27:34 +00:00
parent 088a6ad7b5
commit 755c2cd0d8
12 changed files with 481 additions and 279 deletions

1
NEWS
View file

@ -32,6 +32,7 @@ PHP NEWS
. Added multibyte suppport by default. Previosly php had to be compiled
with --enable-zend-multibyte. Now it can be enabled or disabled throug
zend.multibyte directive in php.ini (Dmitry)
. Removed compile time dependency from ext/mbstring (Dmitry)
. Added scalar typehints to the parser and the reflection API. (Ilia, Derick)
. Added support for Traits. (Stefan)
. Added closure $this support back. (Stas)

View file

@ -200,9 +200,6 @@ void zend_init_compiler_data_structures(TSRMLS_D) /* {{{ */
CG(script_encoding_list) = NULL;
CG(script_encoding_list_size) = 0;
CG(internal_encoding) = NULL;
CG(encoding_detector) = NULL;
CG(encoding_converter) = NULL;
CG(encoding_oddlen) = NULL;
CG(encoding_declared) = 0;
}
/* }}} */

View file

@ -155,11 +155,6 @@ struct _zend_compiler_globals {
zend_encoding *internal_encoding;
/* multibyte utility functions */
zend_encoding_detector encoding_detector;
zend_encoding_converter encoding_converter;
zend_encoding_oddlen encoding_oddlen;
#ifdef ZTS
zval ***static_members_table;
int last_static_member;

View file

@ -513,7 +513,36 @@ static zend_encoding *zend_encoding_table[] = {
NULL
};
static char* dummy_encoding_detector(const unsigned char *string, size_t length, char *list TSRMLS_DC)
{
return NULL;
}
static int dummy_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC)
{
return -1;
}
static size_t dummy_encoding_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC)
{
return 0;
}
static int dummy_encoding_list_checker(const char *encoding_list TSRMLS_DC)
{
return 0;
}
static const char* dummy_get_internal_encoding(TSRMLS_D)
{
return NULL;
}
ZEND_API zend_encoding_detector zend_multibyte_encoding_detector = dummy_encoding_detector;
ZEND_API zend_encoding_converter zend_multibyte_encoding_converter = dummy_encoding_converter;
ZEND_API zend_encoding_oddlen zend_multibyte_encoding_oddlen = dummy_encoding_oddlen;
ZEND_API zend_encoding_list_checker zend_multibyte_check_encoding_list = dummy_encoding_list_checker;
ZEND_API zend_encoding_name_getter zend_multibyte_get_internal_encoding = dummy_get_internal_encoding;
ZEND_API int zend_multibyte_set_script_encoding(const char *encoding_list,
size_t encoding_list_size TSRMLS_DC)
@ -540,11 +569,13 @@ ZEND_API int zend_multibyte_set_internal_encoding(const char *encoding_name TSRM
return 0;
}
ZEND_API int zend_multibyte_set_functions(zend_encoding_detector encoding_detector, zend_encoding_converter encoding_converter, zend_encoding_oddlen encoding_oddlen TSRMLS_DC)
ZEND_API int zend_multibyte_set_functions(zend_encoding_detector encoding_detector, zend_encoding_converter encoding_converter, zend_encoding_oddlen encoding_oddlen, zend_encoding_list_checker encoding_list_checker, zend_encoding_name_getter get_internal_encoding TSRMLS_DC)
{
CG(encoding_detector) = encoding_detector;
CG(encoding_converter) = encoding_converter;
CG(encoding_oddlen) = encoding_oddlen;
zend_multibyte_encoding_detector = encoding_detector;
zend_multibyte_encoding_converter = encoding_converter;
zend_multibyte_encoding_oddlen = encoding_oddlen;
zend_multibyte_check_encoding_list = encoding_list_checker;
zend_multibyte_get_internal_encoding = get_internal_encoding;
return 0;
}
@ -659,18 +690,16 @@ static size_t zend_multibyte_encoding_filter(unsigned char **to, size_t *to_leng
{
size_t oddlen;
if (!CG(encoding_converter)) {
if (zend_multibyte_encoding_converter == dummy_encoding_converter) {
return 0;
}
if (CG(encoding_oddlen)) {
oddlen = CG(encoding_oddlen)(from, from_length, from_encoding TSRMLS_CC);
oddlen = zend_multibyte_encoding_oddlen(from, from_length, from_encoding TSRMLS_CC);
if (oddlen > 0) {
from_length -= oddlen;
}
}
if (CG(encoding_converter)(to, to_length, from, from_length, to_encoding, from_encoding TSRMLS_CC) != 0) {
if (zend_multibyte_encoding_converter(to, to_length, from, from_length, to_encoding, from_encoding TSRMLS_CC) != 0) {
return 0;
}
@ -1053,10 +1082,11 @@ static zend_encoding* zend_multibyte_find_script_encoding(zend_encoding *onetime
}
/* if multiple encodings specified, detect automagically */
if (CG(script_encoding_list_size) > 1 && CG(encoding_detector)) {
if (CG(script_encoding_list_size) > 1 &&
zend_multibyte_encoding_detector != dummy_encoding_detector) {
list = zend_multibyte_assemble_encoding_list(CG(script_encoding_list),
CG(script_encoding_list_size));
name = CG(encoding_detector)(LANG_SCNG(script_org),
name = zend_multibyte_encoding_detector(LANG_SCNG(script_org),
LANG_SCNG(script_org_size), list TSRMLS_CC);
if (list) {
efree(list);

View file

@ -36,6 +36,10 @@ typedef int (*zend_encoding_converter)(unsigned char **to, size_t *to_length, co
typedef size_t (*zend_encoding_oddlen)(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC);
typedef int (*zend_encoding_list_checker)(const char *encoding_list TSRMLS_DC);
typedef const char* (*zend_encoding_name_getter)(TSRMLS_D);
typedef struct _zend_encoding {
zend_encoding_filter input_filter; /* escape input filter */
zend_encoding_filter output_filter; /* escape output filter */
@ -49,10 +53,18 @@ typedef struct _zend_encoding {
* zend multibyte APIs
*/
BEGIN_EXTERN_C()
/* multibyte utility functions */
ZEND_API extern zend_encoding_detector zend_multibyte_encoding_detector;
ZEND_API extern zend_encoding_converter zend_multibyte_encoding_converter;
ZEND_API extern zend_encoding_oddlen zend_multibyte_encoding_oddlen;
ZEND_API extern zend_encoding_list_checker zend_multibyte_check_encoding_list;
ZEND_API extern zend_encoding_name_getter zend_multibyte_get_internal_encoding;
ZEND_API int zend_multibyte_set_script_encoding(const char *encoding_list,
size_t encoding_list_size TSRMLS_DC);
ZEND_API int zend_multibyte_set_internal_encoding(const char *encoding_name TSRMLS_DC);
ZEND_API int zend_multibyte_set_functions(zend_encoding_detector encoding_detector, zend_encoding_converter encoding_converter, zend_encoding_oddlen encoding_oddlen TSRMLS_DC);
ZEND_API int zend_multibyte_set_functions(zend_encoding_detector encoding_detector, zend_encoding_converter encoding_converter, zend_encoding_oddlen encoding_oddlen, zend_encoding_list_checker encoding_list_checker, zend_encoding_name_getter get_internal_encoding TSRMLS_DC);
ZEND_API int zend_multibyte_set_filter(zend_encoding *onetime_encoding TSRMLS_DC);
ZEND_API zend_encoding* zend_multibyte_fetch_encoding(const char *encoding_name);
ZEND_API size_t zend_multibyte_script_encoding_filter(unsigned char **to, size_t

View file

@ -66,16 +66,6 @@
#include "ext/standard/php_image.h"
#include "ext/standard/info.h"
#if defined(PHP_WIN32) || (HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING))
#define EXIF_USE_MBSTRING 1
#else
#define EXIF_USE_MBSTRING 0
#endif
#if EXIF_USE_MBSTRING
#include "ext/mbstring/mbstring.h"
#endif
/* needed for ssize_t definition */
#include <sys/types.h>
@ -176,23 +166,19 @@ ZEND_DECLARE_MODULE_GLOBALS(exif)
ZEND_INI_MH(OnUpdateEncode)
{
#if EXIF_USE_MBSTRING
if (new_value && strlen(new_value) && !php_mb_check_encoding_list(new_value TSRMLS_CC)) {
if (new_value && strlen(new_value) && !zend_multibyte_check_encoding_list(new_value TSRMLS_CC)) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal encoding ignored: '%s'", new_value);
return FAILURE;
}
#endif
return OnUpdateString(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
}
ZEND_INI_MH(OnUpdateDecode)
{
#if EXIF_USE_MBSTRING
if (!php_mb_check_encoding_list(new_value TSRMLS_CC)) {
if (!zend_multibyte_check_encoding_list(new_value TSRMLS_CC)) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal encoding ignored: '%s'", new_value);
return FAILURE;
}
#endif
return OnUpdateString(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
}
@ -224,7 +210,11 @@ static PHP_GINIT_FUNCTION(exif)
PHP_MINIT_FUNCTION(exif)
{
REGISTER_INI_ENTRIES();
REGISTER_LONG_CONSTANT("EXIF_USE_MBSTRING", EXIF_USE_MBSTRING, CONST_CS | CONST_PERSISTENT);
if (zend_hash_exists(&module_registry, "mbstring", sizeof("mbstring"))) {
REGISTER_LONG_CONSTANT("EXIF_USE_MBSTRING", 1, CONST_CS | CONST_PERSISTENT);
} else {
REGISTER_LONG_CONSTANT("EXIF_USE_MBSTRING", 0, CONST_CS | CONST_PERSISTENT);
}
return SUCCESS;
}
/* }}} */
@ -241,9 +231,7 @@ PHP_MSHUTDOWN_FUNCTION(exif)
/* {{{ exif dependencies */
static const zend_module_dep exif_module_deps[] = {
ZEND_MOD_REQUIRED("standard")
#if EXIF_USE_MBSTRING
ZEND_MOD_REQUIRED("mbstring")
#endif
ZEND_MOD_OPTIONAL("mbstring")
{NULL, NULL, NULL}
};
/* }}} */
@ -2588,7 +2576,6 @@ static int exif_process_undefined(char **result, char *value, size_t byte_count
/* {{{ exif_process_string_raw
* Copy a string in Exif header to a character string returns length of allocated buffer if any. */
#if !EXIF_USE_MBSTRING
static int exif_process_string_raw(char **result, char *value, size_t byte_count) {
/* we cannot use strlcpy - here the problem is that we have to copy NUL
* chars up to byte_count, we also have to add a single NUL character to
@ -2602,7 +2589,6 @@ static int exif_process_string_raw(char **result, char *value, size_t byte_count
}
return 0;
}
#endif
/* }}} */
/* {{{ exif_process_string
@ -2629,11 +2615,8 @@ static int exif_process_string(char **result, char *value, size_t byte_count TSR
static int exif_process_user_comment(image_info_type *ImageInfo, char **pszInfoPtr, char **pszEncoding, char *szValuePtr, int ByteCount TSRMLS_DC)
{
int a;
#if EXIF_USE_MBSTRING
char *decode;
size_t len;;
#endif
*pszEncoding = NULL;
/* Copy the comment */
@ -2642,7 +2625,6 @@ static int exif_process_user_comment(image_info_type *ImageInfo, char **pszInfoP
*pszEncoding = estrdup((const char*)szValuePtr);
szValuePtr = szValuePtr+8;
ByteCount -= 8;
#if EXIF_USE_MBSTRING
/* First try to detect BOM: ZERO WIDTH NOBREAK SPACE (FEFF 16)
* since we have no encoding support for the BOM yet we skip that.
*/
@ -2659,34 +2641,38 @@ static int exif_process_user_comment(image_info_type *ImageInfo, char **pszInfoP
} else {
decode = ImageInfo->decode_unicode_le;
}
*pszInfoPtr = php_mb_convert_encoding(szValuePtr, ByteCount, ImageInfo->encode_unicode, decode, &len TSRMLS_CC);
if (zend_multibyte_encoding_converter(
pszInfoPtr,
&len,
szValuePtr,
ByteCount,
ImageInfo->encode_unicode,
decode
TSRMLS_DC) != 0) {
len = exif_process_string_raw(pszInfoPtr, szValuePtr, ByteCount);
}
return len;
#else
return exif_process_string_raw(pszInfoPtr, szValuePtr, ByteCount);
#endif
} else
if (!memcmp(szValuePtr, "ASCII\0\0\0", 8)) {
} else if (!memcmp(szValuePtr, "ASCII\0\0\0", 8)) {
*pszEncoding = estrdup((const char*)szValuePtr);
szValuePtr = szValuePtr+8;
ByteCount -= 8;
} else
if (!memcmp(szValuePtr, "JIS\0\0\0\0\0", 8)) {
} else if (!memcmp(szValuePtr, "JIS\0\0\0\0\0", 8)) {
/* JIS should be tanslated to MB or we leave it to the user - leave it to the user */
*pszEncoding = estrdup((const char*)szValuePtr);
szValuePtr = szValuePtr+8;
ByteCount -= 8;
#if EXIF_USE_MBSTRING
if (ImageInfo->motorola_intel) {
*pszInfoPtr = php_mb_convert_encoding(szValuePtr, ByteCount, ImageInfo->encode_jis, ImageInfo->decode_jis_be, &len TSRMLS_CC);
} else {
*pszInfoPtr = php_mb_convert_encoding(szValuePtr, ByteCount, ImageInfo->encode_jis, ImageInfo->decode_jis_le, &len TSRMLS_CC);
if (zend_multibyte_encoding_converter(
pszInfoPtr,
&len,
szValuePtr,
ByteCount,
ImageInfo->encode_jis,
ImageInfo->motorola_intel ? ImageInfo->decode_jis_be : ImageInfo->decode_jis_le
TSRMLS_DC) != 0) {
len = exif_process_string_raw(pszInfoPtr, szValuePtr, ByteCount);
}
return len;
#else
return exif_process_string_raw(pszInfoPtr, szValuePtr, ByteCount);
#endif
} else
if (!memcmp(szValuePtr, "\0\0\0\0\0\0\0\0", 8)) {
} else if (!memcmp(szValuePtr, "\0\0\0\0\0\0\0\0", 8)) {
/* 8 NULL means undefined and should be ASCII... */
*pszEncoding = estrdup("UNDEFINED");
szValuePtr = szValuePtr+8;
@ -2714,19 +2700,17 @@ static int exif_process_unicode(image_info_type *ImageInfo, xp_field_type *xp_fi
xp_field->tag = tag;
/* Copy the comment */
#if EXIF_USE_MBSTRING
/* What if MS supports big-endian with XP? */
/* if (ImageInfo->motorola_intel) {
xp_field->value = php_mb_convert_encoding(szValuePtr, ByteCount, ImageInfo->encode_unicode, ImageInfo->decode_unicode_be, &xp_field->size TSRMLS_CC);
} else {
xp_field->value = php_mb_convert_encoding(szValuePtr, ByteCount, ImageInfo->encode_unicode, ImageInfo->decode_unicode_le, &xp_field->size TSRMLS_CC);
}*/
xp_field->value = php_mb_convert_encoding(szValuePtr, ByteCount, ImageInfo->encode_unicode, ImageInfo->decode_unicode_le, &xp_field->size TSRMLS_CC);
return xp_field->size;
#else
if (zend_multibyte_encoding_converter(
&xp_field->value,
&xp_field->size,
szValuePtr,
ByteCount,
ImageInfo->encode_unicode,
ImageInfo->motorola_intel ? ImageInfo->decode_unicode_be : ImageInfo->decode_unicode_le
TSRMLS_DC) != 0) {
xp_field->size = exif_process_string_raw(&xp_field->value, szValuePtr, ByteCount);
}
return xp_field->size;
#endif
}
/* }}} */

View file

@ -96,6 +96,7 @@ ZEND_DECLARE_MODULE_GLOBALS(mbstring)
static PHP_GINIT_FUNCTION(mbstring);
static PHP_GSHUTDOWN_FUNCTION(mbstring);
static const char* php_mb_internal_encoding_name(TSRMLS_D);
static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC);
static int php_mb_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC);
static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC);
@ -769,7 +770,8 @@ php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_enc
/* }}} */
/* {{{ MBSTRING_API php_mb_check_encoding_list */
MBSTRING_API int php_mb_check_encoding_list(const char *encoding_list TSRMLS_DC) {
MBSTRING_API int php_mb_check_encoding_list(const char *encoding_list TSRMLS_DC)
{
return php_mb_parse_encoding_list(encoding_list, strlen(encoding_list), NULL, NULL, 0 TSRMLS_CC);
}
/* }}} */
@ -956,6 +958,76 @@ static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang,
}
/* }}} */
static char *php_mb_rfc1867_substring(char *start, int len, char quote TSRMLS_DC)
{
char *result = emalloc(len + 2);
char *resp = result;
int i;
for (i = 0; i < len && start[i] != quote; ++i) {
if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
*resp++ = start[++i];
} else {
size_t j = php_mb_gpc_mbchar_bytes(start+i TSRMLS_CC);
while (j-- > 0 && i < len) {
*resp++ = start[i++];
}
--i;
}
}
*resp = '\0';
return result;
}
static char *php_mb_rfc1867_getword(char *str TSRMLS_DC) /* {{{ */
{
while (*str && isspace(*str)) {
++str;
}
if (!*str) {
return estrdup("");
}
if (*str == '"' || *str == '\'') {
char quote = *str;
str++;
return php_mb_rfc1867_substring(str, strlen(str), quote TSRMLS_CC);
} else {
char *strend = str;
while (*strend && !isspace(*strend)) {
++strend;
}
return php_mb_rfc1867_substring(str, strend - str, 0 TSRMLS_CC);
}
}
/* }}} */
static char *php_mb_rfc1867_basename(char *filename TSRMLS_DC) /* {{{ */
{
char *s, *tmp;
/* The \ check should technically be needed for win32 systems only where
* it is a valid path separator. However, IE in all it's wisdom always sends
* the full path of the file on the user's filesystem, which means that unless
* the user does basename() they get a bogus file name. Until IE's user base drops
* to nill or problem is fixed this code must remain enabled for all systems. */
s = php_mb_strrchr(filename, '\\' TSRMLS_CC);
if ((tmp = php_mb_strrchr(filename, '/' TSRMLS_CC)) > s) {
s = tmp;
}
if (s) {
return s + 1;
} else {
return filename;
}
}
/* }}} */
/* {{{ php.ini directive handler */
/* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
static PHP_INI_MH(OnUpdate_mbstring_language)
@ -1353,6 +1425,21 @@ PHP_MINIT_FUNCTION(mbstring)
#if HAVE_MBREGEX
PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
#endif
zend_multibyte_set_functions(
php_mb_encoding_detector,
php_mb_encoding_converter,
php_mb_oddlen,
php_mb_check_encoding_list,
php_mb_internal_encoding_name TSRMLS_CC);
php_rfc1867_set_multibyte_callbacks(
php_mb_encoding_translation,
php_mb_gpc_encoding_detector,
php_mb_gpc_encoding_converter,
php_mb_rfc1867_getword,
php_mb_rfc1867_basename);
return SUCCESS;
}
/* }}} */
@ -4697,9 +4784,6 @@ static int php_mb_set_zend_encoding(TSRMLS_D)
/* 'd better use mbfl_memory_device? */
char *name, *list = NULL;
int n, *entry, list_size = 0;
zend_encoding_detector encoding_detector;
zend_encoding_converter encoding_converter;
zend_encoding_oddlen encoding_oddlen;
/* notify script encoding to Zend Engine */
entry = MBSTRG(script_encoding_list);
@ -4724,9 +4808,6 @@ static int php_mb_set_zend_encoding(TSRMLS_D)
if (list) {
efree(list);
}
encoding_detector = php_mb_encoding_detector;
encoding_converter = php_mb_encoding_converter;
encoding_oddlen = php_mb_oddlen;
/* TODO: make independent from mbstring.encoding_translation? */
if (MBSTRG(encoding_translation)) {
@ -4735,8 +4816,6 @@ static int php_mb_set_zend_encoding(TSRMLS_D)
zend_multibyte_set_internal_encoding(name TSRMLS_CC);
}
zend_multibyte_set_functions(encoding_detector, encoding_converter, encoding_oddlen TSRMLS_CC);
return 0;
}
/* }}} */
@ -4849,6 +4928,26 @@ static size_t php_mb_oddlen(const unsigned char *string, size_t length, const ch
}
/* }}} */
/* {{{ const char* php_mb_internal_encoding_name()
* returns name of internal encoding
*/
static const char* php_mb_internal_encoding_name(TSRMLS_D)
{
const char *name = mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
if (!name ||
!*name ||
(strlen(name) == 4 &&
(!memcmp("pass", name, sizeof("pass") - 1) ||
!memcmp("auto", name, sizeof("auto") - 1) ||
!memcmp("none", name, sizeof("none") - 1)))) {
return NULL;
}
return name;
}
/* }}} */
#endif /* HAVE_MBSTRING */
/*

View file

@ -54,11 +54,6 @@
#include <langinfo.h>
#endif
#if HAVE_MBSTRING
# include "ext/mbstring/mbstring.h"
ZEND_EXTERN_MODULE_GLOBALS(mbstring)
#endif
#include <zend_hash.h>
#include "html_tables.h"
@ -372,7 +367,6 @@ static enum entity_charset determine_charset(char *charset_hint TSRMLS_DC)
int i;
enum entity_charset charset = cs_utf_8;
int len = 0;
zval *uf_result = NULL;
/* Default is now UTF-8 */
if (charset_hint == NULL)
@ -381,79 +375,11 @@ static enum entity_charset determine_charset(char *charset_hint TSRMLS_DC)
if ((len = strlen(charset_hint)) != 0) {
goto det_charset;
}
#if HAVE_MBSTRING
#if !defined(COMPILE_DL_MBSTRING)
/* XXX: Ugly things. Why don't we look for a more sophisticated way? */
switch (MBSTRG(current_internal_encoding)) {
case mbfl_no_encoding_8859_1:
return cs_8859_1;
case mbfl_no_encoding_utf8:
return cs_utf_8;
case mbfl_no_encoding_euc_jp:
case mbfl_no_encoding_eucjp_win:
return cs_eucjp;
case mbfl_no_encoding_sjis:
case mbfl_no_encoding_sjis_open:
case mbfl_no_encoding_cp932:
return cs_sjis;
case mbfl_no_encoding_cp1252:
return cs_cp1252;
case mbfl_no_encoding_8859_15:
return cs_8859_15;
case mbfl_no_encoding_big5:
return cs_big5;
case mbfl_no_encoding_euc_cn:
case mbfl_no_encoding_hz:
case mbfl_no_encoding_cp936:
return cs_gb2312;
case mbfl_no_encoding_koi8r:
return cs_koi8r;
case mbfl_no_encoding_cp866:
return cs_cp866;
case mbfl_no_encoding_cp1251:
return cs_cp1251;
case mbfl_no_encoding_8859_5:
return cs_8859_5;
default:
;
}
#else
{
zval nm_mb_internal_encoding;
ZVAL_STRING(&nm_mb_internal_encoding, "mb_internal_encoding", 0);
if (call_user_function_ex(CG(function_table), NULL, &nm_mb_internal_encoding, &uf_result, 0, NULL, 1, NULL TSRMLS_CC) != FAILURE) {
charset_hint = Z_STRVAL_P(uf_result);
len = Z_STRLEN_P(uf_result);
if ((len == 4) && /* sizeof(none|auto|pass)-1 */
(!memcmp("pass", charset_hint, sizeof("pass") - 1) ||
!memcmp("auto", charset_hint, sizeof("auto") - 1) ||
!memcmp("none", charset_hint, sizeof("none") - 1))) {
charset_hint = NULL;
len = 0;
} else {
charset_hint = (char*)zend_multibyte_get_internal_encoding(TSRMLS_C);
if (charset_hint != NULL && (len=strlen(charset_hint)) != 0) {
goto det_charset;
}
}
}
#endif
#endif
charset_hint = SG(default_charset);
if (charset_hint != NULL && (len=strlen(charset_hint)) != 0) {
@ -514,9 +440,6 @@ det_charset:
charset_hint);
}
}
if (uf_result != NULL) {
zval_ptr_dtor(&uf_result);
}
return charset;
}
/* }}} */

View file

@ -36,23 +36,49 @@
#define DEBUG_FILE_UPLOAD ZEND_DEBUG
PHPAPI int (*php_rfc1867_callback)(unsigned int event, void *event_data, void **extra TSRMLS_DC) = NULL;
static int dummy_encoding_translation(TSRMLS_D)
{
return 0;
}
#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
#include "ext/mbstring/mbstring.h"
static php_rfc1867_encoding_translation_t php_rfc1867_encoding_translation = dummy_encoding_translation;
static php_rfc1867_encoding_detector_t php_rfc1867_encoding_detector = NULL;
static php_rfc1867_encoding_converter_t php_rfc1867_encoding_converter = NULL;
static php_rfc1867_getword_t php_rfc1867_getword = NULL;
static php_rfc1867_basename_t php_rfc1867_basename = NULL;
PHPAPI int (*php_rfc1867_callback)(unsigned int event, void *event_data, void **extra TSRMLS_DC) = NULL;
static void safe_php_register_variable(char *var, char *strval, int val_len, zval *track_vars_array, zend_bool override_protection TSRMLS_DC);
static void php_flush_gpc_variables(int num_vars, char **val_list, int *len_list, zval *array_ptr TSRMLS_DC) /* {{{ */
{
int i;
unsigned int new_val_len;
if (num_vars > 0 &&
php_mb_gpc_encoding_detector(val_list, len_list, num_vars, NULL TSRMLS_CC) == SUCCESS) {
php_mb_gpc_encoding_converter(val_list, len_list, num_vars, NULL, NULL TSRMLS_CC);
php_rfc1867_encoding_detector(val_list, len_list, num_vars, NULL TSRMLS_CC) == SUCCESS) {
php_rfc1867_encoding_converter(val_list, len_list, num_vars, NULL, NULL TSRMLS_CC);
}
for (i = 0; i<num_vars; i += 2) {
safe_php_register_variable(val_list[i], val_list[i+1], len_list[i+1], array_ptr, 0 TSRMLS_CC);
if (sapi_module.input_filter(PARSE_POST, val_list[i], &val_list[i+1], len_list[i+1], &new_val_len TSRMLS_CC)) {
if (php_rfc1867_callback != NULL) {
multipart_event_formdata event_formdata;
void *event_extra_data = NULL;
event_formdata.post_bytes_processed = SG(read_post_bytes);
event_formdata.name = val_list[i];
event_formdata.value = &val_list[i+1];
event_formdata.length = new_val_len;
event_formdata.newlength = &new_val_len;
if (php_rfc1867_callback(MULTIPART_EVENT_FORMDATA, &event_formdata, &event_extra_data TSRMLS_CC) == FAILURE) {
efree(val_list[i]);
efree(val_list[i+1]);
continue;
}
}
safe_php_register_variable(val_list[i], val_list[i+1], new_val_len, array_ptr, 0 TSRMLS_CC);
}
efree(val_list[i]);
efree(val_list[i+1]);
}
@ -94,8 +120,6 @@ static void php_gpc_stack_variable(char *param, char *value, char ***pval_list,
}
/* }}} */
#endif
/* The longest property name we use in an uploaded file array */
#define MAX_SIZE_OF_INDEX sizeof("[tmp_name]")
@ -536,93 +560,45 @@ static char *php_ap_getword(char **line, char stop)
static char *substring_conf(char *start, int len, char quote TSRMLS_DC)
{
char *result = emalloc(len + 2);
char *result = emalloc(len + 1);
char *resp = result;
int i;
for (i = 0; i < len; ++i) {
for (i = 0; i < len && start[i] != quote; ++i) {
if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
*resp++ = start[++i];
} else {
#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
if (php_mb_encoding_translation(TSRMLS_C)) {
size_t j = php_mb_gpc_mbchar_bytes(start+i TSRMLS_CC);
while (j-- > 0 && i < len) {
*resp++ = start[i++];
}
--i;
} else {
*resp++ = start[i];
}
#else
*resp++ = start[i];
#endif
}
}
*resp = '\0';
return result;
}
static char *php_ap_getword_conf(char **line TSRMLS_DC)
static char *php_ap_getword_conf(char *str TSRMLS_DC)
{
char *str = *line, *strend, *res, quote;
#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
if (php_mb_encoding_translation(TSRMLS_C)) {
int len=strlen(str);
php_mb_gpc_encoding_detector(&str, &len, 1, NULL TSRMLS_CC);
}
#endif
while (*str && isspace(*str)) {
++str;
}
if (!*str) {
*line = str;
return estrdup("");
}
if ((quote = *str) == '"' || quote == '\'') {
strend = str + 1;
look_for_quote:
while (*strend && *strend != quote) {
if (*strend == '\\' && strend[1] && strend[1] == quote) {
strend += 2;
if (*str == '"' || *str == '\'') {
char quote = *str;
str++;
return substring_conf(str, strlen(str), quote TSRMLS_CC);
} else {
++strend;
}
}
if (*strend && *strend == quote) {
char p = *(strend + 1);
if (p != '\r' && p != '\n' && p != '\0') {
strend++;
goto look_for_quote;
}
}
char *strend = str;
res = substring_conf(str + 1, strend - str - 1, quote TSRMLS_CC);
if (*strend == quote) {
++strend;
}
} else {
strend = str;
while (*strend && !isspace(*strend)) {
++strend;
}
res = substring_conf(str, strend - str, 0 TSRMLS_CC);
return substring_conf(str, strend - str, 0 TSRMLS_CC);
}
while (*strend && isspace(*strend)) {
++strend;
}
*line = strend;
return res;
}
/*
@ -733,10 +709,8 @@ SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler) /* {{{ */
int max_file_size = 0, skip_upload = 0, anonindex = 0, is_anonymous;
zval *http_post_files = NULL;
HashTable *uploaded_files = NULL;
#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
int str_len = 0, num_vars = 0, num_vars_max = 2*10, *len_list = NULL;
char **val_list = NULL;
#endif
multipart_buffer *mbuff;
zval *array_ptr = (zval *) arg;
int fd = -1;
@ -806,12 +780,11 @@ SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler) /* {{{ */
INIT_PZVAL(http_post_files);
PG(http_globals)[TRACK_VARS_FILES] = http_post_files;
#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
if (php_mb_encoding_translation(TSRMLS_C)) {
if (php_rfc1867_encoding_translation(TSRMLS_C)) {
val_list = (char **)ecalloc(num_vars_max+2, sizeof(char *));
len_list = (int *)ecalloc(num_vars_max+2, sizeof(int));
}
#endif
zend_llist_init(&header, sizeof(mime_header_entry), (llist_dtor_func_t) php_free_hdr_entry, 0);
if (php_rfc1867_callback != NULL) {
@ -859,12 +832,36 @@ SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler) /* {{{ */
if (param) {
efree(param);
}
param = php_ap_getword_conf(&pair TSRMLS_CC);
if (php_rfc1867_encoding_translation(TSRMLS_C)) {
if (num_vars >= num_vars_max) {
php_gpc_realloc_buffer(&val_list, &len_list, &num_vars_max, 1 TSRMLS_CC);
}
val_list[num_vars] = pair;
len_list[num_vars] = strlen(pair);
num_vars++;
php_rfc1867_encoding_detector(val_list, len_list, num_vars, NULL TSRMLS_CC);
num_vars--;
param = php_rfc1867_getword(pair TSRMLS_CC);
} else {
param = php_ap_getword_conf(pair TSRMLS_CC);
}
} else if (!strcasecmp(key, "filename")) {
if (filename) {
efree(filename);
}
filename = php_ap_getword_conf(&pair TSRMLS_CC);
if (php_rfc1867_encoding_translation(TSRMLS_C)) {
if (num_vars >= num_vars_max) {
php_gpc_realloc_buffer(&val_list, &len_list, &num_vars_max, 1 TSRMLS_CC);
}
val_list[num_vars] = pair;
len_list[num_vars] = strlen(pair);
num_vars++;
php_rfc1867_encoding_detector(val_list, len_list, num_vars, NULL TSRMLS_CC);
num_vars--;
filename = php_rfc1867_getword(pair TSRMLS_CC);
} else {
filename = php_ap_getword_conf(pair TSRMLS_CC);
}
}
}
if (key) {
@ -883,7 +880,10 @@ SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler) /* {{{ */
value = estrdup("");
}
if (sapi_module.input_filter(PARSE_POST, param, &value, value_len, &new_val_len TSRMLS_CC)) {
if (php_rfc1867_encoding_translation(TSRMLS_C)) {
/* postpone filtering, callback call and registration */
php_gpc_stack_variable(param, value, &val_list, &len_list, &num_vars, &num_vars_max TSRMLS_CC);
} else if (sapi_module.input_filter(PARSE_POST, param, &value, value_len, &new_val_len TSRMLS_CC)) {
if (php_rfc1867_callback != NULL) {
multipart_event_formdata event_formdata;
size_t newlength = new_val_len;
@ -900,16 +900,7 @@ SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler) /* {{{ */
}
new_val_len = newlength;
}
#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
if (php_mb_encoding_translation(TSRMLS_C)) {
php_gpc_stack_variable(param, value, &val_list, &len_list, &num_vars, &num_vars_max TSRMLS_CC);
} else {
safe_php_register_variable(param, value, new_val_len, array_ptr, 0 TSRMLS_CC);
}
#else
safe_php_register_variable(param, value, new_val_len, array_ptr, 0 TSRMLS_CC);
#endif
} else if (php_rfc1867_callback != NULL) {
multipart_event_formdata event_formdata;
@ -1144,30 +1135,25 @@ SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler) /* {{{ */
snprintf(lbuf, llen, "%s_name", param);
}
/* The \ check should technically be needed for win32 systems only where
* it is a valid path separator. However, IE in all it's wisdom always sends
* the full path of the file on the user's filesystem, which means that unless
* the user does basename() they get a bogus file name. Until IE's user base drops
* to nill or problem is fixed this code must remain enabled for all systems. */
#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
if (php_mb_encoding_translation(TSRMLS_C)) {
if (php_rfc1867_encoding_translation(TSRMLS_C)) {
if (num_vars >= num_vars_max) {
php_gpc_realloc_buffer(&val_list, &len_list, &num_vars_max, 1 TSRMLS_CC);
}
val_list[num_vars] = filename;
len_list[num_vars] = strlen(filename);
num_vars++;
if (php_mb_gpc_encoding_detector(val_list, len_list, num_vars, NULL TSRMLS_CC) == SUCCESS) {
if (php_rfc1867_encoding_detector(val_list, len_list, num_vars, NULL TSRMLS_CC) == SUCCESS) {
str_len = strlen(filename);
php_mb_gpc_encoding_converter(&filename, &str_len, 1, NULL, NULL TSRMLS_CC);
}
s = php_mb_strrchr(filename, '\\' TSRMLS_CC);
if ((tmp = php_mb_strrchr(filename, '/' TSRMLS_CC)) > s) {
s = tmp;
php_rfc1867_encoding_converter(&filename, &str_len, 1, NULL, NULL TSRMLS_CC);
}
s = php_rfc1867_basename(filename TSRMLS_CC);
num_vars--;
} else {
#endif
/* The \ check should technically be needed for win32 systems only where
* it is a valid path separator. However, IE in all it's wisdom always sends
* the full path of the file on the user's filesystem, which means that unless
* the user does basename() they get a bogus file name. Until IE's user base drops
* to nill or problem is fixed this code must remain enabled for all systems. */
s = strrchr(filename, '\\');
if ((tmp = strrchr(filename, '/')) > s) {
s = tmp;
@ -1181,17 +1167,15 @@ SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler) /* {{{ */
s = tmp > s ? tmp : s;
}
#endif
#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
if (s) {
s++;
} else {
s = filename;
}
}
#endif
if (!is_anonymous) {
if (s && s > filename) {
safe_php_register_variable(lbuf, s+1, strlen(s+1), NULL, 0 TSRMLS_CC);
} else {
safe_php_register_variable(lbuf, filename, strlen(filename), NULL, 0 TSRMLS_CC);
}
safe_php_register_variable(lbuf, s, strlen(s), NULL, 0 TSRMLS_CC);
}
/* Add $foo[name] */
@ -1200,11 +1184,7 @@ SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler) /* {{{ */
} else {
snprintf(lbuf, llen, "%s[name]", param);
}
if (s && s > filename) {
register_http_post_files_variable(lbuf, s+1, http_post_files, 0 TSRMLS_CC);
} else {
register_http_post_files_variable(lbuf, filename, http_post_files, 0 TSRMLS_CC);
}
register_http_post_files_variable(lbuf, s, http_post_files, 0 TSRMLS_CC);
efree(filename);
s = NULL;
@ -1320,11 +1300,9 @@ fileupload_done:
php_rfc1867_callback(MULTIPART_EVENT_END, &event_end, &event_extra_data TSRMLS_CC);
}
#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
if (php_mb_encoding_translation(TSRMLS_C)) {
if (php_rfc1867_encoding_translation(TSRMLS_C)) {
php_flush_gpc_variables(num_vars, val_list, len_list, array_ptr TSRMLS_CC);
}
#endif
if (lbuf) efree(lbuf);
if (abuf) efree(abuf);
@ -1338,6 +1316,21 @@ fileupload_done:
}
/* }}} */
SAPI_API void php_rfc1867_set_multibyte_callbacks(
php_rfc1867_encoding_translation_t encoding_translation,
php_rfc1867_encoding_detector_t encoding_detector,
php_rfc1867_encoding_converter_t encoding_converter,
php_rfc1867_getword_t getword,
php_rfc1867_basename_t basename) /* {{{ */
{
php_rfc1867_encoding_translation = encoding_translation;
php_rfc1867_encoding_detector = encoding_detector;
php_rfc1867_encoding_converter = encoding_converter;
php_rfc1867_getword = getword;
php_rfc1867_basename = basename;
}
/* }}} */
/*
* Local variables:
* tab-width: 4

View file

@ -67,10 +67,23 @@ typedef struct _multipart_event_end {
size_t post_bytes_processed;
} multipart_event_end;
typedef int (*php_rfc1867_encoding_translation_t)(TSRMLS_D);
typedef int (*php_rfc1867_encoding_detector_t)(char **arg_string, int *arg_length, int num, char *arg_list TSRMLS_DC);
typedef int (*php_rfc1867_encoding_converter_t)(char **str, int *len, int num, const char *encoding_to, const char *encoding_from TSRMLS_DC);
typedef char* (*php_rfc1867_getword_t)(char *str TSRMLS_DC);
typedef char* (*php_rfc1867_basename_t)(char *str TSRMLS_DC);
SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler);
void destroy_uploaded_files_hash(TSRMLS_D);
void php_rfc1867_register_constants(TSRMLS_D);
extern PHPAPI int (*php_rfc1867_callback)(unsigned int event, void *event_data, void **extra TSRMLS_DC);
SAPI_API void php_rfc1867_set_multibyte_callbacks(
php_rfc1867_encoding_translation_t encoding_translation,
php_rfc1867_encoding_detector_t encoding_detector,
php_rfc1867_encoding_converter_t encoding_converter,
php_rfc1867_getword_t getword,
php_rfc1867_basename_t basename);
#endif /* RFC1867_H */

104
tests/basic/028.phpt Normal file
View file

@ -0,0 +1,104 @@
--TEST--
RFC1867 character quotting
--INI--
file_uploads=1
--POST_RAW--
Content-Type: multipart/form-data; boundary=---------------------------20896060251896012921717172737
-----------------------------20896060251896012921717172737
Content-Disposition: form-data; name=name1
testname
-----------------------------20896060251896012921717172737
Content-Disposition: form-data; name='name2'
testname
-----------------------------20896060251896012921717172737
Content-Disposition: form-data; name="name3"
testname
-----------------------------20896060251896012921717172737
Content-Disposition: form-data; name=name\4
testname
-----------------------------20896060251896012921717172737
Content-Disposition: form-data; name=name\\5
testname
-----------------------------20896060251896012921717172737
Content-Disposition: form-data; name=name\'6
testname
-----------------------------20896060251896012921717172737
Content-Disposition: form-data; name=name\"7
testname
-----------------------------20896060251896012921717172737
Content-Disposition: form-data; name='name\8'
testname
-----------------------------20896060251896012921717172737
Content-Disposition: form-data; name='name\\9'
testname
-----------------------------20896060251896012921717172737
Content-Disposition: form-data; name='name\'10'
testname
-----------------------------20896060251896012921717172737
Content-Disposition: form-data; name='name\"11'
testname
-----------------------------20896060251896012921717172737
Content-Disposition: form-data; name="name\12"
testname
-----------------------------20896060251896012921717172737
Content-Disposition: form-data; name="name\\13"
testname
-----------------------------20896060251896012921717172737
Content-Disposition: form-data; name="name\'14"
testname
-----------------------------20896060251896012921717172737
Content-Disposition: form-data; name="name\"15"
testname
-----------------------------20896060251896012921717172737--
--FILE--
<?php
var_dump($_POST);
?>
--EXPECTF--
array(15) {
["name1"]=>
string(8) "testname"
["name2"]=>
string(8) "testname"
["name3"]=>
string(8) "testname"
["name\\4"]=>
string(8) "testname"
["name\\5"]=>
string(8) "testname"
["name\\\'6"]=>
string(8) "testname"
["name\\\"7"]=>
string(8) "testname"
["name\\8"]=>
string(8) "testname"
["name\\9"]=>
string(8) "testname"
["name\'10"]=>
string(8) "testname"
["name\\\"11"]=>
string(8) "testname"
["name\\12"]=>
string(8) "testname"
["name\\13"]=>
string(8) "testname"
["name\\\'14"]=>
string(8) "testname"
["name\"15"]=>
string(8) "testname"
}

51
tests/basic/029.phpt Normal file
View file

@ -0,0 +1,51 @@
--TEST--
Shift_JIS request
--SKIPIF--
<?php
if (!extension_loaded("mbstring")) {
die("skip Requires mbstring extension");
}
?>
--INI--
file_uploads=1
mbstring.encoding_translation=1
mbstring.http_input=Shift_JIS
mbstring.internal_encoding=UTF-8
--POST_RAW--
Content-Type: multipart/form-data; boundary=---------------------------20896060251896012921717172737
-----------------------------20896060251896012921717172737
Content-Disposition: form-data; name="—\Ž\”\"
ƒhƒŒƒ~ƒtƒ@ƒ\
-----------------------------20896060251896012921717172737
Content-Disposition: form-data; name="pics"; filename="file1.txt"
Content-Type: text/plain
file1
-----------------------------20896060251896012921717172737--
--FILE--
<?php
var_dump($_FILES);
var_dump($_POST);
?>
--EXPECTF--
array(1) {
["pics"]=>
array(5) {
["name"]=>
string(9) "file1.txt"
["type"]=>
string(10) "text/plain"
["tmp_name"]=>
string(%d) "%s"
["error"]=>
int(0)
["size"]=>
int(6)
}
}
array(1) {
["予蚕能"]=>
string(18) "ドレミファソ"
}