mirror of
https://github.com/php/php-src.git
synced 2025-08-16 14:08:47 +02:00
Added conversion support from script character encoding to internal character encoding. This feature is very useful for japanese who uses Shift_JIS encoding because some of characters in Shift_JIS are including '0x5c' and it causes some troubles on Zend parser. This patch is made by Masaki Fujimoto.
This commit is contained in:
parent
2b5beee5ad
commit
f30b722f14
6 changed files with 295 additions and 4 deletions
|
@ -685,12 +685,12 @@ static mbfl_encoding mbfl_encoding_2022jp = {
|
|||
|
||||
|
||||
#if defined(HAVE_MBSTR_CN)
|
||||
static const char *mbfl_encoding_euc_cn_aliases[] = {"EUC_CN", "eucCN", "x-euc-cn", NULL};
|
||||
static const char *mbfl_encoding_euc_cn_aliases[] = {"CN-GB", "EUC_CN", "eucCN", "x-euc-cn", NULL};
|
||||
|
||||
static mbfl_encoding mbfl_encoding_euc_cn = {
|
||||
mbfl_no_encoding_euc_cn,
|
||||
"EUC-CN",
|
||||
"EUC-CN",
|
||||
"CN-GB",
|
||||
(const char *(*)[])&mbfl_encoding_euc_cn_aliases,
|
||||
mblen_table_euccn,
|
||||
MBFL_ENCTYPE_MBCS
|
||||
|
@ -721,12 +721,12 @@ static mbfl_encoding mbfl_encoding_euc_tw = {
|
|||
MBFL_ENCTYPE_MBCS
|
||||
};
|
||||
|
||||
static const char *mbfl_encoding_big5_aliases[] = {"big5", "CP950", NULL};
|
||||
static const char *mbfl_encoding_big5_aliases[] = {"CN-BIG5", "BIG5", "BIG-FIVE", "BIGFIVE", "CP950", NULL};
|
||||
|
||||
static mbfl_encoding mbfl_encoding_big5 = {
|
||||
mbfl_no_encoding_big5,
|
||||
"BIG-5",
|
||||
"BIG-5",
|
||||
"CN-BIG5",
|
||||
(const char *(*)[])&mbfl_encoding_big5_aliases,
|
||||
mblen_table_big5,
|
||||
MBFL_ENCTYPE_MBCS
|
||||
|
@ -6995,7 +6995,53 @@ mbfl_strlen(mbfl_string *string TSRMLS_DC)
|
|||
return len;
|
||||
}
|
||||
|
||||
#ifdef ZEND_MULTIBYTE
|
||||
/*
|
||||
* oddlen
|
||||
*/
|
||||
int
|
||||
mbfl_oddlen(mbfl_string *string)
|
||||
{
|
||||
int len, n, m, k;
|
||||
unsigned char *p;
|
||||
const unsigned char *mbtab;
|
||||
mbfl_encoding *encoding;
|
||||
|
||||
encoding = mbfl_no2encoding(string->no_encoding);
|
||||
if (encoding == NULL || string == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
len = 0;
|
||||
if (encoding->flag & MBFL_ENCTYPE_SBCS) {
|
||||
return 0;
|
||||
} else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
|
||||
return len % 2;
|
||||
} else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
|
||||
return len % 4;
|
||||
} else if (encoding->mblen_table != NULL) {
|
||||
mbtab = encoding->mblen_table;
|
||||
n = 0;
|
||||
p = string->val;
|
||||
k = string->len;
|
||||
/* count */
|
||||
if (p != NULL) {
|
||||
while (n < k) {
|
||||
m = mbtab[*p];
|
||||
n += m;
|
||||
p += m;
|
||||
};
|
||||
}
|
||||
return n-k;
|
||||
} else {
|
||||
/* how can i do ? */
|
||||
return 0;
|
||||
}
|
||||
/* NOT REACHED */
|
||||
}
|
||||
#endif /* ZEND_MULTIBYTE */
|
||||
|
||||
|
||||
/*
|
||||
* strpos
|
||||
*/
|
||||
|
|
|
@ -461,6 +461,14 @@ mbfl_identify_encoding_no(mbfl_string *string, enum mbfl_no_encoding *elist, int
|
|||
int
|
||||
mbfl_strlen(mbfl_string *string TSRMLS_DC);
|
||||
|
||||
#ifdef ZEND_MULTIBYTE
|
||||
/*
|
||||
* oddlen
|
||||
*/
|
||||
int
|
||||
mbfl_oddlen(mbfl_string *string);
|
||||
#endif /* ZEND_MULTIBYTE */
|
||||
|
||||
/*
|
||||
* strpos
|
||||
*/
|
||||
|
|
|
@ -65,6 +65,10 @@
|
|||
#include "php_content_types.h"
|
||||
#include "SAPI.h"
|
||||
|
||||
#ifdef ZEND_MULTIBYTE
|
||||
#include "zend_multibyte.h"
|
||||
#endif /* ZEND_MULTIBYTE */
|
||||
|
||||
#if HAVE_MBSTRING
|
||||
|
||||
#if HAVE_MBREGEX
|
||||
|
@ -524,6 +528,25 @@ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
|
|||
return SUCCESS;
|
||||
}
|
||||
|
||||
#ifdef ZEND_MULTIBYTE
|
||||
static PHP_INI_MH(OnUpdate_mbstring_script_encoding)
|
||||
{
|
||||
int *list, size;
|
||||
|
||||
if (php_mbstring_parse_encoding_list(new_value, new_value_length, &list, &size, 1)) {
|
||||
if (MBSTRG(script_encoding_list) != NULL) {
|
||||
free(MBSTRG(script_encoding_list));
|
||||
}
|
||||
MBSTRG(script_encoding_list) = list;
|
||||
MBSTRG(script_encoding_list_size) = size;
|
||||
} else {
|
||||
return FAILURE;
|
||||
}
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
#endif /* ZEND_MULTIBYTE */
|
||||
|
||||
static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
|
||||
{
|
||||
if (new_value != NULL) {
|
||||
|
@ -546,6 +569,9 @@ PHP_INI_BEGIN()
|
|||
PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input)
|
||||
PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output)
|
||||
PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding)
|
||||
#ifdef ZEND_MULTIBYTE
|
||||
PHP_INI_ENTRY("mbstring.script_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_script_encoding)
|
||||
#endif /* ZEND_MULTIBYTE */
|
||||
PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
|
||||
STD_PHP_INI_ENTRY("mbstring.func_overload", "0", PHP_INI_SYSTEM, OnUpdateInt, func_overload, zend_mbstring_globals, mbstring_globals)
|
||||
PHP_INI_END()
|
||||
|
@ -579,6 +605,10 @@ php_mbstring_init_globals(zend_mbstring_globals *pglobals TSRMLS_DC)
|
|||
MBSTRG(internal_encoding) = mbfl_no_encoding_euc_jp;
|
||||
MBSTRG(current_internal_encoding) = mbfl_no_encoding_euc_jp;
|
||||
#endif
|
||||
#ifdef ZEND_MULTIBYTE
|
||||
MBSTRG(script_encoding_list) = NULL;
|
||||
MBSTRG(script_encoding_list_size) = 0;
|
||||
#endif /* ZEND_MULTIBYTE */
|
||||
MBSTRG(http_output_encoding) = mbfl_no_encoding_pass;
|
||||
MBSTRG(current_http_output_encoding) = mbfl_no_encoding_pass;
|
||||
MBSTRG(http_input_identify) = mbfl_no_encoding_invalid;
|
||||
|
@ -640,6 +670,11 @@ PHP_MSHUTDOWN_FUNCTION(mbstring)
|
|||
if (MBSTRG(http_input_list)) {
|
||||
free(MBSTRG(http_input_list));
|
||||
}
|
||||
#ifdef ZEND_MULTIBYTE
|
||||
if (MBSTRG(script_encoding_list)) {
|
||||
free(MBSTRG(script_encoding_list));
|
||||
}
|
||||
#endif /* ZEND_MULTIBYTE */
|
||||
if (MBSTRG(detect_order_list)) {
|
||||
free(MBSTRG(detect_order_list));
|
||||
}
|
||||
|
@ -858,6 +893,9 @@ PHP_FUNCTION(mb_internal_encoding)
|
|||
RETURN_FALSE;
|
||||
} else {
|
||||
MBSTRG(current_internal_encoding) = no_encoding;
|
||||
#ifdef ZEND_MULTIBYTE
|
||||
zend_multibyte_set_internal_encoding(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1) TSRMLS_CC);
|
||||
#endif /* ZEND_MULTIBYTE */
|
||||
RETURN_TRUE;
|
||||
}
|
||||
} else {
|
||||
|
@ -3174,6 +3212,175 @@ PHP_FUNCTION(mb_get_info)
|
|||
}
|
||||
/* }}} */
|
||||
|
||||
|
||||
#ifdef ZEND_MULTIBYTE
|
||||
PHPAPI int php_mbstring_set_zend_encoding(TSRMLS_D)
|
||||
{
|
||||
/* 'd better use mbfl_memory_device? */
|
||||
char *name, *list = NULL;
|
||||
int n, *entry, list_size = 0;
|
||||
zend_encoding_detector encoding_detector;
|
||||
zend_encoding_converter encoding_converter;
|
||||
zend_multibyte_oddlen multibyte_oddlen;
|
||||
|
||||
/* notify script encoding to Zend Engine */
|
||||
entry = MBSTRG(script_encoding_list);
|
||||
n = MBSTRG(script_encoding_list_size);
|
||||
while (n > 0) {
|
||||
name = (char *)mbfl_no_encoding2name(*entry);
|
||||
if (name) {
|
||||
list_size += strlen(name) + 1;
|
||||
if (!list)
|
||||
{
|
||||
list = (char*)emalloc(list_size);
|
||||
if (!list)
|
||||
return -1;
|
||||
*list = (char)NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
list = (char*)erealloc(list, list_size);
|
||||
if (!list)
|
||||
return -1;
|
||||
strcat(list, ",");
|
||||
}
|
||||
strcat(list, name);
|
||||
}
|
||||
entry++;
|
||||
n--;
|
||||
}
|
||||
zend_multibyte_set_script_encoding(list, (list ? strlen(list) : 0) TSRMLS_CC);
|
||||
if (list)
|
||||
efree(list);
|
||||
|
||||
encoding_detector = php_mbstring_encoding_detector;
|
||||
encoding_converter = NULL;
|
||||
multibyte_oddlen = php_mbstring_oddlen;
|
||||
|
||||
#if defined(MBSTR_ENC_TRANS)
|
||||
/* notify internal encoding to Zend Engine */
|
||||
name = (char*)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
|
||||
zend_multibyte_set_internal_encoding(name, strlen(name) TSRMLS_CC);
|
||||
|
||||
encoding_converter = php_mbstring_encoding_converter;
|
||||
#endif /* defined(MBSTR_ENC_TRANS) */
|
||||
|
||||
zend_multibyte_set_functions(encoding_detector, encoding_converter,
|
||||
multibyte_oddlen TSRMLS_CC);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* mb_detect_encoding (interface for Zend Engine)
|
||||
*/
|
||||
char* php_mbstring_encoding_detector(char *arg_string, int arg_length, char *arg_list TSRMLS_DC)
|
||||
{
|
||||
mbfl_string string;
|
||||
const char *ret;
|
||||
enum mbfl_no_encoding *elist;
|
||||
int size, *list;
|
||||
|
||||
/* make encoding list */
|
||||
list = NULL;
|
||||
size = 0;
|
||||
php_mbstring_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0);
|
||||
if (size <= 0)
|
||||
return NULL;
|
||||
|
||||
if (size > 0 && list != NULL) {
|
||||
elist = list;
|
||||
} else {
|
||||
elist = MBSTRG(current_detect_order_list);
|
||||
size = MBSTRG(current_detect_order_list_size);
|
||||
}
|
||||
|
||||
mbfl_string_init(&string);
|
||||
string.no_language = MBSTRG(current_language);
|
||||
string.val = arg_string;
|
||||
string.len = arg_length;
|
||||
ret = mbfl_identify_encoding_name(&string, elist, size);
|
||||
if (list != NULL) {
|
||||
efree((void *)list);
|
||||
}
|
||||
if (ret != NULL) {
|
||||
return estrdup(ret);
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* mb_convert_encoding (interface for Zend Engine)
|
||||
*/
|
||||
int php_mbstring_encoding_converter(char **to, int *to_length, char *from,
|
||||
int from_length, const char *encoding_to, const char *encoding_from
|
||||
TSRMLS_DC)
|
||||
{
|
||||
mbfl_string string, result, *ret;
|
||||
enum mbfl_no_encoding from_encoding, to_encoding;
|
||||
mbfl_buffer_converter *convd;
|
||||
|
||||
/* new encoding */
|
||||
to_encoding = mbfl_name2no_encoding(encoding_to);
|
||||
if (to_encoding == mbfl_no_encoding_invalid)
|
||||
return -1;
|
||||
|
||||
/* old encoding */
|
||||
from_encoding = mbfl_name2no_encoding(encoding_from);
|
||||
if (from_encoding == mbfl_no_encoding_invalid)
|
||||
return -1;
|
||||
|
||||
/* initialize string */
|
||||
mbfl_string_init(&string);
|
||||
mbfl_string_init(&result);
|
||||
string.no_encoding = from_encoding;
|
||||
string.no_language = MBSTRG(current_language);
|
||||
string.val = from;
|
||||
string.len = from_length;
|
||||
|
||||
/* initialize converter */
|
||||
convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
|
||||
if (convd == NULL)
|
||||
return -1;
|
||||
mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
|
||||
mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
|
||||
|
||||
/* do it */
|
||||
ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
|
||||
if (ret != NULL) {
|
||||
*to = ret->val;
|
||||
*to_length = ret->len;
|
||||
}
|
||||
mbfl_buffer_converter_delete(convd);
|
||||
|
||||
return ret ? 0 : -1;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* returns number of odd (e.g. appears only first byte of multibyte
|
||||
* character) chars
|
||||
*/
|
||||
int php_mbstring_oddlen(char *string, int length, const char *encoding TSRMLS_DC)
|
||||
{
|
||||
mbfl_string mb_string;
|
||||
|
||||
mbfl_string_init(&mb_string);
|
||||
mb_string.no_language = MBSTRG(current_language);
|
||||
mb_string.no_encoding = mbfl_name2no_encoding(encoding);
|
||||
mb_string.val = string;
|
||||
mb_string.len = length;
|
||||
|
||||
if(mb_string.no_encoding == mbfl_no_encoding_invalid)
|
||||
return 0;
|
||||
|
||||
return mbfl_oddlen(&mb_string);
|
||||
}
|
||||
|
||||
#endif /* ZEND_MULTIBYTE */
|
||||
|
||||
#endif /* HAVE_MBSTRING */
|
||||
|
||||
/*
|
||||
|
|
|
@ -129,6 +129,10 @@ ZEND_BEGIN_MODULE_GLOBALS(mbstring)
|
|||
int current_language;
|
||||
int internal_encoding;
|
||||
int current_internal_encoding;
|
||||
#ifdef ZEND_MULTIBYTE
|
||||
int *script_encoding_list;
|
||||
int script_encoding_list_size;
|
||||
#endif /* ZEND_MULTIBYTE */
|
||||
int http_output_encoding;
|
||||
int current_http_output_encoding;
|
||||
int http_input_identify;
|
||||
|
@ -177,6 +181,16 @@ struct mb_overload_def {
|
|||
#define MBSTRG(v) (mbstring_globals.v)
|
||||
#endif
|
||||
|
||||
#ifdef ZEND_MULTIBYTE
|
||||
PHPAPI int php_mbstring_set_zend_encoding(TSRMLS_D);
|
||||
char* php_mbstring_encoding_detector(char *string, int length, char *list
|
||||
TSRMLS_DC);
|
||||
int php_mbstring_encoding_converter(char **to, int *to_length, char *from,
|
||||
int from_length, const char *encoding_to, const char *encoding_from
|
||||
TSRMLS_DC);
|
||||
int php_mbstring_oddlen(char *string, int length, const char *encoding TSRMLS_DC);
|
||||
#endif /* ZEND_MULTIBYTE */
|
||||
|
||||
#else /* HAVE_MBSTRING */
|
||||
|
||||
#define mbstring_module_ptr NULL
|
||||
|
|
|
@ -75,6 +75,10 @@
|
|||
#include "php_logos.h"
|
||||
#include "php_streams.h"
|
||||
|
||||
#if defined(ZEND_MULTIBYTE) && defined(HAVE_MBSTRING)
|
||||
#include "ext/mbstring/mbstring.h"
|
||||
#endif /* defined(ZEND_MULTIBYTE) && defined(HAVE_MBSTRING) */
|
||||
|
||||
#include "SAPI.h"
|
||||
/* }}} */
|
||||
|
||||
|
@ -1402,6 +1406,9 @@ PHPAPI int php_execute_script(zend_file_handle *primary_file TSRMLS_DC)
|
|||
} else {
|
||||
append_file_p = NULL;
|
||||
}
|
||||
#if defined(ZEND_MULTIBYTE) && defined(HAVE_MBSTRING)
|
||||
php_mbstring_set_zend_encoding(TSRMLS_C);
|
||||
#endif /* ZEND_MULTIBYTE && HAVE_MBSTRING */
|
||||
retval = (zend_execute_scripts(ZEND_REQUIRE TSRMLS_CC, NULL, 3, prepend_file_p, primary_file, append_file_p) == SUCCESS);
|
||||
} zend_end_try();
|
||||
|
||||
|
|
|
@ -21,6 +21,10 @@
|
|||
|
||||
#include "php_apache_http.h"
|
||||
|
||||
#if defined(ZEND_MULTIBYTE) && defined(HAVE_MBSTRING)
|
||||
#include "ext/mbstring/mbstring.h"
|
||||
#endif /* defined(ZEND_MULTIBYTE) && defined(HAVE_MBSTRING) */
|
||||
|
||||
#undef shutdown
|
||||
|
||||
/* {{{ Prototypes
|
||||
|
@ -459,6 +463,11 @@ static int send_php(request_rec *r, int display_source_mode, char *filename)
|
|||
fh.opened_path = NULL;
|
||||
fh.free_filename = 0;
|
||||
fh.type = ZEND_HANDLE_FILENAME;
|
||||
|
||||
#if defined(ZEND_MULTIBYTE) && defined(HAVE_MBSTRING)
|
||||
php_mbstring_set_zend_encoding(TSRMLS_C);
|
||||
#endif /* defined(ZEND_MULTIBYTE) && defined(HAVE_MBSTRING) */
|
||||
|
||||
zend_execute_scripts(ZEND_INCLUDE TSRMLS_CC, NULL, 1, &fh);
|
||||
return OK;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue