- MFH: Removed dependencies from php_mbregex to oniguruma types

This commit is contained in:
Moriyoshi Koizumi 2008-07-17 16:08:37 +00:00
parent a00a69b3e5
commit f78da21527
4 changed files with 219 additions and 141 deletions

View file

@ -73,6 +73,10 @@
#include "mb_gpc.h" #include "mb_gpc.h"
#if HAVE_MBREGEX
#include "php_mbregex.h"
#endif
#ifdef ZEND_MULTIBYTE #ifdef ZEND_MULTIBYTE
#include "zend_multibyte.h" #include "zend_multibyte.h"
#endif /* ZEND_MULTIBYTE */ #endif /* ZEND_MULTIBYTE */
@ -89,7 +93,7 @@ static PHP_GSHUTDOWN_FUNCTION(mbstring);
/* {{{ php_mb_default_identify_list */ /* {{{ php_mb_default_identify_list */
typedef struct _php_mb_nls_ident_list { typedef struct _php_mb_nls_ident_list {
enum mbfl_no_language lang; enum mbfl_no_language lang;
enum mbfl_no_encoding* list; const enum mbfl_no_encoding* list;
int list_size; int list_size;
} php_mb_nls_ident_list; } php_mb_nls_ident_list;
@ -906,7 +910,7 @@ static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang,
for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) { for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
if (php_mb_default_identify_list[i].lang == lang) { if (php_mb_default_identify_list[i].lang == lang) {
*plist = php_mb_default_identify_list[i].list; *plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
*plist_size = php_mb_default_identify_list[i].list_size; *plist_size = php_mb_default_identify_list[i].list_size;
return 1; return 1;
} }
@ -1003,12 +1007,13 @@ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
MBSTRG(current_internal_encoding) = no_encoding; MBSTRG(current_internal_encoding) = no_encoding;
#if HAVE_MBREGEX #if HAVE_MBREGEX
{ {
OnigEncoding mbctype; const char *enc_name = new_value;
mbctype = php_mb_regex_name2mbctype(new_value); if (FAILURE == php_mb_regex_set_default_mbctype(enc_name)) {
if (mbctype == ONIG_ENCODING_UNDEF) { /* falls back to EUC-JP if an unknown encoding name is given */
mbctype = ONIG_ENCODING_EUC_JP; enc_name = "EUC-JP";
} php_mb_regex_set_default_mbctype(enc_name);
MBSTRG(current_mbctype) = MBSTRG(default_mbctype) = mbctype; }
php_mb_regex_set_mbctype(new_value);
} }
#endif #endif
#ifdef ZEND_MULTIBYTE #ifdef ZEND_MULTIBYTE
@ -1159,7 +1164,7 @@ static PHP_GINIT_FUNCTION(mbstring)
mbstring_globals->strict_detection = 0; mbstring_globals->strict_detection = 0;
mbstring_globals->outconv = NULL; mbstring_globals->outconv = NULL;
#if HAVE_MBREGEX #if HAVE_MBREGEX
_php_mb_regex_globals_ctor(mbstring_globals TSRMLS_CC); mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc(TSRMLS_C);
#endif #endif
} }
/* }}} */ /* }}} */
@ -1168,7 +1173,7 @@ static PHP_GINIT_FUNCTION(mbstring)
static PHP_GSHUTDOWN_FUNCTION(mbstring) static PHP_GSHUTDOWN_FUNCTION(mbstring)
{ {
#if HAVE_MBREGEX #if HAVE_MBREGEX
_php_mb_regex_globals_dtor(mbstring_globals TSRMLS_CC); php_mb_regex_globals_free(mbstring_globals->mb_regex_globals TSRMLS_CC);
#endif #endif
} }
/* }}} */ /* }}} */

View file

@ -76,10 +76,6 @@
#define PHP_MBSTRING_API 20021024 #define PHP_MBSTRING_API 20021024
#if HAVE_MBREGEX
#include "php_mbregex.h"
#endif
extern zend_module_entry mbstring_module_entry; extern zend_module_entry mbstring_module_entry;
#define mbstring_module_ptr &mbstring_module_entry #define mbstring_module_ptr &mbstring_module_entry
@ -197,8 +193,8 @@ ZEND_BEGIN_MODULE_GLOBALS(mbstring)
long strict_detection; long strict_detection;
long illegalchars; long illegalchars;
mbfl_buffer_converter *outconv; mbfl_buffer_converter *outconv;
#if HAVE_MBREGEX && defined(PHP_MBREGEX_GLOBALS) #if HAVE_MBREGEX
PHP_MBREGEX_GLOBALS struct _zend_mb_regex_globals *mb_regex_globals;
#endif #endif
ZEND_END_MODULE_GLOBALS(mbstring) ZEND_END_MODULE_GLOBALS(mbstring)

View file

@ -29,11 +29,35 @@
#if HAVE_MBREGEX #if HAVE_MBREGEX
#include "ext/standard/php_smart_str.h" #include "ext/standard/php_smart_str.h"
#include "ext/standard/info.h"
#include "php_mbregex.h" #include "php_mbregex.h"
#include "mbstring.h" #include "mbstring.h"
#include "php_onig_compat.h" /* must come prior to the oniguruma header */
#include <oniguruma.h>
#undef UChar
#include "php_onig_compat.h" /* must come prior to the oniguruma header */
#include <oniguruma.h>
#undef UChar
ZEND_EXTERN_MODULE_GLOBALS(mbstring) ZEND_EXTERN_MODULE_GLOBALS(mbstring)
struct _zend_mb_regex_globals {
OnigEncoding default_mbctype;
OnigEncoding current_mbctype;
HashTable ht_rc;
zval *search_str;
zval *search_str_val;
unsigned int search_pos;
php_mb_regex_t *search_re;
OnigRegion *search_regs;
OnigOptionType regex_default_options;
OnigSyntaxType *regex_default_syntax;
};
#define MBREX(g) (MBSTRG(mb_regex_globals)->g)
/* {{{ static void php_mb_regex_free_cache() */ /* {{{ static void php_mb_regex_free_cache() */
static void php_mb_regex_free_cache(php_mb_regex_t **pre) static void php_mb_regex_free_cache(php_mb_regex_t **pre)
{ {
@ -42,24 +66,52 @@ static void php_mb_regex_free_cache(php_mb_regex_t **pre)
/* }}} */ /* }}} */
/* {{{ _php_mb_regex_globals_ctor */ /* {{{ _php_mb_regex_globals_ctor */
void _php_mb_regex_globals_ctor(zend_mbstring_globals *pglobals TSRMLS_DC) static int _php_mb_regex_globals_ctor(zend_mb_regex_globals *pglobals TSRMLS_DC)
{ {
MBSTRG(default_mbctype) = ONIG_ENCODING_EUC_JP; pglobals->default_mbctype = ONIG_ENCODING_EUC_JP;
MBSTRG(current_mbctype) = ONIG_ENCODING_EUC_JP; pglobals->current_mbctype = ONIG_ENCODING_EUC_JP;
zend_hash_init(&(MBSTRG(ht_rc)), 0, NULL, (void (*)(void *)) php_mb_regex_free_cache, 1); zend_hash_init(&(pglobals->ht_rc), 0, NULL, (void (*)(void *)) php_mb_regex_free_cache, 1);
MBSTRG(search_str) = (zval*) NULL; pglobals->search_str = (zval*) NULL;
MBSTRG(search_re) = (php_mb_regex_t*)NULL; pglobals->search_re = (php_mb_regex_t*)NULL;
MBSTRG(search_pos) = 0; pglobals->search_pos = 0;
MBSTRG(search_regs) = (OnigRegion*)NULL; pglobals->search_regs = (OnigRegion*)NULL;
MBSTRG(regex_default_options) = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE; pglobals->regex_default_options = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
MBSTRG(regex_default_syntax) = ONIG_SYNTAX_RUBY; pglobals->regex_default_syntax = ONIG_SYNTAX_RUBY;
return SUCCESS;
} }
/* }}} */ /* }}} */
/* {{{ _php_mb_regex_globals_dtor */ /* {{{ _php_mb_regex_globals_dtor */
void _php_mb_regex_globals_dtor(zend_mbstring_globals *pglobals TSRMLS_DC) static void _php_mb_regex_globals_dtor(zend_mb_regex_globals *pglobals TSRMLS_DC)
{ {
zend_hash_destroy(&MBSTRG(ht_rc)); zend_hash_destroy(&pglobals->ht_rc);
}
/* }}} */
/* {{{ php_mb_regex_globals_alloc */
zend_mb_regex_globals *php_mb_regex_globals_alloc(TSRMLS_D)
{
zend_mb_regex_globals *pglobals = pemalloc(
sizeof(zend_mb_regex_globals), 1);
if (!pglobals) {
return NULL;
}
if (SUCCESS != _php_mb_regex_globals_ctor(pglobals TSRMLS_CC)) {
pefree(pglobals, 1);
return NULL;
}
return pglobals;
}
/* }}} */
/* {{{ php_mb_regex_globals_free */
void php_mb_regex_globals_free(zend_mb_regex_globals *pglobals TSRMLS_DC)
{
if (!pglobals) {
return;
}
_php_mb_regex_globals_dtor(pglobals TSRMLS_CC);
pefree(pglobals, 1);
} }
/* }}} */ /* }}} */
@ -80,26 +132,26 @@ PHP_MSHUTDOWN_FUNCTION(mb_regex)
/* {{{ PHP_RINIT_FUNCTION(mb_regex) */ /* {{{ PHP_RINIT_FUNCTION(mb_regex) */
PHP_RINIT_FUNCTION(mb_regex) PHP_RINIT_FUNCTION(mb_regex)
{ {
return SUCCESS; return MBSTRG(mb_regex_globals) ? SUCCESS: FAILURE;
} }
/* }}} */ /* }}} */
/* {{{ PHP_RSHUTDOWN_FUNCTION(mb_regex) */ /* {{{ PHP_RSHUTDOWN_FUNCTION(mb_regex) */
PHP_RSHUTDOWN_FUNCTION(mb_regex) PHP_RSHUTDOWN_FUNCTION(mb_regex)
{ {
MBSTRG(current_mbctype) = MBSTRG(default_mbctype); MBREX(current_mbctype) = MBREX(default_mbctype);
if (MBSTRG(search_str) != NULL) { if (MBREX(search_str) != NULL) {
zval_ptr_dtor(&MBSTRG(search_str)); zval_ptr_dtor(&MBREX(search_str));
MBSTRG(search_str) = (zval *)NULL; MBREX(search_str) = (zval *)NULL;
} }
MBSTRG(search_pos) = 0; MBREX(search_pos) = 0;
if (MBSTRG(search_regs) != NULL) { if (MBREX(search_regs) != NULL) {
onig_region_free(MBSTRG(search_regs), 1); onig_region_free(MBREX(search_regs), 1);
MBSTRG(search_regs) = (OnigRegion *)NULL; MBREX(search_regs) = (OnigRegion *)NULL;
} }
zend_hash_clean(&MBSTRG(ht_rc)); zend_hash_clean(&MBREX(ht_rc));
return SUCCESS; return SUCCESS;
} }
@ -317,7 +369,7 @@ php_mb_regex_enc_name_map_t enc_name_map[] = {
/* }}} */ /* }}} */
/* {{{ php_mb_regex_name2mbctype */ /* {{{ php_mb_regex_name2mbctype */
OnigEncoding php_mb_regex_name2mbctype(const char *pname) static OnigEncoding _php_mb_regex_name2mbctype(const char *pname)
{ {
const char *p; const char *p;
php_mb_regex_enc_name_map_t *mapping; php_mb_regex_enc_name_map_t *mapping;
@ -338,8 +390,8 @@ OnigEncoding php_mb_regex_name2mbctype(const char *pname)
} }
/* }}} */ /* }}} */
/* {{{ php_mbregex_mbctype2name */ /* {{{ php_mb_regex_mbctype2name */
const char *php_mb_regex_mbctype2name(OnigEncoding mbctype) static const char *_php_mb_regex_mbctype2name(OnigEncoding mbctype)
{ {
php_mb_regex_enc_name_map_t *mapping; php_mb_regex_enc_name_map_t *mapping;
@ -353,6 +405,44 @@ const char *php_mb_regex_mbctype2name(OnigEncoding mbctype)
} }
/* }}} */ /* }}} */
/* {{{ php_mb_regex_set_mbctype */
int php_mb_regex_set_mbctype(const char *encname TSRMLS_DC)
{
OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
if (mbctype == ONIG_ENCODING_UNDEF) {
return FAILURE;
}
MBREX(current_mbctype) = mbctype;
return SUCCESS;
}
/* }}} */
/* {{{ php_mb_regex_set_default_mbctype */
int php_mb_regex_set_default_mbctype(const char *encname TSRMLS_DC)
{
OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
if (mbctype == ONIG_ENCODING_UNDEF) {
return FAILURE;
}
MBREX(default_mbctype) = mbctype;
return SUCCESS;
}
/* }}} */
/* {{{ php_mb_regex_get_mbctype */
const char *php_mb_regex_get_mbctype(TSRMLS_D)
{
return _php_mb_regex_mbctype2name(MBREX(current_mbctype));
}
/* }}} */
/* {{{ php_mb_regex_get_default_mbctype */
const char *php_mb_regex_get_default_mbctype(TSRMLS_D)
{
return _php_mb_regex_mbctype2name(MBREX(default_mbctype));
}
/* }}} */
/* /*
* regex cache * regex cache
*/ */
@ -365,7 +455,7 @@ static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patl
OnigErrorInfo err_info; OnigErrorInfo err_info;
OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
found = zend_hash_find(&MBSTRG(ht_rc), (char *)pattern, patlen+1, (void **) &rc); found = zend_hash_find(&MBREX(ht_rc), (char *)pattern, patlen+1, (void **) &rc);
if (found == FAILURE || (*rc)->options != options || (*rc)->enc != enc || (*rc)->syntax != syntax) { if (found == FAILURE || (*rc)->options != options || (*rc)->enc != enc || (*rc)->syntax != syntax) {
if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) { if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
onig_error_code_to_str(err_str, err_code, err_info); onig_error_code_to_str(err_str, err_code, err_info);
@ -373,7 +463,7 @@ static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patl
retval = NULL; retval = NULL;
goto out; goto out;
} }
zend_hash_update(&MBSTRG(ht_rc), (char *) pattern, patlen + 1, (void *) &retval, sizeof(retval), NULL); zend_hash_update(&MBREX(ht_rc), (char *) pattern, patlen + 1, (void *) &retval, sizeof(retval), NULL);
} else if (found == SUCCESS) { } else if (found == SUCCESS) {
retval = *rc; retval = *rc;
} }
@ -577,7 +667,7 @@ PHP_FUNCTION(mb_regex_encoding)
} }
if (argc == 0) { if (argc == 0) {
const char *retval = php_mb_regex_mbctype2name(MBSTRG(current_mbctype)); const char *retval = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
if (retval == NULL) { if (retval == NULL) {
RETURN_FALSE; RETURN_FALSE;
@ -585,14 +675,14 @@ PHP_FUNCTION(mb_regex_encoding)
RETURN_STRING((char *)retval, 1); RETURN_STRING((char *)retval, 1);
} else if (argc == 1) { } else if (argc == 1) {
mbctype = php_mb_regex_name2mbctype(encoding); mbctype = _php_mb_regex_name2mbctype(encoding);
if (mbctype == ONIG_ENCODING_UNDEF) { if (mbctype == ONIG_ENCODING_UNDEF) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding); php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
RETURN_FALSE; RETURN_FALSE;
} }
MBSTRG(current_mbctype) = mbctype; MBREX(current_mbctype) = mbctype;
RETURN_TRUE; RETURN_TRUE;
} }
} }
@ -616,7 +706,7 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
RETURN_FALSE; RETURN_FALSE;
} }
options = MBSTRG(regex_default_options); options = MBREX(regex_default_options);
if (icase) { if (icase) {
options |= ONIG_OPTION_IGNORECASE; options |= ONIG_OPTION_IGNORECASE;
} }
@ -637,7 +727,7 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
goto out; goto out;
} }
re = php_mbregex_compile_pattern(Z_STRVAL_PP(arg_pattern), Z_STRLEN_PP(arg_pattern), options, MBSTRG(current_mbctype), MBSTRG(regex_default_syntax) TSRMLS_CC); re = php_mbregex_compile_pattern(Z_STRVAL_PP(arg_pattern), Z_STRLEN_PP(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC);
if (re == NULL) { if (re == NULL) {
RETVAL_FALSE; RETVAL_FALSE;
goto out; goto out;
@ -646,7 +736,7 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
regs = onig_region_new(); regs = onig_region_new();
/* actually execute the regular expression */ /* actually execute the regular expression */
if (onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), string, (OnigUChar *)(string + string_len), regs, 0) < 0) { if (onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) {
RETVAL_FALSE; RETVAL_FALSE;
goto out; goto out;
} }
@ -726,7 +816,7 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp
{ {
const char *current_enc_name; const char *current_enc_name;
current_enc_name = php_mb_regex_mbctype2name(MBSTRG(current_mbctype)); current_enc_name = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
if (current_enc_name == NULL || if (current_enc_name == NULL ||
(enc = mbfl_name2encoding(current_enc_name)) == NULL) { (enc = mbfl_name2encoding(current_enc_name)) == NULL) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error"); php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error");
@ -749,8 +839,8 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp
if (option_str != NULL) { if (option_str != NULL) {
_php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval); _php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval);
} else { } else {
options |= MBSTRG(regex_default_options); options |= MBREX(regex_default_options);
syntax = MBSTRG(regex_default_syntax); syntax = MBREX(regex_default_syntax);
} }
} }
if (Z_TYPE_PP(arg_pattern_zval) == IS_STRING) { if (Z_TYPE_PP(arg_pattern_zval) == IS_STRING) {
@ -766,7 +856,7 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp
arg_pattern_len = 1; arg_pattern_len = 1;
} }
/* create regex pattern buffer */ /* create regex pattern buffer */
re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBSTRG(current_mbctype), syntax TSRMLS_CC); re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(current_mbctype), syntax TSRMLS_CC);
if (re == NULL) { if (re == NULL) {
RETURN_FALSE; RETURN_FALSE;
} }
@ -781,7 +871,7 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp
/* do the actual work */ /* do the actual work */
err = 0; err = 0;
pos = string; pos = (OnigUChar *)string;
string_lim = (OnigUChar*)(string + string_len); string_lim = (OnigUChar*)(string + string_len);
regs = onig_region_new(); regs = onig_region_new();
while (err >= 0) { while (err >= 0) {
@ -843,7 +933,7 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp
} }
n = regs->end[0]; n = regs->end[0];
if ((size_t)(pos - (OnigUChar *)string) < n) { if ((size_t)(pos - (OnigUChar *)string) < n) {
pos = string + n; pos = (OnigUChar *)string + n;
} else { } else {
if (pos < string_lim) { if (pos < string_lim) {
smart_str_appendl(&out_buf, pos, 1); smart_str_appendl(&out_buf, pos, 1);
@ -917,7 +1007,7 @@ PHP_FUNCTION(mb_split)
} }
/* create regex pattern buffer */ /* create regex pattern buffer */
if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBSTRG(regex_default_options), MBSTRG(current_mbctype), MBSTRG(regex_default_syntax) TSRMLS_CC)) == NULL) { if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) {
RETURN_FALSE; RETURN_FALSE;
} }
@ -936,7 +1026,7 @@ PHP_FUNCTION(mb_split)
/* add it to the array */ /* add it to the array */
if (regs->beg[0] < string_len && regs->beg[0] >= (size_t)(pos - (OnigUChar *)string)) { if (regs->beg[0] < string_len && regs->beg[0] >= (size_t)(pos - (OnigUChar *)string)) {
add_next_index_stringl(return_value, pos, ((OnigUChar *)(string + regs->beg[0]) - pos), 1); add_next_index_stringl(return_value, (char *)pos, ((OnigUChar *)(string + regs->beg[0]) - pos), 1);
} else { } else {
err = -2; err = -2;
break; break;
@ -966,7 +1056,7 @@ PHP_FUNCTION(mb_split)
/* otherwise we just have one last element to add to the array */ /* otherwise we just have one last element to add to the array */
n = ((OnigUChar *)(string + string_len) - pos); n = ((OnigUChar *)(string + string_len) - pos);
if (n > 0) { if (n > 0) {
add_next_index_stringl(return_value, pos, n, 1); add_next_index_stringl(return_value, (char *)pos, n, 1);
} else { } else {
add_next_index_stringl(return_value, "", 0, 1); add_next_index_stringl(return_value, "", 0, 1);
} }
@ -985,7 +1075,8 @@ PHP_FUNCTION(mb_ereg_match)
php_mb_regex_t *re; php_mb_regex_t *re;
OnigSyntaxType *syntax; OnigSyntaxType *syntax;
int option = 0, err; OnigOptionType option = 0;
int err;
{ {
char *option_str = NULL; char *option_str = NULL;
@ -1000,12 +1091,12 @@ PHP_FUNCTION(mb_ereg_match)
if (option_str != NULL) { if (option_str != NULL) {
_php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, NULL); _php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, NULL);
} else { } else {
option |= MBSTRG(regex_default_options); option |= MBREX(regex_default_options);
syntax = MBSTRG(regex_default_syntax); syntax = MBREX(regex_default_syntax);
} }
} }
if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBSTRG(current_mbctype), syntax TSRMLS_CC)) == NULL) { if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) {
RETURN_FALSE; RETURN_FALSE;
} }
@ -1027,7 +1118,8 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
size_t argc = ZEND_NUM_ARGS(); size_t argc = ZEND_NUM_ARGS();
char *arg_pattern, *arg_options; char *arg_pattern, *arg_options;
int arg_pattern_len, arg_options_len; int arg_pattern_len, arg_options_len;
int n, i, err, pos, len, beg, end, option; int n, i, err, pos, len, beg, end;
OnigOptionType option;
OnigUChar *str; OnigUChar *str;
OnigSyntaxType *syntax; OnigSyntaxType *syntax;
@ -1035,7 +1127,7 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
return; return;
} }
option = MBSTRG(regex_default_options); option = MBREX(regex_default_options);
if (argc == 2) { if (argc == 2) {
option = 0; option = 0;
@ -1044,20 +1136,20 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
if (argc > 0) { if (argc > 0) {
/* create regex pattern buffer */ /* create regex pattern buffer */
if ((MBSTRG(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBSTRG(current_mbctype), MBSTRG(regex_default_syntax) TSRMLS_CC)) == NULL) { if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) {
RETURN_FALSE; RETURN_FALSE;
} }
} }
pos = MBSTRG(search_pos); pos = MBREX(search_pos);
str = NULL; str = NULL;
len = 0; len = 0;
if (MBSTRG(search_str) != NULL && Z_TYPE_P(MBSTRG(search_str)) == IS_STRING){ if (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING){
str = (OnigUChar *)Z_STRVAL_P(MBSTRG(search_str)); str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str));
len = Z_STRLEN_P(MBSTRG(search_str)); len = Z_STRLEN_P(MBREX(search_str));
} }
if (MBSTRG(search_re) == NULL) { if (MBREX(search_re) == NULL) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "No regex given"); php_error_docref(NULL TSRMLS_CC, E_WARNING, "No regex given");
RETURN_FALSE; RETURN_FALSE;
} }
@ -1067,14 +1159,14 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
RETURN_FALSE; RETURN_FALSE;
} }
if (MBSTRG(search_regs)) { if (MBREX(search_regs)) {
onig_region_free(MBSTRG(search_regs), 1); onig_region_free(MBREX(search_regs), 1);
} }
MBSTRG(search_regs) = onig_region_new(); MBREX(search_regs) = onig_region_new();
err = onig_search(MBSTRG(search_re), str, str + len, str + pos, str + len, MBSTRG(search_regs), 0); err = onig_search(MBREX(search_re), str, str + len, str + pos, str + len, MBREX(search_regs), 0);
if (err == ONIG_MISMATCH) { if (err == ONIG_MISMATCH) {
MBSTRG(search_pos) = len; MBREX(search_pos) = len;
RETVAL_FALSE; RETVAL_FALSE;
} else if (err <= -2) { } else if (err <= -2) {
OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
@ -1082,23 +1174,23 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str); php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str);
RETVAL_FALSE; RETVAL_FALSE;
} else { } else {
if (MBSTRG(search_regs)->beg[0] == MBSTRG(search_regs)->end[0]) { if (MBREX(search_regs)->beg[0] == MBREX(search_regs)->end[0]) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression"); php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
} }
switch (mode) { switch (mode) {
case 1: case 1:
array_init(return_value); array_init(return_value);
beg = MBSTRG(search_regs)->beg[0]; beg = MBREX(search_regs)->beg[0];
end = MBSTRG(search_regs)->end[0]; end = MBREX(search_regs)->end[0];
add_next_index_long(return_value, beg); add_next_index_long(return_value, beg);
add_next_index_long(return_value, end - beg); add_next_index_long(return_value, end - beg);
break; break;
case 2: case 2:
array_init(return_value); array_init(return_value);
n = MBSTRG(search_regs)->num_regs; n = MBREX(search_regs)->num_regs;
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
beg = MBSTRG(search_regs)->beg[i]; beg = MBREX(search_regs)->beg[i];
end = MBSTRG(search_regs)->end[i]; end = MBREX(search_regs)->end[i];
if (beg >= 0 && beg <= end && end <= len) { if (beg >= 0 && beg <= end && end <= len) {
add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1); add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1);
} else { } else {
@ -1110,17 +1202,17 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
RETVAL_TRUE; RETVAL_TRUE;
break; break;
} }
end = MBSTRG(search_regs)->end[0]; end = MBREX(search_regs)->end[0];
if (pos < end) { if (pos < end) {
MBSTRG(search_pos) = end; MBREX(search_pos) = end;
} else { } else {
MBSTRG(search_pos) = pos + 1; MBREX(search_pos) = pos + 1;
} }
} }
if (err < 0) { if (err < 0) {
onig_region_free(MBSTRG(search_regs), 1); onig_region_free(MBREX(search_regs), 1);
MBSTRG(search_regs) = (OnigRegion *)NULL; MBREX(search_regs) = (OnigRegion *)NULL;
} }
} }
/* }}} */ /* }}} */
@ -1164,8 +1256,8 @@ PHP_FUNCTION(mb_ereg_search_init)
return; return;
} }
option = MBSTRG(regex_default_options); option = MBREX(regex_default_options);
syntax = MBSTRG(regex_default_syntax); syntax = MBREX(regex_default_syntax);
if (argc == 3) { if (argc == 3) {
option = 0; option = 0;
@ -1174,25 +1266,25 @@ PHP_FUNCTION(mb_ereg_search_init)
if (argc > 1) { if (argc > 1) {
/* create regex pattern buffer */ /* create regex pattern buffer */
if ((MBSTRG(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBSTRG(current_mbctype), syntax TSRMLS_CC)) == NULL) { if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) {
RETURN_FALSE; RETURN_FALSE;
} }
} }
if (MBSTRG(search_str) != NULL) { if (MBREX(search_str) != NULL) {
zval_ptr_dtor(&MBSTRG(search_str)); zval_ptr_dtor(&MBREX(search_str));
MBSTRG(search_str) = (zval *)NULL; MBREX(search_str) = (zval *)NULL;
} }
MBSTRG(search_str) = arg_str; MBREX(search_str) = arg_str;
Z_ADDREF_P(MBSTRG(search_str)); Z_ADDREF_P(MBREX(search_str));
SEPARATE_ZVAL_IF_NOT_REF(&MBSTRG(search_str)); SEPARATE_ZVAL_IF_NOT_REF(&MBREX(search_str));
MBSTRG(search_pos) = 0; MBREX(search_pos) = 0;
if (MBSTRG(search_regs) != NULL) { if (MBREX(search_regs) != NULL) {
onig_region_free(MBSTRG(search_regs), 1); onig_region_free(MBREX(search_regs), 1);
MBSTRG(search_regs) = (OnigRegion *) NULL; MBREX(search_regs) = (OnigRegion *) NULL;
} }
RETURN_TRUE; RETURN_TRUE;
@ -1206,15 +1298,15 @@ PHP_FUNCTION(mb_ereg_search_getregs)
int n, i, len, beg, end; int n, i, len, beg, end;
OnigUChar *str; OnigUChar *str;
if (MBSTRG(search_regs) != NULL && Z_TYPE_P(MBSTRG(search_str)) == IS_STRING && Z_STRVAL_P(MBSTRG(search_str)) != NULL) { if (MBREX(search_regs) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && Z_STRVAL_P(MBREX(search_str)) != NULL) {
array_init(return_value); array_init(return_value);
str = (OnigUChar *)Z_STRVAL_P(MBSTRG(search_str)); str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str));
len = Z_STRLEN_P(MBSTRG(search_str)); len = Z_STRLEN_P(MBREX(search_str));
n = MBSTRG(search_regs)->num_regs; n = MBREX(search_regs)->num_regs;
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
beg = MBSTRG(search_regs)->beg[i]; beg = MBREX(search_regs)->beg[i];
end = MBSTRG(search_regs)->end[i]; end = MBREX(search_regs)->end[i];
if (beg >= 0 && beg <= end && end <= len) { if (beg >= 0 && beg <= end && end <= len) {
add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1); add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1);
} else { } else {
@ -1231,7 +1323,7 @@ PHP_FUNCTION(mb_ereg_search_getregs)
Get search start position */ Get search start position */
PHP_FUNCTION(mb_ereg_search_getpos) PHP_FUNCTION(mb_ereg_search_getpos)
{ {
RETVAL_LONG(MBSTRG(search_pos)); RETVAL_LONG(MBREX(search_pos));
} }
/* }}} */ /* }}} */
@ -1245,28 +1337,28 @@ PHP_FUNCTION(mb_ereg_search_setpos)
return; return;
} }
if (position < 0 || (MBSTRG(search_str) != NULL && Z_TYPE_P(MBSTRG(search_str)) == IS_STRING && position >= Z_STRLEN_P(MBSTRG(search_str)))) { if (position < 0 || (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && position >= Z_STRLEN_P(MBREX(search_str)))) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Position is out of range"); php_error_docref(NULL TSRMLS_CC, E_WARNING, "Position is out of range");
MBSTRG(search_pos) = 0; MBREX(search_pos) = 0;
RETURN_FALSE; RETURN_FALSE;
} }
MBSTRG(search_pos) = position; MBREX(search_pos) = position;
RETURN_TRUE; RETURN_TRUE;
} }
/* }}} */ /* }}} */
/* {{{ php_mb_regex_set_options */ /* {{{ php_mb_regex_set_options */
void php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax TSRMLS_DC) static void _php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax TSRMLS_DC)
{ {
if (prev_options != NULL) { if (prev_options != NULL) {
*prev_options = MBSTRG(regex_default_options); *prev_options = MBREX(regex_default_options);
} }
if (prev_syntax != NULL) { if (prev_syntax != NULL) {
*prev_syntax = MBSTRG(regex_default_syntax); *prev_syntax = MBREX(regex_default_syntax);
} }
MBSTRG(regex_default_options) = options; MBREX(regex_default_options) = options;
MBSTRG(regex_default_syntax) = syntax; MBREX(regex_default_syntax) = syntax;
} }
/* }}} */ /* }}} */
@ -1288,10 +1380,10 @@ PHP_FUNCTION(mb_regex_set_options)
opt = 0; opt = 0;
syntax = NULL; syntax = NULL;
_php_mb_regex_init_options(string, string_len, &opt, &syntax, NULL); _php_mb_regex_init_options(string, string_len, &opt, &syntax, NULL);
php_mb_regex_set_options(opt, syntax, NULL, NULL TSRMLS_CC); _php_mb_regex_set_options(opt, syntax, NULL, NULL TSRMLS_CC);
} else { } else {
opt = MBSTRG(regex_default_options); opt = MBREX(regex_default_options);
syntax = MBSTRG(regex_default_syntax); syntax = MBREX(regex_default_syntax);
} }
_php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax); _php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax);

View file

@ -25,23 +25,6 @@
#include "php.h" #include "php.h"
#include "zend.h" #include "zend.h"
#include "php_onig_compat.h" /* must come prior to the oniguruma header */
#include "oniguruma.h"
#undef UChar
/* {{{ PHP_MBREGEX_GLOBALS */
#define PHP_MBREGEX_GLOBALS \
OnigEncoding default_mbctype; \
OnigEncoding current_mbctype; \
HashTable ht_rc; \
zval *search_str; \
zval *search_str_val; \
unsigned int search_pos; \
php_mb_regex_t *search_re; \
OnigRegion *search_regs; \
OnigOptionType regex_default_options; \
OnigSyntaxType *regex_default_syntax;
/* }}} */
/* {{{ PHP_MBREGEX_FUNCTION_ENTRIES */ /* {{{ PHP_MBREGEX_FUNCTION_ENTRIES */
#define PHP_MBREGEX_FUNCTION_ENTRIES \ #define PHP_MBREGEX_FUNCTION_ENTRIES \
@ -76,8 +59,6 @@
PHP_FALIAS(mbereg_search_setpos, mb_ereg_search_setpos, arginfo_mb_ereg_search_setpos) PHP_FALIAS(mbereg_search_setpos, mb_ereg_search_setpos, arginfo_mb_ereg_search_setpos)
/* }}} */ /* }}} */
typedef struct _zend_mbstring_globals * zend_mbstring_globals_ptr;
#define PHP_MBREGEX_MAXCACHE 50 #define PHP_MBREGEX_MAXCACHE 50
PHP_MINIT_FUNCTION(mb_regex); PHP_MINIT_FUNCTION(mb_regex);
@ -85,11 +66,15 @@ PHP_MSHUTDOWN_FUNCTION(mb_regex);
PHP_RINIT_FUNCTION(mb_regex); PHP_RINIT_FUNCTION(mb_regex);
PHP_RSHUTDOWN_FUNCTION(mb_regex); PHP_RSHUTDOWN_FUNCTION(mb_regex);
PHP_MINFO_FUNCTION(mb_regex); PHP_MINFO_FUNCTION(mb_regex);
void _php_mb_regex_globals_ctor(zend_mbstring_globals_ptr pglobals TSRMLS_DC);
void php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax TSRMLS_DC); typedef struct _zend_mb_regex_globals zend_mb_regex_globals;
void _php_mb_regex_globals_dtor(zend_mbstring_globals_ptr pglobals TSRMLS_DC);
OnigEncoding php_mb_regex_name2mbctype(const char *pname); zend_mb_regex_globals *php_mb_regex_globals_new(TSRMLS_D);
const char *php_mb_regex_mbctype2name(OnigEncoding mbctype); void php_mb_regex_globals_free(zend_mb_regex_globals *pglobals TSRMLS_DC);
int php_mb_regex_set_mbctype(const char *enc TSRMLS_DC);
int php_mb_regex_set_default_mbctype(const char *encname TSRMLS_DC);
const char *php_mb_regex_get_mbctype(TSRMLS_D);
const char *php_mb_regex_get_default_mbctype(TSRMLS_D);
PHP_FUNCTION(mb_regex_encoding); PHP_FUNCTION(mb_regex_encoding);
PHP_FUNCTION(mb_ereg); PHP_FUNCTION(mb_ereg);