diff --git a/ext/mbstring/php_mbregex.c b/ext/mbstring/php_mbregex.c index b95f315c881..957ee484ba5 100644 --- a/ext/mbstring/php_mbregex.c +++ b/ext/mbstring/php_mbregex.c @@ -646,6 +646,50 @@ _php_mb_regex_init_options(const char *parg, size_t narg, OnigOptionType *option } /* }}} */ + +/* + * Callbacks for named subpatterns + */ + +/* {{{ struct mb_ereg_groups_iter_arg */ +typedef struct mb_regex_groups_iter_args { + zval *groups; + char *search_str; + int search_len; + OnigRegion *region; +} mb_regex_groups_iter_args; +/* }}} */ + +/* {{{ mb_ereg_groups_iter */ +static int +mb_regex_groups_iter(const OnigUChar* name, const OnigUChar* name_end, int ngroup_num, int* group_nums, regex_t* reg, void* parg) +{ + mb_regex_groups_iter_args *args = (mb_regex_groups_iter_args *) parg; + int i, gn, ref, beg, end; + + for (i = 0; i < ngroup_num; i++) { + gn = group_nums[i]; + ref = onig_name_to_backref_number(reg, name, name_end, args->region); + if (ref != gn) { + /* + * In case of duplicate groups, keep only the last suceeding one + * to be consistent with preg_match with the PCRE_DUPNAMES option. + */ + continue; + } + beg = args->region->beg[gn]; + end = args->region->end[gn]; + if (beg >= 0 && beg < end && end <= args->search_len) { + add_assoc_stringl_ex(args->groups, (char *)name, name_end - name, &args->search_str[beg], end - beg); + } else { + add_assoc_bool_ex(args->groups, (char *)name, name_end - name, 0); + } + } + + return 0; +} +/* }}} */ + /* * php functions */ @@ -762,6 +806,11 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase) add_index_bool(array, i, 0); } } + + if (onig_number_of_names(re) > 0) { + mb_regex_groups_iter_args args = {array, string, string_len, regs}; + onig_foreach_name(re, mb_regex_groups_iter, &args); + } } if (match_len == 0) { @@ -1291,6 +1340,15 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode) add_index_bool(return_value, i, 0); } } + if (onig_number_of_names(MBREX(search_re)) > 0) { + mb_regex_groups_iter_args args = { + return_value, + Z_STRVAL(MBREX(search_str)), + Z_STRLEN(MBREX(search_str)), + MBREX(search_regs) + }; + onig_foreach_name(MBREX(search_re), mb_regex_groups_iter, &args); + } break; default: RETVAL_TRUE; @@ -1417,6 +1475,15 @@ PHP_FUNCTION(mb_ereg_search_getregs) add_index_bool(return_value, i, 0); } } + if (onig_number_of_names(MBREX(search_re)) > 0) { + mb_regex_groups_iter_args args = { + return_value, + Z_STRVAL(MBREX(search_str)), + len, + MBREX(search_regs) + }; + onig_foreach_name(MBREX(search_re), mb_regex_groups_iter, &args); + } } else { RETVAL_FALSE; } diff --git a/ext/mbstring/tests/mb_ereg_dupnames.phpt b/ext/mbstring/tests/mb_ereg_dupnames.phpt new file mode 100644 index 00000000000..fcc428c3604 --- /dev/null +++ b/ext/mbstring/tests/mb_ereg_dupnames.phpt @@ -0,0 +1,37 @@ +--TEST-- +Testing mb_ereg() duplicate named groups +--SKIPIF-- + +--FILE-- +?)|(?!))'; + mb_ereg($pattern, '中?', $m); + var_dump($m); + mb_ereg($pattern, '中!', $m); + var_dump($m); +?> +--EXPECT-- +array(4) { + [0]=> + string(6) "中?" + [1]=> + string(3) "?" + [2]=> + bool(false) + ["punct"]=> + string(3) "?" +} +array(4) { + [0]=> + string(6) "中!" + [1]=> + bool(false) + [2]=> + string(3) "!" + ["punct"]=> + string(3) "!" +} diff --git a/ext/mbstring/tests/mb_ereg_named_subpatterns.phpt b/ext/mbstring/tests/mb_ereg_named_subpatterns.phpt new file mode 100644 index 00000000000..ed0f85baa31 --- /dev/null +++ b/ext/mbstring/tests/mb_ereg_named_subpatterns.phpt @@ -0,0 +1,50 @@ +--TEST-- +Testing mb_ereg() named subpatterns +--SKIPIF-- + +--FILE-- +\s*)(?\w+)', ' 中国', $m); + var_dump($m); + mb_ereg('(?\s*)(?\w+)', '国', $m); + var_dump($m); + mb_ereg('(\s*)(?\w+)', ' 中国', $m); + var_dump($m); +?> +--EXPECT-- +array(5) { + [0]=> + string(8) " 中国" + [1]=> + string(2) " " + [2]=> + string(6) "中国" + ["wsp"]=> + string(2) " " + ["word"]=> + string(6) "中国" +} +array(5) { + [0]=> + string(3) "国" + [1]=> + bool(false) + [2]=> + string(3) "国" + ["wsp"]=> + bool(false) + ["word"]=> + string(3) "国" +} +array(3) { + [0]=> + string(8) " 中国" + [1]=> + string(6) "中国" + ["word"]=> + string(6) "中国" +} diff --git a/ext/mbstring/tests/mb_ereg_search_named_subpatterns.phpt b/ext/mbstring/tests/mb_ereg_search_named_subpatterns.phpt new file mode 100644 index 00000000000..f899e9beebd --- /dev/null +++ b/ext/mbstring/tests/mb_ereg_search_named_subpatterns.phpt @@ -0,0 +1,31 @@ +--TEST-- +Testing mb_ereg_search() named capture groups +--SKIPIF-- + +--FILE-- +\s*)(?\w+)(?[?!])'); + var_dump(mb_ereg_search_getregs()); +?> +--EXPECT-- +array(7) { + [0]=> + string(11) " 中国?" + [1]=> + string(2) " " + [2]=> + string(6) "中国" + [3]=> + string(3) "?" + ["punct"]=> + string(3) "?" + ["wsp"]=> + string(2) " " + ["word"]=> + string(6) "中国" +}