adds support for named captures to mb_ereg & mb_ereg_search

`mb_ereg`, `mb_ereg_search_regs` & `mb_ereg_search_getregs`
returned only numbered capturing groups.
Now they return both numbered and named capturing groups.
Fixes Bug #72704.
This commit is contained in:
ju1ius 2016-07-29 18:00:28 +02:00 committed by Nikita Popov
parent 69a49af0d3
commit 212f56b7ca
4 changed files with 185 additions and 0 deletions

View file

@ -646,6 +646,50 @@ _php_mb_regex_init_options(const char *parg, size_t narg, OnigOptionType *option
}
/* }}} */
/*
* Callbacks for named subpatterns
*/
/* {{{ struct mb_ereg_groups_iter_arg */
typedef struct mb_regex_groups_iter_args {
zval *groups;
char *search_str;
int search_len;
OnigRegion *region;
} mb_regex_groups_iter_args;
/* }}} */
/* {{{ mb_ereg_groups_iter */
static int
mb_regex_groups_iter(const OnigUChar* name, const OnigUChar* name_end, int ngroup_num, int* group_nums, regex_t* reg, void* parg)
{
mb_regex_groups_iter_args *args = (mb_regex_groups_iter_args *) parg;
int i, gn, ref, beg, end;
for (i = 0; i < ngroup_num; i++) {
gn = group_nums[i];
ref = onig_name_to_backref_number(reg, name, name_end, args->region);
if (ref != gn) {
/*
* In case of duplicate groups, keep only the last suceeding one
* to be consistent with preg_match with the PCRE_DUPNAMES option.
*/
continue;
}
beg = args->region->beg[gn];
end = args->region->end[gn];
if (beg >= 0 && beg < end && end <= args->search_len) {
add_assoc_stringl_ex(args->groups, (char *)name, name_end - name, &args->search_str[beg], end - beg);
} else {
add_assoc_bool_ex(args->groups, (char *)name, name_end - name, 0);
}
}
return 0;
}
/* }}} */
/*
* php functions
*/
@ -762,6 +806,11 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
add_index_bool(array, i, 0);
}
}
if (onig_number_of_names(re) > 0) {
mb_regex_groups_iter_args args = {array, string, string_len, regs};
onig_foreach_name(re, mb_regex_groups_iter, &args);
}
}
if (match_len == 0) {
@ -1291,6 +1340,15 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
add_index_bool(return_value, i, 0);
}
}
if (onig_number_of_names(MBREX(search_re)) > 0) {
mb_regex_groups_iter_args args = {
return_value,
Z_STRVAL(MBREX(search_str)),
Z_STRLEN(MBREX(search_str)),
MBREX(search_regs)
};
onig_foreach_name(MBREX(search_re), mb_regex_groups_iter, &args);
}
break;
default:
RETVAL_TRUE;
@ -1417,6 +1475,15 @@ PHP_FUNCTION(mb_ereg_search_getregs)
add_index_bool(return_value, i, 0);
}
}
if (onig_number_of_names(MBREX(search_re)) > 0) {
mb_regex_groups_iter_args args = {
return_value,
Z_STRVAL(MBREX(search_str)),
len,
MBREX(search_regs)
};
onig_foreach_name(MBREX(search_re), mb_regex_groups_iter, &args);
}
} else {
RETVAL_FALSE;
}

View file

@ -0,0 +1,37 @@
--TEST--
Testing mb_ereg() duplicate named groups
--SKIPIF--
<?php
if (!extension_loaded('mbstring')) die('skip mbstring not enabled');
function_exists('mb_ereg') or die("skip mb_ereg() is not available in this build");
?>
--FILE--
<?php
mb_regex_encoding("UTF-8");
$pattern = '\w+((?<punct>)|(?<punct>))';
mb_ereg($pattern, '中?', $m);
var_dump($m);
mb_ereg($pattern, '中!', $m);
var_dump($m);
?>
--EXPECT--
array(4) {
[0]=>
string(6) "中?"
[1]=>
string(3) ""
[2]=>
bool(false)
["punct"]=>
string(3) ""
}
array(4) {
[0]=>
string(6) "中!"
[1]=>
bool(false)
[2]=>
string(3) ""
["punct"]=>
string(3) ""
}

View file

@ -0,0 +1,50 @@
--TEST--
Testing mb_ereg() named subpatterns
--SKIPIF--
<?php
if (!extension_loaded('mbstring')) die('skip mbstring not enabled');
function_exists('mb_ereg') or die("skip mb_ereg() is not available in this build");
?>
--FILE--
<?php
mb_regex_encoding("UTF-8");
mb_ereg('(?<wsp>\s*)(?<word>\w+)', ' 中国', $m);
var_dump($m);
mb_ereg('(?<wsp>\s*)(?<word>\w+)', '国', $m);
var_dump($m);
mb_ereg('(\s*)(?<word>\w+)', ' 中国', $m);
var_dump($m);
?>
--EXPECT--
array(5) {
[0]=>
string(8) " 中国"
[1]=>
string(2) " "
[2]=>
string(6) "中国"
["wsp"]=>
string(2) " "
["word"]=>
string(6) "中国"
}
array(5) {
[0]=>
string(3) "国"
[1]=>
bool(false)
[2]=>
string(3) "国"
["wsp"]=>
bool(false)
["word"]=>
string(3) "国"
}
array(3) {
[0]=>
string(8) " 中国"
[1]=>
string(6) "中国"
["word"]=>
string(6) "中国"
}

View file

@ -0,0 +1,31 @@
--TEST--
Testing mb_ereg_search() named capture groups
--SKIPIF--
<?php
if (!extension_loaded('mbstring')) die('skip mbstring not enabled');
function_exists('mb_ereg_search') or die("skip mb_ereg_search() is not available in this build");
?>
--FILE--
<?php
mb_regex_encoding("UTF-8");
mb_ereg_search_init(' 中国?');
mb_ereg_search('(?<wsp>\s*)(?<word>\w+)(?<punct>[])');
var_dump(mb_ereg_search_getregs());
?>
--EXPECT--
array(7) {
[0]=>
string(11) " 中国?"
[1]=>
string(2) " "
[2]=>
string(6) "中国"
[3]=>
string(3) ""
["punct"]=>
string(3) ""
["wsp"]=>
string(2) " "
["word"]=>
string(6) "中国"
}