mirror of
https://github.com/php/php-src.git
synced 2025-08-16 05:58:45 +02:00
adds support for named captures to mb_ereg & mb_ereg_search
`mb_ereg`, `mb_ereg_search_regs` & `mb_ereg_search_getregs` returned only numbered capturing groups. Now they return both numbered and named capturing groups. Fixes Bug #72704.
This commit is contained in:
parent
69a49af0d3
commit
212f56b7ca
4 changed files with 185 additions and 0 deletions
|
@ -646,6 +646,50 @@ _php_mb_regex_init_options(const char *parg, size_t narg, OnigOptionType *option
|
|||
}
|
||||
/* }}} */
|
||||
|
||||
|
||||
/*
|
||||
* Callbacks for named subpatterns
|
||||
*/
|
||||
|
||||
/* {{{ struct mb_ereg_groups_iter_arg */
|
||||
typedef struct mb_regex_groups_iter_args {
|
||||
zval *groups;
|
||||
char *search_str;
|
||||
int search_len;
|
||||
OnigRegion *region;
|
||||
} mb_regex_groups_iter_args;
|
||||
/* }}} */
|
||||
|
||||
/* {{{ mb_ereg_groups_iter */
|
||||
static int
|
||||
mb_regex_groups_iter(const OnigUChar* name, const OnigUChar* name_end, int ngroup_num, int* group_nums, regex_t* reg, void* parg)
|
||||
{
|
||||
mb_regex_groups_iter_args *args = (mb_regex_groups_iter_args *) parg;
|
||||
int i, gn, ref, beg, end;
|
||||
|
||||
for (i = 0; i < ngroup_num; i++) {
|
||||
gn = group_nums[i];
|
||||
ref = onig_name_to_backref_number(reg, name, name_end, args->region);
|
||||
if (ref != gn) {
|
||||
/*
|
||||
* In case of duplicate groups, keep only the last suceeding one
|
||||
* to be consistent with preg_match with the PCRE_DUPNAMES option.
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
beg = args->region->beg[gn];
|
||||
end = args->region->end[gn];
|
||||
if (beg >= 0 && beg < end && end <= args->search_len) {
|
||||
add_assoc_stringl_ex(args->groups, (char *)name, name_end - name, &args->search_str[beg], end - beg);
|
||||
} else {
|
||||
add_assoc_bool_ex(args->groups, (char *)name, name_end - name, 0);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
/*
|
||||
* php functions
|
||||
*/
|
||||
|
@ -762,6 +806,11 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
|
|||
add_index_bool(array, i, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (onig_number_of_names(re) > 0) {
|
||||
mb_regex_groups_iter_args args = {array, string, string_len, regs};
|
||||
onig_foreach_name(re, mb_regex_groups_iter, &args);
|
||||
}
|
||||
}
|
||||
|
||||
if (match_len == 0) {
|
||||
|
@ -1291,6 +1340,15 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
|
|||
add_index_bool(return_value, i, 0);
|
||||
}
|
||||
}
|
||||
if (onig_number_of_names(MBREX(search_re)) > 0) {
|
||||
mb_regex_groups_iter_args args = {
|
||||
return_value,
|
||||
Z_STRVAL(MBREX(search_str)),
|
||||
Z_STRLEN(MBREX(search_str)),
|
||||
MBREX(search_regs)
|
||||
};
|
||||
onig_foreach_name(MBREX(search_re), mb_regex_groups_iter, &args);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
RETVAL_TRUE;
|
||||
|
@ -1417,6 +1475,15 @@ PHP_FUNCTION(mb_ereg_search_getregs)
|
|||
add_index_bool(return_value, i, 0);
|
||||
}
|
||||
}
|
||||
if (onig_number_of_names(MBREX(search_re)) > 0) {
|
||||
mb_regex_groups_iter_args args = {
|
||||
return_value,
|
||||
Z_STRVAL(MBREX(search_str)),
|
||||
len,
|
||||
MBREX(search_regs)
|
||||
};
|
||||
onig_foreach_name(MBREX(search_re), mb_regex_groups_iter, &args);
|
||||
}
|
||||
} else {
|
||||
RETVAL_FALSE;
|
||||
}
|
||||
|
|
37
ext/mbstring/tests/mb_ereg_dupnames.phpt
Normal file
37
ext/mbstring/tests/mb_ereg_dupnames.phpt
Normal file
|
@ -0,0 +1,37 @@
|
|||
--TEST--
|
||||
Testing mb_ereg() duplicate named groups
|
||||
--SKIPIF--
|
||||
<?php
|
||||
if (!extension_loaded('mbstring')) die('skip mbstring not enabled');
|
||||
function_exists('mb_ereg') or die("skip mb_ereg() is not available in this build");
|
||||
?>
|
||||
--FILE--
|
||||
<?php
|
||||
mb_regex_encoding("UTF-8");
|
||||
$pattern = '\w+((?<punct>?)|(?<punct>!))';
|
||||
mb_ereg($pattern, '中?', $m);
|
||||
var_dump($m);
|
||||
mb_ereg($pattern, '中!', $m);
|
||||
var_dump($m);
|
||||
?>
|
||||
--EXPECT--
|
||||
array(4) {
|
||||
[0]=>
|
||||
string(6) "中?"
|
||||
[1]=>
|
||||
string(3) "?"
|
||||
[2]=>
|
||||
bool(false)
|
||||
["punct"]=>
|
||||
string(3) "?"
|
||||
}
|
||||
array(4) {
|
||||
[0]=>
|
||||
string(6) "中!"
|
||||
[1]=>
|
||||
bool(false)
|
||||
[2]=>
|
||||
string(3) "!"
|
||||
["punct"]=>
|
||||
string(3) "!"
|
||||
}
|
50
ext/mbstring/tests/mb_ereg_named_subpatterns.phpt
Normal file
50
ext/mbstring/tests/mb_ereg_named_subpatterns.phpt
Normal file
|
@ -0,0 +1,50 @@
|
|||
--TEST--
|
||||
Testing mb_ereg() named subpatterns
|
||||
--SKIPIF--
|
||||
<?php
|
||||
if (!extension_loaded('mbstring')) die('skip mbstring not enabled');
|
||||
function_exists('mb_ereg') or die("skip mb_ereg() is not available in this build");
|
||||
?>
|
||||
--FILE--
|
||||
<?php
|
||||
mb_regex_encoding("UTF-8");
|
||||
mb_ereg('(?<wsp>\s*)(?<word>\w+)', ' 中国', $m);
|
||||
var_dump($m);
|
||||
mb_ereg('(?<wsp>\s*)(?<word>\w+)', '国', $m);
|
||||
var_dump($m);
|
||||
mb_ereg('(\s*)(?<word>\w+)', ' 中国', $m);
|
||||
var_dump($m);
|
||||
?>
|
||||
--EXPECT--
|
||||
array(5) {
|
||||
[0]=>
|
||||
string(8) " 中国"
|
||||
[1]=>
|
||||
string(2) " "
|
||||
[2]=>
|
||||
string(6) "中国"
|
||||
["wsp"]=>
|
||||
string(2) " "
|
||||
["word"]=>
|
||||
string(6) "中国"
|
||||
}
|
||||
array(5) {
|
||||
[0]=>
|
||||
string(3) "国"
|
||||
[1]=>
|
||||
bool(false)
|
||||
[2]=>
|
||||
string(3) "国"
|
||||
["wsp"]=>
|
||||
bool(false)
|
||||
["word"]=>
|
||||
string(3) "国"
|
||||
}
|
||||
array(3) {
|
||||
[0]=>
|
||||
string(8) " 中国"
|
||||
[1]=>
|
||||
string(6) "中国"
|
||||
["word"]=>
|
||||
string(6) "中国"
|
||||
}
|
31
ext/mbstring/tests/mb_ereg_search_named_subpatterns.phpt
Normal file
31
ext/mbstring/tests/mb_ereg_search_named_subpatterns.phpt
Normal file
|
@ -0,0 +1,31 @@
|
|||
--TEST--
|
||||
Testing mb_ereg_search() named capture groups
|
||||
--SKIPIF--
|
||||
<?php
|
||||
if (!extension_loaded('mbstring')) die('skip mbstring not enabled');
|
||||
function_exists('mb_ereg_search') or die("skip mb_ereg_search() is not available in this build");
|
||||
?>
|
||||
--FILE--
|
||||
<?php
|
||||
mb_regex_encoding("UTF-8");
|
||||
mb_ereg_search_init(' 中国?');
|
||||
mb_ereg_search('(?<wsp>\s*)(?<word>\w+)(?<punct>[?!])');
|
||||
var_dump(mb_ereg_search_getregs());
|
||||
?>
|
||||
--EXPECT--
|
||||
array(7) {
|
||||
[0]=>
|
||||
string(11) " 中国?"
|
||||
[1]=>
|
||||
string(2) " "
|
||||
[2]=>
|
||||
string(6) "中国"
|
||||
[3]=>
|
||||
string(3) "?"
|
||||
["punct"]=>
|
||||
string(3) "?"
|
||||
["wsp"]=>
|
||||
string(2) " "
|
||||
["word"]=>
|
||||
string(6) "中国"
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue