Merge branch 'PHP-5.5' into PHP-5.6

* PHP-5.5:
  updated NEWS
  Fixed Bug #53823 (preg_replace: * qualifier on unicode replace garbles the string)
This commit is contained in:
Christoph M. Becker 2015-06-23 19:32:57 +02:00
commit e1561c490e
3 changed files with 87 additions and 4 deletions

View file

@ -225,6 +225,25 @@ static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_D
} }
/* }}} */ /* }}} */
/* {{{ static calculate_unit_length */
/* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE_UTF8. */
static zend_always_inline int calculate_unit_length(pcre_cache_entry *pce, char *start)
{
int unit_len;
if (pce->compile_options & PCRE_UTF8) {
char *end = start;
/* skip continuation bytes */
while ((*++end & 0xC0) == 0x80);
unit_len = end - start;
} else {
unit_len = 1;
}
return unit_len;
}
/* }}} */
/* {{{ pcre_get_compiled_regex_cache /* {{{ pcre_get_compiled_regex_cache
*/ */
PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC) PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC)
@ -780,8 +799,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
the start offset, and continue. Fudge the offset values the start offset, and continue. Fudge the offset values
to achieve this, unless we're already at the end of the string. */ to achieve this, unless we're already at the end of the string. */
if (g_notempty != 0 && start_offset < subject_len) { if (g_notempty != 0 && start_offset < subject_len) {
int unit_len = calculate_unit_length(pce, subject + start_offset);
offsets[0] = start_offset; offsets[0] = start_offset;
offsets[1] = start_offset + 1; offsets[1] = start_offset + unit_len;
} else } else
break; break;
} else { } else {
@ -1240,10 +1261,12 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub
the start offset, and continue. Fudge the offset values the start offset, and continue. Fudge the offset values
to achieve this, unless we're already at the end of the string. */ to achieve this, unless we're already at the end of the string. */
if (g_notempty != 0 && start_offset < subject_len) { if (g_notempty != 0 && start_offset < subject_len) {
int unit_len = calculate_unit_length(pce, piece);
offsets[0] = start_offset; offsets[0] = start_offset;
offsets[1] = start_offset + 1; offsets[1] = start_offset + unit_len;
memcpy(&result[*result_len], piece, 1); memcpy(&result[*result_len], piece, unit_len);
(*result_len)++; *result_len += unit_len;
} else { } else {
new_len = *result_len + subject_len - start_offset; new_len = *result_len + subject_len - start_offset;
if (new_len + 1 > alloc_len) { if (new_len + 1 > alloc_len) {

View file

@ -0,0 +1,13 @@
--TEST--
Bug #53823 - preg_replace: * qualifier on unicode replace garbles the string
--FILE--
<?php
var_dump(preg_replace('/[^\pL\pM]*/iu', '', 'áéíóú'));
// invalid UTF-8
var_dump(preg_replace('/[^\pL\pM]*/iu', '', "\xFCáéíóú"));
var_dump(preg_replace('/[^\pL\pM]*/iu', '', "áéíóú\xFC"));
?>
--EXPECT--
string(10) "áéíóú"
NULL
NULL

View file

@ -0,0 +1,47 @@
--TEST--
Bug #66121 - UTF-8 lookbehinds match bytes instead of characters
--FILE--
<?php
// Sinhala characters
var_dump(preg_replace('/(?<!ක)/u', '*', 'ක'));
var_dump(preg_replace('/(?<!ක)/u', '*', 'ම'));
// English characters
var_dump(preg_replace('/(?<!k)/u', '*', 'k'));
var_dump(preg_replace('/(?<!k)/u', '*', 'm'));
// Sinhala characters
preg_match_all('/(?<!ක)/u', 'ම', $matches, PREG_OFFSET_CAPTURE);
var_dump($matches);
// invalid UTF-8
var_dump(preg_replace('/(?<!ක)/u', '*', "\xFCක"));
var_dump(preg_replace('/(?<!ක)/u', '*', "ක\xFC"));
var_dump(preg_match_all('/(?<!ක)/u', "\xFCම", $matches, PREG_OFFSET_CAPTURE));
var_dump(preg_match_all('/(?<!ක)/u', "\xFCම", $matches, PREG_OFFSET_CAPTURE));
?>
--EXPECT--
string(4) "*ක"
string(5) "*ම*"
string(2) "*k"
string(3) "*m*"
array(1) {
[0]=>
array(2) {
[0]=>
array(2) {
[0]=>
string(0) ""
[1]=>
int(0)
}
[1]=>
array(2) {
[0]=>
string(0) ""
[1]=>
int(3)
}
}
}
NULL
NULL
bool(false)
bool(false)