Merge branch 'PHP-5.5' into PHP-5.6

* PHP-5.5:
  updated NEWS
  Fixed Bug #53823 (preg_replace: * qualifier on unicode replace garbles the string)
This commit is contained in:
Christoph M. Becker 2015-06-23 19:32:57 +02:00
commit e1561c490e
3 changed files with 87 additions and 4 deletions

View file

@ -225,6 +225,25 @@ static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_D
}
/* }}} */
/* {{{ static calculate_unit_length */
/* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE_UTF8. */
static zend_always_inline int calculate_unit_length(pcre_cache_entry *pce, char *start)
{
int unit_len;
if (pce->compile_options & PCRE_UTF8) {
char *end = start;
/* skip continuation bytes */
while ((*++end & 0xC0) == 0x80);
unit_len = end - start;
} else {
unit_len = 1;
}
return unit_len;
}
/* }}} */
/* {{{ pcre_get_compiled_regex_cache
*/
PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC)
@ -780,8 +799,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
the start offset, and continue. Fudge the offset values
to achieve this, unless we're already at the end of the string. */
if (g_notempty != 0 && start_offset < subject_len) {
int unit_len = calculate_unit_length(pce, subject + start_offset);
offsets[0] = start_offset;
offsets[1] = start_offset + 1;
offsets[1] = start_offset + unit_len;
} else
break;
} else {
@ -1240,10 +1261,12 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub
the start offset, and continue. Fudge the offset values
to achieve this, unless we're already at the end of the string. */
if (g_notempty != 0 && start_offset < subject_len) {
int unit_len = calculate_unit_length(pce, piece);
offsets[0] = start_offset;
offsets[1] = start_offset + 1;
memcpy(&result[*result_len], piece, 1);
(*result_len)++;
offsets[1] = start_offset + unit_len;
memcpy(&result[*result_len], piece, unit_len);
*result_len += unit_len;
} else {
new_len = *result_len + subject_len - start_offset;
if (new_len + 1 > alloc_len) {

View file

@ -0,0 +1,13 @@
--TEST--
Bug #53823 - preg_replace: * qualifier on unicode replace garbles the string
--FILE--
<?php
var_dump(preg_replace('/[^\pL\pM]*/iu', '', 'áéíóú'));
// invalid UTF-8
var_dump(preg_replace('/[^\pL\pM]*/iu', '', "\xFCáéíóú"));
var_dump(preg_replace('/[^\pL\pM]*/iu', '', "áéíóú\xFC"));
?>
--EXPECT--
string(10) "áéíóú"
NULL
NULL

View file

@ -0,0 +1,47 @@
--TEST--
Bug #66121 - UTF-8 lookbehinds match bytes instead of characters
--FILE--
<?php
// Sinhala characters
var_dump(preg_replace('/(?<!ක)/u', '*', 'ක'));
var_dump(preg_replace('/(?<!ක)/u', '*', 'ම'));
// English characters
var_dump(preg_replace('/(?<!k)/u', '*', 'k'));
var_dump(preg_replace('/(?<!k)/u', '*', 'm'));
// Sinhala characters
preg_match_all('/(?<!ක)/u', 'ම', $matches, PREG_OFFSET_CAPTURE);
var_dump($matches);
// invalid UTF-8
var_dump(preg_replace('/(?<!ක)/u', '*', "\xFCක"));
var_dump(preg_replace('/(?<!ක)/u', '*', "ක\xFC"));
var_dump(preg_match_all('/(?<!ක)/u', "\xFCම", $matches, PREG_OFFSET_CAPTURE));
var_dump(preg_match_all('/(?<!ක)/u', "\xFCම", $matches, PREG_OFFSET_CAPTURE));
?>
--EXPECT--
string(4) "*ක"
string(5) "*ම*"
string(2) "*k"
string(3) "*m*"
array(1) {
[0]=>
array(2) {
[0]=>
array(2) {
[0]=>
string(0) ""
[1]=>
int(0)
}
[1]=>
array(2) {
[0]=>
string(0) ""
[1]=>
int(3)
}
}
}
NULL
NULL
bool(false)
bool(false)