Merge branch 'PHP-5.5' into PHP-5.6

* PHP-5.5: updated NEWS Fixed Bug #53823 (preg_replace: * qualifier on unicode replace garbles the string)
2025-08-17 14:38:49 +02:00 · 2015-06-23 19:32:57 +02:00 · 2015-06-23 19:32:57 +02:00 · e1561c490e
commit e1561c490e
parent 868b9328a0 1cbcbcbc21
3 changed files with 87 additions and 4 deletions
--- a/ext/pcre/php_pcre.c
+++ b/ext/pcre/php_pcre.c
@ -225,6 +225,25 @@ static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_D
 }
 /* }}} */
 /* {{{ static calculate_unit_length */
 /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE_UTF8. */
 static zend_always_inline int calculate_unit_length(pcre_cache_entry *pce, char *start)
 {
 	int unit_len;
 	if (pce->compile_options & PCRE_UTF8) {
 		char *end = start;
 		/* skip continuation bytes */
 		while ((*++end & 0xC0) == 0x80);
 		unit_len = end - start;
 	} else {
 		unit_len = 1;
 	}
 	return unit_len;
 }
 /* }}} */
 /* {{{ pcre_get_compiled_regex_cache
 */
 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC)
@ -780,8 +799,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
 			   the start offset, and continue. Fudge the offset values
 			   to achieve this, unless we're already at the end of the string. */
 			if (g_notempty != 0 && start_offset < subject_len) {
 				int unit_len = calculate_unit_length(pce, subject + start_offset);
 				offsets[0] = start_offset;
-				offsets[1] = start_offset + 1;
+				offsets[1] = start_offset + unit_len;
 			} else
 				break;
 		} else {
@ -1240,10 +1261,12 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub
 			   the start offset, and continue. Fudge the offset values
 			   to achieve this, unless we're already at the end of the string. */
 			if (g_notempty != 0 && start_offset < subject_len) {
 				int unit_len = calculate_unit_length(pce, piece);
 				offsets[0] = start_offset;
-				offsets[1] = start_offset + 1;
+				offsets[1] = start_offset + unit_len;
-				memcpy(&result[*result_len], piece, 1);
+				memcpy(&result[*result_len], piece, unit_len);
-				(*result_len)++;
+				*result_len += unit_len;
 			} else {
 				new_len = *result_len + subject_len - start_offset;
 				if (new_len + 1 > alloc_len) {
--- a/ext/pcre/tests/bug53823.phpt
+++ b/ext/pcre/tests/bug53823.phpt
@ -0,0 +1,13 @@
 --TEST--
 Bug #53823 - preg_replace: * qualifier on unicode replace garbles the string
 --FILE--
 <?php
 var_dump(preg_replace('/[^\pL\pM]*/iu', '', 'áéíóú'));
 // invalid UTF-8
 var_dump(preg_replace('/[^\pL\pM]*/iu', '', "\xFCáéíóú"));
 var_dump(preg_replace('/[^\pL\pM]*/iu', '', "áéíóú\xFC"));
 ?>
 --EXPECT--
 string(10) "áéíóú"
 NULL
 NULL
--- a/ext/pcre/tests/bug66121.phpt
+++ b/ext/pcre/tests/bug66121.phpt
@ -0,0 +1,47 @@
 --TEST--
 Bug #66121 - UTF-8 lookbehinds match bytes instead of characters
 --FILE--
 <?php
 // Sinhala characters
 var_dump(preg_replace('/(?<!ක)/u', '*', 'ක'));
 var_dump(preg_replace('/(?<!ක)/u', '*', 'ම'));
 // English characters
 var_dump(preg_replace('/(?<!k)/u', '*', 'k'));
 var_dump(preg_replace('/(?<!k)/u', '*', 'm'));
 // Sinhala characters
 preg_match_all('/(?<!ක)/u', 'ම', $matches, PREG_OFFSET_CAPTURE);
 var_dump($matches);
 // invalid UTF-8
 var_dump(preg_replace('/(?<!ක)/u', '*', "\xFCක"));
 var_dump(preg_replace('/(?<!ක)/u', '*', "ක\xFC"));
 var_dump(preg_match_all('/(?<!ක)/u', "\xFCම", $matches, PREG_OFFSET_CAPTURE));
 var_dump(preg_match_all('/(?<!ක)/u', "\xFCම", $matches, PREG_OFFSET_CAPTURE));
 ?>
 --EXPECT--
 string(4) "*ක"
 string(5) "*ම*"
 string(2) "*k"
 string(3) "*m*"
 array(1) {
  [0]=>
  array(2) {
    [0]=>
    array(2) {
      [0]=>
      string(0) ""
      [1]=>
      int(0)
    }
    [1]=>
    array(2) {
      [0]=>
      string(0) ""
      [1]=>
      int(3)
    }
  }
 }
 NULL
 NULL
 bool(false)
 bool(false)