mirror of
https://github.com/php/php-src.git
synced 2025-08-16 05:58:45 +02:00
PCRE: Only remember valid UTF-8 if start offset zero
PCRE only validates the string starting from the start offset (minus maximum look-behind, but let's ignore that), so we can only remember that the string is fully valid UTF-8 is the original start offset is zero.
This commit is contained in:
parent
c9e78e6d33
commit
cd5591a28d
3 changed files with 19 additions and 4 deletions
1
NEWS
1
NEWS
|
@ -13,6 +13,7 @@ PHP NEWS
|
||||||
- PCRE:
|
- PCRE:
|
||||||
. Fixed bug #79188 (Memory corruption in preg_replace/preg_replace_callback
|
. Fixed bug #79188 (Memory corruption in preg_replace/preg_replace_callback
|
||||||
and unicode). (Nikita)
|
and unicode). (Nikita)
|
||||||
|
. Fixed bug #79241 (Segmentation fault on preg_match()). (Nikita)
|
||||||
|
|
||||||
?? ??? ????, PHP 7.4.3
|
?? ??? ????, PHP 7.4.3
|
||||||
|
|
||||||
|
|
|
@ -1167,7 +1167,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
|
||||||
PCRE2_SPTR mark = NULL; /* Target for MARK name */
|
PCRE2_SPTR mark = NULL; /* Target for MARK name */
|
||||||
zval marks; /* Array of marks for PREG_PATTERN_ORDER */
|
zval marks; /* Array of marks for PREG_PATTERN_ORDER */
|
||||||
pcre2_match_data *match_data;
|
pcre2_match_data *match_data;
|
||||||
PCRE2_SIZE start_offset2;
|
PCRE2_SIZE start_offset2, orig_start_offset;
|
||||||
|
|
||||||
char *subject = ZSTR_VAL(subject_str);
|
char *subject = ZSTR_VAL(subject_str);
|
||||||
size_t subject_len = ZSTR_LEN(subject_str);
|
size_t subject_len = ZSTR_LEN(subject_str);
|
||||||
|
@ -1263,8 +1263,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
options = (pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, start_offset2)
|
orig_start_offset = start_offset2;
|
||||||
? 0 : PCRE2_NO_UTF_CHECK;
|
options =
|
||||||
|
(pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, orig_start_offset)
|
||||||
|
? 0 : PCRE2_NO_UTF_CHECK;
|
||||||
|
|
||||||
/* Execute the regular expression. */
|
/* Execute the regular expression. */
|
||||||
#ifdef HAVE_PCRE_JIT_SUPPORT
|
#ifdef HAVE_PCRE_JIT_SUPPORT
|
||||||
|
@ -1454,7 +1456,8 @@ error:
|
||||||
|
|
||||||
if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
|
if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
|
||||||
/* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
|
/* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
|
||||||
if ((pce->compile_options & PCRE2_UTF) && !ZSTR_IS_INTERNED(subject_str)) {
|
if ((pce->compile_options & PCRE2_UTF)
|
||||||
|
&& !ZSTR_IS_INTERNED(subject_str) && orig_start_offset == 0) {
|
||||||
GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8);
|
GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -15,8 +15,19 @@ var_dump(preg_match($pattern, $text, $matches, 0, 0));
|
||||||
var_dump(preg_match($pattern, $text, $matches, 0, 1));
|
var_dump(preg_match($pattern, $text, $matches, 0, 1));
|
||||||
var_dump(preg_last_error() == PREG_BAD_UTF8_OFFSET_ERROR);
|
var_dump(preg_last_error() == PREG_BAD_UTF8_OFFSET_ERROR);
|
||||||
|
|
||||||
|
echo "\n";
|
||||||
|
|
||||||
|
$text = "VA\xff"; $text .= "LID";
|
||||||
|
var_dump(preg_match($pattern, $text, $matches, 0, 4));
|
||||||
|
var_dump(preg_match($pattern, $text, $matches, 0, 0));
|
||||||
|
var_dump(preg_last_error() == PREG_BAD_UTF8_ERROR);
|
||||||
|
|
||||||
?>
|
?>
|
||||||
--EXPECT--
|
--EXPECT--
|
||||||
int(0)
|
int(0)
|
||||||
bool(false)
|
bool(false)
|
||||||
bool(true)
|
bool(true)
|
||||||
|
|
||||||
|
int(1)
|
||||||
|
bool(false)
|
||||||
|
bool(true)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue