From 738016bd884a339009e1af371eaba0fee60bf23b Mon Sep 17 00:00:00 2001 From: Yasuo Ohgaki Date: Thu, 1 Sep 2016 19:15:32 +0900 Subject: [PATCH 1/4] Implement RF bug #72777 - ensure stack limits on mbstring functions. The patch creates new config: mbstring.regex_stack_limit, which defaults to 100000. --- ext/mbstring/mbstring.c | 18 +++++++++-- ext/mbstring/mbstring.h | 1 + ext/mbstring/php_mbregex.c | 34 +++++++++++++++++--- ext/mbstring/tests/mbregex_stack_limit.phpt | 24 ++++++++++++++ ext/mbstring/tests/mbregex_stack_limit2.phpt | 25 ++++++++++++++ 5 files changed, 94 insertions(+), 8 deletions(-) create mode 100644 ext/mbstring/tests/mbregex_stack_limit.phpt create mode 100644 ext/mbstring/tests/mbregex_stack_limit2.phpt diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 0e255e98d27..36b6c478b84 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -1027,9 +1027,18 @@ static void *_php_mb_compile_regex(const char *pattern) /* {{{ _php_mb_match_regex */ static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len) { - return onig_search((php_mb_regex_t *)opaque, (const OnigUChar *)str, - (const OnigUChar*)str + str_len, (const OnigUChar *)str, - (const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE) >= 0; + OnigMatchParam *mp = onig_new_match_param(); + int err; + onig_initialize_match_param(mp); + if(MBSTRG(regex_stack_limit) > 0 && MBSTRG(regex_stack_limit) < UINT_MAX) { + onig_set_match_stack_limit_size_of_match_param(mp, (unsigned int)MBSTRG(regex_stack_limit)); + } + /* search */ + err = onig_search_with_param((php_mb_regex_t *)opaque, (const OnigUChar *)str, + (const OnigUChar*)str + str_len, (const OnigUChar *)str, + (const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE, mp); + onig_free_match_param(mp); + return err >= 0; } /* }}} */ @@ -1502,6 +1511,9 @@ PHP_INI_BEGIN() PHP_INI_ALL, OnUpdateBool, strict_detection, zend_mbstring_globals, mbstring_globals) +#if HAVE_MBREGEX + STD_PHP_INI_ENTRY("mbstring.regex_stack_limit", "100000",PHP_INI_ALL, OnUpdateLong, regex_stack_limit, zend_mbstring_globals, mbstring_globals) +#endif PHP_INI_END() /* }}} */ diff --git a/ext/mbstring/mbstring.h b/ext/mbstring/mbstring.h index ede7b17b247..29fe1e97e2c 100644 --- a/ext/mbstring/mbstring.h +++ b/ext/mbstring/mbstring.h @@ -166,6 +166,7 @@ ZEND_BEGIN_MODULE_GLOBALS(mbstring) void *http_output_conv_mimetypes; #if HAVE_MBREGEX struct _zend_mb_regex_globals *mb_regex_globals; + zend_long regex_stack_limit; #endif char *last_used_encoding_name; const mbfl_encoding *last_used_encoding; diff --git a/ext/mbstring/php_mbregex.c b/ext/mbstring/php_mbregex.c index 319ee567c63..10c7f3e272c 100644 --- a/ext/mbstring/php_mbregex.c +++ b/ext/mbstring/php_mbregex.c @@ -850,6 +850,23 @@ PHP_FUNCTION(mb_regex_encoding) } /* }}} */ +/* {{{ _php_mb_onig_search */ +static int _php_mb_onig_search(regex_t* reg, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, + const OnigUChar* range, OnigRegion* region, OnigOptionType option) { + OnigMatchParam *mp = onig_new_match_param(); + int err; + onig_initialize_match_param(mp); + if(MBSTRG(regex_stack_limit) > 0 && MBSTRG(regex_stack_limit) < UINT_MAX) { + onig_set_match_stack_limit_size_of_match_param(mp, (unsigned int)MBSTRG(regex_stack_limit)); + } + /* search */ + err = onig_search_with_param(reg, str, end, start, range, region, option, mp); + onig_free_match_param(mp); + return err; +} +/* }}} */ + + /* {{{ _php_mb_regex_ereg_exec */ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase) { @@ -909,7 +926,7 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase) regs = onig_region_new(); /* actually execute the regular expression */ - if (onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) { + if (_php_mb_onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) { RETVAL_FALSE; goto out; } @@ -1086,7 +1103,7 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp string_lim = (OnigUChar*)(string + string_len); regs = onig_region_new(); while (err >= 0) { - err = onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0); + err = _php_mb_onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0); if (err <= -2) { OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(err_str, err); @@ -1262,7 +1279,7 @@ PHP_FUNCTION(mb_split) /* churn through str, generating array entries as we go */ while (count != 0 && (size_t)(pos - (OnigUChar *)string) < string_len) { size_t beg, end; - err = onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0); + err = _php_mb_onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0); if (err < 0) { break; } @@ -1319,6 +1336,7 @@ PHP_FUNCTION(mb_ereg_match) OnigSyntaxType *syntax; OnigOptionType option = 0; int err; + OnigMatchParam *mp; { char *option_str = NULL; @@ -1342,8 +1360,14 @@ PHP_FUNCTION(mb_ereg_match) RETURN_FALSE; } + mp = onig_new_match_param(); + onig_initialize_match_param(mp); + if(MBSTRG(regex_stack_limit) > 0 && MBSTRG(regex_stack_limit) < UINT_MAX) { + onig_set_match_stack_limit_size_of_match_param(mp, (unsigned int)MBSTRG(regex_stack_limit)); + } /* match */ - err = onig_match(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, NULL, 0); + err = onig_match_with_param(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, NULL, 0, mp); + onig_free_match_param(mp); if (err >= 0) { RETVAL_TRUE; } else { @@ -1406,7 +1430,7 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode) } MBREX(search_regs) = onig_region_new(); - err = onig_search(MBREX(search_re), str, str + len, str + pos, str + len, MBREX(search_regs), 0); + err = _php_mb_onig_search(MBREX(search_re), str, str + len, str + pos, str + len, MBREX(search_regs), 0); if (err == ONIG_MISMATCH) { MBREX(search_pos) = len; RETVAL_FALSE; diff --git a/ext/mbstring/tests/mbregex_stack_limit.phpt b/ext/mbstring/tests/mbregex_stack_limit.phpt new file mode 100644 index 00000000000..9d0f3acc9d4 --- /dev/null +++ b/ext/mbstring/tests/mbregex_stack_limit.phpt @@ -0,0 +1,24 @@ +--TEST-- +Test oniguruma stack limit +--SKIPIF-- + +--FILE-- + +--EXPECT-- +bool(false) +bool(false) +int(1) +OK diff --git a/ext/mbstring/tests/mbregex_stack_limit2.phpt b/ext/mbstring/tests/mbregex_stack_limit2.phpt new file mode 100644 index 00000000000..12c8c8edab2 --- /dev/null +++ b/ext/mbstring/tests/mbregex_stack_limit2.phpt @@ -0,0 +1,25 @@ +--TEST-- +Test oniguruma stack limit +--SKIPIF-- + +--FILE-- + +--EXPECTF-- +Warning: mb_ereg_replace(): mbregex search failure in php_mbereg_replace_exec(): match-stack limit over in %s on line %d +string(0) "" +OK From bc4cb277d3ddaecee8285b98d248380b454edbfd Mon Sep 17 00:00:00 2001 From: Stanislav Malyshev Date: Thu, 28 Mar 2019 22:57:07 -0700 Subject: [PATCH 2/4] Add mbstring.regex_stack_limit to php.ini-* --- php.ini-development | 5 +++++ php.ini-production | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/php.ini-development b/php.ini-development index fcc95fd3c50..320cf170a3b 100644 --- a/php.ini-development +++ b/php.ini-development @@ -1712,6 +1712,11 @@ zend.assertions = 1 ; Default: mbstring.http_output_conv_mimetype=^(text/|application/xhtml\+xml) ;mbstring.http_output_conv_mimetype= +; This directive specifies maximum stack depth for mbstring regular expressions. It is similar +; to the pcre.recursion_limit for PCRE. +; Default: 100000 +;mbstring.regex_stack_limit=100000 + [gd] ; Tell the jpeg decode to ignore warnings and try to create ; a gd image. The warning will then be displayed as notices diff --git a/php.ini-production b/php.ini-production index 909bf6d8dce..656ae673021 100644 --- a/php.ini-production +++ b/php.ini-production @@ -1719,6 +1719,11 @@ zend.assertions = -1 ; Default: mbstring.http_output_conv_mimetype=^(text/|application/xhtml\+xml) ;mbstring.http_output_conv_mimetype= +; This directive specifies maximum stack depth for mbstring regular expressions. It is similar +; to the pcre.recursion_limit for PCRE. +; Default: 100000 +;mbstring.regex_stack_limit=100000 + [gd] ; Tell the jpeg decode to ignore warnings and try to create ; a gd image. The warning will then be displayed as notices From e12c069d33fef2e0b2a8009e1cd4bc4fc2206e67 Mon Sep 17 00:00:00 2001 From: Stanislav Malyshev Date: Sat, 30 Mar 2019 14:52:04 -0700 Subject: [PATCH 3/4] Add fallbacks for older oniguruma versions --- ext/mbstring/mbstring.c | 13 ++++++++++++- ext/mbstring/php_mbregex.c | 14 +++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 36b6c478b84..28ea4bd0569 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -65,6 +65,17 @@ #include "php_onig_compat.h" #include #undef UChar +#if ONIGURUMA_VERSION_INT < 60800 +typedef void OnigMatchParam; +#define onig_new_match_param() (NULL) +#define onig_initialize_match_param(x) +#define onig_set_match_stack_limit_size_of_match_param(x, y) +#define onig_free_match_param(x) +#define onig_search_with_param(reg, str, end, start, range, region, option, mp) \ + onig_search(reg, str, end, start, range, region, option) +#define onig_match_with_param(re, str, end, at, region, option, mp) \ + onig_match(re, str, end, at, region, option) +#endif #elif HAVE_PCRE || HAVE_BUNDLED_PCRE #include "ext/pcre/php_pcre.h" #endif @@ -1030,7 +1041,7 @@ static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len) OnigMatchParam *mp = onig_new_match_param(); int err; onig_initialize_match_param(mp); - if(MBSTRG(regex_stack_limit) > 0 && MBSTRG(regex_stack_limit) < UINT_MAX) { + if (!ZEND_LONG_UINT_OVFL(MBSTRG(regex_stack_limit))) { onig_set_match_stack_limit_size_of_match_param(mp, (unsigned int)MBSTRG(regex_stack_limit)); } /* search */ diff --git a/ext/mbstring/php_mbregex.c b/ext/mbstring/php_mbregex.c index 10c7f3e272c..75b00f547cf 100644 --- a/ext/mbstring/php_mbregex.c +++ b/ext/mbstring/php_mbregex.c @@ -34,6 +34,18 @@ #include #undef UChar +#if ONIGURUMA_VERSION_INT < 60800 +typedef void OnigMatchParam; +#define onig_new_match_param() (NULL) +#define onig_initialize_match_param(x) +#define onig_set_match_stack_limit_size_of_match_param(x, y) +#define onig_free_match_param(x) +#define onig_search_with_param(reg, str, end, start, range, region, option, mp) \ + onig_search(reg, str, end, start, range, region, option) +#define onig_match_with_param(re, str, end, at, region, option, mp) \ + onig_match(re, str, end, at, region, option) +#endif + ZEND_EXTERN_MODULE_GLOBALS(mbstring) struct _zend_mb_regex_globals { @@ -856,7 +868,7 @@ static int _php_mb_onig_search(regex_t* reg, const OnigUChar* str, const OnigUCh OnigMatchParam *mp = onig_new_match_param(); int err; onig_initialize_match_param(mp); - if(MBSTRG(regex_stack_limit) > 0 && MBSTRG(regex_stack_limit) < UINT_MAX) { + if (!ZEND_LONG_UINT_OVFL(MBSTRG(regex_stack_limit))) { onig_set_match_stack_limit_size_of_match_param(mp, (unsigned int)MBSTRG(regex_stack_limit)); } /* search */ From 3d5b6f234e3172b193856aec23fd3f6cf14e464b Mon Sep 17 00:00:00 2001 From: Stanislav Malyshev Date: Mon, 1 Apr 2019 00:03:49 -0700 Subject: [PATCH 4/4] Update NEWS & UPGRADING --- NEWS | 4 ++++ UPGRADING | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/NEWS b/NEWS index 207338cf61f..ee101478175 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,10 @@ PHP NEWS . Fixed bug #72175 (Impossibility of creating multiple connections to Interbase with php 7.x). (Nikita) +- mbstring: + . Implemented FR #72777 (Implement regex stack limits for mbregex functions). + (Yasuo Ohgaki, Stas) + - MySQLi: . Fixed bug #77773 (Unbuffered queries leak memory - MySQLi / mysqlnd). (Nikita) diff --git a/UPGRADING b/UPGRADING index 2220efb014b..0e44c493ae4 100644 --- a/UPGRADING +++ b/UPGRADING @@ -727,6 +727,10 @@ LDAP: . New INI to set syslog ident string which is prepended to every message. It is used only when error_log is set syslog. +- mbstring.regex_stack_limit + . New INI directive (since 7.3.6) limiting stack depth of mbstring/oniguruma + regular expressions. + ======================================== 12. Windows Support ========================================