Fix memory leak in Regexp capture group when timeout

[Bug #20650]

The capture group allocates memory that is leaked when it times out.

For example:

    re = Regexp.new("^#{"(a*)" * 10_000}x$", timeout: 0.000001)
    str = "a" * 1000000 + "x"

    10.times do
      100.times do
        re =~ str
      rescue Regexp::TimeoutError
      end

      puts `ps -o rss= -p #{$$}`
    end

Before:

    34688
    56416
    78288
    100368
    120784
    140704
    161904
    183568
    204320
    224800

After:

    16288
    16288
    16880
    16896
    16912
    16928
    16944
    17184
    17184
    17200
This commit is contained in:
Peter Zhu 2024-07-24 15:16:16 -04:00
parent 53f0d3b1f7
commit 10574857ce
Notes: git 2024-07-25 13:24:06 +00:00
3 changed files with 66 additions and 24 deletions

View file

@ -636,6 +636,7 @@ ONIG_EXTERN const OnigSyntaxType* OnigDefaultSyntax;
#define ONIGERR_PARSE_DEPTH_LIMIT_OVER -16 #define ONIGERR_PARSE_DEPTH_LIMIT_OVER -16
#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SET -21 #define ONIGERR_DEFAULT_ENCODING_IS_NOT_SET -21
#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22 #define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22
#define ONIGERR_TIMEOUT -23
/* general error */ /* general error */
#define ONIGERR_INVALID_ARGUMENT -30 #define ONIGERR_INVALID_ARGUMENT -30
/* syntax error */ /* syntax error */

View file

@ -4220,7 +4220,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
xfree(xmalloc_base); xfree(xmalloc_base);
if (stk_base != stk_alloc || IS_NOT_NULL(msa->stack_p)) if (stk_base != stk_alloc || IS_NOT_NULL(msa->stack_p))
xfree(stk_base); xfree(stk_base);
HANDLE_REG_TIMEOUT_IN_MATCH_AT; return ONIGERR_TIMEOUT;
} }
@ -5212,10 +5212,15 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE # ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
# define MATCH_AND_RETURN_CHECK(upper_range) \ # define MATCH_AND_RETURN_CHECK(upper_range) \
r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
if (r != ONIG_MISMATCH) {\ switch (r) { \
if (r >= 0) {\ case ONIG_MISMATCH: \
if (! IS_FIND_LONGEST(reg->options)) {\ break; \
goto match;\ case ONIGERR_TIMEOUT: \
goto timeout; \
default: \
if (r >= 0) { \
if (! IS_FIND_LONGEST(reg->options)) { \
goto match; \
}\ }\
}\ }\
else goto finish; /* error */ \ else goto finish; /* error */ \
@ -5223,9 +5228,14 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
# else # else
# define MATCH_AND_RETURN_CHECK(upper_range) \ # define MATCH_AND_RETURN_CHECK(upper_range) \
r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
if (r != ONIG_MISMATCH) {\ switch (r) { \
if (r >= 0) {\ case ONIG_MISMATCH: \
goto match;\ break; \
case ONIGERR_TIMEOUT: \
goto timeout; \
default: \
if (r >= 0) { \
goto match; \
}\ }\
else goto finish; /* error */ \ else goto finish; /* error */ \
} }
@ -5234,21 +5244,31 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE # ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
# define MATCH_AND_RETURN_CHECK(none) \ # define MATCH_AND_RETURN_CHECK(none) \
r = match_at(reg, str, end, s, prev, &msa);\ r = match_at(reg, str, end, s, prev, &msa);\
if (r != ONIG_MISMATCH) {\ switch (r) { \
if (r >= 0) {\ case ONIG_MISMATCH: \
if (! IS_FIND_LONGEST(reg->options)) {\ break; \
goto match;\ case ONIGERR_TIMEOUT: \
}\ goto timeout; \
}\ default: \
if (r >= 0) { \
if (! IS_FIND_LONGEST(reg->options)) { \
goto match; \
} \
} \
else goto finish; /* error */ \ else goto finish; /* error */ \
} }
# else # else
# define MATCH_AND_RETURN_CHECK(none) \ # define MATCH_AND_RETURN_CHECK(none) \
r = match_at(reg, str, end, s, prev, &msa);\ r = match_at(reg, str, end, s, prev, &msa);\
if (r != ONIG_MISMATCH) {\ switch (r) { \
if (r >= 0) {\ case ONIG_MISMATCH: \
goto match;\ break; \
}\ case ONIGERR_TIMEOUT: \
goto timeout; \
default: \
if (r >= 0) { \
goto match; \
} \
else goto finish; /* error */ \ else goto finish; /* error */ \
} }
# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ # endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
@ -5552,6 +5572,11 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
match: match:
MATCH_ARG_FREE(msa); MATCH_ARG_FREE(msa);
return s - str; return s - str;
timeout:
MATCH_ARG_FREE(msa);
onig_region_free(region, false);
HANDLE_REG_TIMEOUT_IN_MATCH_AT;
} }
extern OnigPosition extern OnigPosition

View file

@ -1902,6 +1902,22 @@ class TestRegexp < Test::Unit::TestCase
end; end;
end end
def test_timeout_memory_leak
assert_no_memory_leak([], "#{<<~"begin;"}", "#{<<~'end;'}", "[Bug #20650]", timeout: 100, rss: true)
regex = Regexp.new("^#{"(a*)" * 10_000}x$", timeout: 0.000001)
str = "a" * 1_000_000 + "x"
code = proc do
regex =~ str
rescue
end
10.times(&code)
begin;
1_000.times(&code)
end;
end
def test_match_cache_exponential def test_match_cache_exponential
assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}") assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}")
timeout = #{ EnvUtil.apply_timeout_scale(10).inspect } timeout = #{ EnvUtil.apply_timeout_scale(10).inspect }