String#gsub! Elide MatchData allocation when we know it can't escape

In gsub is used with a string replacement or a map that doesn't
have a default proc, we know for sure no code can cause the MatchData
to escape the `gsub` call.

In such case, we still have to allocate a new MatchData because we
don't know what is the lifetime of the backref, but for any subsequent
match we can re-use the MatchData we allocated ourselves, reducing
allocations significantly.

This partially fixes [Misc #20652], except when a block is used,
and partially reduce the performance impact of
abc0304cb2 / [Bug #17507]

```
compare-ruby: ruby 3.5.0dev (2025-02-24T09:44:57Z master 5cf146399f) +PRISM [arm64-darwin24]
built-ruby: ruby 3.5.0dev (2025-02-24T10:58:27Z gsub-elude-match da966636e9) +PRISM [arm64-darwin24]
warming up....

|                 |compare-ruby|built-ruby|
|:----------------|-----------:|---------:|
|escape           |      3.577k|    3.697k|
|                 |           -|     1.03x|
|escape_bin       |      5.869k|    6.743k|
|                 |           -|     1.15x|
|escape_utf8      |      3.448k|    3.738k|
|                 |           -|     1.08x|
|escape_utf8_bin  |      6.361k|    7.267k|
|                 |           -|     1.14x|
```

Co-Authored-By: Étienne Barrié <etienne.barrie@gmail.com>
This commit is contained in:
Jean Boussier 2025-02-24 11:39:00 +01:00
parent 21ac0a3a64
commit 87f9c3c65e
3 changed files with 44 additions and 20 deletions

19
re.c
View file

@ -1521,7 +1521,7 @@ match_set_string(VALUE m, VALUE string, long pos, long len)
rmatch->regs.end[0] = pos + len;
}
void
VALUE
rb_backref_set_string(VALUE string, long pos, long len)
{
VALUE match = rb_backref_get();
@ -1530,6 +1530,7 @@ rb_backref_set_string(VALUE string, long pos, long len)
}
match_set_string(match, string, pos, len);
rb_backref_set(match);
return match;
}
/*
@ -1812,12 +1813,20 @@ rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_back
return ONIG_MISMATCH;
}
VALUE match = match_alloc(rb_cMatch);
VALUE match;
if (set_match && RTEST(*set_match)) {
match = *set_match;
}
else {
match = match_alloc(rb_cMatch);
}
rb_matchext_t *rm = RMATCH_EXT(match);
rm->regs = regs;
if (set_backref_str) {
RB_OBJ_WRITE(match, &RMATCH(match)->str, rb_str_new4(str));
rb_obj_reveal(match, rb_cMatch);
}
else {
/* Note that a MatchData object with RMATCH(match)->str == 0 is incomplete!
@ -1835,15 +1844,15 @@ rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_back
}
long
rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str)
rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str, VALUE *match)
{
return rb_reg_search_set_match(re, str, pos, reverse, set_backref_str, NULL);
return rb_reg_search_set_match(re, str, pos, reverse, set_backref_str, match);
}
long
rb_reg_search(VALUE re, VALUE str, long pos, int reverse)
{
return rb_reg_search0(re, str, pos, reverse, 1);
return rb_reg_search_set_match(re, str, pos, reverse, 1, NULL);
}
static OnigPosition