ruby/benchmark/string_gsub.yml
Jean Boussier f32d5071b7 Elide string allocation when using String#gsub in MAP mode
If the provided Hash doesn't have a default proc, we know for
sure that we'll never call into user provided code, hence the
string we allocate to access the Hash can't possibly escape.

So we don't actually have to allocate it, we can use a fake_str,
AKA a stack allocated string.

```
compare-ruby: ruby 3.5.0dev (2025-02-10T13:47:44Z master 3fb455adab) +PRISM [arm64-darwin23]
built-ruby: ruby 3.5.0dev (2025-02-10T17:09:52Z opt-gsub-alloc ea5c28958f) +PRISM [arm64-darwin23]
warming up....

|                 |compare-ruby|built-ruby|
|:----------------|-----------:|---------:|
|escape           |      3.374k|    3.722k|
|                 |           -|     1.10x|
|escape_bin       |      5.469k|    6.587k|
|                 |           -|     1.20x|
|escape_utf8      |      3.465k|    3.734k|
|                 |           -|     1.08x|
|escape_utf8_bin  |      5.752k|    7.283k|
|                 |           -|     1.27x|
```
2025-02-12 10:23:50 +01:00

43 lines
1 KiB
YAML

prelude: |
# frozen_string_literal: true
STR = ((("a" * 31) + "<") * 1000).freeze
STR_UNICODE = ((("a" * 30) + "\u2028") * 1000).freeze
ESCAPED_CHARS_BINARY = {
"\u2028".b => '\u2028'.b,
"\u2029".b => '\u2029'.b,
">".b => '\u003e'.b.freeze,
"<".b => '\u003c'.b.freeze,
"&".b => '\u0026'.b.freeze,
}
BINARY_PATTERN = Regexp.union(ESCAPED_CHARS_BINARY.keys)
ESCAPED_CHARS = {
"\u2028" => '\u2028',
"\u2029" => '\u2029',
">" => '\u003e',
"<" => '\u003c',
"&" => '\u0026',
}
ESCAPE_PATTERN = Regexp.union(ESCAPED_CHARS.keys)
benchmark:
escape: |
str = STR.dup
str.gsub!(ESCAPE_PATTERN, ESCAPED_CHARS)
str
escape_bin: |
str = STR.b
str.gsub!(BINARY_PATTERN, ESCAPED_CHARS_BINARY)
str.force_encoding(Encoding::UTF_8)
escape_utf8: |
str = STR_UNICODE.dup
str.gsub!(ESCAPE_PATTERN, ESCAPED_CHARS)
str
escape_utf8_bin: |
str = STR_UNICODE.b
str.gsub!(BINARY_PATTERN, ESCAPED_CHARS_BINARY)
str.force_encoding(Encoding::UTF_8)