From 36a06efdd9f0604093dccbaf96d4e2cb17874dc8 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 2 Nov 2023 11:02:43 +0100 Subject: [PATCH] Make String.new size pools aware. If the required capacity would fit in an embded string, returns one. This can reduce malloc churn for code that use string buffers. --- string.c | 93 ++++++++++++++++++++++++++++++ test/-ext-/string/test_capacity.rb | 2 +- 2 files changed, 94 insertions(+), 1 deletion(-) diff --git a/string.c b/string.c index 9f4a345726..13b63f023a 100644 --- a/string.c +++ b/string.c @@ -1874,6 +1874,98 @@ rb_str_init(int argc, VALUE *argv, VALUE str) return str; } +/* :nodoc: */ +static VALUE +rb_str_s_new(int argc, VALUE *argv, VALUE klass) +{ + if (klass != rb_cString) { + return rb_class_new_instance_pass_kw(argc, argv, klass); + } + + static ID keyword_ids[2]; + VALUE orig, opt, encoding = Qnil, capacity = Qnil; + VALUE kwargs[2]; + rb_encoding *enc = NULL; + + int n = rb_scan_args(argc, argv, "01:", &orig, &opt); + if (NIL_P(opt)) { + return rb_class_new_instance_pass_kw(argc, argv, klass); + } + + keyword_ids[0] = rb_id_encoding(); + CONST_ID(keyword_ids[1], "capacity"); + rb_get_kwargs(opt, keyword_ids, 0, 2, kwargs); + encoding = kwargs[0]; + capacity = kwargs[1]; + + int termlen = 1; + + if (n == 1) { + orig = StringValue(orig); + } + else { + orig = Qnil; + } + + if (UNDEF_P(encoding)) { + if (!NIL_P(orig)) { + encoding = rb_obj_encoding(orig); + } + } + + if (!UNDEF_P(encoding)) { + enc = rb_to_encoding(encoding); + termlen = rb_enc_mbminlen(enc); + } + + // If capacity is nil, we're basically just duping `orig`. + if (UNDEF_P(capacity)) { + if (NIL_P(orig)) { + VALUE empty_str = str_new(klass, "", 0); + if (enc) { + rb_enc_associate(empty_str, enc); + } + return empty_str; + } + VALUE copy = str_duplicate(klass, orig); + rb_enc_associate(copy, enc); + ENC_CODERANGE_CLEAR(copy); + return copy; + } + + long capa = 0; + capa = NUM2LONG(capacity); + if (capa < 0) { + capa = 0; + } + + if (!NIL_P(orig)) { + long orig_capa = rb_str_capacity(orig); + if (orig_capa > capa) { + capa = orig_capa; + } + } + + long fake_len = capa - termlen; + if (fake_len < 0) { + fake_len = 0; + } + + VALUE str = str_new0(klass, NULL, fake_len, termlen); + STR_SET_LEN(str, 0); + TERM_FILL(RSTRING_PTR(str), termlen); + + if (enc) { + rb_enc_associate(str, enc); + } + + if (!NIL_P(orig)) { + rb_str_buf_append(str, orig); + } + + return str; +} + #ifdef NONASCII_MASK #define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80) @@ -11931,6 +12023,7 @@ Init_String(void) st_foreach(rb_vm_fstring_table(), fstring_set_class_i, rb_cString); rb_include_module(rb_cString, rb_mComparable); rb_define_alloc_func(rb_cString, empty_str_alloc); + rb_define_singleton_method(rb_cString, "new", rb_str_s_new, -1); rb_define_singleton_method(rb_cString, "try_convert", rb_str_s_try_convert, 1); rb_define_method(rb_cString, "initialize", rb_str_init, -1); rb_define_method(rb_cString, "initialize_copy", rb_str_replace, 1); diff --git a/test/-ext-/string/test_capacity.rb b/test/-ext-/string/test_capacity.rb index 71f91918e7..2c6c51fdda 100644 --- a/test/-ext-/string/test_capacity.rb +++ b/test/-ext-/string/test_capacity.rb @@ -23,7 +23,7 @@ class Test_StringCapacity < Test::Unit::TestCase def test_s_new_capacity assert_equal("", String.new(capacity: 1000)) assert_equal(String, String.new(capacity: 1000).class) - assert_equal(10000, capa(String.new(capacity: 10000))) + assert_equal(10_000 - 1, capa(String.new(capacity: 10_000))) # Real capa doesn't account for termlen assert_equal("", String.new(capacity: -1000)) assert_equal(capa(String.new(capacity: -10000)), capa(String.new(capacity: -1000)))