Make String.new size pools aware.

If the required capacity would fit in an embded string,
returns one.

This can reduce malloc churn for code that use string buffers.
This commit is contained in:
Jean Boussier 2023-11-02 11:02:43 +01:00
parent 42f368ead5
commit 36a06efdd9
2 changed files with 94 additions and 1 deletions

View file

@ -1874,6 +1874,98 @@ rb_str_init(int argc, VALUE *argv, VALUE str)
return str;
}
/* :nodoc: */
static VALUE
rb_str_s_new(int argc, VALUE *argv, VALUE klass)
{
if (klass != rb_cString) {
return rb_class_new_instance_pass_kw(argc, argv, klass);
}
static ID keyword_ids[2];
VALUE orig, opt, encoding = Qnil, capacity = Qnil;
VALUE kwargs[2];
rb_encoding *enc = NULL;
int n = rb_scan_args(argc, argv, "01:", &orig, &opt);
if (NIL_P(opt)) {
return rb_class_new_instance_pass_kw(argc, argv, klass);
}
keyword_ids[0] = rb_id_encoding();
CONST_ID(keyword_ids[1], "capacity");
rb_get_kwargs(opt, keyword_ids, 0, 2, kwargs);
encoding = kwargs[0];
capacity = kwargs[1];
int termlen = 1;
if (n == 1) {
orig = StringValue(orig);
}
else {
orig = Qnil;
}
if (UNDEF_P(encoding)) {
if (!NIL_P(orig)) {
encoding = rb_obj_encoding(orig);
}
}
if (!UNDEF_P(encoding)) {
enc = rb_to_encoding(encoding);
termlen = rb_enc_mbminlen(enc);
}
// If capacity is nil, we're basically just duping `orig`.
if (UNDEF_P(capacity)) {
if (NIL_P(orig)) {
VALUE empty_str = str_new(klass, "", 0);
if (enc) {
rb_enc_associate(empty_str, enc);
}
return empty_str;
}
VALUE copy = str_duplicate(klass, orig);
rb_enc_associate(copy, enc);
ENC_CODERANGE_CLEAR(copy);
return copy;
}
long capa = 0;
capa = NUM2LONG(capacity);
if (capa < 0) {
capa = 0;
}
if (!NIL_P(orig)) {
long orig_capa = rb_str_capacity(orig);
if (orig_capa > capa) {
capa = orig_capa;
}
}
long fake_len = capa - termlen;
if (fake_len < 0) {
fake_len = 0;
}
VALUE str = str_new0(klass, NULL, fake_len, termlen);
STR_SET_LEN(str, 0);
TERM_FILL(RSTRING_PTR(str), termlen);
if (enc) {
rb_enc_associate(str, enc);
}
if (!NIL_P(orig)) {
rb_str_buf_append(str, orig);
}
return str;
}
#ifdef NONASCII_MASK
#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
@ -11931,6 +12023,7 @@ Init_String(void)
st_foreach(rb_vm_fstring_table(), fstring_set_class_i, rb_cString);
rb_include_module(rb_cString, rb_mComparable);
rb_define_alloc_func(rb_cString, empty_str_alloc);
rb_define_singleton_method(rb_cString, "new", rb_str_s_new, -1);
rb_define_singleton_method(rb_cString, "try_convert", rb_str_s_try_convert, 1);
rb_define_method(rb_cString, "initialize", rb_str_init, -1);
rb_define_method(rb_cString, "initialize_copy", rb_str_replace, 1);

View file

@ -23,7 +23,7 @@ class Test_StringCapacity < Test::Unit::TestCase
def test_s_new_capacity
assert_equal("", String.new(capacity: 1000))
assert_equal(String, String.new(capacity: 1000).class)
assert_equal(10000, capa(String.new(capacity: 10000)))
assert_equal(10_000 - 1, capa(String.new(capacity: 10_000))) # Real capa doesn't account for termlen
assert_equal("", String.new(capacity: -1000))
assert_equal(capa(String.new(capacity: -10000)), capa(String.new(capacity: -1000)))