Struct: keep direct reference to IMEMO/fields when space allows

It's not rare for structs to have additional ivars, hence are one
of the most common, if not the most common type in the `gen_fields_tbl`.

This can cause Ractor contention, but even in single ractor mode
means having to do a hash lookup to access the ivars, and increase
GC work.

Instead, unless the struct is perfectly right sized, we can store
a reference to the associated IMEMO/fields object right after the
last struct member.

```
compare-ruby: ruby 3.5.0dev (2025-08-06T12:50:36Z struct-ivar-fields-2 9a30d141a1) +PRISM [arm64-darwin24]
built-ruby: ruby 3.5.0dev (2025-08-06T12:57:59Z struct-ivar-fields-2 2ff3ec237f) +PRISM [arm64-darwin24]
warming up.....

|                      |compare-ruby|built-ruby|
|:---------------------|-----------:|---------:|
|member_reader         |    590.317k|  579.246k|
|                      |       1.02x|         -|
|member_writer         |    543.963k|  527.104k|
|                      |       1.03x|         -|
|member_reader_method  |    213.540k|  213.004k|
|                      |       1.00x|         -|
|member_writer_method  |    192.657k|  191.491k|
|                      |       1.01x|         -|
|ivar_reader           |    403.993k|  569.915k|
|                      |           -|     1.41x|
```

Co-Authored-By: Étienne Barrié <etienne.barrie@gmail.com>
This commit is contained in:
Jean Boussier 2025-08-06 14:46:36 +02:00
parent 9b3ad3449b
commit f3206cc79b
8 changed files with 194 additions and 23 deletions

View file

@ -1,5 +1,12 @@
prelude: |
C = Struct.new(:x) do
def initialize(...)
super
@ivar = 42
end
attr_accessor :ivar
class_eval <<-END
def r
#{'x;'*256}
@ -15,11 +22,16 @@ prelude: |
m = method(:x=)
#{'m.call(nil);'*256}
end
def r_ivar
#{'ivar;'*256}
end
END
end
C.new(nil) # ensure common shape is known
obj = C.new(nil)
benchmark:
member_reader: "obj.r"
member_writer: "obj.w"
member_reader_method: "obj.rm"
member_writer_method: "obj.wm"
ivar_reader: "obj.r_ivar"

8
depend
View file

@ -6065,6 +6065,7 @@ hash.$(OBJEXT): $(top_srcdir)/internal/set_table.h
hash.$(OBJEXT): $(top_srcdir)/internal/st.h
hash.$(OBJEXT): $(top_srcdir)/internal/static_assert.h
hash.$(OBJEXT): $(top_srcdir)/internal/string.h
hash.$(OBJEXT): $(top_srcdir)/internal/struct.h
hash.$(OBJEXT): $(top_srcdir)/internal/symbol.h
hash.$(OBJEXT): $(top_srcdir)/internal/thread.h
hash.$(OBJEXT): $(top_srcdir)/internal/time.h
@ -6288,6 +6289,7 @@ hash.$(OBJEXT): {$(VPATH)}symbol.h
hash.$(OBJEXT): {$(VPATH)}thread_$(THREAD_MODEL).h
hash.$(OBJEXT): {$(VPATH)}thread_native.h
hash.$(OBJEXT): {$(VPATH)}util.h
hash.$(OBJEXT): {$(VPATH)}variable.h
hash.$(OBJEXT): {$(VPATH)}vm_core.h
hash.$(OBJEXT): {$(VPATH)}vm_debug.h
hash.$(OBJEXT): {$(VPATH)}vm_opts.h
@ -12926,6 +12928,7 @@ range.$(OBJEXT): $(top_srcdir)/internal/enumerator.h
range.$(OBJEXT): $(top_srcdir)/internal/error.h
range.$(OBJEXT): $(top_srcdir)/internal/fixnum.h
range.$(OBJEXT): $(top_srcdir)/internal/gc.h
range.$(OBJEXT): $(top_srcdir)/internal/imemo.h
range.$(OBJEXT): $(top_srcdir)/internal/numeric.h
range.$(OBJEXT): $(top_srcdir)/internal/range.h
range.$(OBJEXT): $(top_srcdir)/internal/serial.h
@ -12948,6 +12951,7 @@ range.$(OBJEXT): {$(VPATH)}config.h
range.$(OBJEXT): {$(VPATH)}defines.h
range.$(OBJEXT): {$(VPATH)}encoding.h
range.$(OBJEXT): {$(VPATH)}id.h
range.$(OBJEXT): {$(VPATH)}id_table.h
range.$(OBJEXT): {$(VPATH)}intern.h
range.$(OBJEXT): {$(VPATH)}internal.h
range.$(OBJEXT): {$(VPATH)}internal/abi.h
@ -15580,6 +15584,7 @@ shape.$(OBJEXT): $(top_srcdir)/internal/serial.h
shape.$(OBJEXT): $(top_srcdir)/internal/set_table.h
shape.$(OBJEXT): $(top_srcdir)/internal/static_assert.h
shape.$(OBJEXT): $(top_srcdir)/internal/string.h
shape.$(OBJEXT): $(top_srcdir)/internal/struct.h
shape.$(OBJEXT): $(top_srcdir)/internal/symbol.h
shape.$(OBJEXT): $(top_srcdir)/internal/variable.h
shape.$(OBJEXT): $(top_srcdir)/internal/vm.h
@ -16569,6 +16574,7 @@ string.$(OBJEXT): $(top_srcdir)/internal/serial.h
string.$(OBJEXT): $(top_srcdir)/internal/set_table.h
string.$(OBJEXT): $(top_srcdir)/internal/static_assert.h
string.$(OBJEXT): $(top_srcdir)/internal/string.h
string.$(OBJEXT): $(top_srcdir)/internal/struct.h
string.$(OBJEXT): $(top_srcdir)/internal/transcode.h
string.$(OBJEXT): $(top_srcdir)/internal/variable.h
string.$(OBJEXT): $(top_srcdir)/internal/vm.h
@ -16766,6 +16772,7 @@ string.$(OBJEXT): {$(VPATH)}thread.h
string.$(OBJEXT): {$(VPATH)}thread_$(THREAD_MODEL).h
string.$(OBJEXT): {$(VPATH)}thread_native.h
string.$(OBJEXT): {$(VPATH)}util.h
string.$(OBJEXT): {$(VPATH)}variable.h
string.$(OBJEXT): {$(VPATH)}vm_core.h
string.$(OBJEXT): {$(VPATH)}vm_debug.h
string.$(OBJEXT): {$(VPATH)}vm_opts.h
@ -18103,6 +18110,7 @@ variable.$(OBJEXT): $(top_srcdir)/internal/serial.h
variable.$(OBJEXT): $(top_srcdir)/internal/set_table.h
variable.$(OBJEXT): $(top_srcdir)/internal/static_assert.h
variable.$(OBJEXT): $(top_srcdir)/internal/string.h
variable.$(OBJEXT): $(top_srcdir)/internal/struct.h
variable.$(OBJEXT): $(top_srcdir)/internal/symbol.h
variable.$(OBJEXT): $(top_srcdir)/internal/thread.h
variable.$(OBJEXT): $(top_srcdir)/internal/variable.h

13
gc.c
View file

@ -3260,6 +3260,10 @@ rb_gc_mark_children(void *objspace, VALUE obj)
gc_mark_internal(ptr[i]);
}
if (!FL_TEST_RAW(obj, RSTRUCT_GEN_FIELDS)) {
gc_mark_internal(RSTRUCT_FIELDS_OBJ(obj));
}
break;
}
@ -4188,6 +4192,15 @@ rb_gc_update_object_references(void *objspace, VALUE obj)
for (i = 0; i < len; i++) {
UPDATE_IF_MOVED(objspace, ptr[i]);
}
if (RSTRUCT_EMBED_LEN(obj)) {
if (!FL_TEST_RAW(obj, RSTRUCT_GEN_FIELDS)) {
UPDATE_IF_MOVED(objspace, ptr[len]);
}
}
else {
UPDATE_IF_MOVED(objspace, RSTRUCT(obj)->as.heap.fields_obj);
}
}
break;
default:

View file

@ -11,10 +11,23 @@
#include "ruby/internal/stdbool.h" /* for bool */
#include "ruby/ruby.h" /* for struct RBasic */
/* Flags of RStruct
*
* 1-7: RSTRUCT_EMBED_LEN
* If non-zero, the struct is embedded (its contents follow the
* header, rather than being on a separately allocated buffer) and
* these bits are the length of the Struct.
* 8: RSTRUCT_GEN_FIELDS
* The struct is embedded and has no space left to store the
* IMEMO/fields reference. Any ivar this struct may have will be in
* the generic_fields_tbl. This flag doesn't imply the struct has
* ivars.
*/
enum {
RSTRUCT_EMBED_LEN_MASK = RUBY_FL_USER7 | RUBY_FL_USER6 | RUBY_FL_USER5 | RUBY_FL_USER4 |
RUBY_FL_USER3 | RUBY_FL_USER2 | RUBY_FL_USER1,
RSTRUCT_EMBED_LEN_SHIFT = (RUBY_FL_USHIFT+1),
RSTRUCT_GEN_FIELDS = RUBY_FL_USER8,
};
struct RStruct {
@ -23,6 +36,7 @@ struct RStruct {
struct {
long len;
const VALUE *ptr;
VALUE fields_obj;
} heap;
/* This is a length 1 array because:
* 1. GCC has a bug that does not optimize C flexible array members
@ -116,4 +130,31 @@ RSTRUCT_GET(VALUE st, long k)
return RSTRUCT_CONST_PTR(st)[k];
}
static inline VALUE
RSTRUCT_FIELDS_OBJ(VALUE st)
{
const long embed_len = RSTRUCT_EMBED_LEN(st);
VALUE fields_obj;
if (embed_len) {
RUBY_ASSERT(!FL_TEST_RAW(st, RSTRUCT_GEN_FIELDS));
fields_obj = RSTRUCT_GET(st, embed_len);
}
else {
fields_obj = RSTRUCT(st)->as.heap.fields_obj;
}
return fields_obj;
}
static inline void
RSTRUCT_SET_FIELDS_OBJ(VALUE st, VALUE fields_obj)
{
const long embed_len = RSTRUCT_EMBED_LEN(st);
if (embed_len) {
RUBY_ASSERT(!FL_TEST_RAW(st, RSTRUCT_GEN_FIELDS));
RSTRUCT_SET(st, embed_len, fields_obj);
}
else {
RB_OBJ_WRITE(st, &RSTRUCT(st)->as.heap.fields_obj, fields_obj);
}
}
#endif /* INTERNAL_STRUCT_H */

View file

@ -811,13 +811,22 @@ struct_alloc(VALUE klass)
{
long n = num_members(klass);
size_t embedded_size = offsetof(struct RStruct, as.ary) + (sizeof(VALUE) * n);
if (RCLASS_MAX_IV_COUNT(klass) > 0) {
embedded_size += sizeof(VALUE);
}
VALUE flags = T_STRUCT | (RGENGC_WB_PROTECTED_STRUCT ? FL_WB_PROTECTED : 0);
if (n > 0 && rb_gc_size_allocatable_p(embedded_size)) {
flags |= n << RSTRUCT_EMBED_LEN_SHIFT;
NEWOBJ_OF(st, struct RStruct, klass, flags, embedded_size, 0);
if (RCLASS_MAX_IV_COUNT(klass) == 0 && embedded_size == rb_gc_obj_slot_size((VALUE)st)) {
FL_SET_RAW((VALUE)st, RSTRUCT_GEN_FIELDS);
}
else {
RSTRUCT_SET_FIELDS_OBJ((VALUE)st, 0);
}
rb_mem_clear((VALUE *)st->as.ary, n);
return (VALUE)st;

View file

@ -252,3 +252,52 @@ class TestObjectIdRactor < Test::Unit::TestCase
end;
end
end
class TestObjectIdStruct < TestObjectId
EmbeddedStruct = Struct.new(:embedded_field)
def setup
@obj = EmbeddedStruct.new
end
end
class TestObjectIdStructGenIvar < TestObjectId
GenIvarStruct = Struct.new(:a, :b, :c)
def setup
@obj = GenIvarStruct.new
end
end
class TestObjectIdStructNotEmbed < TestObjectId
MANY_IVS = 80
StructNotEmbed = Struct.new(*MANY_IVS.times.map { |i| :"field_#{i}" })
def setup
@obj = StructNotEmbed.new
end
end
class TestObjectIdStructTooComplex < TestObjectId
StructTooComplex = Struct.new(:a) do
def initialize
@too_complex_obj_id_test = 1
end
end
def setup
if defined?(RubyVM::Shape::SHAPE_MAX_VARIATIONS)
assert_equal 8, RubyVM::Shape::SHAPE_MAX_VARIATIONS
end
8.times do |i|
StructTooComplex.new.instance_variable_set("@TestObjectIdStructTooComplex#{i}", 1)
end
@obj = StructTooComplex.new
@obj.instance_variable_set("@a#{rand(10_000)}", 1)
if defined?(RubyVM::Shape)
assert_predicate(RubyVM::Shape.of(@obj), :too_complex?)
end
end
end

View file

@ -99,6 +99,24 @@ class TestRactor < Test::Unit::TestCase
RUBY
end
def test_struct_instance_variables
assert_ractor(<<~'RUBY')
StructIvar = Struct.new(:member) do
def initialize(*)
super
@ivar = "ivar"
end
attr_reader :ivar
end
obj = StructIvar.new("member")
obj_copy = Ractor.new { Ractor.receive }.send(obj).value
assert_equal obj.ivar, obj_copy.ivar
refute_same obj.ivar, obj_copy.ivar
assert_equal obj.member, obj_copy.member
refute_same obj.member, obj_copy.member
RUBY
end
def test_fork_raise_isolation_error
assert_ractor(<<~'RUBY')
ractor = Ractor.new do

View file

@ -29,6 +29,7 @@
#include "internal/object.h"
#include "internal/gc.h"
#include "internal/re.h"
#include "internal/struct.h"
#include "internal/symbol.h"
#include "internal/thread.h"
#include "internal/variable.h"
@ -1228,10 +1229,19 @@ rb_obj_fields(VALUE obj, ID field_name)
ivar_ractor_check(obj, field_name);
VALUE fields_obj = 0;
if (rb_obj_exivar_p(obj)) {
RB_VM_LOCKING() {
if (!st_lookup(generic_fields_tbl_, (st_data_t)obj, (st_data_t *)&fields_obj)) {
rb_bug("Object is missing entry in generic_fields_tbl");
if (rb_shape_obj_has_fields(obj)) {
switch (BUILTIN_TYPE(obj)) {
case T_STRUCT:
if (LIKELY(!FL_TEST_RAW(obj, RSTRUCT_GEN_FIELDS))) {
fields_obj = RSTRUCT_FIELDS_OBJ(obj);
break;
}
// fall through
default:
RB_VM_LOCKING() {
if (!st_lookup(generic_fields_tbl_, (st_data_t)obj, (st_data_t *)&fields_obj)) {
rb_bug("Object is missing entry in generic_fields_tbl");
}
}
}
}
@ -1243,11 +1253,19 @@ rb_free_generic_ivar(VALUE obj)
{
if (rb_obj_exivar_p(obj)) {
st_data_t key = (st_data_t)obj, value;
RB_VM_LOCKING() {
st_delete(generic_fields_tbl_no_ractor_check(), &key, &value);
RBASIC_SET_SHAPE_ID(obj, ROOT_SHAPE_ID);
switch (BUILTIN_TYPE(obj)) {
case T_STRUCT:
if (LIKELY(!FL_TEST_RAW(obj, RSTRUCT_GEN_FIELDS))) {
RSTRUCT_SET_FIELDS_OBJ(obj, 0);
break;
}
// fall through
default:
RB_VM_LOCKING() {
st_delete(generic_fields_tbl_no_ractor_check(), &key, &value);
}
}
RBASIC_SET_SHAPE_ID(obj, ROOT_SHAPE_ID);
}
}
@ -1260,12 +1278,20 @@ rb_obj_set_fields(VALUE obj, VALUE fields_obj, ID field_name, VALUE original_fie
RUBY_ASSERT(!original_fields_obj || IMEMO_TYPE_P(original_fields_obj, imemo_fields));
if (fields_obj != original_fields_obj) {
RB_VM_LOCKING() {
st_insert(generic_fields_tbl_, (st_data_t)obj, (st_data_t)fields_obj);
switch (BUILTIN_TYPE(obj)) {
case T_STRUCT:
if (LIKELY(!FL_TEST_RAW(obj, RSTRUCT_GEN_FIELDS))) {
RSTRUCT_SET_FIELDS_OBJ(obj, fields_obj);
break;
}
// fall through
default:
RB_VM_LOCKING() {
st_insert(generic_fields_tbl_, (st_data_t)obj, (st_data_t)fields_obj);
}
RB_OBJ_WRITTEN(obj, original_fields_obj, fields_obj);
}
RB_OBJ_WRITTEN(obj, original_fields_obj, fields_obj);
if (original_fields_obj) {
// Clear root shape to avoid triggering cleanup such as free_object_id.
rb_imemo_fields_clear(original_fields_obj);
@ -1276,11 +1302,11 @@ rb_obj_set_fields(VALUE obj, VALUE fields_obj, ID field_name, VALUE original_fie
}
void
rb_obj_replace_fields(VALUE obj, VALUE fields_obj, ID field_name)
rb_obj_replace_fields(VALUE obj, VALUE fields_obj)
{
RB_VM_LOCKING() {
VALUE original_fields_obj = rb_obj_fields(obj, field_name);
rb_obj_set_fields(obj, fields_obj, field_name, original_fields_obj);
VALUE original_fields_obj = rb_obj_fields_no_ractor_check(obj);
rb_obj_set_fields(obj, fields_obj, 0, original_fields_obj);
}
}
@ -1608,7 +1634,7 @@ obj_transition_too_complex(VALUE obj, st_table *table)
{
VALUE fields_obj = rb_imemo_fields_new_complex_tbl(rb_obj_class(obj), table);
RBASIC_SET_SHAPE_ID(fields_obj, shape_id);
rb_obj_replace_fields(obj, fields_obj, 0);
rb_obj_replace_fields(obj, fields_obj);
}
}
@ -2299,12 +2325,7 @@ rb_copy_generic_ivar(VALUE dest, VALUE obj)
rb_shape_copy_fields(new_fields_obj, dest_buf, dest_shape_id, src_buf, src_shape_id);
RBASIC_SET_SHAPE_ID(new_fields_obj, dest_shape_id);
RB_VM_LOCKING() {
st_insert(generic_fields_tbl_no_ractor_check(), (st_data_t)dest, (st_data_t)new_fields_obj);
RB_OBJ_WRITTEN(dest, Qundef, new_fields_obj);
}
RBASIC_SET_SHAPE_ID(dest, dest_shape_id);
rb_obj_replace_fields(dest, new_fields_obj);
}
return;