From 57a0afe2090b8d05673d650b1e8bf9ae67449b1f Mon Sep 17 00:00:00 2001 From: "NARUSE, Yui" Date: Thu, 21 Mar 2024 11:23:21 +0900 Subject: [PATCH] merge revision(s) 081ee3d35509110f383cb7dd8d1205def0cdd1e8,1c97abaabae6844c861705fd07f532292dcffa74: [Backport #19907] (#10315) Add memory leak test for eval kwargs De-dup identical callinfo objects Previously every call to vm_ci_new (when the CI was not packable) would result in a different callinfo being returned this meant that every kwarg callsite had its own CI. When calling, different CIs result in different CCs. These CIs and CCs both end up persisted on the T_CLASS inside cc_tbl. So in an eval loop this resulted in a memory leak of both types of object. This also likely resulted in extra memory used, and extra time searching, in non-eval cases. For simplicity in this commit I always allocate a CI object inside rb_vm_ci_lookup, but ideally we would lazily allocate it only when needed. I hope to do that as a follow up in the future. --- gc.c | 1 + internal/class.h | 2 +- test/ruby/test_method.rb | 8 +++ vm.c | 7 +++ vm_callinfo.h | 15 ++---- vm_core.h | 1 + vm_method.c | 114 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 137 insertions(+), 11 deletions(-) diff --git a/gc.c b/gc.c index 6419f8ff25..378e4a31cb 100644 --- a/gc.c +++ b/gc.c @@ -3791,6 +3791,7 @@ obj_free(rb_objspace_t *objspace, VALUE obj) case imemo_callinfo: { const struct rb_callinfo * ci = ((const struct rb_callinfo *)obj); + rb_vm_ci_free(ci); if (ci->kwarg) { ((struct rb_callinfo_kwarg *)ci->kwarg)->references--; if (ci->kwarg->references == 0) xfree((void *)ci->kwarg); diff --git a/internal/class.h b/internal/class.h index fcdface028..74ad956e3e 100644 --- a/internal/class.h +++ b/internal/class.h @@ -44,7 +44,7 @@ struct rb_classext_struct { VALUE *iv_ptr; struct rb_id_table *const_tbl; struct rb_id_table *callable_m_tbl; - struct rb_id_table *cc_tbl; /* ID -> [[ci, cc1], cc2, ...] */ + struct rb_id_table *cc_tbl; /* ID -> [[ci1, cc1], [ci2, cc2] ...] */ struct rb_id_table *cvc_tbl; size_t superclass_depth; VALUE *superclasses; diff --git a/test/ruby/test_method.rb b/test/ruby/test_method.rb index b22dd9cbee..90635bc5e5 100644 --- a/test/ruby/test_method.rb +++ b/test/ruby/test_method.rb @@ -1614,4 +1614,12 @@ class TestMethod < Test::Unit::TestCase def test_invalidating_CC_ASAN assert_ruby_status(['-e', 'using Module.new']) end + + def test_kwarg_eval_memory_leak + assert_no_memory_leak([], "", <<~RUBY, rss: true, limit: 1.2) + 100_000.times do + eval("Hash.new(foo: 123)") + end + RUBY + end end diff --git a/vm.c b/vm.c index 96f4189b14..665ffcbded 100644 --- a/vm.c +++ b/vm.c @@ -2809,6 +2809,7 @@ rb_vm_update_references(void *ptr) if (ptr) { rb_vm_t *vm = ptr; + rb_gc_update_tbl_refs(vm->ci_table); rb_gc_update_tbl_refs(vm->frozen_strings); vm->mark_object_ary = rb_gc_location(vm->mark_object_ary); vm->load_path = rb_gc_location(vm->load_path); @@ -3046,6 +3047,10 @@ ruby_vm_destruct(rb_vm_t *vm) st_free_table(vm->loading_table); vm->loading_table = 0; } + if (vm->ci_table) { + st_free_table(vm->ci_table); + vm->ci_table = NULL; + } if (vm->frozen_strings) { st_free_table(vm->frozen_strings); vm->frozen_strings = 0; @@ -3136,6 +3141,7 @@ vm_memsize(const void *ptr) rb_vm_memsize_workqueue(&vm->workqueue) + rb_st_memsize(vm->defined_module_hash) + vm_memsize_at_exit_list(vm->at_exit) + + rb_st_memsize(vm->ci_table) + rb_st_memsize(vm->frozen_strings) + vm_memsize_builtin_function_table(vm->builtin_function_table) + rb_id_table_memsize(vm->negative_cme_table) + @@ -4220,6 +4226,7 @@ Init_vm_objects(void) /* initialize mark object array, hash */ vm->mark_object_ary = rb_ary_hidden_new(128); vm->loading_table = st_init_strtable(); + vm->ci_table = st_init_table(&vm_ci_hashtype); vm->frozen_strings = st_init_table_with_size(&rb_fstring_hash_type, 10000); } diff --git a/vm_callinfo.h b/vm_callinfo.h index 8437f2176c..ab7c199e3c 100644 --- a/vm_callinfo.h +++ b/vm_callinfo.h @@ -197,12 +197,13 @@ vm_ci_dump(const struct rb_callinfo *ci) (((VALUE)(argc)) << CI_EMBED_ARGC_SHFT) | \ RUBY_FIXNUM_FLAG)) +// vm_method.c +const struct rb_callinfo *rb_vm_ci_lookup(ID mid, unsigned int flag, unsigned int argc, const struct rb_callinfo_kwarg *kwarg); +void rb_vm_ci_free(const struct rb_callinfo *); + static inline const struct rb_callinfo * vm_ci_new_(ID mid, unsigned int flag, unsigned int argc, const struct rb_callinfo_kwarg *kwarg, const char *file, int line) { - if (kwarg) { - ((struct rb_callinfo_kwarg *)kwarg)->references++; - } if (USE_EMBED_CI && VM_CI_EMBEDDABLE_P(mid, flag, argc, kwarg)) { RB_DEBUG_COUNTER_INC(ci_packed); return vm_ci_new_id(mid, flag, argc, kwarg); @@ -211,13 +212,7 @@ vm_ci_new_(ID mid, unsigned int flag, unsigned int argc, const struct rb_callinf const bool debug = 0; if (debug) ruby_debug_printf("%s:%d ", file, line); - // TODO: dedup - const struct rb_callinfo *ci = (const struct rb_callinfo *) - rb_imemo_new(imemo_callinfo, - (VALUE)mid, - (VALUE)flag, - (VALUE)argc, - (VALUE)kwarg); + const struct rb_callinfo *ci = rb_vm_ci_lookup(mid, flag, argc, kwarg); if (debug) rp(ci); if (kwarg) { RB_DEBUG_COUNTER_INC(ci_kw); diff --git a/vm_core.h b/vm_core.h index ef770ab441..e192ae2325 100644 --- a/vm_core.h +++ b/vm_core.h @@ -750,6 +750,7 @@ typedef struct rb_vm_struct { const struct rb_builtin_function *builtin_function_table; int builtin_inline_index; + st_table *ci_table; struct rb_id_table *negative_cme_table; st_table *overloaded_cme_table; // cme -> overloaded_cme diff --git a/vm_method.c b/vm_method.c index 65c7178cc5..6025da4dbd 100644 --- a/vm_method.c +++ b/vm_method.c @@ -337,6 +337,120 @@ invalidate_all_refinement_cc(void *vstart, void *vend, size_t stride, void *data return 0; // continue to iteration } +static st_index_t +vm_ci_hash(VALUE v) +{ + const struct rb_callinfo *ci = (const struct rb_callinfo *)v; + st_index_t h; + h = rb_hash_start(ci->mid); + h = rb_hash_uint(h, ci->flag); + h = rb_hash_uint(h, ci->argc); + if (ci->kwarg) { + for (int i = 0; i < ci->kwarg->keyword_len; i++) { + h = rb_hash_uint(h, ci->kwarg->keywords[i]); + } + } + return h; +} + +static int +vm_ci_hash_cmp(VALUE v1, VALUE v2) +{ + const struct rb_callinfo *ci1 = (const struct rb_callinfo *)v1; + const struct rb_callinfo *ci2 = (const struct rb_callinfo *)v2; + if (ci1->mid != ci2->mid) return 1; + if (ci1->flag != ci2->flag) return 1; + if (ci1->argc != ci2->argc) return 1; + if (ci1->kwarg != NULL) { + VM_ASSERT(ci2->kwarg != NULL); // implied by matching flags + + if (ci1->kwarg->keyword_len != ci2->kwarg->keyword_len) + return 1; + + for (int i = 0; i < ci1->kwarg->keyword_len; i++) { + if (ci1->kwarg->keywords[i] != ci2->kwarg->keywords[i]) { + return 1; + } + } + } else { + VM_ASSERT(ci2->kwarg == NULL); // implied by matching flags + } + return 0; +} + +static const struct st_hash_type vm_ci_hashtype = { + vm_ci_hash_cmp, + vm_ci_hash +}; + +static int +ci_lookup_i(st_data_t *key, st_data_t *value, st_data_t data, int existing) +{ + const struct rb_callinfo *ci = (const struct rb_callinfo *)*key; + st_data_t *ret = (st_data_t *)data; + + if (existing) { + if (rb_objspace_garbage_object_p((VALUE)ci)) { + *ret = (st_data_t)NULL; + return ST_DELETE; + } else { + *ret = *key; + return ST_STOP; + } + } + else { + *key = *value = *ret = (st_data_t)ci; + return ST_CONTINUE; + } +} + +const struct rb_callinfo * +rb_vm_ci_lookup(ID mid, unsigned int flag, unsigned int argc, const struct rb_callinfo_kwarg *kwarg) +{ + rb_vm_t *vm = GET_VM(); + const struct rb_callinfo *ci = NULL; + + if (kwarg) { + ((struct rb_callinfo_kwarg *)kwarg)->references++; + } + const struct rb_callinfo *new_ci = (const struct rb_callinfo *) + rb_imemo_new( + imemo_callinfo, + (VALUE)mid, + (VALUE)flag, + (VALUE)argc, + (VALUE)kwarg); + + RB_VM_LOCK_ENTER(); + { + st_table *ci_table = vm->ci_table; + VM_ASSERT(ci_table); + + do { + st_update(ci_table, (st_data_t)new_ci, ci_lookup_i, (st_data_t)&ci); + } while (ci == NULL); + } + RB_VM_LOCK_LEAVE(); + + VM_ASSERT(ci); + VM_ASSERT(vm_ci_markable(ci)); + + return ci; +} + +void +rb_vm_ci_free(const struct rb_callinfo *ci) +{ + rb_vm_t *vm = GET_VM(); + + RB_VM_LOCK_ENTER(); + { + st_data_t key = (st_data_t)ci; + st_delete(vm->ci_table, &key, NULL); + } + RB_VM_LOCK_LEAVE(); +} + void rb_clear_all_refinement_method_cache(void) {