merge revision(s) 081ee3d35509110f383cb7dd8d1205def0cdd1e8,1c97abaabae6844c861705fd07f532292dcffa74: [Backport #19907] (#10315)

Add memory leak test for eval kwargs

	De-dup identical callinfo objects

	Previously every call to vm_ci_new (when the CI was not packable) would
	result in a different callinfo being returned this meant that every
	kwarg callsite had its own CI.

	When calling, different CIs result in different CCs. These CIs and CCs
	both end up persisted on the T_CLASS inside cc_tbl. So in an eval loop
	this resulted in a memory leak of both types of object. This also likely
	resulted in extra memory used, and extra time searching, in non-eval
	cases.

	For simplicity in this commit I always allocate a CI object inside
	rb_vm_ci_lookup, but ideally we would lazily allocate it only when
	needed. I hope to do that as a follow up in the future.
This commit is contained in:
NARUSE, Yui 2024-03-21 11:23:21 +09:00 committed by GitHub
parent 821719a505
commit 57a0afe209
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 137 additions and 11 deletions

1
gc.c
View file

@ -3791,6 +3791,7 @@ obj_free(rb_objspace_t *objspace, VALUE obj)
case imemo_callinfo: case imemo_callinfo:
{ {
const struct rb_callinfo * ci = ((const struct rb_callinfo *)obj); const struct rb_callinfo * ci = ((const struct rb_callinfo *)obj);
rb_vm_ci_free(ci);
if (ci->kwarg) { if (ci->kwarg) {
((struct rb_callinfo_kwarg *)ci->kwarg)->references--; ((struct rb_callinfo_kwarg *)ci->kwarg)->references--;
if (ci->kwarg->references == 0) xfree((void *)ci->kwarg); if (ci->kwarg->references == 0) xfree((void *)ci->kwarg);

View file

@ -44,7 +44,7 @@ struct rb_classext_struct {
VALUE *iv_ptr; VALUE *iv_ptr;
struct rb_id_table *const_tbl; struct rb_id_table *const_tbl;
struct rb_id_table *callable_m_tbl; struct rb_id_table *callable_m_tbl;
struct rb_id_table *cc_tbl; /* ID -> [[ci, cc1], cc2, ...] */ struct rb_id_table *cc_tbl; /* ID -> [[ci1, cc1], [ci2, cc2] ...] */
struct rb_id_table *cvc_tbl; struct rb_id_table *cvc_tbl;
size_t superclass_depth; size_t superclass_depth;
VALUE *superclasses; VALUE *superclasses;

View file

@ -1614,4 +1614,12 @@ class TestMethod < Test::Unit::TestCase
def test_invalidating_CC_ASAN def test_invalidating_CC_ASAN
assert_ruby_status(['-e', 'using Module.new']) assert_ruby_status(['-e', 'using Module.new'])
end end
def test_kwarg_eval_memory_leak
assert_no_memory_leak([], "", <<~RUBY, rss: true, limit: 1.2)
100_000.times do
eval("Hash.new(foo: 123)")
end
RUBY
end
end end

7
vm.c
View file

@ -2809,6 +2809,7 @@ rb_vm_update_references(void *ptr)
if (ptr) { if (ptr) {
rb_vm_t *vm = ptr; rb_vm_t *vm = ptr;
rb_gc_update_tbl_refs(vm->ci_table);
rb_gc_update_tbl_refs(vm->frozen_strings); rb_gc_update_tbl_refs(vm->frozen_strings);
vm->mark_object_ary = rb_gc_location(vm->mark_object_ary); vm->mark_object_ary = rb_gc_location(vm->mark_object_ary);
vm->load_path = rb_gc_location(vm->load_path); vm->load_path = rb_gc_location(vm->load_path);
@ -3046,6 +3047,10 @@ ruby_vm_destruct(rb_vm_t *vm)
st_free_table(vm->loading_table); st_free_table(vm->loading_table);
vm->loading_table = 0; vm->loading_table = 0;
} }
if (vm->ci_table) {
st_free_table(vm->ci_table);
vm->ci_table = NULL;
}
if (vm->frozen_strings) { if (vm->frozen_strings) {
st_free_table(vm->frozen_strings); st_free_table(vm->frozen_strings);
vm->frozen_strings = 0; vm->frozen_strings = 0;
@ -3136,6 +3141,7 @@ vm_memsize(const void *ptr)
rb_vm_memsize_workqueue(&vm->workqueue) + rb_vm_memsize_workqueue(&vm->workqueue) +
rb_st_memsize(vm->defined_module_hash) + rb_st_memsize(vm->defined_module_hash) +
vm_memsize_at_exit_list(vm->at_exit) + vm_memsize_at_exit_list(vm->at_exit) +
rb_st_memsize(vm->ci_table) +
rb_st_memsize(vm->frozen_strings) + rb_st_memsize(vm->frozen_strings) +
vm_memsize_builtin_function_table(vm->builtin_function_table) + vm_memsize_builtin_function_table(vm->builtin_function_table) +
rb_id_table_memsize(vm->negative_cme_table) + rb_id_table_memsize(vm->negative_cme_table) +
@ -4220,6 +4226,7 @@ Init_vm_objects(void)
/* initialize mark object array, hash */ /* initialize mark object array, hash */
vm->mark_object_ary = rb_ary_hidden_new(128); vm->mark_object_ary = rb_ary_hidden_new(128);
vm->loading_table = st_init_strtable(); vm->loading_table = st_init_strtable();
vm->ci_table = st_init_table(&vm_ci_hashtype);
vm->frozen_strings = st_init_table_with_size(&rb_fstring_hash_type, 10000); vm->frozen_strings = st_init_table_with_size(&rb_fstring_hash_type, 10000);
} }

View file

@ -197,12 +197,13 @@ vm_ci_dump(const struct rb_callinfo *ci)
(((VALUE)(argc)) << CI_EMBED_ARGC_SHFT) | \ (((VALUE)(argc)) << CI_EMBED_ARGC_SHFT) | \
RUBY_FIXNUM_FLAG)) RUBY_FIXNUM_FLAG))
// vm_method.c
const struct rb_callinfo *rb_vm_ci_lookup(ID mid, unsigned int flag, unsigned int argc, const struct rb_callinfo_kwarg *kwarg);
void rb_vm_ci_free(const struct rb_callinfo *);
static inline const struct rb_callinfo * static inline const struct rb_callinfo *
vm_ci_new_(ID mid, unsigned int flag, unsigned int argc, const struct rb_callinfo_kwarg *kwarg, const char *file, int line) vm_ci_new_(ID mid, unsigned int flag, unsigned int argc, const struct rb_callinfo_kwarg *kwarg, const char *file, int line)
{ {
if (kwarg) {
((struct rb_callinfo_kwarg *)kwarg)->references++;
}
if (USE_EMBED_CI && VM_CI_EMBEDDABLE_P(mid, flag, argc, kwarg)) { if (USE_EMBED_CI && VM_CI_EMBEDDABLE_P(mid, flag, argc, kwarg)) {
RB_DEBUG_COUNTER_INC(ci_packed); RB_DEBUG_COUNTER_INC(ci_packed);
return vm_ci_new_id(mid, flag, argc, kwarg); return vm_ci_new_id(mid, flag, argc, kwarg);
@ -211,13 +212,7 @@ vm_ci_new_(ID mid, unsigned int flag, unsigned int argc, const struct rb_callinf
const bool debug = 0; const bool debug = 0;
if (debug) ruby_debug_printf("%s:%d ", file, line); if (debug) ruby_debug_printf("%s:%d ", file, line);
// TODO: dedup const struct rb_callinfo *ci = rb_vm_ci_lookup(mid, flag, argc, kwarg);
const struct rb_callinfo *ci = (const struct rb_callinfo *)
rb_imemo_new(imemo_callinfo,
(VALUE)mid,
(VALUE)flag,
(VALUE)argc,
(VALUE)kwarg);
if (debug) rp(ci); if (debug) rp(ci);
if (kwarg) { if (kwarg) {
RB_DEBUG_COUNTER_INC(ci_kw); RB_DEBUG_COUNTER_INC(ci_kw);

View file

@ -750,6 +750,7 @@ typedef struct rb_vm_struct {
const struct rb_builtin_function *builtin_function_table; const struct rb_builtin_function *builtin_function_table;
int builtin_inline_index; int builtin_inline_index;
st_table *ci_table;
struct rb_id_table *negative_cme_table; struct rb_id_table *negative_cme_table;
st_table *overloaded_cme_table; // cme -> overloaded_cme st_table *overloaded_cme_table; // cme -> overloaded_cme

View file

@ -337,6 +337,120 @@ invalidate_all_refinement_cc(void *vstart, void *vend, size_t stride, void *data
return 0; // continue to iteration return 0; // continue to iteration
} }
static st_index_t
vm_ci_hash(VALUE v)
{
const struct rb_callinfo *ci = (const struct rb_callinfo *)v;
st_index_t h;
h = rb_hash_start(ci->mid);
h = rb_hash_uint(h, ci->flag);
h = rb_hash_uint(h, ci->argc);
if (ci->kwarg) {
for (int i = 0; i < ci->kwarg->keyword_len; i++) {
h = rb_hash_uint(h, ci->kwarg->keywords[i]);
}
}
return h;
}
static int
vm_ci_hash_cmp(VALUE v1, VALUE v2)
{
const struct rb_callinfo *ci1 = (const struct rb_callinfo *)v1;
const struct rb_callinfo *ci2 = (const struct rb_callinfo *)v2;
if (ci1->mid != ci2->mid) return 1;
if (ci1->flag != ci2->flag) return 1;
if (ci1->argc != ci2->argc) return 1;
if (ci1->kwarg != NULL) {
VM_ASSERT(ci2->kwarg != NULL); // implied by matching flags
if (ci1->kwarg->keyword_len != ci2->kwarg->keyword_len)
return 1;
for (int i = 0; i < ci1->kwarg->keyword_len; i++) {
if (ci1->kwarg->keywords[i] != ci2->kwarg->keywords[i]) {
return 1;
}
}
} else {
VM_ASSERT(ci2->kwarg == NULL); // implied by matching flags
}
return 0;
}
static const struct st_hash_type vm_ci_hashtype = {
vm_ci_hash_cmp,
vm_ci_hash
};
static int
ci_lookup_i(st_data_t *key, st_data_t *value, st_data_t data, int existing)
{
const struct rb_callinfo *ci = (const struct rb_callinfo *)*key;
st_data_t *ret = (st_data_t *)data;
if (existing) {
if (rb_objspace_garbage_object_p((VALUE)ci)) {
*ret = (st_data_t)NULL;
return ST_DELETE;
} else {
*ret = *key;
return ST_STOP;
}
}
else {
*key = *value = *ret = (st_data_t)ci;
return ST_CONTINUE;
}
}
const struct rb_callinfo *
rb_vm_ci_lookup(ID mid, unsigned int flag, unsigned int argc, const struct rb_callinfo_kwarg *kwarg)
{
rb_vm_t *vm = GET_VM();
const struct rb_callinfo *ci = NULL;
if (kwarg) {
((struct rb_callinfo_kwarg *)kwarg)->references++;
}
const struct rb_callinfo *new_ci = (const struct rb_callinfo *)
rb_imemo_new(
imemo_callinfo,
(VALUE)mid,
(VALUE)flag,
(VALUE)argc,
(VALUE)kwarg);
RB_VM_LOCK_ENTER();
{
st_table *ci_table = vm->ci_table;
VM_ASSERT(ci_table);
do {
st_update(ci_table, (st_data_t)new_ci, ci_lookup_i, (st_data_t)&ci);
} while (ci == NULL);
}
RB_VM_LOCK_LEAVE();
VM_ASSERT(ci);
VM_ASSERT(vm_ci_markable(ci));
return ci;
}
void
rb_vm_ci_free(const struct rb_callinfo *ci)
{
rb_vm_t *vm = GET_VM();
RB_VM_LOCK_ENTER();
{
st_data_t key = (st_data_t)ci;
st_delete(vm->ci_table, &key, NULL);
}
RB_VM_LOCK_LEAVE();
}
void void
rb_clear_all_refinement_method_cache(void) rb_clear_all_refinement_method_cache(void)
{ {