extend rb_call_cache

Prior to this changeset, majority of inline cache mishits resulted
into the same method entry when rb_callable_method_entry() resolves
a method search.  Let's not call the function at the first place on
such situations.

In doing so we extend the struct rb_call_cache from 44 bytes (in
case of 64 bit machine) to 64 bytes, and fill the gap with
secondary class serial(s).  Call cache's class serials now behavies
as a LRU cache.

Calculating -------------------------------------
                           ours         2.7         2.6
vm2_poly_same_method     2.339M      1.744M      1.369M i/s - 6.000M times in 2.565086s 3.441329s 4.381386s

Comparison:
             vm2_poly_same_method
                ours:   2339103.0 i/s
                 2.7:   1743512.3 i/s - 1.34x  slower
                 2.6:   1369429.8 i/s - 1.71x  slower
This commit is contained in:
卜部昌平 2019-10-07 12:59:57 +09:00
parent 3c252651e1
commit d45a013a1a
Notes: git 2019-11-07 17:41:56 +09:00
6 changed files with 75 additions and 19 deletions

View file

@ -2357,10 +2357,32 @@ struct rb_execution_context_struct;
struct rb_control_frame_struct;
struct rb_calling_info;
struct rb_call_data;
/* I have several reasons to chose 64 here:
*
* - A cache line must be a power-of-two size.
* - Setting this to anything less than or equal to 32 boosts nothing.
* - I have never seen an architecture that has 128 byte L1 cache line.
* - I know Intel Core and Sparc T4 at least uses 64.
* - I know jemalloc internally has this exact same `#define CACHE_LINE 64`.
* https://github.com/jemalloc/jemalloc/blob/dev/include/jemalloc/internal/jemalloc_internal_types.h
*/
#define CACHELINE 64
struct rb_call_cache {
/* inline cache: keys */
rb_serial_t method_state;
rb_serial_t class_serial;
rb_serial_t class_serial[
(CACHELINE
- sizeof(rb_serial_t) /* method_state */
- sizeof(struct rb_callable_method_entry_struct *) /* me */
- sizeof(struct rb_callable_method_definition_struct *) /* def */
- sizeof(enum method_missing_reason) /* aux */
- sizeof(VALUE (*)( /* call */
struct rb_execution_context_struct *e,
struct rb_control_frame_struct *,
struct rb_calling_info *,
const struct rb_call_data *)))
/ sizeof(rb_serial_t)
];
/* inline cache: values */
const struct rb_callable_method_entry_struct *me;
@ -2377,6 +2399,7 @@ struct rb_call_cache {
int inc_sp; /* used by cfunc */
} aux;
};
STATIC_ASSERT(cachelined, sizeof(struct rb_call_cache) <= CACHELINE);
struct rb_call_info {
/* fixed at compile time */
ID mid;