mirror of
https://github.com/ruby/ruby.git
synced 2025-08-15 13:39:04 +02:00
Introduce BOP_CMP for optimized comparison
Prior to this commit the `OPTIMIZED_CMP` macro relied on a method lookup to determine whether `<=>` was overridden. The result of the lookup was cached, but only for the duration of the specific method that initialized the cmp_opt_data cache structure. With this method lookup, `[x,y].max` is slower than doing `x > y ? x : y` even though there's an optimized instruction for "new array max". (John noticed somebody a proposed micro-optimization based on this fact in https://github.com/mastodon/mastodon/pull/19903.) ```rb a, b = 1, 2 Benchmark.ips do |bm| bm.report('conditional') { a > b ? a : b } bm.report('method') { [a, b].max } bm.compare! end ``` Before: ``` Comparison: conditional: 22603733.2 i/s method: 19820412.7 i/s - 1.14x (± 0.00) slower ``` This commit replaces the method lookup with a new CMP basic op, which gives the examples above equivalent performance. After: ``` Comparison: method: 24022466.5 i/s conditional: 23851094.2 i/s - same-ish: difference falls within error ``` Relevant benchmarks show an improvement to Array#max and Array#min when not using the optimized newarray_max instruction as well. They are noticeably faster for small arrays with the relevant types, and the same or maybe a touch faster on larger arrays. ``` $ make benchmark COMPARE_RUBY=<master@5958c305> ITEM=array_min $ make benchmark COMPARE_RUBY=<master@5958c305> ITEM=array_max ``` The benchmarks added in this commit also look generally improved. Co-authored-by: John Hawthorn <jhawthorn@github.com>
This commit is contained in:
parent
c43951e60e
commit
e69b91fae4
12 changed files with 96 additions and 82 deletions
49
enum.c
49
enum.c
|
@ -1373,7 +1373,6 @@ sort_by_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, _data))
|
|||
static int
|
||||
sort_by_cmp(const void *ap, const void *bp, void *data)
|
||||
{
|
||||
struct cmp_opt_data cmp_opt = { 0, 0 };
|
||||
VALUE a;
|
||||
VALUE b;
|
||||
VALUE ary = (VALUE)data;
|
||||
|
@ -1385,7 +1384,7 @@ sort_by_cmp(const void *ap, const void *bp, void *data)
|
|||
a = *(VALUE *)ap;
|
||||
b = *(VALUE *)bp;
|
||||
|
||||
return OPTIMIZED_CMP(a, b, cmp_opt);
|
||||
return OPTIMIZED_CMP(a, b);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1713,11 +1712,10 @@ cmpint_reenter_check(struct nmin_data *data, VALUE val)
|
|||
static int
|
||||
nmin_cmp(const void *ap, const void *bp, void *_data)
|
||||
{
|
||||
struct cmp_opt_data cmp_opt = { 0, 0 };
|
||||
struct nmin_data *data = (struct nmin_data *)_data;
|
||||
VALUE a = *(const VALUE *)ap, b = *(const VALUE *)bp;
|
||||
#define rb_cmpint(cmp, a, b) rb_cmpint(cmpint_reenter_check(data, (cmp)), a, b)
|
||||
return OPTIMIZED_CMP(a, b, cmp_opt);
|
||||
return OPTIMIZED_CMP(a, b);
|
||||
#undef rb_cmpint
|
||||
}
|
||||
|
||||
|
@ -2027,7 +2025,6 @@ enum_none(int argc, VALUE *argv, VALUE obj)
|
|||
|
||||
struct min_t {
|
||||
VALUE min;
|
||||
struct cmp_opt_data cmp_opt;
|
||||
};
|
||||
|
||||
static VALUE
|
||||
|
@ -2041,7 +2038,7 @@ min_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, args))
|
|||
memo->min = i;
|
||||
}
|
||||
else {
|
||||
if (OPTIMIZED_CMP(i, memo->min, memo->cmp_opt) < 0) {
|
||||
if (OPTIMIZED_CMP(i, memo->min) < 0) {
|
||||
memo->min = i;
|
||||
}
|
||||
}
|
||||
|
@ -2130,7 +2127,7 @@ static VALUE
|
|||
enum_min(int argc, VALUE *argv, VALUE obj)
|
||||
{
|
||||
VALUE memo;
|
||||
struct min_t *m = NEW_CMP_OPT_MEMO(struct min_t, memo);
|
||||
struct min_t *m = NEW_MEMO_FOR(struct min_t, memo);
|
||||
VALUE result;
|
||||
VALUE num;
|
||||
|
||||
|
@ -2138,8 +2135,6 @@ enum_min(int argc, VALUE *argv, VALUE obj)
|
|||
return rb_nmin_run(obj, num, 0, 0, 0);
|
||||
|
||||
m->min = Qundef;
|
||||
m->cmp_opt.opt_methods = 0;
|
||||
m->cmp_opt.opt_inited = 0;
|
||||
if (rb_block_given_p()) {
|
||||
rb_block_call(obj, id_each, 0, 0, min_ii, memo);
|
||||
}
|
||||
|
@ -2153,7 +2148,6 @@ enum_min(int argc, VALUE *argv, VALUE obj)
|
|||
|
||||
struct max_t {
|
||||
VALUE max;
|
||||
struct cmp_opt_data cmp_opt;
|
||||
};
|
||||
|
||||
static VALUE
|
||||
|
@ -2167,7 +2161,7 @@ max_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, args))
|
|||
memo->max = i;
|
||||
}
|
||||
else {
|
||||
if (OPTIMIZED_CMP(i, memo->max, memo->cmp_opt) > 0) {
|
||||
if (OPTIMIZED_CMP(i, memo->max) > 0) {
|
||||
memo->max = i;
|
||||
}
|
||||
}
|
||||
|
@ -2255,7 +2249,7 @@ static VALUE
|
|||
enum_max(int argc, VALUE *argv, VALUE obj)
|
||||
{
|
||||
VALUE memo;
|
||||
struct max_t *m = NEW_CMP_OPT_MEMO(struct max_t, memo);
|
||||
struct max_t *m = NEW_MEMO_FOR(struct max_t, memo);
|
||||
VALUE result;
|
||||
VALUE num;
|
||||
|
||||
|
@ -2263,8 +2257,6 @@ enum_max(int argc, VALUE *argv, VALUE obj)
|
|||
return rb_nmin_run(obj, num, 0, 1, 0);
|
||||
|
||||
m->max = Qundef;
|
||||
m->cmp_opt.opt_methods = 0;
|
||||
m->cmp_opt.opt_inited = 0;
|
||||
if (rb_block_given_p()) {
|
||||
rb_block_call(obj, id_each, 0, 0, max_ii, (VALUE)memo);
|
||||
}
|
||||
|
@ -2280,7 +2272,6 @@ struct minmax_t {
|
|||
VALUE min;
|
||||
VALUE max;
|
||||
VALUE last;
|
||||
struct cmp_opt_data cmp_opt;
|
||||
};
|
||||
|
||||
static void
|
||||
|
@ -2293,11 +2284,11 @@ minmax_i_update(VALUE i, VALUE j, struct minmax_t *memo)
|
|||
memo->max = j;
|
||||
}
|
||||
else {
|
||||
n = OPTIMIZED_CMP(i, memo->min, memo->cmp_opt);
|
||||
n = OPTIMIZED_CMP(i, memo->min);
|
||||
if (n < 0) {
|
||||
memo->min = i;
|
||||
}
|
||||
n = OPTIMIZED_CMP(j, memo->max, memo->cmp_opt);
|
||||
n = OPTIMIZED_CMP(j, memo->max);
|
||||
if (n > 0) {
|
||||
memo->max = j;
|
||||
}
|
||||
|
@ -2320,7 +2311,7 @@ minmax_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, _memo))
|
|||
j = memo->last;
|
||||
memo->last = Qundef;
|
||||
|
||||
n = OPTIMIZED_CMP(j, i, memo->cmp_opt);
|
||||
n = OPTIMIZED_CMP(j, i);
|
||||
if (n == 0)
|
||||
i = j;
|
||||
else if (n < 0) {
|
||||
|
@ -2422,12 +2413,10 @@ static VALUE
|
|||
enum_minmax(VALUE obj)
|
||||
{
|
||||
VALUE memo;
|
||||
struct minmax_t *m = NEW_CMP_OPT_MEMO(struct minmax_t, memo);
|
||||
struct minmax_t *m = NEW_MEMO_FOR(struct minmax_t, memo);
|
||||
|
||||
m->min = Qundef;
|
||||
m->last = Qundef;
|
||||
m->cmp_opt.opt_methods = 0;
|
||||
m->cmp_opt.opt_inited = 0;
|
||||
if (rb_block_given_p()) {
|
||||
rb_block_call(obj, id_each, 0, 0, minmax_ii, memo);
|
||||
if (!UNDEF_P(m->last))
|
||||
|
@ -2447,7 +2436,6 @@ enum_minmax(VALUE obj)
|
|||
static VALUE
|
||||
min_by_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, args))
|
||||
{
|
||||
struct cmp_opt_data cmp_opt = { 0, 0 };
|
||||
struct MEMO *memo = MEMO_CAST(args);
|
||||
VALUE v;
|
||||
|
||||
|
@ -2458,7 +2446,7 @@ min_by_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, args))
|
|||
MEMO_V1_SET(memo, v);
|
||||
MEMO_V2_SET(memo, i);
|
||||
}
|
||||
else if (OPTIMIZED_CMP(v, memo->v1, cmp_opt) < 0) {
|
||||
else if (OPTIMIZED_CMP(v, memo->v1) < 0) {
|
||||
MEMO_V1_SET(memo, v);
|
||||
MEMO_V2_SET(memo, i);
|
||||
}
|
||||
|
@ -2522,7 +2510,6 @@ enum_min_by(int argc, VALUE *argv, VALUE obj)
|
|||
static VALUE
|
||||
max_by_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, args))
|
||||
{
|
||||
struct cmp_opt_data cmp_opt = { 0, 0 };
|
||||
struct MEMO *memo = MEMO_CAST(args);
|
||||
VALUE v;
|
||||
|
||||
|
@ -2533,7 +2520,7 @@ max_by_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, args))
|
|||
MEMO_V1_SET(memo, v);
|
||||
MEMO_V2_SET(memo, i);
|
||||
}
|
||||
else if (OPTIMIZED_CMP(v, memo->v1, cmp_opt) > 0) {
|
||||
else if (OPTIMIZED_CMP(v, memo->v1) > 0) {
|
||||
MEMO_V1_SET(memo, v);
|
||||
MEMO_V2_SET(memo, i);
|
||||
}
|
||||
|
@ -2606,8 +2593,6 @@ struct minmax_by_t {
|
|||
static void
|
||||
minmax_by_i_update(VALUE v1, VALUE v2, VALUE i1, VALUE i2, struct minmax_by_t *memo)
|
||||
{
|
||||
struct cmp_opt_data cmp_opt = { 0, 0 };
|
||||
|
||||
if (UNDEF_P(memo->min_bv)) {
|
||||
memo->min_bv = v1;
|
||||
memo->max_bv = v2;
|
||||
|
@ -2615,11 +2600,11 @@ minmax_by_i_update(VALUE v1, VALUE v2, VALUE i1, VALUE i2, struct minmax_by_t *m
|
|||
memo->max = i2;
|
||||
}
|
||||
else {
|
||||
if (OPTIMIZED_CMP(v1, memo->min_bv, cmp_opt) < 0) {
|
||||
if (OPTIMIZED_CMP(v1, memo->min_bv) < 0) {
|
||||
memo->min_bv = v1;
|
||||
memo->min = i1;
|
||||
}
|
||||
if (OPTIMIZED_CMP(v2, memo->max_bv, cmp_opt) > 0) {
|
||||
if (OPTIMIZED_CMP(v2, memo->max_bv) > 0) {
|
||||
memo->max_bv = v2;
|
||||
memo->max = i2;
|
||||
}
|
||||
|
@ -2629,7 +2614,6 @@ minmax_by_i_update(VALUE v1, VALUE v2, VALUE i1, VALUE i2, struct minmax_by_t *m
|
|||
static VALUE
|
||||
minmax_by_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, _memo))
|
||||
{
|
||||
struct cmp_opt_data cmp_opt = { 0, 0 };
|
||||
struct minmax_by_t *memo = MEMO_FOR(struct minmax_by_t, _memo);
|
||||
VALUE vi, vj, j;
|
||||
int n;
|
||||
|
@ -2647,7 +2631,7 @@ minmax_by_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, _memo))
|
|||
j = memo->last;
|
||||
memo->last_bv = Qundef;
|
||||
|
||||
n = OPTIMIZED_CMP(vj, vi, cmp_opt);
|
||||
n = OPTIMIZED_CMP(vj, vi);
|
||||
if (n == 0) {
|
||||
i = j;
|
||||
vi = vj;
|
||||
|
@ -3033,7 +3017,6 @@ each_cons_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, args))
|
|||
static VALUE
|
||||
enum_each_cons_size(VALUE obj, VALUE args, VALUE eobj)
|
||||
{
|
||||
struct cmp_opt_data cmp_opt = { 0, 0 };
|
||||
const VALUE zero = LONG2FIX(0);
|
||||
VALUE n, size;
|
||||
long cons_size = NUM2LONG(RARRAY_AREF(args, 0));
|
||||
|
@ -3043,7 +3026,7 @@ enum_each_cons_size(VALUE obj, VALUE args, VALUE eobj)
|
|||
if (NIL_P(size)) return Qnil;
|
||||
|
||||
n = add_int(size, 1 - cons_size);
|
||||
return (OPTIMIZED_CMP(n, zero, cmp_opt) == -1) ? zero : n;
|
||||
return (OPTIMIZED_CMP(n, zero) == -1) ? zero : n;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue