From 4763193567c314d463863f00fea2593c0e3024fb Mon Sep 17 00:00:00 2001 From: Dmitry Stogov Date: Fri, 10 Jan 2025 00:34:58 +0300 Subject: [PATCH] Update IR IR commit: e445f57f3a936584db28489a49098d52f03388a7 --- ext/opcache/jit/ir/ir.c | 143 ++++++++++++++++++++++++----- ext/opcache/jit/ir/ir.h | 15 ++- ext/opcache/jit/ir/ir_fold.h | 23 +++-- ext/opcache/jit/ir/ir_private.h | 29 ++++-- ext/opcache/jit/ir/ir_sccp.c | 2 +- ext/opcache/jit/ir/ir_x86.dasc | 158 ++++++++++++++++++++++++-------- 6 files changed, 290 insertions(+), 80 deletions(-) diff --git a/ext/opcache/jit/ir/ir.c b/ext/opcache/jit/ir/ir.c index 17c39545916..08d1d505921 100644 --- a/ext/opcache/jit/ir/ir.c +++ b/ext/opcache/jit/ir/ir.c @@ -1283,9 +1283,11 @@ void ir_build_def_use_lists(ir_ctx *ctx) void ir_use_list_remove_all(ir_ctx *ctx, ir_ref from, ir_ref ref) { ir_ref j, n, *p, *q, use; - ir_use_list *use_list = &ctx->use_lists[from]; + ir_use_list *use_list; ir_ref skip = 0; + IR_ASSERT(from > 0); + use_list = &ctx->use_lists[from]; n = use_list->count; for (j = 0, p = q = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { use = *p; @@ -1310,8 +1312,10 @@ void ir_use_list_remove_all(ir_ctx *ctx, ir_ref from, ir_ref ref) void ir_use_list_remove_one(ir_ctx *ctx, ir_ref from, ir_ref ref) { ir_ref j, n, *p; - ir_use_list *use_list = &ctx->use_lists[from]; + ir_use_list *use_list; + IR_ASSERT(from > 0); + use_list = &ctx->use_lists[from]; n = use_list->count; j = 0; p = &ctx->use_edges[use_list->refs]; @@ -1334,9 +1338,11 @@ void ir_use_list_remove_one(ir_ctx *ctx, ir_ref from, ir_ref ref) void ir_use_list_replace_one(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_ref new_use) { - ir_use_list *use_list = &ctx->use_lists[ref]; + ir_use_list *use_list; ir_ref i, n, *p; + IR_ASSERT(ref > 0); + use_list = &ctx->use_lists[ref]; n = use_list->count; for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { if (*p == use) { @@ -1348,9 +1354,11 @@ void ir_use_list_replace_one(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_ref new_use void ir_use_list_replace_all(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_ref new_use) { - ir_use_list *use_list = &ctx->use_lists[ref]; + ir_use_list *use_list; ir_ref i, n, *p; + IR_ASSERT(ref > 0); + use_list = &ctx->use_lists[ref]; n = use_list->count; for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { if (*p == use) { @@ -1361,9 +1369,12 @@ void ir_use_list_replace_all(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_ref new_use bool ir_use_list_add(ir_ctx *ctx, ir_ref to, ir_ref ref) { - ir_use_list *use_list = &ctx->use_lists[to]; - ir_ref n = use_list->refs + use_list->count; + ir_use_list *use_list; + ir_ref n; + IR_ASSERT(to > 0); + use_list = &ctx->use_lists[to]; + n = use_list->refs + use_list->count; if (n < ctx->use_edges_count && ctx->use_edges[n] == IR_UNUSED) { ctx->use_edges[n] = ref; use_list->count++; @@ -1385,6 +1396,59 @@ bool ir_use_list_add(ir_ctx *ctx, ir_ref to, ir_ref ref) } } +static int ir_ref_cmp(const void *p1, const void *p2) +{ + return *(ir_ref*)p1 - *(ir_ref*)p2; +} + +void ir_use_list_sort(ir_ctx *ctx, ir_ref ref) +{ + ir_use_list *use_list; + uint32_t n; + + IR_ASSERT(ref > 0); + use_list = &ctx->use_lists[ref]; + n = use_list->count; + if (n > 1) { + qsort(ctx->use_edges + use_list->refs, n, sizeof(ir_ref), ir_ref_cmp); + } +} + +void ir_replace(ir_ctx *ctx, ir_ref ref, ir_ref new_ref) +{ + int i, j, n, use; + ir_insn *insn; + + IR_ASSERT(ref != new_ref); + n = ctx->use_lists[ref].count; + for (i = 0; i < n; i++) { + use = ctx->use_edges[ctx->use_lists[ref].refs + i]; + IR_ASSERT(use != ref); + insn = &ctx->ir_base[use]; + j = ir_insn_find_op(insn, ref); + IR_ASSERT(j > 0); + ir_insn_set_op(insn, j, new_ref); + if (!IR_IS_CONST_REF(new_ref)) { + ir_use_list_add(ctx, new_ref, use); + } + } +} + +void ir_update_op(ir_ctx *ctx, ir_ref ref, uint32_t idx, ir_ref new_val) +{ + ir_insn *insn = &ctx->ir_base[ref]; + ir_ref old_val = ir_insn_op(insn, idx); + + IR_ASSERT(old_val != new_val); + if (new_val > 0) { + ir_use_list_add(ctx, new_val, ref); + } + ir_insn_set_op(insn, idx, new_val); + if (old_val > 0) { + ir_use_list_remove_one(ctx, old_val, ref); + } +} + /* Helper Data Types */ void ir_array_grow(ir_array *a, uint32_t size) { @@ -1428,16 +1492,16 @@ void ir_list_remove(ir_list *l, uint32_t i) l->len--; } -bool ir_list_contains(const ir_list *l, ir_ref val) +uint32_t ir_list_find(const ir_list *l, ir_ref val) { uint32_t i; for (i = 0; i < l->len; i++) { if (ir_array_at(&l->a, i) == val) { - return 1; + return i; } } - return 0; + return (uint32_t)-1; } static uint32_t ir_hashtab_hash_size(uint32_t size) @@ -2010,18 +2074,22 @@ ir_ref _ir_PHI_N(ir_ctx *ctx, ir_type type, ir_ref n, ir_ref *inputs) return inputs[0]; } else { ir_ref i; - ir_ref ref = inputs[0]; + ir_ref ref; - IR_ASSERT(ctx->ir_base[ctx->control].op == IR_MERGE || ctx->ir_base[ctx->control].op == IR_LOOP_BEGIN); - if (ref != IR_UNUSED) { - for (i = 1; i < n; i++) { - if (inputs[i] != ref) { - break; + if (UNEXPECTED(!(ctx->flags & IR_OPT_FOLDING))) { + IR_ASSERT(ctx->ir_base[ctx->control].op == IR_MERGE + || ctx->ir_base[ctx->control].op == IR_LOOP_BEGIN); + ref = inputs[0]; + if (ref != IR_UNUSED) { + for (i = 1; i < n; i++) { + if (inputs[i] != ref) { + break; + } + } + if (i == n) { + /* all the same */ + return ref; } - } - if (i == n) { - /* all the same */ - return ref; } } @@ -2066,7 +2134,8 @@ void _ir_ENTRY(ir_ctx *ctx, ir_ref src, ir_ref num) void _ir_BEGIN(ir_ctx *ctx, ir_ref src) { IR_ASSERT(!ctx->control); - if (src + if (EXPECTED(ctx->flags & IR_OPT_FOLDING) + && src && src + 1 == ctx->insns_count && ctx->ir_base[src].op == IR_END) { /* merge with the last END */ @@ -2095,8 +2164,14 @@ ir_ref _ir_IF(ir_ctx *ctx, ir_ref condition) { ir_ref if_ref; - condition = _ir_fold_condition(ctx, condition); IR_ASSERT(ctx->control); + if (UNEXPECTED(!(ctx->flags & IR_OPT_FOLDING))) { + if_ref = ir_emit2(ctx, IR_IF, ctx->control, condition); + ctx->control = IR_UNUSED; + return if_ref; + } + + condition = _ir_fold_condition(ctx, condition); if (IR_IS_CONST_REF(condition)) { condition = ir_ref_is_true(ctx, condition) ? IR_TRUE : IR_FALSE; } else { @@ -2649,7 +2724,7 @@ void _ir_GUARD(ir_ctx *ctx, ir_ref condition, ir_ref addr) return; } condition = IR_FALSE; - } else { + } else if (EXPECTED(ctx->flags & IR_OPT_FOLDING)) { ir_insn *prev = NULL; ir_ref ref = ctx->control; ir_insn *insn; @@ -2695,7 +2770,7 @@ void _ir_GUARD_NOT(ir_ctx *ctx, ir_ref condition, ir_ref addr) return; } condition = IR_TRUE; - } else { + } else if (EXPECTED(ctx->flags & IR_OPT_FOLDING)) { ir_insn *prev = NULL; ir_ref ref = ctx->control; ir_insn *insn; @@ -2779,6 +2854,10 @@ ir_ref _ir_VLOAD(ir_ctx *ctx, ir_type type, ir_ref var) ir_ref ref = ctx->control; ir_insn *insn; + if (UNEXPECTED(!(ctx->flags & IR_OPT_FOLDING))) { + IR_ASSERT(ctx->control); + return ctx->control = ir_emit2(ctx, IR_OPT(IR_VLOAD, type), ctx->control, var); + } while (ref > var) { insn = &ctx->ir_base[ref]; if (insn->op == IR_VLOAD) { @@ -2825,6 +2904,12 @@ void _ir_VSTORE(ir_ctx *ctx, ir_ref var, ir_ref val) ir_insn *insn; bool guarded = 0; + if (UNEXPECTED(!(ctx->flags & IR_OPT_FOLDING))) { + IR_ASSERT(ctx->control); + ctx->control = ir_emit3(ctx, IR_VSTORE, ctx->control, var, val); + return; + } + if (!IR_IS_CONST_REF(val)) { insn = &ctx->ir_base[val]; if (insn->op == IR_BITCAST @@ -2893,9 +2978,12 @@ void _ir_RSTORE(ir_ctx *ctx, ir_ref reg, ir_ref val) ir_ref _ir_LOAD(ir_ctx *ctx, ir_type type, ir_ref addr) { - ir_ref ref = ir_find_aliasing_load(ctx, ctx->control, type, addr); + ir_ref ref = IR_UNUSED; IR_ASSERT(ctx->control); + if (EXPECTED(ctx->flags & IR_OPT_FOLDING)) { + ref = ir_find_aliasing_load(ctx, ctx->control, type, addr); + } if (!ref) { ctx->control = ref = ir_emit2(ctx, IR_OPT(IR_LOAD, type), ctx->control, addr); } @@ -2912,6 +3000,12 @@ void _ir_STORE(ir_ctx *ctx, ir_ref addr, ir_ref val) ir_type type2; bool guarded = 0; + IR_ASSERT(ctx->control); + if (UNEXPECTED(!(ctx->flags & IR_OPT_FOLDING))) { + ctx->control = ir_emit3(ctx, IR_STORE, ctx->control, addr, val); + return; + } + if (!IR_IS_CONST_REF(val)) { insn = &ctx->ir_base[val]; if (insn->op == IR_BITCAST @@ -2922,7 +3016,6 @@ void _ir_STORE(ir_ctx *ctx, ir_ref addr, ir_ref val) } } - IR_ASSERT(ctx->control); while (ref > limit) { insn = &ctx->ir_base[ref]; if (insn->op == IR_STORE) { diff --git a/ext/opcache/jit/ir/ir.h b/ext/opcache/jit/ir/ir.h index 433c59472f5..5d22e0f874c 100644 --- a/ext/opcache/jit/ir/ir.h +++ b/ext/opcache/jit/ir/ir.h @@ -705,6 +705,7 @@ ir_ref ir_emit3(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3); ir_ref ir_emit_N(ir_ctx *ctx, uint32_t opt, int32_t count); void ir_set_op(ir_ctx *ctx, ir_ref ref, int32_t n, ir_ref val); +ir_ref ir_get_op(ir_ctx *ctx, ir_ref ref, int32_t n); IR_ALWAYS_INLINE void ir_set_op1(ir_ctx *ctx, ir_ref ref, ir_ref val) { @@ -721,8 +722,6 @@ IR_ALWAYS_INLINE void ir_set_op3(ir_ctx *ctx, ir_ref ref, ir_ref val) ctx->ir_base[ref].op3 = val; } -ir_ref ir_get_op(ir_ctx *ctx, ir_ref ref, int32_t n); - IR_ALWAYS_INLINE ir_ref ir_insn_op(const ir_insn *insn, int32_t n) { const ir_ref *p = insn->ops + n; @@ -735,6 +734,18 @@ IR_ALWAYS_INLINE void ir_insn_set_op(ir_insn *insn, int32_t n, ir_ref val) *p = val; } +IR_ALWAYS_INLINE uint32_t ir_insn_find_op(const ir_insn *insn, ir_ref val) +{ + int i, n = insn->inputs_count; + + for (i = 1; i <= n; i++) { + if (ir_insn_op(insn, i) == val) { + return i; + } + } + return 0; +} + ir_ref ir_fold(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3); ir_ref ir_fold0(ir_ctx *ctx, uint32_t opt); diff --git a/ext/opcache/jit/ir/ir_fold.h b/ext/opcache/jit/ir/ir_fold.h index b23ea832df9..38ea059ac5a 100644 --- a/ext/opcache/jit/ir/ir_fold.h +++ b/ext/opcache/jit/ir/ir_fold.h @@ -1808,10 +1808,6 @@ IR_FOLD(MUL(_, C_ADDR)) IR_FOLD_COPY(op2); } else if (op2_insn->val.u64 == 1) { IR_FOLD_COPY(op1); - } else if (op2_insn->val.u64 == 2 && IR_OPT_TYPE(opt) != IR_ADDR) { - opt = IR_ADD | (opt & IR_OPT_TYPE_MASK); - op2 = op1; - IR_FOLD_RESTART; } IR_FOLD_NEXT; } @@ -1827,11 +1823,6 @@ IR_FOLD(MUL(_, C_I64)) } else if (op2_insn->val.i64 == 1) { /* a * 1 => a */ IR_FOLD_COPY(op1); - } else if (op2_insn->val.i64 == 2) { - /* a * 2 => a + a */ - opt = IR_ADD | (opt & IR_OPT_TYPE_MASK); - op2 = op1; - IR_FOLD_RESTART; } else if (op2_insn->val.i64 == -1) { /* a * -1 => -a */ opt = IR_NEG | (opt & IR_OPT_TYPE_MASK); @@ -2907,7 +2898,6 @@ IR_FOLD(ADD(SHR, SHL)) /* Swap operands (move lower ref to op2) for better CSE */ -IR_FOLD(ADD(_, _)) IR_FOLD(MUL(_, _)) IR_FOLD_NAMED(swap_ops) { @@ -2929,6 +2919,19 @@ IR_FOLD(MUL_OV(_, _)) IR_FOLD_EMIT; } +IR_FOLD(ADD(_, _)) +{ + if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt)) && op1 == op2) { + /* a + a => a * 2 */ + IR_ASSERT(!IR_IS_CONST_REF(op1)); + val.u64 = 2; + opt = IR_MUL | (opt & IR_OPT_TYPE_MASK); + op2 = ir_const(ctx, val, IR_OPT_TYPE(opt)); + IR_FOLD_RESTART; + } + IR_FOLD_DO_NAMED(swap_ops); +} + IR_FOLD(SUB(_, _)) { if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt)) && op1 == op2) { diff --git a/ext/opcache/jit/ir/ir_private.h b/ext/opcache/jit/ir/ir_private.h index f88ba754969..4d1e8dd32bb 100644 --- a/ext/opcache/jit/ir/ir_private.h +++ b/ext/opcache/jit/ir/ir_private.h @@ -701,7 +701,7 @@ typedef struct _ir_list { uint32_t len; } ir_list; -bool ir_list_contains(const ir_list *l, ir_ref val); +uint32_t ir_list_find(const ir_list *l, ir_ref val); void ir_list_insert(ir_list *l, uint32_t i, ir_ref val); void ir_list_remove(ir_list *l, uint32_t i); @@ -766,6 +766,19 @@ IR_ALWAYS_INLINE void ir_list_set(ir_list *l, uint32_t i, ir_ref val) ir_array_set_unchecked(&l->a, i, val); } +/* Doesn't preserve order */ +IR_ALWAYS_INLINE void ir_list_del(ir_list *l, uint32_t i) +{ + IR_ASSERT(i < l->len); + l->len--; + ir_array_set_unchecked(&l->a, i, ir_array_at(&l->a, l->len)); +} + +IR_ALWAYS_INLINE bool ir_list_contains(const ir_list *l, ir_ref val) +{ + return ir_list_find(l, val) != (uint32_t)-1; +} + /* Worklist (unique list) */ typedef struct _ir_worklist { ir_list l; @@ -1019,11 +1032,12 @@ struct _ir_use_list { ir_ref count; }; -void ir_use_list_remove_all(ir_ctx *ctx, ir_ref from, ir_ref use); -void ir_use_list_remove_one(ir_ctx *ctx, ir_ref from, ir_ref use); -void ir_use_list_replace_all(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_ref new_use); -void ir_use_list_replace_one(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_ref new_use); -bool ir_use_list_add(ir_ctx *ctx, ir_ref to, ir_ref new_use); +void ir_use_list_remove_all(ir_ctx *ctx, ir_ref def, ir_ref use); +void ir_use_list_remove_one(ir_ctx *ctx, ir_ref def, ir_ref use); +void ir_use_list_replace_all(ir_ctx *ctx, ir_ref def, ir_ref use, ir_ref new_use); +void ir_use_list_replace_one(ir_ctx *ctx, ir_ref def, ir_ref use, ir_ref new_use); +bool ir_use_list_add(ir_ctx *ctx, ir_ref def, ir_ref use); +void ir_use_list_sort(ir_ctx *ctx, ir_ref def); IR_ALWAYS_INLINE ir_ref ir_next_control(const ir_ctx *ctx, ir_ref ref) { @@ -1068,6 +1082,9 @@ IR_ALWAYS_INLINE ir_ref ir_next_control(const ir_ctx *ctx, ir_ref ref) _insn2 = _tmp; \ } while (0) +void ir_replace(ir_ctx *ctx, ir_ref ref, ir_ref new_ref); +void ir_update_op(ir_ctx *ctx, ir_ref ref, uint32_t idx, ir_ref new_val); + /*** IR Basic Blocks info ***/ #define IR_IS_BB_START(op) \ ((ir_op_flags[op] & IR_OP_FLAG_BB_START) != 0) diff --git a/ext/opcache/jit/ir/ir_sccp.c b/ext/opcache/jit/ir/ir_sccp.c index 05577f05b31..50f78d66d9c 100644 --- a/ext/opcache/jit/ir/ir_sccp.c +++ b/ext/opcache/jit/ir/ir_sccp.c @@ -1238,7 +1238,7 @@ static void ir_merge_blocks(ir_ctx *ctx, ir_ref end, ir_ref begin, ir_bitqueue * /* connect their predecessor and successor */ ctx->ir_base[next].op1 = prev; - ir_use_list_replace_all(ctx, prev, end, next); + ir_use_list_replace_one(ctx, prev, end, next); if (ctx->ir_base[prev].op == IR_BEGIN || ctx->ir_base[prev].op == IR_MERGE) { ir_bitqueue_add(worklist2, prev); diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc index bc90dc5966a..58c6ed40f7d 100644 --- a/ext/opcache/jit/ir/ir_x86.dasc +++ b/ext/opcache/jit/ir/ir_x86.dasc @@ -1001,6 +1001,8 @@ const char *ir_reg_name(int8_t reg, ir_type type) _(LEA_SI_OB) \ _(LEA_B_SI) \ _(LEA_SI_B) \ + _(LEA_B_SI_O) \ + _(LEA_SI_B_O) \ _(INC) \ _(DEC) \ _(MUL_PWR2) \ @@ -1581,7 +1583,7 @@ static void ir_match_fuse_addr(ir_ctx *ctx, ir_ref addr_ref) if (!rule) { ctx->rules[addr_ref] = rule = ir_match_insn(ctx, addr_ref); } - if (rule >= IR_LEA_OB && rule <= IR_LEA_SI_B) { + if (rule >= IR_LEA_OB && rule <= IR_LEA_SI_B_O) { ir_use_list *use_list; ir_ref j; @@ -1943,7 +1945,8 @@ static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref) } else if ((op1_insn->op == IR_OR || op1_insn->op == IR_AND || op1_insn->op == IR_XOR) || /* GT(ADD(_, _), 0) can't be optimized because ADD may overflow */ ((op1_insn->op == IR_ADD || op1_insn->op == IR_SUB) && - (insn->op == IR_EQ || insn->op == IR_NE))) { + (insn->op == IR_EQ || insn->op == IR_NE || + insn->op == IR_LT || insn->op == IR_GE))) { /* v = BINOP(_, _); CMP(v, 0) => BINOP; SETCC */ if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) { ir_match_fuse_load_commutative_int(ctx, op1_insn, ref); @@ -1990,14 +1993,20 @@ lea: /* z = MUL(Y, 2|4|8) ... ADD(z, imm32) => SKIP ... LEA [Y*2|4|8+im32] */ ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_SI; return IR_LEA_SI_O; - } else if (rule == IR_LEA_SIB) { + } else if (rule == IR_LEA_SIB || rule == (IR_FUSED | IR_SIMPLE | IR_LEA_SIB)) { /* z = ADD(X, MUL(Y, 2|4|8)) ... ADD(z, imm32) => SKIP ... LEA [X+Y*2|4|8+im32] */ ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_SIB; return IR_LEA_SIB_O; - } else if (rule == IR_LEA_IB) { + } else if (rule == IR_LEA_IB || rule == (IR_FUSED | IR_SIMPLE | IR_LEA_IB)) { /* z = ADD(X, Y) ... ADD(z, imm32) => SKIP ... LEA [X+Y+im32] */ ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_IB; return IR_LEA_IB_O; + } else if (rule == IR_LEA_B_SI || rule == (IR_FUSED | IR_SIMPLE | IR_LEA_B_SI)) { + ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_B_SI; + return IR_LEA_B_SI_O; + } else if (rule == IR_LEA_SI_B || rule == (IR_FUSED | IR_SIMPLE | IR_LEA_SI_B)) { + ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_SI_B; + return IR_LEA_SI_B_O; } } /* ADD(X, imm32) => LEA [X+imm32] */ @@ -2050,7 +2059,7 @@ lea: if (!rule) { ctx->rules[insn->op2] = rule = ir_match_insn(ctx, insn->op2); } - if (rule == IR_LEA_OB) { + if (rule == IR_LEA_OB || rule == (IR_FUSED | IR_SIMPLE | IR_LEA_OB)) { /* x = ADD(X, imm32) ... y = MUL(Y, 2|4|8) ... ADD(y, x) => SKIP ... SKIP ... LEA */ ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_LEA_OB; return IR_LEA_SI_OB; @@ -2065,7 +2074,7 @@ lea: if (!rule) { ctx->rules[insn->op2] = rule = ir_match_insn(ctx, insn->op2); } - if (rule == IR_LEA_OB) { + if (rule == IR_LEA_OB || rule == (IR_FUSED | IR_SIMPLE | IR_LEA_OB)) { ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_LEA_OB; /* x = ADD(X, imm32) ... ADD(Y, x) => SKIP ... LEA */ return IR_LEA_I_OB; @@ -2602,7 +2611,8 @@ store_int: ((op1_insn->op == IR_OR || op1_insn->op == IR_AND || op1_insn->op == IR_XOR) || /* GT(ADD(_, _), 0) can't be optimized because ADD may overflow */ ((op1_insn->op == IR_ADD || op1_insn->op == IR_SUB) && - (op2_insn->op == IR_EQ || op2_insn->op == IR_NE)))) { + (op2_insn->op == IR_EQ || op2_insn->op == IR_NE || + op2_insn->op == IR_LT || op2_insn->op == IR_GE)))) { /* v = BINOP(_, _); c = CMP(v, 0) ... IF(c) => BINOP; SKIP_CMP ... JCC */ if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) { ir_match_fuse_load_commutative_int(ctx, op1_insn, ref); @@ -2744,7 +2754,8 @@ store_int: if ((op1_insn->op == IR_OR || op1_insn->op == IR_AND || op1_insn->op == IR_XOR) || /* GT(ADD(_, _), 0) can't be optimized because ADD may overflow */ ((op1_insn->op == IR_ADD || op1_insn->op == IR_SUB) && - (op2_insn->op == IR_EQ || op2_insn->op == IR_NE))) { + (op2_insn->op == IR_EQ || op2_insn->op == IR_NE || + op2_insn->op == IR_LT || op2_insn->op == IR_GE))) { if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) { ir_match_fuse_load_commutative_int(ctx, op1_insn, ref); ctx->rules[op2_insn->op1] = IR_BINOP_INT | IR_MAY_SWAP; @@ -2768,7 +2779,8 @@ store_int: if ((op_insn->op == IR_OR || op_insn->op == IR_AND || op_insn->op == IR_XOR) || /* GT(ADD(_, _), 0) can't be optimized because ADD may overflow */ ((op_insn->op == IR_ADD || op_insn->op == IR_SUB) && - (op2_insn->op == IR_EQ || op2_insn->op == IR_NE))) { + (op2_insn->op == IR_EQ || op2_insn->op == IR_NE || + op2_insn->op == IR_LT || op2_insn->op == IR_GE))) { if (ctx->ir_base[op_insn->op1].op == IR_LOAD && ctx->ir_base[op_insn->op1].op2 == store_insn->op2) { if (ir_in_same_block(ctx, op_insn->op1) @@ -3286,7 +3298,7 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref) int32_t offset = 0, scale; IR_ASSERT(((rule & IR_RULE_MASK) >= IR_LEA_OB && - (rule & IR_RULE_MASK) <= IR_LEA_SI_B) || + (rule & IR_RULE_MASK) <= IR_LEA_SI_B_O) || rule == IR_STATIC_ALLOCA); switch (rule & IR_RULE_MASK) { default: @@ -3456,6 +3468,22 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref) scale = ctx->ir_base[op1_insn->op2].val.i32; offset_insn = NULL; break; + case IR_LEA_B_SI_O: + offset_insn = insn; + op1_insn = &ctx->ir_base[insn->op1]; + base_reg_ref = insn->op1 * sizeof(ir_ref) + 1; + index_reg_ref = op1_insn->op2 * sizeof(ir_ref) + 1; + op2_insn = &ctx->ir_base[op1_insn->op2]; + scale = ctx->ir_base[op2_insn->op2].val.i32; + break; + case IR_LEA_SI_B_O: + offset_insn = insn; + op1_insn = &ctx->ir_base[insn->op1]; + index_reg_ref = op1_insn->op1 * sizeof(ir_ref) + 1; + base_reg_ref = insn->op1 * sizeof(ir_ref) + 2; + op1_insn = &ctx->ir_base[op1_insn->op1]; + scale = ctx->ir_base[op1_insn->op2].val.i32; + break; case IR_ALLOCA: offset = IR_SPILL_POS_TO_OFFSET(insn->op3); base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; @@ -5583,7 +5611,7 @@ static void ir_emit_cmp_int_common2(ir_ctx *ctx, ir_ref root, ir_ref ref, ir_ins ir_emit_cmp_int_common(ctx, type, root, cmp_insn, op1_reg, op1, op2_reg, op2); } -static void _ir_emit_setcc_int(ir_ctx *ctx, uint8_t op, ir_reg def_reg) +static void _ir_emit_setcc_int(ir_ctx *ctx, uint8_t op, ir_reg def_reg, bool after_op) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; @@ -5598,10 +5626,18 @@ static void _ir_emit_setcc_int(ir_ctx *ctx, uint8_t op, ir_reg def_reg) | setne Rb(def_reg) break; case IR_LT: - | setl Rb(def_reg) + if (after_op) { + | sets Rb(def_reg) + } else { + | setl Rb(def_reg) + } break; case IR_GE: - | setge Rb(def_reg) + if (after_op) { + | setns Rb(def_reg) + } else { + | setge Rb(def_reg) + } break; case IR_LE: | setle Rb(def_reg) @@ -5711,7 +5747,7 @@ static void ir_emit_cmp_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) } } ir_emit_cmp_int_common(ctx, type, def, insn, op1_reg, op1, op2_reg, op2); - _ir_emit_setcc_int(ctx, op, def_reg); + _ir_emit_setcc_int(ctx, op, def_reg, 0); if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, insn->type, def, def_reg); } @@ -5808,7 +5844,7 @@ static void ir_emit_testcc_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) IR_ASSERT(def_reg != IR_REG_NONE); ir_emit_test_int_common(ctx, def, insn->op1, insn->op); - _ir_emit_setcc_int(ctx, insn->op, def_reg); + _ir_emit_setcc_int(ctx, insn->op, def_reg, 0); if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, insn->type, def, def_reg); } @@ -5819,7 +5855,7 @@ static void ir_emit_setcc_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); IR_ASSERT(def_reg != IR_REG_NONE); - _ir_emit_setcc_int(ctx, insn->op, def_reg); + _ir_emit_setcc_int(ctx, insn->op, def_reg, 1); if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, insn->type, def, def_reg); } @@ -5963,7 +5999,7 @@ static void ir_emit_jmp_false(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next } } -static void ir_emit_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block, uint8_t op, bool int_cmp) +static void ir_emit_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block, uint8_t op, bool int_cmp, bool after_op) { uint32_t true_block, false_block; ir_backend_data *data = ctx->data; @@ -5994,10 +6030,18 @@ static void ir_emit_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint | jne =>true_block break; case IR_LT: - | jl =>true_block + if (after_op) { + | js =>true_block + } else { + | jl =>true_block + } break; case IR_GE: - | jge =>true_block + if (after_op) { + | jns =>true_block + } else { + | jge =>true_block + } break; case IR_LE: | jle =>true_block @@ -6133,7 +6177,7 @@ static void ir_emit_cmp_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_i if (!same_comparison) { ir_emit_cmp_int_common(ctx, type, def, cmp_insn, op1_reg, op1, op2_reg, op2); } - ir_emit_jcc(ctx, b, def, insn, next_block, op, 1); + ir_emit_jcc(ctx, b, def, insn, next_block, op, 1, 0); } static void ir_emit_test_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) @@ -6149,13 +6193,13 @@ static void ir_emit_test_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_ } ir_emit_test_int_common(ctx, def, op2, op); - ir_emit_jcc(ctx, b, def, insn, next_block, op, 1); + ir_emit_jcc(ctx, b, def, insn, next_block, op, 1, 0); } static void ir_emit_cmp_and_branch_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) { ir_op op = ir_emit_cmp_fp_common(ctx, def, insn->op2, &ctx->ir_base[insn->op2]); - ir_emit_jcc(ctx, b, def, insn, next_block, op, 0); + ir_emit_jcc(ctx, b, def, insn, next_block, op, 0, 0); } static void ir_emit_if_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) @@ -6203,7 +6247,7 @@ static void ir_emit_if_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, u } | ASM_MEM_IMM_OP cmp, type, mem, 0 } - ir_emit_jcc(ctx, b, def, insn, next_block, IR_NE, 1); + ir_emit_jcc(ctx, b, def, insn, next_block, IR_NE, 1, 0); } static void ir_emit_cond(ir_ctx *ctx, ir_ref def, ir_insn *insn) @@ -9078,7 +9122,7 @@ static void ir_emit_ijmp(ir_ctx *ctx, ir_ref def, ir_insn *insn) } } -static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next_block, uint8_t op, void *addr, bool int_cmp) +static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next_block, uint8_t op, void *addr, bool int_cmp, bool after_op) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; @@ -9112,10 +9156,18 @@ static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next | je =>target break; case IR_LT: - | jge =>target + if (after_op) { + | jns =>target + } else { + | jge =>target + } break; case IR_GE: - | jl =>target + if (after_op) { + | js =>target + } else { + | jl =>target + } break; case IR_LE: | jg =>target @@ -9183,10 +9235,18 @@ static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next | je &target_addr break; case IR_LT: - | jge &target_addr + if (after_op) { + | jns &target_addr + } else { + | jge &target_addr + } break; case IR_GE: - | jl &target_addr + if (after_op) { + | js &target_addr + } else { + | jl &target_addr + } break; case IR_LE: | jg &target_addr @@ -9251,10 +9311,18 @@ static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next | jne &addr break; case IR_LT: - | jl &addr + if (after_op) { + | js &addr + } else { + | jl &addr + } break; case IR_GE: - | jge &addr + if (after_op) { + | jns &addr + } else { + | jge &addr + } break; case IR_LE: | jle &addr @@ -9369,7 +9437,7 @@ static bool ir_emit_guard(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, ui } else { op = IR_NE; } - return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1); + return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1, 0); } else { |.if X64 if (insn->op == IR_GUARD) { @@ -9447,7 +9515,7 @@ static bool ir_emit_guard_cmp_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn * op ^= 1; // reverse } - return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1); + return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1, 0); } static bool ir_emit_guard_cmp_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) @@ -9458,7 +9526,7 @@ static bool ir_emit_guard_cmp_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *i if (insn->op == IR_GUARD) { op ^= 1; // reverse } - return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 0); + return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 0, 0); } static bool ir_emit_guard_test_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) @@ -9467,7 +9535,7 @@ static bool ir_emit_guard_test_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn ir_op op = (insn->op == IR_GUARD) ? IR_EQ : IR_NE; ir_emit_test_int_common(ctx, def, insn->op2, op); - return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1); + return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1, 0); } static bool ir_emit_guard_jcc_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) @@ -9478,7 +9546,7 @@ static bool ir_emit_guard_jcc_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn * if (insn->op == IR_GUARD) { op ^= 1; // reverse } - return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1); + return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1, 1); } static bool ir_emit_guard_overflow(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) @@ -9528,7 +9596,15 @@ static void ir_emit_lea(ir_ctx *ctx, ir_ref def, ir_type type) && IR_MEM_BASE(mem) != IR_REG_NONE) { ir_reg reg = IR_MEM_BASE(mem); | add Rd(def_reg), Rd(reg) + } else if (IR_MEM_INDEX(mem) == def_reg + && IR_MEM_OFFSET(mem) == 0 + && IR_MEM_SCALE(mem) == 2 + && IR_MEM_BASE(mem) == IR_REG_NONE) { + | add Rd(def_reg), Rd(def_reg) } else { + if (IR_MEM_SCALE(mem) == 2 && IR_MEM_BASE(mem) == IR_REG_NONE) { + mem = IR_MEM(IR_MEM_INDEX(mem), IR_MEM_OFFSET(mem), IR_MEM_INDEX(mem), 1); + } | ASM_TXT_TMEM_OP lea, Rd(def_reg), dword, mem } } else { @@ -9544,7 +9620,15 @@ static void ir_emit_lea(ir_ctx *ctx, ir_ref def, ir_type type) && IR_MEM_BASE(mem) != IR_REG_NONE) { ir_reg reg = IR_MEM_BASE(mem); | add Ra(def_reg), Ra(reg) + } else if (IR_MEM_INDEX(mem) == def_reg + && IR_MEM_OFFSET(mem) == 0 + && IR_MEM_SCALE(mem) == 2 + && IR_MEM_BASE(mem) == IR_REG_NONE) { + | add Ra(def_reg), Ra(def_reg) } else { + if (IR_MEM_SCALE(mem) == 2 && IR_MEM_BASE(mem) == IR_REG_NONE) { + mem = IR_MEM(IR_MEM_INDEX(mem), IR_MEM_OFFSET(mem), IR_MEM_INDEX(mem), 1); + } | ASM_TXT_TMEM_OP lea, Ra(def_reg), aword, mem } } @@ -10453,6 +10537,8 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) case IR_LEA_SI_OB: case IR_LEA_B_SI: case IR_LEA_SI_B: + case IR_LEA_B_SI_O: + case IR_LEA_SI_B_O: ir_emit_lea(ctx, i, insn->type); break; case IR_MUL_PWR2: @@ -10575,7 +10661,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) } else { IR_ASSERT(op >= IR_EQ && op <= IR_UGT); } - ir_emit_jcc(ctx, b, i, insn, _ir_next_block(ctx, _b), op, 1); + ir_emit_jcc(ctx, b, i, insn, _ir_next_block(ctx, _b), op, 1, 1); } break; case IR_GUARD_CMP_INT: