diff --git a/ext/opcache/jit/ir/ir.c b/ext/opcache/jit/ir/ir.c index 1aa887f6a94..4d414b852a5 100644 --- a/ext/opcache/jit/ir/ir.c +++ b/ext/opcache/jit/ir/ir.c @@ -1009,6 +1009,12 @@ ir_fold_cse: ctx->prev_insn_chain[op] = ref; return ref; + } else { + ctx->fold_insn.optx = opt; + ctx->fold_insn.op1 = op1; + ctx->fold_insn.op2 = op2; + ctx->fold_insn.op3 = op3; + return IR_FOLD_DO_CSE; } ir_fold_emit: if (!(ctx->flags2 & IR_OPT_IN_SCCP)) { @@ -1031,7 +1037,7 @@ ir_fold_const: if (!(ctx->flags2 & IR_OPT_IN_SCCP)) { return ir_const(ctx, val, IR_OPT_TYPE(opt)); } else { - ctx->fold_insn.type = IR_OPT_TYPE(opt); + ctx->fold_insn.opt = IR_OPT(IR_OPT_TYPE(opt), IR_OPT_TYPE(opt)); ctx->fold_insn.val.u64 = val.u64; return IR_FOLD_DO_CONST; } diff --git a/ext/opcache/jit/ir/ir_check.c b/ext/opcache/jit/ir/ir_check.c index 0e0bf3ca4c5..40a475ae22f 100644 --- a/ext/opcache/jit/ir/ir_check.c +++ b/ext/opcache/jit/ir/ir_check.c @@ -106,7 +106,10 @@ bool ir_check(const ir_ctx *ctx) use = *p; if (use != IR_UNUSED) { if (IR_IS_CONST_REF(use)) { - if (use >= ctx->consts_count) { + if (IR_OPND_KIND(flags, j) != IR_OPND_DATA) { + fprintf(stderr, "ir_base[%d].ops[%d] reference (%d) must not be constant\n", i, j, use); + ok = 0; + } else if (use >= ctx->consts_count) { fprintf(stderr, "ir_base[%d].ops[%d] constant reference (%d) is out of range\n", i, j, use); ok = 0; } diff --git a/ext/opcache/jit/ir/ir_sccp.c b/ext/opcache/jit/ir/ir_sccp.c index 996847d58a7..a93dce11aaf 100644 --- a/ext/opcache/jit/ir/ir_sccp.c +++ b/ext/opcache/jit/ir/ir_sccp.c @@ -13,6 +13,7 @@ #include "ir_private.h" #define IR_COMBO_COPY_PROPAGATION 1 +#define IR_COMBO_GVN 1 #define IR_TOP IR_UNUSED #define IR_BOTTOM IR_LAST_OP @@ -32,39 +33,311 @@ IR_ALWAYS_INLINE bool _ir_is_reachable_ctrl(ir_ctx *ctx, ir_insn *_values, ir_re return _values[ref].op != IR_TOP; /* BOTTOM, IF or MERGE */ } +IR_ALWAYS_INLINE void ir_sccp_add_uses(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref) +{ + ir_use_list *use_list; + ir_ref n, *p, use; + + IR_ASSERT(!IR_IS_CONST_REF(ref)); + use_list = &ctx->use_lists[ref]; + n = use_list->count; + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + use = *p; + if (_values[use].op != IR_BOTTOM) { + ir_bitqueue_add(worklist, use); + } + } +} + +IR_ALWAYS_INLINE void ir_sccp_add_input(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref) +{ + IR_ASSERT(!IR_IS_CONST_REF(ref)); + IR_ASSERT(_values[ref].op == IR_TOP); + /* do backward propagaton only once */ + if (!_values[ref].op1) { + _values[ref].op1 = 1; + ir_bitqueue_add(worklist, ref); + } +} + #if IR_COMBO_COPY_PROPAGATION IR_ALWAYS_INLINE ir_ref ir_sccp_identity(ir_ctx *ctx, ir_insn *_values, ir_ref a) { if (a > 0 && _values[a].op == IR_COPY) { do { a = _values[a].op1; - } while (a > 0 && _values[a].op == IR_COPY); - IR_ASSERT(a < 0 || _values[a].op == IR_BOTTOM); - IR_ASSERT(a > 0 || IR_IS_SYM_CONST(ctx->ir_base[a].op)); + IR_ASSERT(a > 0); + } while (_values[a].op == IR_COPY); + IR_ASSERT(_values[a].op == IR_BOTTOM); } return a; } -static void ir_sccp_add_identity(ir_ctx *ctx, ir_insn *_values, ir_ref src, ir_ref dst, ir_type type) +#if 0 +static void CHECK_LIST(ir_insn *_values, ir_ref ref) +{ + ir_ref member = _values[ref].op2; + while (member != ref) { + IR_ASSERT(_values[_values[member].op2].op3 == member); + member = _values[member].op2; + } + IR_ASSERT(_values[_values[ref].op2].op3 == ref); +} +#else +# define CHECK_LIST(_values, ref) +#endif + +static void ir_sccp_add_identity(ir_ctx *ctx, ir_insn *_values, ir_ref src, ir_ref dst) { IR_ASSERT(dst > 0 && _values[dst].op != IR_BOTTOM && _values[dst].op != IR_COPY); - IR_ASSERT((src > 0 && (_values[src].op == IR_BOTTOM || _values[src].op == IR_COPY)) - || (src < 0 && IR_IS_SYM_CONST(ctx->ir_base[src].op))); + IR_ASSERT((src > 0 && (_values[src].op == IR_BOTTOM || _values[src].op == IR_COPY))); IR_ASSERT(ir_sccp_identity(ctx, _values, src) != dst); - _values[dst].optx = IR_OPT(IR_COPY, type); + + _values[dst].optx = IR_COPY; _values[dst].op1 = src; + + if (_values[src].op == IR_BOTTOM) { + /* initialize empty double-linked list */ + if (_values[src].op1 != src) { + _values[src].op1 = src; + _values[src].op2 = src; + _values[src].op3 = src; + } + } else { + src = ir_sccp_identity(ctx, _values, src); + } + + /* insert into circular double-linked list */ + ir_ref prev = _values[src].op3; + _values[dst].op2 = src; + _values[dst].op3 = prev; + _values[src].op3 = dst; + _values[prev].op2 = dst; + CHECK_LIST(_values, dst); +} + +static void ir_sccp_split_partition(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref) +{ + ir_ref member, head, tail, next, prev; + + CHECK_LIST(_values, ref); + IR_MAKE_BOTTOM(ref); + _values[ref].op1 = ref; + + member = _values[ref].op2; + head = tail = IR_UNUSED; + while (member != ref) { + if (_values[member].op != IR_BOTTOM) { + ir_bitqueue_add(worklist, member); + } + ir_sccp_add_uses(ctx, _values, worklist, member); + + next = _values[member].op2; + if (ir_sccp_identity(ctx, _values, member) == ref) { + /* remove "member" from the old circular double-linked list */ + prev = _values[member].op3; + _values[prev].op2 = next; + _values[next].op3 = prev; + + /* insert "member" into the new double-linked list */ + if (!head) { + head = tail = member; + } else { + _values[tail].op2 = member; + _values[member].op3 = tail; + tail = member; + } + } + member = next; + } + + /* remove "ref" from the old circular double-linked list */ + next = _values[ref].op2; + prev = _values[ref].op3; + _values[prev].op2 = next; + _values[next].op3 = prev; + CHECK_LIST(_values, next); + + /* close the new circle */ + if (head) { + _values[ref].op2 = head; + _values[ref].op3 = tail; + _values[tail].op2 = ref; + _values[head].op3 = ref; + } else { + _values[ref].op2 = ref; + _values[ref].op3 = ref; + } + CHECK_LIST(_values, ref); +} + +IR_ALWAYS_INLINE void ir_sccp_make_bottom_ex(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref) +{ + if (_values[ref].op == IR_COPY) { + ir_sccp_split_partition(ctx, _values, worklist, ref); + } else { + IR_MAKE_BOTTOM(ref); + } +} + +# define IR_MAKE_BOTTOM_EX(ref) ir_sccp_make_bottom_ex(ctx, _values, worklist, ref) +#else +# define ir_sccp_identity(_ctx, _values, ref) (ref) +# define IR_MAKE_BOTTOM_EX(ref) IR_MAKE_BOTTOM(ref) +#endif + +typedef struct _ir_gvn_hash { + uint32_t mask; + ir_ref *hash; + ir_ref *chain; +} ir_gvn_hash; + +#if IR_COMBO_GVN +static void ir_gvn_hash_init(ir_ctx *ctx, ir_gvn_hash *gvn_hash) +{ + uint32_t mask = ctx->insns_limit - 1; + mask |= (mask >> 1); + mask |= (mask >> 2); + mask |= (mask >> 4); + mask |= (mask >> 8); + mask |= (mask >> 16); + gvn_hash->mask = mask; + gvn_hash->hash = ir_mem_calloc(mask + 1, sizeof(ir_ref)); + gvn_hash->chain = ir_mem_calloc(ctx->insns_limit, sizeof(ir_ref)); +} + +static void ir_gvn_hash_free(ir_gvn_hash *gvn_hash) +{ + ir_mem_free(gvn_hash->hash); + ir_mem_free(gvn_hash->chain); +} + +static ir_ref ir_gvn_lookup(ir_ctx *ctx, ir_gvn_hash *gvn_hash, ir_ref ref) +{ + ir_ref old; + ir_insn *insn = &ctx->ir_base[ref]; + uint32_t hash; + + hash = insn->opt; + hash = hash ^ (hash << 17) ^ (hash >> 13) ^ insn->op1; + hash = hash ^ (hash << 17) ^ (hash >> 13) ^ insn->op2; + hash = hash ^ (hash << 17) ^ (hash >> 13) ^ insn->op3; + hash &= gvn_hash->mask; + + old = gvn_hash->hash[hash]; + while (old) { + ir_insn *old_insn = &ctx->ir_base[old]; + + if (old == ref) { + return IR_UNUSED; + } else if (insn->opt == old_insn->opt + && insn->op1 == old_insn->op1 + && insn->op2 == old_insn->op2 + && insn->op3 == old_insn->op3) { + return old; + } else { + old = gvn_hash->chain[old]; + } + } + + gvn_hash->chain[ref] = gvn_hash->hash[hash]; + gvn_hash->hash[hash] = ref; + + if (insn->opt == ctx->fold_insn.opt + && insn->op1 == ctx->fold_insn.op1 + && insn->op2 == ctx->fold_insn.op2 + && insn->op3 == ctx->fold_insn.op3) { + return IR_UNUSED; + } + + hash = ctx->fold_insn.opt; + hash = hash ^ (hash << 17) ^ (hash >> 13) ^ ctx->fold_insn.op1; + hash = hash ^ (hash << 17) ^ (hash >> 13) ^ ctx->fold_insn.op2; + hash = hash ^ (hash << 17) ^ (hash >> 13) ^ ctx->fold_insn.op3; + hash &= gvn_hash->mask; + + old = gvn_hash->hash[hash]; + while (old) { + ir_insn *old_insn = &ctx->ir_base[old]; + + if (ctx->fold_insn.opt == old_insn->opt + && ctx->fold_insn.op1 == old_insn->op1 + && ctx->fold_insn.op2 == old_insn->op2 + && ctx->fold_insn.op3 == old_insn->op3) { + return old; + } else { + old = gvn_hash->chain[old]; + } + } + + return IR_UNUSED; } #endif -static ir_ref ir_sccp_fold(ir_ctx *ctx, ir_insn *_values, ir_ref res, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3) +IR_ALWAYS_INLINE bool ir_sccp_meet_const(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref, ir_insn *val_insn) { - ir_insn *op1_insn, *op2_insn, *op3_insn, *insn; + IR_ASSERT(IR_IS_CONST_OP(val_insn->op) || IR_IS_SYM_CONST(val_insn->op)); + if (_values[ref].op == IR_TOP) { + /* TOP meet NEW_CONST => NEW_CONST */ + _values[ref].optx = val_insn->opt; + _values[ref].val.u64 = val_insn->val.u64; + return 1; + } else if (_values[ref].opt == val_insn->opt) { + /* OLD_CONST meet NEW_CONST => (OLD_CONST == NEW_CONST) ? OLD_CONST : BOTTOM */ + if (_values[ref].val.u64 == val_insn->val.u64) { + return 0; + } + } + + IR_MAKE_BOTTOM_EX(ref); + return 1; +} + +IR_ALWAYS_INLINE bool ir_sccp_meet(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref, ir_ref val) +{ + ir_ref val_identity = ir_sccp_identity(ctx, _values, val); + ir_insn *val_insn; + + if (IR_IS_CONST_REF(val_identity)) { + val_insn = &ctx->ir_base[val_identity]; + } else { + val_insn = &_values[val_identity]; + + if (!IR_IS_CONST_OP(val_insn->op) && !IR_IS_SYM_CONST(val_insn->op)) { #if IR_COMBO_COPY_PROPAGATION + if (_values[ref].op == IR_COPY) { + /* COPY(OLD_VAL) meet COPY(NEW_VAL) => + * (IDENTITY(OLD_VAL) == IDENTITY(NEW_VAL) ? COPY(OLD_VAL) ? BOTTOM */ + if (ir_sccp_identity(ctx, _values, ref) == val_identity) { + return 0; /* not changed */ + } + ir_sccp_split_partition(ctx, _values, worklist, ref); + return 1; + } else { + IR_ASSERT(_values[ref].op != IR_BOTTOM); + /* TOP meet COPY(NEW_VAL) -> COPY(NEW_VAL) */ + /* OLD_CONST meet COPY(NEW_VAL) -> COPY(NEW_VAL) */ + ir_sccp_add_identity(ctx, _values, val, ref); + return 1; + } +#endif + + IR_MAKE_BOTTOM(ref); + return 1; + } + } + + return ir_sccp_meet_const(ctx, _values, worklist, ref, val_insn); +} + +static ir_ref ir_sccp_fold(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_gvn_hash *gvn_hash, ir_ref res, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3) +{ + ir_insn *op1_insn, *op2_insn, *op3_insn; + op1 = ir_sccp_identity(ctx, _values, op1); op2 = ir_sccp_identity(ctx, _values, op2); op3 = ir_sccp_identity(ctx, _values, op3); -#endif restart: op1_insn = (op1 > 0 && IR_IS_CONST_OP(_values[op1].op)) ? _values + op1 : ctx->ir_base + op1; @@ -78,57 +351,37 @@ restart: op2 = ctx->fold_insn.op2; op3 = ctx->fold_insn.op3; goto restart; + case IR_FOLD_DO_CSE: +#if IR_COMBO_GVN + if (gvn_hash) { + op1 = ir_gvn_lookup(ctx, gvn_hash, res); + if (op1) { + if (op1 == res) { + return 0; /* not changed */ + } + goto ir_fold_copy; + } + } + IR_FALLTHROUGH; +#endif case IR_FOLD_DO_EMIT: - goto make_bottom; + IR_MAKE_BOTTOM_EX(res); + return 1; case IR_FOLD_DO_COPY: op1 = ctx->fold_insn.op1; - insn = (op1 > 0 && IR_IS_CONST_OP(_values[op1].op)) ? _values + op1 : ctx->ir_base + op1; - if (IR_IS_CONST_OP(insn->op)) { - /* pass */ - } else { -#if IR_COMBO_COPY_PROPAGATION - if (_values[res].op == IR_TOP) { - /* pass to new copy */ - } else if (_values[res].op == IR_COPY) { - if (ir_sccp_identity(ctx, _values, _values[res].op1) == ir_sccp_identity(ctx, _values, op1)) { - return 0; /* not changed */ - } else { - goto make_bottom; - } - } else { - IR_ASSERT(_values[res].op != IR_BOTTOM); - /* we don't check for widening */ - } - /* create new COPY */ - ir_sccp_add_identity(ctx, _values, op1, res, insn->type); - return 1; -#else - goto make_bottom; +#if IR_COMBO_GVN +ir_fold_copy: #endif - } - break; + return ir_sccp_meet(ctx, _values, worklist, res, op1); case IR_FOLD_DO_CONST: - insn = &ctx->fold_insn; - break; + return ir_sccp_meet_const(ctx, _values, worklist, res, &ctx->fold_insn); default: IR_ASSERT(0); return 0; } - - if (IR_IS_TOP(res)) { - _values[res].optx = IR_OPT(insn->type, insn->type); - _values[res].val.u64 = insn->val.u64; - return 1; - } else if (_values[res].opt == IR_OPT(insn->type, insn->type) && _values[res].val.u64 == insn->val.u64) { - return 0; /* not changed */ - } - -make_bottom: - IR_MAKE_BOTTOM(res); - return 1; } -static bool ir_sccp_meet_phi(ir_ctx *ctx, ir_insn *_values, ir_ref i, ir_insn *insn, ir_bitqueue *worklist) +static bool ir_sccp_analyze_phi(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref i, ir_insn *insn) { ir_ref j, n, input, *merge_input, *p; ir_insn *v, *new_const = NULL; @@ -164,11 +417,7 @@ static bool ir_sccp_meet_phi(ir_ctx *ctx, ir_insn *_values, ir_ref i, ir_insn *i } else { v = &_values[input]; if (v->op == IR_TOP) { - /* do backward propagaton only once */ - if (!v->op1) { - v->op1 = 1; - ir_bitqueue_add(worklist, input); - } + ir_sccp_add_input(ctx, _values, worklist, input); continue; #if IR_COMBO_COPY_PROPAGATION } else if (v->op == IR_COPY) { @@ -222,11 +471,7 @@ next: } else { v = &_values[input]; if (v->op == IR_TOP) { - /* do backward propagaton only once */ - if (!v->op1) { - v->op1 = 1; - ir_bitqueue_add(worklist, input); - } + ir_sccp_add_input(ctx, _values, worklist, input); continue; #if IR_COMBO_COPY_PROPAGATION } else if (v->op == IR_COPY) { @@ -253,31 +498,14 @@ next: #if IR_COMBO_COPY_PROPAGATION if (new_copy) { - if (_values[i].op == IR_COPY) { - if (phi_identity == new_copy_identity) { - return 0; /* not changed */ - } else { - goto make_bottom; - } - } else { - IR_ASSERT(_values[i].op != IR_BOTTOM); - /* we don't check for widening */ - ir_sccp_add_identity(ctx, _values, new_copy, i, insn->type); - return 1; - } + return ir_sccp_meet(ctx, _values, worklist, i, new_copy); } #endif - if (_values[i].op == IR_TOP) { - _values[i].optx = new_const->opt; - _values[i].val.u64 = new_const->val.u64; - return 1; - } else if (_values[i].opt == new_const->opt && _values[i].val.u64 == new_const->val.u64) { - return 0; - } + return ir_sccp_meet_const(ctx, _values, worklist, i, new_const); make_bottom: - IR_MAKE_BOTTOM(i); + IR_MAKE_BOTTOM_EX(i); return 1; } @@ -373,7 +601,7 @@ static void ir_sccp_trace_end(ir_ctx *ctx, ir_insn *_values, ir_ref i) # define ir_sccp_trace_end(c, v, i) #endif -static void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist) +static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_gvn_hash *gvn_hash, ir_bitqueue *iter_worklist) { ir_ref i, j, n, *p, use; ir_use_list *use_list; @@ -393,7 +621,7 @@ static void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist /* dead code */ continue; } else if (insn->op == IR_PHI) { - if (!ir_sccp_meet_phi(ctx, _values, i, insn, worklist)) { + if (!ir_sccp_analyze_phi(ctx, _values, worklist, i, insn)) { continue; } } else if (EXPECTED(IR_IS_FOLDABLE_OP(insn->op))) { @@ -411,11 +639,7 @@ static void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist if (input > 0) { if (_values[input].op == IR_TOP) { has_top = 1; - /* do backward propagaton only once */ - if (!_values[input].op1) { - _values[input].op1 = 1; - ir_bitqueue_add(worklist, input); - } + ir_sccp_add_input(ctx, _values, worklist, input); } else if (_values[input].op != IR_BOTTOM) { /* Perform folding only if some of direct inputs * is going to be replaced by a constant or copy. @@ -430,12 +654,12 @@ static void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist continue; } if (!may_benefit) { - IR_MAKE_BOTTOM(i); + IR_MAKE_BOTTOM_EX(i); if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC || insn->op == IR_ZEXT || insn->op == IR_SEXT || insn->op == IR_EQ || insn->op == IR_NE) { ir_bitqueue_add(iter_worklist, i); } - } else if (!ir_sccp_fold(ctx, _values, i, insn->opt, insn->op1, insn->op2, insn->op3)) { + } else if (!ir_sccp_fold(ctx, _values, worklist, gvn_hash, i, insn->opt, insn->op1, insn->op2, insn->op3)) { /* not changed */ continue; } else if (_values[i].op == IR_BOTTOM) { @@ -446,7 +670,7 @@ static void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist } } } else { - IR_MAKE_BOTTOM(i); + IR_MAKE_BOTTOM_EX(i); } } else if (flags & IR_OP_FLAG_BB_START) { if (insn->op == IR_MERGE || insn->op == IR_BEGIN) { @@ -488,11 +712,7 @@ static void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist } if (insn->op == IR_IF) { if (IR_IS_TOP(insn->op2)) { - /* do backward propagaton only once */ - if (!_values[insn->op2].op1) { - _values[insn->op2].op1 = 1; - ir_bitqueue_add(worklist, insn->op2); - } + ir_sccp_add_input(ctx, _values, worklist, insn->op2); continue; } if (IR_IS_CONST(insn->op2)) { @@ -519,11 +739,7 @@ static void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist IR_MAKE_BOTTOM(i); } else if (insn->op == IR_SWITCH) { if (IR_IS_TOP(insn->op2)) { - /* do backward propagaton only once */ - if (!_values[insn->op2].op1) { - _values[insn->op2].op1 = 1; - ir_bitqueue_add(worklist, insn->op2); - } + ir_sccp_add_input(ctx, _values, worklist, insn->op2); continue; } if (IR_IS_CONST(insn->op2)) { @@ -562,46 +778,59 @@ static void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist ir_bitqueue_add(iter_worklist, i); IR_MAKE_BOTTOM(i); } else { - IR_MAKE_BOTTOM(i); + if (_values[i].op == IR_TOP) { + bool has_top = 0; - /* control, call, load and store instructions may have unprocessed inputs */ - n = IR_INPUT_EDGES_COUNT(flags); - if (IR_OP_HAS_VAR_INPUTS(flags) && (n = insn->inputs_count) > 3) { - for (j = 0; j < (n>>2); j++) { - _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */ - } - for (j = 2, p = insn->ops + j; j <= n; j++, p++) { - IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA); - use = *p; - if (use > 0 && UNEXPECTED(_values[use].op == IR_TOP)) { - ir_bitqueue_add(worklist, use); + /* control, call, load and store instructions may have unprocessed inputs */ + n = IR_INPUT_EDGES_COUNT(flags); + if (IR_OP_HAS_VAR_INPUTS(flags) && (n = insn->inputs_count) > 3) { + for (j = 0; j < (n>>2); j++) { + _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */ + } + for (j = 2, p = insn->ops + j; j <= n; j++, p++) { + IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA); + use = *p; + if (use > 0 && _values[use].op == IR_TOP) { + has_top = 1; + ir_sccp_add_input(ctx, _values, worklist, use); + } + } + } else if (n >= 2) { + IR_ASSERT(IR_OPND_KIND(flags, 2) == IR_OPND_DATA); + use = insn->op2; + if (use > 0 && _values[use].op == IR_TOP) { + has_top = 1; + ir_sccp_add_input(ctx, _values, worklist, use); + } + if (n > 2) { + IR_ASSERT(n == 3); + IR_ASSERT(IR_OPND_KIND(flags, 3) == IR_OPND_DATA); + use = insn->op3; + if (use > 0 && _values[use].op == IR_TOP) { + has_top = 1; + ir_sccp_add_input(ctx, _values, worklist, use); + } } } - } else if (n >= 2) { - IR_ASSERT(IR_OPND_KIND(flags, 2) == IR_OPND_DATA); - use = insn->op2; - if (use > 0 && UNEXPECTED(_values[use].op == IR_TOP)) { - ir_bitqueue_add(worklist, use); - } - if (n > 2) { - IR_ASSERT(n == 3); - IR_ASSERT(IR_OPND_KIND(flags, 3) == IR_OPND_DATA); - use = insn->op3; - if (use > 0 && UNEXPECTED(_values[use].op == IR_TOP)) { - ir_bitqueue_add(worklist, use); + + if (has_top && !(flags & IR_OP_FLAG_BB_END)) { + use = ir_next_control(ctx, i); + if (_values[use].op == IR_TOP) { + has_top = 1; + /* do forward control propagaton only once */ + if (!_values[use].op1) { + _values[use].op1 = 1; + ir_bitqueue_add(worklist, use); + } } + continue; } } + + IR_MAKE_BOTTOM(i); } } - use_list = &ctx->use_lists[i]; - n = use_list->count; - for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { - use = *p; - if (_values[use].op != IR_BOTTOM) { - ir_bitqueue_add(worklist, use); - } - } + ir_sccp_add_uses(ctx, _values, worklist, i); } #ifdef IR_DEBUG @@ -917,7 +1146,7 @@ static void ir_sccp_remove_unfeasible_merge_inputs(ir_ctx *ctx, ir_insn *_values } } -static void ir_sccp_transform(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist) +static IR_NEVER_INLINE void ir_sccp_transform(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist) { ir_ref i, j; ir_insn *value; @@ -935,7 +1164,7 @@ static void ir_sccp_transform(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *workli ir_sccp_replace_insn(ctx, _values, i, j, iter_worklist); #if IR_COMBO_COPY_PROPAGATION } else if (value->op == IR_COPY) { - ir_sccp_replace_insn(ctx, _values, i, value->op1, iter_worklist); + ir_sccp_replace_insn(ctx, _values, i, ir_sccp_identity(ctx, _values, value->op1), iter_worklist); #endif } else if (value->op == IR_TOP) { /* remove unreachable instruction */ @@ -1114,6 +1343,7 @@ restart: op2 = ctx->fold_insn.op2; op3 = ctx->fold_insn.op3; goto restart; + case IR_FOLD_DO_CSE: case IR_FOLD_DO_EMIT: insn = &ctx->ir_base[ref]; if (insn->opt != ctx->fold_insn.opt @@ -2638,7 +2868,7 @@ static void ir_optimize_merge(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_ } } -static void ir_iter_opt(ir_ctx *ctx, ir_bitqueue *worklist) +static IR_NEVER_INLINE void ir_iter_opt(ir_ctx *ctx, ir_bitqueue *worklist) { ir_ref i; ir_insn *insn; @@ -2731,17 +2961,32 @@ int ir_sccp(ir_ctx *ctx) { ir_bitqueue sccp_worklist, iter_worklist; ir_insn *_values; + ir_gvn_hash *gvn_hash = NULL; +#if IR_COMBO_GVN + ir_gvn_hash gvn_hash_holder; +#endif ctx->flags2 |= IR_OPT_IN_SCCP; +#if IR_COMBO_GVN + if (ctx->flags2 & IR_MEM2SSA_VARS) { + ir_gvn_hash_init(ctx, &gvn_hash_holder); + gvn_hash = &gvn_hash_holder; + } +#endif ir_bitqueue_init(&iter_worklist, ctx->insns_count); ir_bitqueue_init(&sccp_worklist, ctx->insns_count); _values = ir_mem_calloc(ctx->insns_count, sizeof(ir_insn)); - ir_sccp_analyze(ctx, _values, &sccp_worklist, &iter_worklist); + ir_sccp_analyze(ctx, _values, &sccp_worklist, gvn_hash, &iter_worklist); ir_sccp_transform(ctx, _values, &sccp_worklist, &iter_worklist); ir_mem_free(_values); ir_bitqueue_free(&sccp_worklist); +#if IR_COMBO_GVN + if (gvn_hash) { + ir_gvn_hash_free(gvn_hash); + } +#endif ctx->flags2 |= IR_CFG_REACHABLE;