Update IR

IR commit: 8bb0acca45a7b0f12691f4258e41462599efbd74
This commit is contained in:
Dmitry Stogov 2025-03-13 03:07:43 +03:00
parent 7f771e1e87
commit ae67eb0490
No known key found for this signature in database
2 changed files with 540 additions and 205 deletions

View file

@ -413,10 +413,11 @@ static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b)
n = ctx->use_lists[ref].refs; n = ctx->use_lists[ref].refs;
for (i = 0; i < clones_count; i++) { for (i = 0; i < clones_count; i++) {
clone = clones[i].ref; clone = clones[i].ref;
if (clones[i].use_count == 1) { if (clones[i].use_count == 1
&& ctx->cfg_blocks[clones[i].block].loop_depth >= ctx->cfg_blocks[uses[clones[i].use].block].loop_depth) {
/* TOTALLY_USEFUL block may be a head of a diamond above the real usage. /* TOTALLY_USEFUL block may be a head of a diamond above the real usage.
* Sink it down to the real usage block. * Sink it down to the real usage block.
* Clones with few uses we be sunk into the LCA block. * Clones with few uses will be sunk into the LCA block.
*/ */
clones[i].block = uses[clones[i].use].block; clones[i].block = uses[clones[i].use].block;
} }

View file

@ -553,8 +553,7 @@ static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bi
} }
if (!may_benefit) { if (!may_benefit) {
IR_MAKE_BOTTOM_EX(i); IR_MAKE_BOTTOM_EX(i);
if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC) {
|| insn->op == IR_ZEXT || insn->op == IR_SEXT || insn->op == IR_EQ || insn->op == IR_NE) {
ir_bitqueue_add(iter_worklist, i); ir_bitqueue_add(iter_worklist, i);
} }
} else if (!ir_sccp_fold(ctx, _values, worklist, i, insn)) { } else if (!ir_sccp_fold(ctx, _values, worklist, i, insn)) {
@ -562,8 +561,7 @@ static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bi
continue; continue;
} else if (_values[i].op == IR_BOTTOM) { } else if (_values[i].op == IR_BOTTOM) {
insn = &ctx->ir_base[i]; insn = &ctx->ir_base[i];
if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC) {
|| insn->op == IR_ZEXT || insn->op == IR_SEXT || insn->op == IR_EQ || insn->op == IR_NE) {
ir_bitqueue_add(iter_worklist, i); ir_bitqueue_add(iter_worklist, i);
} }
} }
@ -571,7 +569,7 @@ static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bi
IR_MAKE_BOTTOM_EX(i); IR_MAKE_BOTTOM_EX(i);
} }
} else if (flags & IR_OP_FLAG_BB_START) { } else if (flags & IR_OP_FLAG_BB_START) {
if (insn->op == IR_MERGE || insn->op == IR_BEGIN) { if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN || insn->op == IR_BEGIN) {
ir_bitqueue_add(iter_worklist, i); ir_bitqueue_add(iter_worklist, i);
} }
if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) { if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) {
@ -919,163 +917,128 @@ static void ir_sccp_remove_if(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref
} }
} }
static void ir_sccp_remove_unfeasible_merge_inputs(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref unfeasible_inputs) static bool ir_sccp_remove_unfeasible_merge_inputs(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
{ {
ir_ref i, j, n, k, *p, use; ir_ref old_merge_inputs, new_merge_inputs, i, *p;
ir_insn *insn, *use_insn;
ir_use_list *use_list; ir_use_list *use_list;
ir_bitset life_inputs; ir_bitset life_inputs;
ir_bitset_base_t holder = 0;
insn = &ctx->ir_base[ref];
IR_ASSERT(insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN); IR_ASSERT(insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN);
n = insn->inputs_count; old_merge_inputs = insn->inputs_count;
if (n - unfeasible_inputs == 1) { new_merge_inputs = 0;
/* remove MERGE completely */ life_inputs = (old_merge_inputs - IR_BITSET_BITS) ? &holder : ir_bitset_malloc(old_merge_inputs + 1);
for (j = 1; j <= n; j++) {
ir_ref input = ir_insn_op(insn, j);
if (input && IR_IS_REACHABLE(input)) {
ir_insn *input_insn = &ctx->ir_base[input];
IR_ASSERT(input_insn->op == IR_END || input_insn->op == IR_LOOP_END|| for (i = 1; i <= old_merge_inputs; i++) {
input_insn->op == IR_IJMP || input_insn->op == IR_UNREACHABLE); ir_ref input = ir_insn_op(insn, i);
if (input_insn->op == IR_END || input_insn->op == IR_LOOP_END) {
ir_ref prev, next = IR_UNUSED;
ir_insn *next_insn = NULL;
prev = input_insn->op1; if (input) {
use_list = &ctx->use_lists[ref]; new_merge_inputs++;
if (use_list->count == 1) { if (new_merge_inputs != i) {
next = ctx->use_edges[use_list->refs]; ir_insn_set_op(insn, new_merge_inputs, input);
next_insn = &ctx->ir_base[next]; }
} else { ir_bitset_incl(life_inputs, i);
k = 0; }
p = &ctx->use_edges[use_list->refs]; }
while (k < use_list->count) {
use = *p;
use_insn = &ctx->ir_base[use];
#if IR_COMBO_COPY_PROPAGATION
IR_ASSERT((use_insn->op != IR_PHI) && "PHI must be already removed");
#else
if (use_insn->op == IR_PHI) {
/* Convert PHI into COPY */
ir_ref i, n = use_insn->inputs_count;
for (i = 2; i <= n; i++) { if (new_merge_inputs == old_merge_inputs) {
if (i != j + 1) { /* All inputs are feasible */
ir_ref from = ir_insn_op(use_insn, i); if (life_inputs != &holder) {
if (from > 0) { ir_mem_free(life_inputs);
ir_use_list_remove_one(ctx, from, use); }
} return 0;
ir_insn_set_op(use_insn, i, IR_UNUSED); }
}
} for (i = new_merge_inputs + 1; i <= old_merge_inputs; i++) {
use_insn->optx = IR_OPTX(IR_COPY, use_insn->type, 1); ir_insn_set_op(insn, i, IR_UNUSED);
use_insn->op1 = ir_insn_op(use_insn, j + 1); }
ir_insn_set_op(use_insn, j + 1, IR_UNUSED);
ir_use_list_remove_one(ctx, ref, use); if (new_merge_inputs <= 1) {
p = &ctx->use_edges[use_list->refs + k]; #if 0
continue; if (new_merge_inputs == 1
} && insn->op == IR_LOOP_BEGIN
&& insn->op1 > ref) { // TODO: check dominance instead of order
/* dead loop */
ir_use_list_remove_one(ctx, insn->op1, ref);
insn->op1 = IR_UNUSED;
new_merge_inputs = 0;
}
#endif #endif
if (ir_op_flags[use_insn->op] & IR_OP_FLAG_CONTROL) { insn->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
IR_ASSERT(!next); ir_bitqueue_add(worklist, ref);
next = use;
next_insn = use_insn;
} else if (use_insn->op != IR_NOP) {
IR_ASSERT(use_insn->op1 == ref);
IR_ASSERT(use_insn->op == IR_VAR);
ir_ref region = prev;
while (!IR_IS_BB_START(ctx->ir_base[region].op)) {
region = ctx->ir_base[region].op1;
}
use_insn->op1 = region;
ir_use_list_add(ctx, region, use);
p = &ctx->use_edges[use_list->refs + k];
}
k++;
p++;
}
}
IR_ASSERT(prev && next);
if (prev < next) {
/* remove MERGE and input END from double linked control list */
next_insn->op1 = prev;
ir_use_list_replace_one(ctx, prev, input, next);
/* remove MERGE and input END instructions */
ir_sccp_make_nop(ctx, ref);
ir_sccp_make_nop(ctx, input);
} else {
for (i = 2; i <= n; i++) {
ir_insn_set_op(insn, i, IR_UNUSED);
}
insn->op = IR_BEGIN;
insn->op1 = input;
input_insn->op = IR_END;
}
break;
} else {
for (i = 2; i <= n; i++) {
ir_insn_set_op(insn, i, IR_UNUSED);
}
insn->op = IR_BEGIN;
insn->op1 = input;
}
}
}
} else { } else {
n = insn->inputs_count; insn->inputs_count = new_merge_inputs;
i = 1; }
life_inputs = ir_bitset_malloc(n + 1);
for (j = 1; j <= n; j++) {
ir_ref input = ir_insn_op(insn, j);
if (input) { /* Update PHIs */
if (i != j) { use_list = &ctx->use_lists[ref];
ir_insn_set_op(insn, i, input); if (use_list->count > 1) {
} ir_ref use_count = 0;
ir_bitset_incl(life_inputs, j); ir_ref *q;
i++;
}
}
j = i;
while (j <= n) {
ir_insn_set_op(insn, j, IR_UNUSED);
j++;
}
i--;
insn->inputs_count = i;
n++; for (i = 0, p = q = &ctx->use_edges[use_list->refs]; i < use_list->count; p++, i++) {
use_list = &ctx->use_lists[ref]; ir_ref use = *p;
if (use_list->count > 1) { ir_insn *use_insn = &ctx->ir_base[use];
for (k = use_list->count, p = &ctx->use_edges[use_list->refs]; k > 0; p++, k--) {
use = *p;
use_insn = &ctx->ir_base[use];
if (use_insn->op == IR_PHI) {
i = 2;
for (j = 2; j <= n; j++) {
ir_ref input = ir_insn_op(use_insn, j);
if (ir_bitset_in(life_inputs, j - 1)) { if (use_insn->op == IR_PHI) {
IR_ASSERT(input); ir_ref j, k;
if (i != j) {
ir_insn_set_op(use_insn, i, input); /* compress PHI */
} for (j = k = 1; j <= old_merge_inputs; j++) {
i++; ir_ref input = ir_insn_op(use_insn, j + 1);
} else if (!IR_IS_CONST_REF(input)) {
ir_use_list_remove_one(ctx, input, use); if (ir_bitset_in(life_inputs, j)) {
IR_ASSERT(input);
if (k != j) {
ir_insn_set_op(use_insn, k + 1, input);
} }
k++;
} else if (input > 0) {
ir_use_list_remove_one(ctx, input, use);
} }
while (i <= n) { }
ir_insn_set_op(use_insn, i, IR_UNUSED); while (k <= old_merge_inputs) {
i++; k++;
} ir_insn_set_op(use_insn, k, IR_UNUSED);
use_insn->inputs_count = insn->inputs_count + 1; }
if (new_merge_inputs == 0) {
/* remove PHI */
#if 0
use_insn->op1 = IR_UNUSED;
ir_iter_remove_insn(ctx, use, worklist);
#else
IR_ASSERT(0);
#endif
continue;
} else if (new_merge_inputs == 1) {
/* replace PHI by COPY */
use_insn->optx = IR_OPTX(IR_COPY, use_insn->type, 1);
use_insn->op1 = use_insn->op2;
use_insn->op2 = IR_UNUSED;
ir_bitqueue_add(worklist, use);
continue;
} else {
use_insn->inputs_count = new_merge_inputs + 1;
} }
} }
if (p != q) {
*q = use;
}
q++;
use_count++;
} }
for (i = use_count; i < use_list->count; q++, i++) {
*q = IR_UNUSED;
}
use_list->count = use_count;
}
if (life_inputs != &holder) {
ir_mem_free(life_inputs); ir_mem_free(life_inputs);
} }
return 1;
} }
static IR_NEVER_INLINE void ir_sccp_transform(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist) static IR_NEVER_INLINE void ir_sccp_transform(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist)
@ -1105,7 +1068,7 @@ static IR_NEVER_INLINE void ir_sccp_transform(ir_ctx *ctx, ir_insn *_values, ir_
if (insn->op == IR_NOP) { if (insn->op == IR_NOP) {
/* already removed */ /* already removed */
} else if (ir_op_flags[insn->op] & (IR_OP_FLAG_DATA|IR_OP_FLAG_MEM)) { } else if (ir_op_flags[insn->op] & (IR_OP_FLAG_DATA|IR_OP_FLAG_MEM)) {
if (insn->op != IR_PARAM && (insn->op != IR_VAR || _values[insn->op1].op == IR_TOP)) { if (insn->op != IR_PARAM) {
ir_sccp_remove_insn(ctx, _values, i, iter_worklist); ir_sccp_remove_insn(ctx, _values, i, iter_worklist);
} }
} else { } else {
@ -1137,7 +1100,7 @@ static IR_NEVER_INLINE void ir_sccp_transform(ir_ctx *ctx, ir_insn *_values, ir_
while ((i = ir_bitqueue_pop(worklist)) >= 0) { while ((i = ir_bitqueue_pop(worklist)) >= 0) {
IR_ASSERT(_values[i].op == IR_MERGE); IR_ASSERT(_values[i].op == IR_MERGE);
ir_sccp_remove_unfeasible_merge_inputs(ctx, _values, i, _values[i].op1); ir_sccp_remove_unfeasible_merge_inputs(ctx, i, &ctx->ir_base[i], iter_worklist);
} }
} }
@ -1261,7 +1224,7 @@ static void ir_iter_replace_insn(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bit
ir_bitqueue_add(worklist, input); ir_bitqueue_add(worklist, input);
} else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) { } else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) {
/* try to optimize PHI into ABS/MIN/MAX/COND */ /* try to optimize PHI into ABS/MIN/MAX/COND */
ir_bitqueue_add(worklist, input); ir_bitqueue_add(worklist, ctx->ir_base[input].op1);
} }
} }
} }
@ -1690,9 +1653,10 @@ static ir_ref ir_promote_f2d(ir_ctx *ctx, ir_ref ref, ir_ref use)
return ref; return ref;
} }
static bool ir_may_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref) static bool ir_may_promote_trunc(ir_ctx *ctx, ir_type type, ir_ref ref)
{ {
ir_insn *insn = &ctx->ir_base[ref]; ir_insn *insn = &ctx->ir_base[ref];
ir_ref *p, n, input;
if (IR_IS_CONST_REF(ref)) { if (IR_IS_CONST_REF(ref)) {
return !IR_IS_SYM_CONST(insn->op); return !IR_IS_SYM_CONST(insn->op);
@ -1700,16 +1664,16 @@ static bool ir_may_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref)
switch (insn->op) { switch (insn->op) {
case IR_ZEXT: case IR_ZEXT:
case IR_SEXT: case IR_SEXT:
return ctx->ir_base[insn->op1].type == type; case IR_TRUNC:
return ctx->ir_base[insn->op1].type == type || ctx->use_lists[ref].count == 1;
case IR_NEG: case IR_NEG:
case IR_ABS: case IR_ABS:
case IR_NOT: case IR_NOT:
return ctx->use_lists[ref].count == 1 && return ctx->use_lists[ref].count == 1 &&
ir_may_promote_i2i(ctx, type, insn->op1); ir_may_promote_trunc(ctx, type, insn->op1);
case IR_ADD: case IR_ADD:
case IR_SUB: case IR_SUB:
case IR_MUL: case IR_MUL:
// case IR_DIV:
case IR_MIN: case IR_MIN:
case IR_MAX: case IR_MAX:
case IR_OR: case IR_OR:
@ -1717,8 +1681,41 @@ static bool ir_may_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref)
case IR_XOR: case IR_XOR:
case IR_SHL: case IR_SHL:
return ctx->use_lists[ref].count == 1 && return ctx->use_lists[ref].count == 1 &&
ir_may_promote_i2i(ctx, type, insn->op1) && ir_may_promote_trunc(ctx, type, insn->op1) &&
ir_may_promote_i2i(ctx, type, insn->op2); ir_may_promote_trunc(ctx, type, insn->op2);
// case IR_SHR:
// case IR_SAR:
// case IR_DIV:
// case IR_MOD:
// case IR_FP2INT:
// TODO: ???
case IR_COND:
return ctx->use_lists[ref].count == 1 &&
ir_may_promote_trunc(ctx, type, insn->op2) &&
ir_may_promote_trunc(ctx, type, insn->op3);
case IR_PHI:
if (ctx->use_lists[ref].count != 1) {
ir_use_list *use_list = &ctx->use_lists[ref];
ir_ref count = 0;
for (p = &ctx->use_edges[use_list->refs], n = use_list->count; n > 0; p++, n--) {
if (*p != ref) {
if (count) {
return 0;
}
count = 1;
}
}
}
for (p = insn->ops + 1, n = insn->inputs_count - 1; n > 0; p++, n--) {
input = *p;
if (input != ref) {
if (!ir_may_promote_trunc(ctx, type, input)) {
return 0;
}
}
}
return 1;
default: default:
break; break;
} }
@ -1730,6 +1727,7 @@ static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use)
{ {
ir_insn *insn = &ctx->ir_base[ref]; ir_insn *insn = &ctx->ir_base[ref];
uint32_t count; uint32_t count;
ir_ref *p, n, input;
if (IR_IS_CONST_REF(ref)) { if (IR_IS_CONST_REF(ref)) {
return ir_const(ctx, insn->val, type); return ir_const(ctx, insn->val, type);
@ -1737,6 +1735,22 @@ static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use)
switch (insn->op) { switch (insn->op) {
case IR_ZEXT: case IR_ZEXT:
case IR_SEXT: case IR_SEXT:
case IR_TRUNC:
if (ctx->ir_base[insn->op1].type != type) {
ir_type src_type = ctx->ir_base[insn->op1].type;
if (ir_type_size[src_type] == ir_type_size[type]) {
insn->op = IR_BITCAST;
} else if (ir_type_size[src_type] > ir_type_size[type]) {
insn->op = IR_TRUNC;
} else {
if (insn->op != IR_SEXT && insn->op != IR_ZEXT) {
insn->op = IR_IS_TYPE_SIGNED(type) ? IR_SEXT : IR_ZEXT;
}
}
insn->type = type;
return ref;
}
count = ctx->use_lists[ref].count; count = ctx->use_lists[ref].count;
ir_use_list_remove_all(ctx, ref, use); ir_use_list_remove_all(ctx, ref, use);
if (ctx->use_lists[ref].count == 0) { if (ctx->use_lists[ref].count == 0) {
@ -1768,7 +1782,6 @@ static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use)
case IR_ADD: case IR_ADD:
case IR_SUB: case IR_SUB:
case IR_MUL: case IR_MUL:
// case IR_DIV:
case IR_MIN: case IR_MIN:
case IR_MAX: case IR_MAX:
case IR_OR: case IR_OR:
@ -1783,6 +1796,30 @@ static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use)
} }
insn->type = type; insn->type = type;
return ref; return ref;
// case IR_DIV:
// case IR_MOD:
// case IR_SHR:
// case IR_SAR:
// case IR_FP2INT:
// TODO: ???
case IR_COND:
if (insn->op2 == insn->op3) {
insn->op3 = insn->op2 = ir_promote_i2i(ctx, type, insn->op2, ref);
} else {
insn->op2 = ir_promote_i2i(ctx, type, insn->op2, ref);
insn->op3 = ir_promote_i2i(ctx, type, insn->op3, ref);
}
insn->type = type;
return ref;
case IR_PHI:
for (p = insn->ops + 1, n = insn->inputs_count - 1; n > 0; p++, n--) {
input = *p;
if (input != ref) {
*p = ir_promote_i2i(ctx, type, input, ref);
}
}
insn->type = type;
return ref;
default: default:
break; break;
} }
@ -1854,58 +1891,163 @@ static ir_ref ir_ext_ref(ir_ctx *ctx, ir_ref var_ref, ir_ref src_ref, ir_op op,
return ref; return ref;
} }
static bool ir_try_promote_ext(ir_ctx *ctx, ir_ref ext_ref, ir_insn *insn, ir_bitqueue *worklist) static uint32_t _ir_estimated_control(ir_ctx *ctx, ir_ref val)
{ {
ir_type type = insn->type; ir_insn *insn;
ir_op op = insn->op; ir_ref n, *p, input, result, ctrl;
ir_ref ref = insn->op1;
ir_insn *phi_insn = &ctx->ir_base[ref]; if (IR_IS_CONST_REF(val)) {
ir_insn *op_insn; return 1; /* IR_START */
}
insn = &ctx->ir_base[val];
if (ir_op_flags[insn->op] & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM)) {
return val;
}
IR_ASSERT(ir_op_flags[insn->op] & IR_OP_FLAG_DATA);
if (IR_OPND_KIND(ir_op_flags[insn->op], 1) & IR_OPND_CONTROL_DEP) {
return insn->op1;
}
n = insn->inputs_count;
p = insn->ops + 1;
result = 1;
for (; n > 0; p++, n--) {
input = *p;
ctrl = _ir_estimated_control(ctx, input);
if (ctrl > result) { // TODO: check dominance depth instead of order
result = ctrl;
}
}
return result;
}
static bool ir_is_loop_invariant(ir_ctx *ctx, ir_ref ref, ir_ref loop)
{
ref = _ir_estimated_control(ctx, ref);
return ref < loop; // TODO: check dominance instead of order
}
static bool ir_is_cheaper_ext(ir_ctx *ctx, ir_ref ref, ir_ref loop, ir_ref ext_ref, ir_op op)
{
if (IR_IS_CONST_REF(ref)) {
return 1;
} else {
ir_insn *insn = &ctx->ir_base[ref];
if (insn->op == IR_LOAD) {
if (ir_is_loop_invariant(ctx, ref, loop)) {
return 1;
} else {
/* ZEXT(LOAD(_, _)) costs the same as LOAD(_, _) */
if (ctx->use_lists[ref].count == 2) {
return 1;
} else if (ctx->use_lists[ref].count == 3) {
ir_use_list *use_list = &ctx->use_lists[ref];
ir_ref *p, n, use;
for (p = &ctx->use_edges[use_list->refs], n = use_list->count; n > 0; p++, n--) {
use = *p;
if (use != ext_ref) {
ir_insn *use_insn = &ctx->ir_base[use];
if (use_insn->op != op
&& (!(ir_op_flags[use_insn->op] & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM))
|| use_insn->op1 != ref)) {
return 0;
}
}
}
return 1;
}
}
return 0;
} else {
return ir_is_loop_invariant(ctx, ref, loop);
}
}
}
static bool ir_try_promote_induction_var_ext(ir_ctx *ctx, ir_ref ext_ref, ir_ref phi_ref, ir_ref op_ref, ir_bitqueue *worklist)
{
ir_op op = ctx->ir_base[ext_ref].op;
ir_type type = ctx->ir_base[ext_ref].type;
ir_insn *phi_insn;
ir_use_list *use_list; ir_use_list *use_list;
ir_ref n, *p, use, op_ref; ir_ref n, *p, use, ext_ref_2 = IR_UNUSED;
/* Check for simple induction variable in the form: x2 = PHI(loop, x1, x3); x3 = ADD(x2, _); */
if (phi_insn->op != IR_PHI
|| phi_insn->inputs_count != 3 /* (2 values) */
|| ctx->ir_base[phi_insn->op1].op != IR_LOOP_BEGIN) {
return 0;
}
op_ref = phi_insn->op3;
op_insn = &ctx->ir_base[op_ref];
if ((op_insn->op != IR_ADD && op_insn->op != IR_SUB && op_insn->op != IR_MUL)
|| (op_insn->op1 != ref && op_insn->op2 != ref)
|| ctx->use_lists[op_ref].count != 1) {
return 0;
}
/* Check if we may change the type of the induction variable */ /* Check if we may change the type of the induction variable */
use_list = &ctx->use_lists[ref]; use_list = &ctx->use_lists[phi_ref];
n = use_list->count; n = use_list->count;
for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { if (n > 1) {
use = *p; for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
if (use == op_ref || use == ext_ref) { use = *p;
continue; if (use == op_ref || use == ext_ref) {
} else {
ir_insn *use_insn = &ctx->ir_base[use];
if ((use_insn->op >= IR_EQ && use_insn->op <= IR_UGT)
&& (use_insn->op1 == ref || use_insn->op2 == ref)) {
continue;
} else if (use_insn->op == IR_IF) {
continue; continue;
} else { } else {
return 0; ir_insn *use_insn = &ctx->ir_base[use];
if (use_insn->op >= IR_EQ && use_insn->op <= IR_UGT) {
if (use_insn->op1 == phi_ref) {
if (ir_is_cheaper_ext(ctx, use_insn->op2, ctx->ir_base[phi_ref].op1, ext_ref, op)) {
continue;
}
} else if (use_insn->op2 == phi_ref) {
if (ir_is_cheaper_ext(ctx, use_insn->op1, ctx->ir_base[phi_ref].op1, ext_ref, op)) {
continue;
}
}
return 0;
} else if (use_insn->op == IR_IF) {
continue;
} else if (!ext_ref_2 && use_insn->op == op && use_insn->type == type) {
ext_ref_2 = use;
continue;
} else {
return 0;
}
} }
} }
} }
phi_insn->type = insn->type; use_list = &ctx->use_lists[op_ref];
op_insn->type = insn->type; n = use_list->count;
if (n > 1) {
for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
use = *p;
if (use == phi_ref || use == ext_ref) {
continue;
} else {
ir_insn *use_insn = &ctx->ir_base[use];
for (n = 0; n < ctx->use_lists[ref].count; n++) { if (use_insn->op >= IR_EQ && use_insn->op <= IR_UGT) {
if (use_insn->op1 == phi_ref) {
if (ir_is_cheaper_ext(ctx, use_insn->op2, ctx->ir_base[phi_ref].op1, ext_ref, op)) {
continue;
}
} else if (use_insn->op2 == phi_ref) {
if (ir_is_cheaper_ext(ctx, use_insn->op1, ctx->ir_base[phi_ref].op1, ext_ref, op)) {
continue;
}
}
return 0;
} else if (use_insn->op == IR_IF) {
continue;
} else if (!ext_ref_2 && use_insn->op == op && use_insn->type == type) {
ext_ref_2 = use;
continue;
} else {
return 0;
}
}
}
}
for (n = 0; n < ctx->use_lists[phi_ref].count; n++) {
/* "use_lists" may be reallocated by ir_ext_ref() */ /* "use_lists" may be reallocated by ir_ext_ref() */
use = ctx->use_edges[ctx->use_lists[ref].refs + n]; use = ctx->use_edges[ctx->use_lists[phi_ref].refs + n];
if (use == ext_ref) { if (use == ext_ref) {
continue; continue;
} else { } else {
@ -1913,11 +2055,14 @@ static bool ir_try_promote_ext(ir_ctx *ctx, ir_ref ext_ref, ir_insn *insn, ir_bi
if (use_insn->op == IR_IF) { if (use_insn->op == IR_IF) {
continue; continue;
} else if (use_insn->op == op) {
IR_ASSERT(ext_ref_2 == use);
continue;
} }
IR_ASSERT(((use_insn->op >= IR_EQ && use_insn->op <= IR_UGT) IR_ASSERT(((use_insn->op >= IR_EQ && use_insn->op <= IR_UGT)
|| use_insn->op == IR_ADD || use_insn->op == IR_SUB || use_insn->op == IR_MUL) || use_insn->op == IR_ADD || use_insn->op == IR_SUB || use_insn->op == IR_MUL)
&& (use_insn->op1 == ref || use_insn->op2 == ref)); && (use_insn->op1 == phi_ref || use_insn->op2 == phi_ref));
if (use_insn->op1 != ref) { if (use_insn->op1 != phi_ref) {
if (IR_IS_CONST_REF(use_insn->op1) if (IR_IS_CONST_REF(use_insn->op1)
&& !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op1].op)) { && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op1].op)) {
ctx->ir_base[use].op1 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op1], op, type); ctx->ir_base[use].op1 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op1], op, type);
@ -1926,7 +2071,7 @@ static bool ir_try_promote_ext(ir_ctx *ctx, ir_ref ext_ref, ir_insn *insn, ir_bi
} }
ir_bitqueue_add(worklist, use); ir_bitqueue_add(worklist, use);
} }
if (use_insn->op2 != ref) { if (use_insn->op2 != phi_ref) {
if (IR_IS_CONST_REF(use_insn->op2) if (IR_IS_CONST_REF(use_insn->op2)
&& !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op2].op)) { && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op2].op)) {
ctx->ir_base[use].op2 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op2], op, type); ctx->ir_base[use].op2 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op2], op, type);
@ -1938,19 +2083,108 @@ static bool ir_try_promote_ext(ir_ctx *ctx, ir_ref ext_ref, ir_insn *insn, ir_bi
} }
} }
ir_iter_replace_insn(ctx, ext_ref, ref, worklist); if (ctx->use_lists[op_ref].count > 1) {
for (n = 0; n < ctx->use_lists[op_ref].count; n++) {
/* "use_lists" may be reallocated by ir_ext_ref() */
use = ctx->use_edges[ctx->use_lists[op_ref].refs + n];
if (use == ext_ref || use == phi_ref) {
continue;
} else {
ir_insn *use_insn = &ctx->ir_base[use];
phi_insn = &ctx->ir_base[ref]; if (use_insn->op == IR_IF) {
continue;
} else if (use_insn->op == op) {
IR_ASSERT(ext_ref_2 == use);
continue;
}
IR_ASSERT(use_insn->op >= IR_EQ && use_insn->op <= IR_UGT);
if (use_insn->op1 != op_ref) {
if (IR_IS_CONST_REF(use_insn->op1)
&& !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op1].op)) {
ctx->ir_base[use].op1 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op1], op, type);
} else {
ctx->ir_base[use].op1 = ir_ext_ref(ctx, use, use_insn->op1, op, type, worklist);
}
ir_bitqueue_add(worklist, use);
}
if (use_insn->op2 != op_ref) {
if (IR_IS_CONST_REF(use_insn->op2)
&& !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op2].op)) {
ctx->ir_base[use].op2 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op2], op, type);
} else {
ctx->ir_base[use].op2 = ir_ext_ref(ctx, use, use_insn->op2, op, type, worklist);
}
ir_bitqueue_add(worklist, use);
}
}
}
}
ir_iter_replace_insn(ctx, ext_ref, ctx->ir_base[ext_ref].op1, worklist);
if (ext_ref_2) {
ir_iter_replace_insn(ctx, ext_ref_2, ctx->ir_base[ext_ref_2].op1, worklist);
}
ctx->ir_base[op_ref].type = type;
phi_insn = &ctx->ir_base[phi_ref];
phi_insn->type = type;
if (IR_IS_CONST_REF(phi_insn->op2) if (IR_IS_CONST_REF(phi_insn->op2)
&& !IR_IS_SYM_CONST(ctx->ir_base[phi_insn->op2].op)) { && !IR_IS_SYM_CONST(ctx->ir_base[phi_insn->op2].op)) {
ctx->ir_base[ref].op2 = ir_ext_const(ctx, &ctx->ir_base[phi_insn->op2], op, type); ctx->ir_base[phi_ref].op2 = ir_ext_const(ctx, &ctx->ir_base[phi_insn->op2], op, type);
} else { } else {
ctx->ir_base[ref].op2 = ir_ext_ref(ctx, ref, phi_insn->op2, op, type, worklist); ctx->ir_base[phi_ref].op2 = ir_ext_ref(ctx, phi_ref, phi_insn->op2, op, type, worklist);
} }
return 1; return 1;
} }
static bool ir_try_promote_ext(ir_ctx *ctx, ir_ref ext_ref, ir_insn *insn, ir_bitqueue *worklist)
{
ir_ref ref = insn->op1;
/* Check for simple induction variable in the form: x2 = PHI(loop, x1, x3); x3 = ADD(x2, _); */
insn = &ctx->ir_base[ref];
if (insn->op == IR_PHI
&& insn->inputs_count == 3 /* (2 values) */
&& ctx->ir_base[insn->op1].op == IR_LOOP_BEGIN) {
ir_ref op_ref = insn->op3;
ir_insn *op_insn = &ctx->ir_base[op_ref];
if (op_insn->op == IR_ADD || op_insn->op == IR_SUB || op_insn->op == IR_MUL) {
if (op_insn->op1 == ref) {
if (ir_is_loop_invariant(ctx, op_insn->op2, insn->op1)) {
return ir_try_promote_induction_var_ext(ctx, ext_ref, ref, op_ref, worklist);
}
} else if (op_insn->op2 == ref) {
if (ir_is_loop_invariant(ctx, op_insn->op1, insn->op1)) {
return ir_try_promote_induction_var_ext(ctx, ext_ref, ref, op_ref, worklist);
}
}
}
} else if (insn->op == IR_ADD || insn->op == IR_SUB || insn->op == IR_MUL) {
if (!IR_IS_CONST_REF(insn->op1)
&& ctx->ir_base[insn->op1].op == IR_PHI
&& ctx->ir_base[insn->op1].inputs_count == 3 /* (2 values) */
&& ctx->ir_base[insn->op1].op3 == ref
&& ctx->ir_base[ctx->ir_base[insn->op1].op1].op == IR_LOOP_BEGIN
&& ir_is_loop_invariant(ctx, insn->op2, ctx->ir_base[insn->op1].op1)) {
return ir_try_promote_induction_var_ext(ctx, ext_ref, insn->op1, ref, worklist);
} else if (!IR_IS_CONST_REF(insn->op2)
&& ctx->ir_base[insn->op2].op == IR_PHI
&& ctx->ir_base[insn->op2].inputs_count == 3 /* (2 values) */
&& ctx->ir_base[insn->op2].op3 == ref
&& ctx->ir_base[ctx->ir_base[insn->op2].op1].op == IR_LOOP_BEGIN
&& ir_is_loop_invariant(ctx, insn->op1, ctx->ir_base[insn->op2].op1)) {
return ir_try_promote_induction_var_ext(ctx, ext_ref, insn->op2, ref, worklist);
}
}
return 0;
}
static void ir_get_true_false_refs(const ir_ctx *ctx, ir_ref if_ref, ir_ref *if_true_ref, ir_ref *if_false_ref) static void ir_get_true_false_refs(const ir_ctx *ctx, ir_ref if_ref, ir_ref *if_true_ref, ir_ref *if_false_ref)
{ {
ir_use_list *use_list = &ctx->use_lists[if_ref]; ir_use_list *use_list = &ctx->use_lists[if_ref];
@ -1969,11 +2203,47 @@ static void ir_get_true_false_refs(const ir_ctx *ctx, ir_ref if_ref, ir_ref *if_
} }
} }
static void ir_merge_blocks(ir_ctx *ctx, ir_ref end, ir_ref begin, ir_bitqueue *worklist2) static void ir_merge_blocks(ir_ctx *ctx, ir_ref end, ir_ref begin, ir_bitqueue *worklist)
{ {
ir_ref prev, next; ir_ref prev, next;
ir_use_list *use_list; ir_use_list *use_list;
if (ctx->use_lists[begin].count > 1) {
ir_ref *p, n, i, use;
ir_insn *use_insn;
ir_ref region = end;
ir_ref next = IR_UNUSED;
while (!IR_IS_BB_START(ctx->ir_base[region].op)) {
region = ctx->ir_base[region].op1;
}
use_list = &ctx->use_lists[begin];
n = use_list->count;
for (p = &ctx->use_edges[use_list->refs], i = 0; i < n; p++, i++) {
use = *p;
use_insn = &ctx->ir_base[use];
if (ir_op_flags[use_insn->op] & IR_OP_FLAG_CONTROL) {
IR_ASSERT(!next);
next = use;
} else {
IR_ASSERT(use_insn->op == IR_VAR);
IR_ASSERT(use_insn->op1 == begin);
use_insn->op1 = region;
if (ir_use_list_add(ctx, region, use)) {
/* restore after reallocation */
use_list = &ctx->use_lists[begin];
n = use_list->count;
p = &ctx->use_edges[use_list->refs + i];
}
}
}
IR_ASSERT(next);
ctx->use_edges[use_list->refs] = next;
use_list->count = 1;
}
IR_ASSERT(ctx->ir_base[begin].op == IR_BEGIN); IR_ASSERT(ctx->ir_base[begin].op == IR_BEGIN);
IR_ASSERT(ctx->ir_base[end].op == IR_END); IR_ASSERT(ctx->ir_base[end].op == IR_END);
IR_ASSERT(ctx->ir_base[begin].op1 == end); IR_ASSERT(ctx->ir_base[begin].op1 == end);
@ -1994,7 +2264,7 @@ static void ir_merge_blocks(ir_ctx *ctx, ir_ref end, ir_ref begin, ir_bitqueue *
ir_use_list_replace_one(ctx, prev, end, next); ir_use_list_replace_one(ctx, prev, end, next);
if (ctx->ir_base[prev].op == IR_BEGIN || ctx->ir_base[prev].op == IR_MERGE) { if (ctx->ir_base[prev].op == IR_BEGIN || ctx->ir_base[prev].op == IR_MERGE) {
ir_bitqueue_add(worklist2, prev); ir_bitqueue_add(worklist, prev);
} }
} }
@ -2950,6 +3220,68 @@ static void ir_iter_optimize_merge(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge
} }
} }
static ir_ref ir_find_ext_use(ir_ctx *ctx, ir_ref ref)
{
ir_use_list *use_list = &ctx->use_lists[ref];
ir_ref *p, n, use;
ir_insn *use_insn;
for (p = &ctx->use_edges[use_list->refs], n = use_list->count; n > 0; p++, n--) {
use = *p;
use_insn = &ctx->ir_base[use];
if (use_insn->op == IR_SEXT || use_insn->op == IR_ZEXT) {
return use;
}
}
return IR_UNUSED;
}
static void ir_iter_optimize_induction_var(ir_ctx *ctx, ir_ref phi_ref, ir_ref op_ref, ir_bitqueue *worklist)
{
ir_ref ext_ref;
ext_ref = ir_find_ext_use(ctx, phi_ref);
if (!ext_ref) {
ext_ref = ir_find_ext_use(ctx, op_ref);
}
if (ext_ref) {
ir_try_promote_induction_var_ext(ctx, ext_ref, phi_ref, op_ref, worklist);
}
}
static void ir_iter_optimize_loop(ir_ctx *ctx, ir_ref loop_ref, ir_insn *loop, ir_bitqueue *worklist)
{
ir_ref n;
if (loop->inputs_count != 2 || ctx->use_lists[loop_ref].count <= 1) {
return;
}
/* Check for simple induction variable in the form: x2 = PHI(loop, x1, x3); x3 = ADD(x2, _); */
for (n = 0; n < ctx->use_lists[loop_ref].count; n++) {
/* "use_lists" may be reallocated by ir_ext_ref() */
ir_ref use = ctx->use_edges[ctx->use_lists[loop_ref].refs + n];
ir_insn *use_insn = &ctx->ir_base[use];
if (use_insn->op == IR_PHI) {
ir_ref op_ref = use_insn->op3;
ir_insn *op_insn = &ctx->ir_base[op_ref];
if (op_insn->op == IR_ADD || op_insn->op == IR_SUB || op_insn->op == IR_MUL) {
if (op_insn->op1 == use) {
if (ir_is_loop_invariant(ctx, op_insn->op2, loop_ref)) {
ir_iter_optimize_induction_var(ctx, use, op_ref, worklist);
}
} else if (op_insn->op2 == use) {
if (ir_is_loop_invariant(ctx, op_insn->op1, loop_ref)) {
ir_iter_optimize_induction_var(ctx, use, op_ref, worklist);
}
}
}
}
}
}
static ir_ref ir_iter_optimize_condition(ir_ctx *ctx, ir_ref control, ir_ref condition, bool *swap) static ir_ref ir_iter_optimize_condition(ir_ctx *ctx, ir_ref control, ir_ref condition, bool *swap)
{ {
ir_insn *condition_insn = &ctx->ir_base[condition]; ir_insn *condition_insn = &ctx->ir_base[condition];
@ -3042,6 +3374,7 @@ static void ir_iter_optimize_if(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqu
insn->optx = IR_OPTX(IR_END, IR_VOID, 1); insn->optx = IR_OPTX(IR_END, IR_VOID, 1);
if (!IR_IS_CONST_REF(insn->op2)) { if (!IR_IS_CONST_REF(insn->op2)) {
ir_use_list_remove_one(ctx, insn->op2, ref); ir_use_list_remove_one(ctx, insn->op2, ref);
ir_bitqueue_add(worklist, insn->op2);
} }
insn->op2 = IR_UNUSED; insn->op2 = IR_UNUSED;
@ -3168,7 +3501,7 @@ void ir_iter_opt(ir_ctx *ctx, ir_bitqueue *worklist)
} }
goto folding; goto folding;
case IR_TRUNC: case IR_TRUNC:
if (ir_may_promote_i2i(ctx, insn->type, insn->op1)) { if (ir_may_promote_trunc(ctx, insn->type, insn->op1)) {
ir_ref ref = ir_promote_i2i(ctx, insn->type, insn->op1, i); ir_ref ref = ir_promote_i2i(ctx, insn->type, insn->op1, i);
insn->op1 = ref; insn->op1 = ref;
ir_iter_replace_insn(ctx, i, ref, worklist); ir_iter_replace_insn(ctx, i, ref, worklist);
@ -3193,12 +3526,13 @@ folding:
if (!(ctx->flags & IR_OPT_CFG)) { if (!(ctx->flags & IR_OPT_CFG)) {
/* pass */ /* pass */
} else if (insn->op == IR_BEGIN) { } else if (insn->op == IR_BEGIN) {
if (ctx->ir_base[insn->op1].op == IR_END if (insn->op1 && ctx->ir_base[insn->op1].op == IR_END) {
&& ctx->use_lists[i].count == 1) {
ir_merge_blocks(ctx, insn->op1, i, worklist); ir_merge_blocks(ctx, insn->op1, i, worklist);
} }
} else if (insn->op == IR_MERGE) { } else if (insn->op == IR_MERGE) {
ir_iter_optimize_merge(ctx, i, insn, worklist); ir_iter_optimize_merge(ctx, i, insn, worklist);
} else if (insn->op == IR_LOOP_BEGIN) {
ir_iter_optimize_loop(ctx, i, insn, worklist);
} }
} else if (ir_is_dead_load(ctx, i)) { } else if (ir_is_dead_load(ctx, i)) {
ir_ref next; ir_ref next;