Update IR

IR commit: e445f57f3a936584db28489a49098d52f03388a7
This commit is contained in:
Dmitry Stogov 2025-01-10 00:34:58 +03:00
parent 28b448ac20
commit 4763193567
No known key found for this signature in database
6 changed files with 290 additions and 80 deletions

View file

@ -1283,9 +1283,11 @@ void ir_build_def_use_lists(ir_ctx *ctx)
void ir_use_list_remove_all(ir_ctx *ctx, ir_ref from, ir_ref ref)
{
ir_ref j, n, *p, *q, use;
ir_use_list *use_list = &ctx->use_lists[from];
ir_use_list *use_list;
ir_ref skip = 0;
IR_ASSERT(from > 0);
use_list = &ctx->use_lists[from];
n = use_list->count;
for (j = 0, p = q = &ctx->use_edges[use_list->refs]; j < n; j++, p++) {
use = *p;
@ -1310,8 +1312,10 @@ void ir_use_list_remove_all(ir_ctx *ctx, ir_ref from, ir_ref ref)
void ir_use_list_remove_one(ir_ctx *ctx, ir_ref from, ir_ref ref)
{
ir_ref j, n, *p;
ir_use_list *use_list = &ctx->use_lists[from];
ir_use_list *use_list;
IR_ASSERT(from > 0);
use_list = &ctx->use_lists[from];
n = use_list->count;
j = 0;
p = &ctx->use_edges[use_list->refs];
@ -1334,9 +1338,11 @@ void ir_use_list_remove_one(ir_ctx *ctx, ir_ref from, ir_ref ref)
void ir_use_list_replace_one(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_ref new_use)
{
ir_use_list *use_list = &ctx->use_lists[ref];
ir_use_list *use_list;
ir_ref i, n, *p;
IR_ASSERT(ref > 0);
use_list = &ctx->use_lists[ref];
n = use_list->count;
for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) {
if (*p == use) {
@ -1348,9 +1354,11 @@ void ir_use_list_replace_one(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_ref new_use
void ir_use_list_replace_all(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_ref new_use)
{
ir_use_list *use_list = &ctx->use_lists[ref];
ir_use_list *use_list;
ir_ref i, n, *p;
IR_ASSERT(ref > 0);
use_list = &ctx->use_lists[ref];
n = use_list->count;
for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) {
if (*p == use) {
@ -1361,9 +1369,12 @@ void ir_use_list_replace_all(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_ref new_use
bool ir_use_list_add(ir_ctx *ctx, ir_ref to, ir_ref ref)
{
ir_use_list *use_list = &ctx->use_lists[to];
ir_ref n = use_list->refs + use_list->count;
ir_use_list *use_list;
ir_ref n;
IR_ASSERT(to > 0);
use_list = &ctx->use_lists[to];
n = use_list->refs + use_list->count;
if (n < ctx->use_edges_count && ctx->use_edges[n] == IR_UNUSED) {
ctx->use_edges[n] = ref;
use_list->count++;
@ -1385,6 +1396,59 @@ bool ir_use_list_add(ir_ctx *ctx, ir_ref to, ir_ref ref)
}
}
static int ir_ref_cmp(const void *p1, const void *p2)
{
return *(ir_ref*)p1 - *(ir_ref*)p2;
}
void ir_use_list_sort(ir_ctx *ctx, ir_ref ref)
{
ir_use_list *use_list;
uint32_t n;
IR_ASSERT(ref > 0);
use_list = &ctx->use_lists[ref];
n = use_list->count;
if (n > 1) {
qsort(ctx->use_edges + use_list->refs, n, sizeof(ir_ref), ir_ref_cmp);
}
}
void ir_replace(ir_ctx *ctx, ir_ref ref, ir_ref new_ref)
{
int i, j, n, use;
ir_insn *insn;
IR_ASSERT(ref != new_ref);
n = ctx->use_lists[ref].count;
for (i = 0; i < n; i++) {
use = ctx->use_edges[ctx->use_lists[ref].refs + i];
IR_ASSERT(use != ref);
insn = &ctx->ir_base[use];
j = ir_insn_find_op(insn, ref);
IR_ASSERT(j > 0);
ir_insn_set_op(insn, j, new_ref);
if (!IR_IS_CONST_REF(new_ref)) {
ir_use_list_add(ctx, new_ref, use);
}
}
}
void ir_update_op(ir_ctx *ctx, ir_ref ref, uint32_t idx, ir_ref new_val)
{
ir_insn *insn = &ctx->ir_base[ref];
ir_ref old_val = ir_insn_op(insn, idx);
IR_ASSERT(old_val != new_val);
if (new_val > 0) {
ir_use_list_add(ctx, new_val, ref);
}
ir_insn_set_op(insn, idx, new_val);
if (old_val > 0) {
ir_use_list_remove_one(ctx, old_val, ref);
}
}
/* Helper Data Types */
void ir_array_grow(ir_array *a, uint32_t size)
{
@ -1428,16 +1492,16 @@ void ir_list_remove(ir_list *l, uint32_t i)
l->len--;
}
bool ir_list_contains(const ir_list *l, ir_ref val)
uint32_t ir_list_find(const ir_list *l, ir_ref val)
{
uint32_t i;
for (i = 0; i < l->len; i++) {
if (ir_array_at(&l->a, i) == val) {
return 1;
return i;
}
}
return 0;
return (uint32_t)-1;
}
static uint32_t ir_hashtab_hash_size(uint32_t size)
@ -2010,9 +2074,12 @@ ir_ref _ir_PHI_N(ir_ctx *ctx, ir_type type, ir_ref n, ir_ref *inputs)
return inputs[0];
} else {
ir_ref i;
ir_ref ref = inputs[0];
ir_ref ref;
IR_ASSERT(ctx->ir_base[ctx->control].op == IR_MERGE || ctx->ir_base[ctx->control].op == IR_LOOP_BEGIN);
if (UNEXPECTED(!(ctx->flags & IR_OPT_FOLDING))) {
IR_ASSERT(ctx->ir_base[ctx->control].op == IR_MERGE
|| ctx->ir_base[ctx->control].op == IR_LOOP_BEGIN);
ref = inputs[0];
if (ref != IR_UNUSED) {
for (i = 1; i < n; i++) {
if (inputs[i] != ref) {
@ -2024,6 +2091,7 @@ ir_ref _ir_PHI_N(ir_ctx *ctx, ir_type type, ir_ref n, ir_ref *inputs)
return ref;
}
}
}
ref = ir_emit_N(ctx, IR_OPT(IR_PHI, type), n + 1);
ir_set_op(ctx, ref, 1, ctx->control);
@ -2066,7 +2134,8 @@ void _ir_ENTRY(ir_ctx *ctx, ir_ref src, ir_ref num)
void _ir_BEGIN(ir_ctx *ctx, ir_ref src)
{
IR_ASSERT(!ctx->control);
if (src
if (EXPECTED(ctx->flags & IR_OPT_FOLDING)
&& src
&& src + 1 == ctx->insns_count
&& ctx->ir_base[src].op == IR_END) {
/* merge with the last END */
@ -2095,8 +2164,14 @@ ir_ref _ir_IF(ir_ctx *ctx, ir_ref condition)
{
ir_ref if_ref;
condition = _ir_fold_condition(ctx, condition);
IR_ASSERT(ctx->control);
if (UNEXPECTED(!(ctx->flags & IR_OPT_FOLDING))) {
if_ref = ir_emit2(ctx, IR_IF, ctx->control, condition);
ctx->control = IR_UNUSED;
return if_ref;
}
condition = _ir_fold_condition(ctx, condition);
if (IR_IS_CONST_REF(condition)) {
condition = ir_ref_is_true(ctx, condition) ? IR_TRUE : IR_FALSE;
} else {
@ -2649,7 +2724,7 @@ void _ir_GUARD(ir_ctx *ctx, ir_ref condition, ir_ref addr)
return;
}
condition = IR_FALSE;
} else {
} else if (EXPECTED(ctx->flags & IR_OPT_FOLDING)) {
ir_insn *prev = NULL;
ir_ref ref = ctx->control;
ir_insn *insn;
@ -2695,7 +2770,7 @@ void _ir_GUARD_NOT(ir_ctx *ctx, ir_ref condition, ir_ref addr)
return;
}
condition = IR_TRUE;
} else {
} else if (EXPECTED(ctx->flags & IR_OPT_FOLDING)) {
ir_insn *prev = NULL;
ir_ref ref = ctx->control;
ir_insn *insn;
@ -2779,6 +2854,10 @@ ir_ref _ir_VLOAD(ir_ctx *ctx, ir_type type, ir_ref var)
ir_ref ref = ctx->control;
ir_insn *insn;
if (UNEXPECTED(!(ctx->flags & IR_OPT_FOLDING))) {
IR_ASSERT(ctx->control);
return ctx->control = ir_emit2(ctx, IR_OPT(IR_VLOAD, type), ctx->control, var);
}
while (ref > var) {
insn = &ctx->ir_base[ref];
if (insn->op == IR_VLOAD) {
@ -2825,6 +2904,12 @@ void _ir_VSTORE(ir_ctx *ctx, ir_ref var, ir_ref val)
ir_insn *insn;
bool guarded = 0;
if (UNEXPECTED(!(ctx->flags & IR_OPT_FOLDING))) {
IR_ASSERT(ctx->control);
ctx->control = ir_emit3(ctx, IR_VSTORE, ctx->control, var, val);
return;
}
if (!IR_IS_CONST_REF(val)) {
insn = &ctx->ir_base[val];
if (insn->op == IR_BITCAST
@ -2893,9 +2978,12 @@ void _ir_RSTORE(ir_ctx *ctx, ir_ref reg, ir_ref val)
ir_ref _ir_LOAD(ir_ctx *ctx, ir_type type, ir_ref addr)
{
ir_ref ref = ir_find_aliasing_load(ctx, ctx->control, type, addr);
ir_ref ref = IR_UNUSED;
IR_ASSERT(ctx->control);
if (EXPECTED(ctx->flags & IR_OPT_FOLDING)) {
ref = ir_find_aliasing_load(ctx, ctx->control, type, addr);
}
if (!ref) {
ctx->control = ref = ir_emit2(ctx, IR_OPT(IR_LOAD, type), ctx->control, addr);
}
@ -2912,6 +3000,12 @@ void _ir_STORE(ir_ctx *ctx, ir_ref addr, ir_ref val)
ir_type type2;
bool guarded = 0;
IR_ASSERT(ctx->control);
if (UNEXPECTED(!(ctx->flags & IR_OPT_FOLDING))) {
ctx->control = ir_emit3(ctx, IR_STORE, ctx->control, addr, val);
return;
}
if (!IR_IS_CONST_REF(val)) {
insn = &ctx->ir_base[val];
if (insn->op == IR_BITCAST
@ -2922,7 +3016,6 @@ void _ir_STORE(ir_ctx *ctx, ir_ref addr, ir_ref val)
}
}
IR_ASSERT(ctx->control);
while (ref > limit) {
insn = &ctx->ir_base[ref];
if (insn->op == IR_STORE) {

View file

@ -705,6 +705,7 @@ ir_ref ir_emit3(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3);
ir_ref ir_emit_N(ir_ctx *ctx, uint32_t opt, int32_t count);
void ir_set_op(ir_ctx *ctx, ir_ref ref, int32_t n, ir_ref val);
ir_ref ir_get_op(ir_ctx *ctx, ir_ref ref, int32_t n);
IR_ALWAYS_INLINE void ir_set_op1(ir_ctx *ctx, ir_ref ref, ir_ref val)
{
@ -721,8 +722,6 @@ IR_ALWAYS_INLINE void ir_set_op3(ir_ctx *ctx, ir_ref ref, ir_ref val)
ctx->ir_base[ref].op3 = val;
}
ir_ref ir_get_op(ir_ctx *ctx, ir_ref ref, int32_t n);
IR_ALWAYS_INLINE ir_ref ir_insn_op(const ir_insn *insn, int32_t n)
{
const ir_ref *p = insn->ops + n;
@ -735,6 +734,18 @@ IR_ALWAYS_INLINE void ir_insn_set_op(ir_insn *insn, int32_t n, ir_ref val)
*p = val;
}
IR_ALWAYS_INLINE uint32_t ir_insn_find_op(const ir_insn *insn, ir_ref val)
{
int i, n = insn->inputs_count;
for (i = 1; i <= n; i++) {
if (ir_insn_op(insn, i) == val) {
return i;
}
}
return 0;
}
ir_ref ir_fold(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3);
ir_ref ir_fold0(ir_ctx *ctx, uint32_t opt);

View file

@ -1808,10 +1808,6 @@ IR_FOLD(MUL(_, C_ADDR))
IR_FOLD_COPY(op2);
} else if (op2_insn->val.u64 == 1) {
IR_FOLD_COPY(op1);
} else if (op2_insn->val.u64 == 2 && IR_OPT_TYPE(opt) != IR_ADDR) {
opt = IR_ADD | (opt & IR_OPT_TYPE_MASK);
op2 = op1;
IR_FOLD_RESTART;
}
IR_FOLD_NEXT;
}
@ -1827,11 +1823,6 @@ IR_FOLD(MUL(_, C_I64))
} else if (op2_insn->val.i64 == 1) {
/* a * 1 => a */
IR_FOLD_COPY(op1);
} else if (op2_insn->val.i64 == 2) {
/* a * 2 => a + a */
opt = IR_ADD | (opt & IR_OPT_TYPE_MASK);
op2 = op1;
IR_FOLD_RESTART;
} else if (op2_insn->val.i64 == -1) {
/* a * -1 => -a */
opt = IR_NEG | (opt & IR_OPT_TYPE_MASK);
@ -2907,7 +2898,6 @@ IR_FOLD(ADD(SHR, SHL))
/* Swap operands (move lower ref to op2) for better CSE */
IR_FOLD(ADD(_, _))
IR_FOLD(MUL(_, _))
IR_FOLD_NAMED(swap_ops)
{
@ -2929,6 +2919,19 @@ IR_FOLD(MUL_OV(_, _))
IR_FOLD_EMIT;
}
IR_FOLD(ADD(_, _))
{
if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt)) && op1 == op2) {
/* a + a => a * 2 */
IR_ASSERT(!IR_IS_CONST_REF(op1));
val.u64 = 2;
opt = IR_MUL | (opt & IR_OPT_TYPE_MASK);
op2 = ir_const(ctx, val, IR_OPT_TYPE(opt));
IR_FOLD_RESTART;
}
IR_FOLD_DO_NAMED(swap_ops);
}
IR_FOLD(SUB(_, _))
{
if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt)) && op1 == op2) {

View file

@ -701,7 +701,7 @@ typedef struct _ir_list {
uint32_t len;
} ir_list;
bool ir_list_contains(const ir_list *l, ir_ref val);
uint32_t ir_list_find(const ir_list *l, ir_ref val);
void ir_list_insert(ir_list *l, uint32_t i, ir_ref val);
void ir_list_remove(ir_list *l, uint32_t i);
@ -766,6 +766,19 @@ IR_ALWAYS_INLINE void ir_list_set(ir_list *l, uint32_t i, ir_ref val)
ir_array_set_unchecked(&l->a, i, val);
}
/* Doesn't preserve order */
IR_ALWAYS_INLINE void ir_list_del(ir_list *l, uint32_t i)
{
IR_ASSERT(i < l->len);
l->len--;
ir_array_set_unchecked(&l->a, i, ir_array_at(&l->a, l->len));
}
IR_ALWAYS_INLINE bool ir_list_contains(const ir_list *l, ir_ref val)
{
return ir_list_find(l, val) != (uint32_t)-1;
}
/* Worklist (unique list) */
typedef struct _ir_worklist {
ir_list l;
@ -1019,11 +1032,12 @@ struct _ir_use_list {
ir_ref count;
};
void ir_use_list_remove_all(ir_ctx *ctx, ir_ref from, ir_ref use);
void ir_use_list_remove_one(ir_ctx *ctx, ir_ref from, ir_ref use);
void ir_use_list_replace_all(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_ref new_use);
void ir_use_list_replace_one(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_ref new_use);
bool ir_use_list_add(ir_ctx *ctx, ir_ref to, ir_ref new_use);
void ir_use_list_remove_all(ir_ctx *ctx, ir_ref def, ir_ref use);
void ir_use_list_remove_one(ir_ctx *ctx, ir_ref def, ir_ref use);
void ir_use_list_replace_all(ir_ctx *ctx, ir_ref def, ir_ref use, ir_ref new_use);
void ir_use_list_replace_one(ir_ctx *ctx, ir_ref def, ir_ref use, ir_ref new_use);
bool ir_use_list_add(ir_ctx *ctx, ir_ref def, ir_ref use);
void ir_use_list_sort(ir_ctx *ctx, ir_ref def);
IR_ALWAYS_INLINE ir_ref ir_next_control(const ir_ctx *ctx, ir_ref ref)
{
@ -1068,6 +1082,9 @@ IR_ALWAYS_INLINE ir_ref ir_next_control(const ir_ctx *ctx, ir_ref ref)
_insn2 = _tmp; \
} while (0)
void ir_replace(ir_ctx *ctx, ir_ref ref, ir_ref new_ref);
void ir_update_op(ir_ctx *ctx, ir_ref ref, uint32_t idx, ir_ref new_val);
/*** IR Basic Blocks info ***/
#define IR_IS_BB_START(op) \
((ir_op_flags[op] & IR_OP_FLAG_BB_START) != 0)

View file

@ -1238,7 +1238,7 @@ static void ir_merge_blocks(ir_ctx *ctx, ir_ref end, ir_ref begin, ir_bitqueue *
/* connect their predecessor and successor */
ctx->ir_base[next].op1 = prev;
ir_use_list_replace_all(ctx, prev, end, next);
ir_use_list_replace_one(ctx, prev, end, next);
if (ctx->ir_base[prev].op == IR_BEGIN || ctx->ir_base[prev].op == IR_MERGE) {
ir_bitqueue_add(worklist2, prev);

View file

@ -1001,6 +1001,8 @@ const char *ir_reg_name(int8_t reg, ir_type type)
_(LEA_SI_OB) \
_(LEA_B_SI) \
_(LEA_SI_B) \
_(LEA_B_SI_O) \
_(LEA_SI_B_O) \
_(INC) \
_(DEC) \
_(MUL_PWR2) \
@ -1581,7 +1583,7 @@ static void ir_match_fuse_addr(ir_ctx *ctx, ir_ref addr_ref)
if (!rule) {
ctx->rules[addr_ref] = rule = ir_match_insn(ctx, addr_ref);
}
if (rule >= IR_LEA_OB && rule <= IR_LEA_SI_B) {
if (rule >= IR_LEA_OB && rule <= IR_LEA_SI_B_O) {
ir_use_list *use_list;
ir_ref j;
@ -1943,7 +1945,8 @@ static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref)
} else if ((op1_insn->op == IR_OR || op1_insn->op == IR_AND || op1_insn->op == IR_XOR) ||
/* GT(ADD(_, _), 0) can't be optimized because ADD may overflow */
((op1_insn->op == IR_ADD || op1_insn->op == IR_SUB) &&
(insn->op == IR_EQ || insn->op == IR_NE))) {
(insn->op == IR_EQ || insn->op == IR_NE ||
insn->op == IR_LT || insn->op == IR_GE))) {
/* v = BINOP(_, _); CMP(v, 0) => BINOP; SETCC */
if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) {
ir_match_fuse_load_commutative_int(ctx, op1_insn, ref);
@ -1990,14 +1993,20 @@ lea:
/* z = MUL(Y, 2|4|8) ... ADD(z, imm32) => SKIP ... LEA [Y*2|4|8+im32] */
ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_SI;
return IR_LEA_SI_O;
} else if (rule == IR_LEA_SIB) {
} else if (rule == IR_LEA_SIB || rule == (IR_FUSED | IR_SIMPLE | IR_LEA_SIB)) {
/* z = ADD(X, MUL(Y, 2|4|8)) ... ADD(z, imm32) => SKIP ... LEA [X+Y*2|4|8+im32] */
ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_SIB;
return IR_LEA_SIB_O;
} else if (rule == IR_LEA_IB) {
} else if (rule == IR_LEA_IB || rule == (IR_FUSED | IR_SIMPLE | IR_LEA_IB)) {
/* z = ADD(X, Y) ... ADD(z, imm32) => SKIP ... LEA [X+Y+im32] */
ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_IB;
return IR_LEA_IB_O;
} else if (rule == IR_LEA_B_SI || rule == (IR_FUSED | IR_SIMPLE | IR_LEA_B_SI)) {
ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_B_SI;
return IR_LEA_B_SI_O;
} else if (rule == IR_LEA_SI_B || rule == (IR_FUSED | IR_SIMPLE | IR_LEA_SI_B)) {
ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_SI_B;
return IR_LEA_SI_B_O;
}
}
/* ADD(X, imm32) => LEA [X+imm32] */
@ -2050,7 +2059,7 @@ lea:
if (!rule) {
ctx->rules[insn->op2] = rule = ir_match_insn(ctx, insn->op2);
}
if (rule == IR_LEA_OB) {
if (rule == IR_LEA_OB || rule == (IR_FUSED | IR_SIMPLE | IR_LEA_OB)) {
/* x = ADD(X, imm32) ... y = MUL(Y, 2|4|8) ... ADD(y, x) => SKIP ... SKIP ... LEA */
ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_LEA_OB;
return IR_LEA_SI_OB;
@ -2065,7 +2074,7 @@ lea:
if (!rule) {
ctx->rules[insn->op2] = rule = ir_match_insn(ctx, insn->op2);
}
if (rule == IR_LEA_OB) {
if (rule == IR_LEA_OB || rule == (IR_FUSED | IR_SIMPLE | IR_LEA_OB)) {
ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_LEA_OB;
/* x = ADD(X, imm32) ... ADD(Y, x) => SKIP ... LEA */
return IR_LEA_I_OB;
@ -2602,7 +2611,8 @@ store_int:
((op1_insn->op == IR_OR || op1_insn->op == IR_AND || op1_insn->op == IR_XOR) ||
/* GT(ADD(_, _), 0) can't be optimized because ADD may overflow */
((op1_insn->op == IR_ADD || op1_insn->op == IR_SUB) &&
(op2_insn->op == IR_EQ || op2_insn->op == IR_NE)))) {
(op2_insn->op == IR_EQ || op2_insn->op == IR_NE ||
op2_insn->op == IR_LT || op2_insn->op == IR_GE)))) {
/* v = BINOP(_, _); c = CMP(v, 0) ... IF(c) => BINOP; SKIP_CMP ... JCC */
if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) {
ir_match_fuse_load_commutative_int(ctx, op1_insn, ref);
@ -2744,7 +2754,8 @@ store_int:
if ((op1_insn->op == IR_OR || op1_insn->op == IR_AND || op1_insn->op == IR_XOR) ||
/* GT(ADD(_, _), 0) can't be optimized because ADD may overflow */
((op1_insn->op == IR_ADD || op1_insn->op == IR_SUB) &&
(op2_insn->op == IR_EQ || op2_insn->op == IR_NE))) {
(op2_insn->op == IR_EQ || op2_insn->op == IR_NE ||
op2_insn->op == IR_LT || op2_insn->op == IR_GE))) {
if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) {
ir_match_fuse_load_commutative_int(ctx, op1_insn, ref);
ctx->rules[op2_insn->op1] = IR_BINOP_INT | IR_MAY_SWAP;
@ -2768,7 +2779,8 @@ store_int:
if ((op_insn->op == IR_OR || op_insn->op == IR_AND || op_insn->op == IR_XOR) ||
/* GT(ADD(_, _), 0) can't be optimized because ADD may overflow */
((op_insn->op == IR_ADD || op_insn->op == IR_SUB) &&
(op2_insn->op == IR_EQ || op2_insn->op == IR_NE))) {
(op2_insn->op == IR_EQ || op2_insn->op == IR_NE ||
op2_insn->op == IR_LT || op2_insn->op == IR_GE))) {
if (ctx->ir_base[op_insn->op1].op == IR_LOAD
&& ctx->ir_base[op_insn->op1].op2 == store_insn->op2) {
if (ir_in_same_block(ctx, op_insn->op1)
@ -3286,7 +3298,7 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref)
int32_t offset = 0, scale;
IR_ASSERT(((rule & IR_RULE_MASK) >= IR_LEA_OB &&
(rule & IR_RULE_MASK) <= IR_LEA_SI_B) ||
(rule & IR_RULE_MASK) <= IR_LEA_SI_B_O) ||
rule == IR_STATIC_ALLOCA);
switch (rule & IR_RULE_MASK) {
default:
@ -3456,6 +3468,22 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref)
scale = ctx->ir_base[op1_insn->op2].val.i32;
offset_insn = NULL;
break;
case IR_LEA_B_SI_O:
offset_insn = insn;
op1_insn = &ctx->ir_base[insn->op1];
base_reg_ref = insn->op1 * sizeof(ir_ref) + 1;
index_reg_ref = op1_insn->op2 * sizeof(ir_ref) + 1;
op2_insn = &ctx->ir_base[op1_insn->op2];
scale = ctx->ir_base[op2_insn->op2].val.i32;
break;
case IR_LEA_SI_B_O:
offset_insn = insn;
op1_insn = &ctx->ir_base[insn->op1];
index_reg_ref = op1_insn->op1 * sizeof(ir_ref) + 1;
base_reg_ref = insn->op1 * sizeof(ir_ref) + 2;
op1_insn = &ctx->ir_base[op1_insn->op1];
scale = ctx->ir_base[op1_insn->op2].val.i32;
break;
case IR_ALLOCA:
offset = IR_SPILL_POS_TO_OFFSET(insn->op3);
base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
@ -5583,7 +5611,7 @@ static void ir_emit_cmp_int_common2(ir_ctx *ctx, ir_ref root, ir_ref ref, ir_ins
ir_emit_cmp_int_common(ctx, type, root, cmp_insn, op1_reg, op1, op2_reg, op2);
}
static void _ir_emit_setcc_int(ir_ctx *ctx, uint8_t op, ir_reg def_reg)
static void _ir_emit_setcc_int(ir_ctx *ctx, uint8_t op, ir_reg def_reg, bool after_op)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
@ -5598,10 +5626,18 @@ static void _ir_emit_setcc_int(ir_ctx *ctx, uint8_t op, ir_reg def_reg)
| setne Rb(def_reg)
break;
case IR_LT:
if (after_op) {
| sets Rb(def_reg)
} else {
| setl Rb(def_reg)
}
break;
case IR_GE:
if (after_op) {
| setns Rb(def_reg)
} else {
| setge Rb(def_reg)
}
break;
case IR_LE:
| setle Rb(def_reg)
@ -5711,7 +5747,7 @@ static void ir_emit_cmp_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
}
}
ir_emit_cmp_int_common(ctx, type, def, insn, op1_reg, op1, op2_reg, op2);
_ir_emit_setcc_int(ctx, op, def_reg);
_ir_emit_setcc_int(ctx, op, def_reg, 0);
if (IR_REG_SPILLED(ctx->regs[def][0])) {
ir_emit_store(ctx, insn->type, def, def_reg);
}
@ -5808,7 +5844,7 @@ static void ir_emit_testcc_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
IR_ASSERT(def_reg != IR_REG_NONE);
ir_emit_test_int_common(ctx, def, insn->op1, insn->op);
_ir_emit_setcc_int(ctx, insn->op, def_reg);
_ir_emit_setcc_int(ctx, insn->op, def_reg, 0);
if (IR_REG_SPILLED(ctx->regs[def][0])) {
ir_emit_store(ctx, insn->type, def, def_reg);
}
@ -5819,7 +5855,7 @@ static void ir_emit_setcc_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
IR_ASSERT(def_reg != IR_REG_NONE);
_ir_emit_setcc_int(ctx, insn->op, def_reg);
_ir_emit_setcc_int(ctx, insn->op, def_reg, 1);
if (IR_REG_SPILLED(ctx->regs[def][0])) {
ir_emit_store(ctx, insn->type, def, def_reg);
}
@ -5963,7 +5999,7 @@ static void ir_emit_jmp_false(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next
}
}
static void ir_emit_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block, uint8_t op, bool int_cmp)
static void ir_emit_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block, uint8_t op, bool int_cmp, bool after_op)
{
uint32_t true_block, false_block;
ir_backend_data *data = ctx->data;
@ -5994,10 +6030,18 @@ static void ir_emit_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint
| jne =>true_block
break;
case IR_LT:
if (after_op) {
| js =>true_block
} else {
| jl =>true_block
}
break;
case IR_GE:
if (after_op) {
| jns =>true_block
} else {
| jge =>true_block
}
break;
case IR_LE:
| jle =>true_block
@ -6133,7 +6177,7 @@ static void ir_emit_cmp_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_i
if (!same_comparison) {
ir_emit_cmp_int_common(ctx, type, def, cmp_insn, op1_reg, op1, op2_reg, op2);
}
ir_emit_jcc(ctx, b, def, insn, next_block, op, 1);
ir_emit_jcc(ctx, b, def, insn, next_block, op, 1, 0);
}
static void ir_emit_test_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block)
@ -6149,13 +6193,13 @@ static void ir_emit_test_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_
}
ir_emit_test_int_common(ctx, def, op2, op);
ir_emit_jcc(ctx, b, def, insn, next_block, op, 1);
ir_emit_jcc(ctx, b, def, insn, next_block, op, 1, 0);
}
static void ir_emit_cmp_and_branch_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block)
{
ir_op op = ir_emit_cmp_fp_common(ctx, def, insn->op2, &ctx->ir_base[insn->op2]);
ir_emit_jcc(ctx, b, def, insn, next_block, op, 0);
ir_emit_jcc(ctx, b, def, insn, next_block, op, 0, 0);
}
static void ir_emit_if_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block)
@ -6203,7 +6247,7 @@ static void ir_emit_if_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, u
}
| ASM_MEM_IMM_OP cmp, type, mem, 0
}
ir_emit_jcc(ctx, b, def, insn, next_block, IR_NE, 1);
ir_emit_jcc(ctx, b, def, insn, next_block, IR_NE, 1, 0);
}
static void ir_emit_cond(ir_ctx *ctx, ir_ref def, ir_insn *insn)
@ -9078,7 +9122,7 @@ static void ir_emit_ijmp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
}
}
static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next_block, uint8_t op, void *addr, bool int_cmp)
static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next_block, uint8_t op, void *addr, bool int_cmp, bool after_op)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
@ -9112,10 +9156,18 @@ static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next
| je =>target
break;
case IR_LT:
if (after_op) {
| jns =>target
} else {
| jge =>target
}
break;
case IR_GE:
if (after_op) {
| js =>target
} else {
| jl =>target
}
break;
case IR_LE:
| jg =>target
@ -9183,10 +9235,18 @@ static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next
| je &target_addr
break;
case IR_LT:
if (after_op) {
| jns &target_addr
} else {
| jge &target_addr
}
break;
case IR_GE:
if (after_op) {
| js &target_addr
} else {
| jl &target_addr
}
break;
case IR_LE:
| jg &target_addr
@ -9251,10 +9311,18 @@ static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next
| jne &addr
break;
case IR_LT:
if (after_op) {
| js &addr
} else {
| jl &addr
}
break;
case IR_GE:
if (after_op) {
| jns &addr
} else {
| jge &addr
}
break;
case IR_LE:
| jle &addr
@ -9369,7 +9437,7 @@ static bool ir_emit_guard(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, ui
} else {
op = IR_NE;
}
return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1);
return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1, 0);
} else {
|.if X64
if (insn->op == IR_GUARD) {
@ -9447,7 +9515,7 @@ static bool ir_emit_guard_cmp_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *
op ^= 1; // reverse
}
return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1);
return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1, 0);
}
static bool ir_emit_guard_cmp_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block)
@ -9458,7 +9526,7 @@ static bool ir_emit_guard_cmp_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *i
if (insn->op == IR_GUARD) {
op ^= 1; // reverse
}
return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 0);
return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 0, 0);
}
static bool ir_emit_guard_test_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block)
@ -9467,7 +9535,7 @@ static bool ir_emit_guard_test_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn
ir_op op = (insn->op == IR_GUARD) ? IR_EQ : IR_NE;
ir_emit_test_int_common(ctx, def, insn->op2, op);
return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1);
return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1, 0);
}
static bool ir_emit_guard_jcc_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block)
@ -9478,7 +9546,7 @@ static bool ir_emit_guard_jcc_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *
if (insn->op == IR_GUARD) {
op ^= 1; // reverse
}
return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1);
return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1, 1);
}
static bool ir_emit_guard_overflow(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
@ -9528,7 +9596,15 @@ static void ir_emit_lea(ir_ctx *ctx, ir_ref def, ir_type type)
&& IR_MEM_BASE(mem) != IR_REG_NONE) {
ir_reg reg = IR_MEM_BASE(mem);
| add Rd(def_reg), Rd(reg)
} else if (IR_MEM_INDEX(mem) == def_reg
&& IR_MEM_OFFSET(mem) == 0
&& IR_MEM_SCALE(mem) == 2
&& IR_MEM_BASE(mem) == IR_REG_NONE) {
| add Rd(def_reg), Rd(def_reg)
} else {
if (IR_MEM_SCALE(mem) == 2 && IR_MEM_BASE(mem) == IR_REG_NONE) {
mem = IR_MEM(IR_MEM_INDEX(mem), IR_MEM_OFFSET(mem), IR_MEM_INDEX(mem), 1);
}
| ASM_TXT_TMEM_OP lea, Rd(def_reg), dword, mem
}
} else {
@ -9544,7 +9620,15 @@ static void ir_emit_lea(ir_ctx *ctx, ir_ref def, ir_type type)
&& IR_MEM_BASE(mem) != IR_REG_NONE) {
ir_reg reg = IR_MEM_BASE(mem);
| add Ra(def_reg), Ra(reg)
} else if (IR_MEM_INDEX(mem) == def_reg
&& IR_MEM_OFFSET(mem) == 0
&& IR_MEM_SCALE(mem) == 2
&& IR_MEM_BASE(mem) == IR_REG_NONE) {
| add Ra(def_reg), Ra(def_reg)
} else {
if (IR_MEM_SCALE(mem) == 2 && IR_MEM_BASE(mem) == IR_REG_NONE) {
mem = IR_MEM(IR_MEM_INDEX(mem), IR_MEM_OFFSET(mem), IR_MEM_INDEX(mem), 1);
}
| ASM_TXT_TMEM_OP lea, Ra(def_reg), aword, mem
}
}
@ -10453,6 +10537,8 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
case IR_LEA_SI_OB:
case IR_LEA_B_SI:
case IR_LEA_SI_B:
case IR_LEA_B_SI_O:
case IR_LEA_SI_B_O:
ir_emit_lea(ctx, i, insn->type);
break;
case IR_MUL_PWR2:
@ -10575,7 +10661,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
} else {
IR_ASSERT(op >= IR_EQ && op <= IR_UGT);
}
ir_emit_jcc(ctx, b, i, insn, _ir_next_block(ctx, _b), op, 1);
ir_emit_jcc(ctx, b, i, insn, _ir_next_block(ctx, _b), op, 1, 1);
}
break;
case IR_GUARD_CMP_INT: