Update IR

IR commit: eee484977acfe97b81b338a16390e218a852ec1c
This commit is contained in:
Dmitry Stogov 2024-04-04 01:11:28 +03:00
parent bb1688d732
commit d2b54dc53e
No known key found for this signature in database
9 changed files with 252 additions and 27 deletions

View file

@ -267,6 +267,7 @@ void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted
#define ir_op_flag_x2 (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_CALL | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT))
#define ir_op_flag_x3 (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_CALL | 3 | (3 << IR_OP_FLAG_OPERANDS_SHIFT))
#define ir_op_flag_xN (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_CALL | IR_OP_FLAG_VAR_INPUTS)
#define ir_op_flag_a1 (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_ALLOC | 1 | (1 << IR_OP_FLAG_OPERANDS_SHIFT))
#define ir_op_flag_a2 (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_ALLOC | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT))
#define ir_op_kind____ IR_OPND_UNUSED
@ -2771,3 +2772,9 @@ ir_ref _ir_VA_ARG(ir_ctx *ctx, ir_type type, ir_ref list)
IR_ASSERT(ctx->control);
return ctx->control = ir_emit2(ctx, IR_OPT(IR_VA_ARG, type), ctx->control, list);
}
ir_ref _ir_BLOCK_BEGIN(ir_ctx *ctx)
{
IR_ASSERT(ctx->control);
return ctx->control = ir_emit1(ctx, IR_OPT(IR_BLOCK_BEGIN, IR_ADDR), ctx->control);
}

View file

@ -316,6 +316,8 @@ typedef enum _ir_type {
/* memory reference and load/store ops */ \
_(ALLOCA, a2, src, def, ___) /* alloca(def) */ \
_(AFREE, a2, src, def, ___) /* revert alloca(def) */ \
_(BLOCK_BEGIN, a1, src, ___, ___) /* stacksave */ \
_(BLOCK_END, a2, src, def, ___) /* stackrestore */ \
_(VADDR, d1, var, ___, ___) /* load address of local var */ \
_(VLOAD, l2, src, var, ___) /* load value of local var */ \
_(VSTORE, s3, src, var, def) /* store value to local var */ \

View file

@ -4063,6 +4063,34 @@ static void ir_emit_afree(ir_ctx *ctx, ir_ref def, ir_insn *insn)
}
}
static void ir_emit_block_begin(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
| mov Rx(def_reg), sp
if (IR_REG_SPILLED(ctx->regs[def][0])) {
ir_emit_store(ctx, IR_ADDR, def, def_reg);
}
}
static void ir_emit_block_end(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_reg op2_reg = ctx->regs[def][2];
IR_ASSERT(op2_reg != IR_REG_NONE);
if (IR_REG_SPILLED(op2_reg)) {
op2_reg = IR_REG_NUM(op2_reg);
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
}
| mov sp, Rx(op2_reg)
}
static void ir_emit_frame_addr(ir_ctx *ctx, ir_ref def)
{
ir_backend_data *data = ctx->data;
@ -5965,6 +5993,12 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
case IR_AFREE:
ir_emit_afree(ctx, i, insn);
break;
case IR_BLOCK_BEGIN:
ir_emit_block_begin(ctx, i, insn);
break;
case IR_BLOCK_END:
ir_emit_block_end(ctx, i, insn);
break;
case IR_FRAME_ADDR:
ir_emit_frame_addr(ctx, i);
break;

View file

@ -577,6 +577,9 @@ extern "C" {
#define ir_FRAME_ADDR() ir_fold0(_ir_CTX, IR_OPT(IR_FRAME_ADDR, IR_ADDR))
#define ir_BLOCK_BEGIN() _ir_BLOCK_BEGIN(_ir_CTX)
#define ir_BLOCK_END(_val) do {_ir_CTX->control = ir_emit2(_ir_CTX, IR_BLOCK_END, _ir_CTX->control, (_val));} while (0)
#define ir_VA_START(_list) _ir_VA_START(_ir_CTX, _list)
#define ir_VA_END(_list) _ir_VA_END(_ir_CTX, _list)
#define ir_VA_COPY(_dst, _src) _ir_VA_COPY(_ir_CTX, _dst, _src)
@ -680,6 +683,7 @@ void _ir_RETURN(ir_ctx *ctx, ir_ref val);
void _ir_IJMP(ir_ctx *ctx, ir_ref addr);
void _ir_GUARD(ir_ctx *ctx, ir_ref condition, ir_ref addr);
void _ir_GUARD_NOT(ir_ctx *ctx, ir_ref condition, ir_ref addr);
ir_ref _ir_BLOCK_BEGIN(ir_ctx *ctx);
ir_ref _ir_SNAPSHOT(ir_ctx *ctx, ir_ref n);
void _ir_SNAPSHOT_SET_OP(ir_ctx *ctx, ir_ref snapshot, ir_ref pos, ir_ref val);
ir_ref _ir_EXITCALL(ir_ctx *ctx, ir_ref func);

View file

@ -1511,12 +1511,17 @@ int ir_build_dominators_tree(ir_ctx *ctx)
}
#else
/* A single pass modification of "A Simple, Fast Dominance Algorithm" by
* Cooper, Harvey and Kennedy, that relays on IR block ordering */
* Cooper, Harvey and Kennedy, that relays on IR block ordering.
* It may fallback to the general slow fixed-point algorithm. */
static int ir_build_dominators_tree_iterative(ir_ctx *ctx);
int ir_build_dominators_tree(ir_ctx *ctx)
{
uint32_t blocks_count, b;
ir_block *blocks, *bb;
uint32_t *edges;
ir_list worklist;
ir_list_init(&worklist, ctx->cfg_blocks_count / 2);
ctx->flags2 |= IR_NO_LOOPS;
@ -1539,6 +1544,9 @@ int ir_build_dominators_tree(ir_ctx *ctx)
if (UNEXPECTED(idom > b)) {
/* In rare cases, LOOP_BEGIN.op1 may be a back-edge. Skip back-edges. */
ctx->flags2 &= ~IR_NO_LOOPS;
IR_ASSERT(k > 1);
IR_ASSERT(blocks[idom].successors_count == 1);
ir_list_push(&worklist, idom);
while (1) {
k--;
p++;
@ -1547,9 +1555,12 @@ int ir_build_dominators_tree(ir_ctx *ctx)
break;
}
IR_ASSERT(k > 0);
IR_ASSERT(blocks[idom].successors_count == 1);
ir_list_push(&worklist, idom);
}
}
IR_ASSERT(blocks[idom].idom > 0);
IR_ASSERT(k != 0);
while (--k > 0) {
uint32_t pred_b = *(++p);
@ -1566,6 +1577,9 @@ int ir_build_dominators_tree(ir_ctx *ctx)
}
} else {
ctx->flags2 &= ~IR_NO_LOOPS;
IR_ASSERT(bb->predecessors_count > 1);
IR_ASSERT(blocks[pred_b].successors_count == 1);
ir_list_push(&worklist, pred_b);
}
}
bb->idom = idom;
@ -1593,6 +1607,131 @@ int ir_build_dominators_tree(ir_ctx *ctx)
blocks[1].idom = 0;
if (ir_list_len(&worklist) != 0) {
uint32_t dom_depth;
uint32_t succ_b;
bool complete = 1;
/* Check if all the back-edges lead to the loop headers */
do {
b = ir_list_pop(&worklist);
bb = &blocks[b];
IR_ASSERT(bb->successors_count == 1);
succ_b = ctx->cfg_edges[bb->successors];
dom_depth = blocks[succ_b].dom_depth;;
while (bb->dom_depth > dom_depth) {
b = bb->dom_parent;
bb = &blocks[b];
}
if (UNEXPECTED(b != succ_b)) {
complete = 0;
break;
}
} while (ir_list_len(&worklist) != 0);
if (UNEXPECTED(!complete)) {
ir_list_free(&worklist);
return ir_build_dominators_tree_iterative(ctx);
}
}
ir_list_free(&worklist);
return 1;
}
static int ir_build_dominators_tree_iterative(ir_ctx *ctx)
{
bool changed;
uint32_t blocks_count, b;
ir_block *blocks, *bb;
uint32_t *edges;
/* Find immediate dominators */
blocks = ctx->cfg_blocks;
edges = ctx->cfg_edges;
blocks_count = ctx->cfg_blocks_count;
/* Clear the dominators tree, but keep already found dominators */
for (b = 0, bb = &blocks[0]; b <= blocks_count; b++, bb++) {
bb->dom_depth = 0;
bb->dom_child = 0;
bb->dom_next_child = 0;
}
/* Find immediate dominators by iterative fixed-point algorithm */
blocks[1].idom = 1;
do {
changed = 0;
for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) {
IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
IR_ASSERT(bb->predecessors_count > 0);
uint32_t k = bb->predecessors_count;
uint32_t *p = edges + bb->predecessors;
uint32_t idom = *p;
if (blocks[idom].idom == 0) {
while (1) {
k--;
p++;
idom = *p;
if (blocks[idom].idom > 0) {
break;
}
IR_ASSERT(k > 0);
}
}
IR_ASSERT(k != 0);
while (--k > 0) {
uint32_t pred_b = *(++p);
if (blocks[pred_b].idom > 0) {
IR_ASSERT(blocks[pred_b].idom > 0);
while (idom != pred_b) {
while (pred_b > idom) {
pred_b = blocks[pred_b].idom;
}
while (idom > pred_b) {
idom = blocks[idom].idom;
}
}
}
}
if (bb->idom != idom) {
bb->idom = idom;
changed = 1;
}
}
} while (changed);
/* Build dominators tree */
blocks[1].idom = 0;
blocks[1].dom_depth = 0;
for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) {
uint32_t idom = bb->idom;
ir_block *idom_bb = &blocks[idom];
bb->dom_depth = idom_bb->dom_depth + 1;
/* Sort by block number to traverse children in pre-order */
if (idom_bb->dom_child == 0) {
idom_bb->dom_child = b;
} else if (b < idom_bb->dom_child) {
bb->dom_next_child = idom_bb->dom_child;
idom_bb->dom_child = b;
} else {
int child = idom_bb->dom_child;
ir_block *child_bb = &blocks[child];
while (child_bb->dom_next_child > 0 && b > child_bb->dom_next_child) {
child = child_bb->dom_next_child;
child_bb = &blocks[child];
}
bb->dom_next_child = child_bb->dom_next_child;
child_bb->dom_next_child = b;
}
}
return 1;
}
#endif

View file

@ -662,14 +662,16 @@ static void ir_emit_dessa_move(ir_ctx *ctx, ir_type type, ir_ref to, ir_ref from
}
}
IR_ALWAYS_INLINE void ir_dessa_resolve_cycle(ir_ctx *ctx, int32_t *pred, int32_t *loc, ir_bitset todo, ir_type type, int32_t to, ir_reg tmp_reg, ir_reg tmp_fp_reg)
IR_ALWAYS_INLINE void ir_dessa_resolve_cycle(ir_ctx *ctx, int32_t *pred, int32_t *loc, int8_t *types, ir_bitset todo, int32_t to, ir_reg tmp_reg, ir_reg tmp_fp_reg)
{
ir_ref from;
ir_mem tmp_spill_slot;
ir_type type;
IR_MEM_VAL(tmp_spill_slot) = 0;
IR_ASSERT(!IR_IS_CONST_REF(to));
from = pred[to];
type = types[from];
IR_ASSERT(!IR_IS_CONST_REF(from));
IR_ASSERT(from != to);
IR_ASSERT(loc[from] == from);
@ -721,6 +723,7 @@ IR_ALWAYS_INLINE void ir_dessa_resolve_cycle(ir_ctx *ctx, int32_t *pred, int32_t
from = pred[to];
r = loc[from];
type = types[to];
if (from == r && ir_bitset_in(todo, from)) {
/* Memory to memory move inside an isolated or "blocked" cycle requres an additional temporary register */
@ -743,6 +746,8 @@ IR_ALWAYS_INLINE void ir_dessa_resolve_cycle(ir_ctx *ctx, int32_t *pred, int32_t
break;
}
}
type = types[to];
if (IR_MEM_VAL(tmp_spill_slot)) {
ir_emit_load_mem(ctx, type, IR_IS_TYPE_INT(type) ? tmp_reg : tmp_fp_reg, tmp_spill_slot);
}
@ -830,11 +835,12 @@ static int ir_dessa_parallel_copy(ir_ctx *ctx, ir_dessa_copy *copies, int count,
to = pred[to];
while (!IR_IS_CONST_REF(to) && ir_bitset_in(ready, to)) {
to = pred[to];
if (!IR_IS_CONST_REF(to) && ir_bitset_in(visited, to)) {
if (IR_IS_CONST_REF(to)) {
break;
} else if (ir_bitset_in(visited, to)) {
/* We found a cycle. Resolve it. */
ir_bitset_incl(visited, to);
type = types[to];
ir_dessa_resolve_cycle(ctx, pred, loc, todo, type, to, tmp_reg, tmp_fp_reg);
ir_dessa_resolve_cycle(ctx, pred, loc, types, todo, to, tmp_reg, tmp_fp_reg);
break;
}
ir_bitset_incl(visited, to);

View file

@ -3311,26 +3311,26 @@ static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to)
if (IR_IS_TYPE_INT(type)) {
tmp_reg.num = 0;
tmp_reg.type = type;
tmp_reg.start = IR_DEF_SUB_REF;
tmp_reg.start = IR_USE_SUB_REF;
tmp_reg.end = IR_SAVE_SUB_REF;
} else {
IR_ASSERT(IR_IS_TYPE_FP(type));
tmp_reg.num = 1;
tmp_reg.type = type;
tmp_reg.start = IR_DEF_SUB_REF;
tmp_reg.start = IR_USE_SUB_REF;
tmp_reg.end = IR_SAVE_SUB_REF;
}
} else if (from != 0) {
if (IR_IS_TYPE_INT(type)) {
tmp_reg.num = 0;
tmp_reg.type = type;
tmp_reg.start = IR_DEF_SUB_REF;
tmp_reg.start = IR_USE_SUB_REF;
tmp_reg.end = IR_SAVE_SUB_REF;
} else {
IR_ASSERT(IR_IS_TYPE_FP(type));
tmp_reg.num = 1;
tmp_reg.type = type;
tmp_reg.start = IR_DEF_SUB_REF;
tmp_reg.start = IR_USE_SUB_REF;
tmp_reg.end = IR_SAVE_SUB_REF;
}
} else {
@ -3916,7 +3916,7 @@ static void assign_regs(ir_ctx *ctx)
} else {
reg |= IR_REG_SPILL_LOAD;
}
if (ctx->ir_base[ref].op != IR_SNAPSHOT) {
if (ctx->ir_base[ref].op != IR_SNAPSHOT && !(use_pos->flags & IR_PHI_USE)) {
uint32_t use_b = ctx->cfg_map[ref];
if (ir_ival_covers(ival, IR_SAVE_LIVE_POS_FROM_REF(ctx->cfg_blocks[use_b].end))) {

View file

@ -528,25 +528,24 @@ restart:
static void ir_sccp_remove_if(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref dst)
{
ir_ref j, n, *p, use, next;
ir_ref next;
ir_insn *insn, *next_insn;
ir_use_list *use_list = &ctx->use_lists[ref];
insn = &ctx->ir_base[ref];
n = use_list->count;
for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) {
use = *p;
if (use == dst) {
next = ctx->use_edges[ctx->use_lists[use].refs];
if (ctx->use_lists[dst].count == 1) {
next = ctx->use_edges[ctx->use_lists[dst].refs];
next_insn = &ctx->ir_base[next];
/* remove IF and IF_TRUE/FALSE from double linked control list */
next_insn->op1 = insn->op1;
ir_use_list_replace_one(ctx, insn->op1, ref, next);
/* remove IF and IF_TRUE/FALSE instructions */
ir_sccp_make_nop(ctx, ref);
ir_sccp_make_nop(ctx, use);
break;
}
ir_sccp_make_nop(ctx, dst);
} else {
insn->op2 = IR_UNUSED;
insn->optx = IR_OPTX(IR_END, IR_VOID, 1);
next_insn = &ctx->ir_base[dst];
next_insn->op = IR_BEGIN;
}
}
@ -1247,7 +1246,7 @@ int ir_sccp(ir_ctx *ctx)
/* remove unreachable instruction */
insn = &ctx->ir_base[i];
if (ir_op_flags[insn->op] & (IR_OP_FLAG_DATA|IR_OP_FLAG_MEM)) {
if (insn->op != IR_PARAM && insn->op != IR_VAR) {
if (insn->op != IR_PARAM && (insn->op != IR_VAR || _values[insn->op1].op == IR_TOP)) {
ir_sccp_remove_insn(ctx, _values, i, &worklist2);
}
} else {

View file

@ -7733,6 +7733,34 @@ static void ir_emit_afree(ir_ctx *ctx, ir_ref def, ir_insn *insn)
}
}
static void ir_emit_block_begin(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
| mov Ra(def_reg), Ra(IR_REG_RSP)
if (IR_REG_SPILLED(ctx->regs[def][0])) {
ir_emit_store(ctx, IR_ADDR, def, def_reg);
}
}
static void ir_emit_block_end(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_reg op2_reg = ctx->regs[def][2];
IR_ASSERT(op2_reg != IR_REG_NONE);
if (IR_REG_SPILLED(op2_reg)) {
op2_reg = IR_REG_NUM(op2_reg);
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
}
| mov Ra(IR_REG_RSP), Ra(op2_reg)
}
static void ir_emit_frame_addr(ir_ctx *ctx, ir_ref def)
{
ir_backend_data *data = ctx->data;
@ -10391,6 +10419,12 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
case IR_AFREE:
ir_emit_afree(ctx, i, insn);
break;
case IR_BLOCK_BEGIN:
ir_emit_block_begin(ctx, i, insn);
break;
case IR_BLOCK_END:
ir_emit_block_end(ctx, i, insn);
break;
case IR_FRAME_ADDR:
ir_emit_frame_addr(ctx, i);
break;