Update IR

IR commit: 6e2aea0ebfef2c741ebec30c57aa492df0d4e319
This commit is contained in:
Dmitry Stogov 2025-08-04 17:26:24 +03:00
parent b3f4863373
commit ac1cd9c26e
No known key found for this signature in database
13 changed files with 685 additions and 127 deletions

View file

@ -172,7 +172,7 @@ void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted
} else if (insn->val.c == '\0') {
fprintf(f, "'\\0'");
} else {
fprintf(f, "%u", insn->val.c);
fprintf(f, "%u", (unsigned char)insn->val.c);
}
break;
case IR_I8:
@ -247,6 +247,7 @@ void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted
#define ir_op_flag_S1X1 (ir_op_flag_S | 1 | (2 << IR_OP_FLAG_OPERANDS_SHIFT))
#define ir_op_flag_S2 (ir_op_flag_S | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT))
#define ir_op_flag_S2X1 (ir_op_flag_S | 2 | (3 << IR_OP_FLAG_OPERANDS_SHIFT))
#define ir_op_flag_S3 (ir_op_flag_S | 3 | (3 << IR_OP_FLAG_OPERANDS_SHIFT))
#define ir_op_flag_SN (ir_op_flag_S | IR_OP_FLAG_VAR_INPUTS)
#define ir_op_flag_E (IR_OP_FLAG_CONTROL|IR_OP_FLAG_BB_END)
#define ir_op_flag_E1 (ir_op_flag_E | 1 | (1 << IR_OP_FLAG_OPERANDS_SHIFT))
@ -803,7 +804,9 @@ ir_ref ir_proto(ir_ctx *ctx, uint8_t flags, ir_type ret_type, uint32_t params_co
proto->flags = flags;
proto->ret_type = ret_type;
proto->params_count = params_count;
memcpy(proto->param_types, param_types, params_count);
if (params_count) {
memcpy(proto->param_types, param_types, params_count);
}
return ir_strl(ctx, (const char *)proto, offsetof(ir_proto_t, param_types) + params_count);
}
@ -1080,7 +1083,7 @@ ir_ref ir_emit_N(ir_ctx *ctx, uint32_t opt, int32_t count)
ir_ref *p, ref = ctx->insns_count;
ir_insn *insn;
IR_ASSERT(count >= 0);
IR_ASSERT(count >= 0 && count < 65536);
while (UNEXPECTED(ref + count/4 >= ctx->insns_limit)) {
ir_grow_top(ctx);
}
@ -2973,6 +2976,12 @@ void _ir_CASE_VAL(ir_ctx *ctx, ir_ref switch_ref, ir_ref val)
ctx->control = ir_emit2(ctx, IR_CASE_VAL, switch_ref, val);
}
void _ir_CASE_RANGE(ir_ctx *ctx, ir_ref switch_ref, ir_ref v1, ir_ref v2)
{
IR_ASSERT(!ctx->control);
ctx->control = ir_emit3(ctx, IR_CASE_RANGE, switch_ref, v1, v2);
}
void _ir_CASE_DEFAULT(ir_ctx *ctx, ir_ref switch_ref)
{
IR_ASSERT(!ctx->control);

View file

@ -359,6 +359,7 @@ typedef enum _ir_type {
_(IF_TRUE, S1X1, src, prb, ___) /* IF TRUE proj. */ \
_(IF_FALSE, S1X1, src, prb, ___) /* IF FALSE proj. */ \
_(CASE_VAL, S2X1, src, def, prb) /* switch proj. */ \
_(CASE_RANGE, S3, src, def, def) /* switch proj. */ \
_(CASE_DEFAULT, S1X1, src, prb, ___) /* switch proj. */ \
_(MERGE, SN, src, src, src) /* control merge */ \
_(LOOP_BEGIN, SN, src, src, src) /* loop start */ \
@ -854,6 +855,9 @@ void ir_gdb_unregister_all(void);
bool ir_gdb_present(void);
/* IR load API (implementation in ir_load.c) */
#define IR_RESOLVE_SYM_ADD_THUNK (1<<0)
#define IR_RESOLVE_SYM_SILENT (1<<1)
struct _ir_loader {
uint32_t default_func_flags;
bool (*init_module) (ir_loader *loader, const char *name, const char *filename, const char *target);
@ -870,7 +874,7 @@ struct _ir_loader {
bool (*sym_data_end) (ir_loader *loader, uint32_t flags);
bool (*func_init) (ir_loader *loader, ir_ctx *ctx, const char *name);
bool (*func_process) (ir_loader *loader, ir_ctx *ctx, const char *name);
void*(*resolve_sym_name) (ir_loader *loader, const char *name, bool add_thunk);
void*(*resolve_sym_name) (ir_loader *loader, const char *name, uint32_t flags);
bool (*has_sym) (ir_loader *loader, const char *name);
bool (*add_sym) (ir_loader *loader, const char *name, void *addr);
};
@ -884,11 +888,12 @@ int ir_load_llvm_bitcode(ir_loader *loader, const char *filename);
int ir_load_llvm_asm(ir_loader *loader, const char *filename);
/* IR save API (implementation in ir_save.c) */
#define IR_SAVE_CFG (1<<0) /* add info about CFG */
#define IR_SAVE_CFG_MAP (1<<1) /* add info about CFG block assignment */
#define IR_SAVE_USE_LISTS (1<<2) /* add info about def->use lists */
#define IR_SAVE_RULES (1<<3) /* add info about selected code-generation rules */
#define IR_SAVE_REGS (1<<4) /* add info about selected registers */
#define IR_SAVE_CFG (1<<0) /* add info about CFG */
#define IR_SAVE_CFG_MAP (1<<1) /* add info about CFG block assignment */
#define IR_SAVE_USE_LISTS (1<<2) /* add info about def->use lists */
#define IR_SAVE_RULES (1<<3) /* add info about selected code-generation rules */
#define IR_SAVE_REGS (1<<4) /* add info about selected registers */
#define IR_SAVE_SAFE_NAMES (1<<5) /* add '@' prefix to symbol names */
void ir_print_proto(const ir_ctx *ctx, ir_ref proto, FILE *f);
void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f);

View file

@ -996,6 +996,7 @@ binop_fp:
case IR_IF_TRUE:
case IR_IF_FALSE:
case IR_CASE_VAL:
case IR_CASE_RANGE:
case IR_CASE_DEFAULT:
case IR_MERGE:
case IR_LOOP_BEGIN:
@ -4366,11 +4367,15 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_type type = insn->type;
ir_reg def_reg = ctx->regs[def][0];
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op2_reg = ctx->regs[def][2];
ir_reg tmp_reg = ctx->regs[def][3];
int32_t offset;
if (ctx->use_lists[def].count == 1) {
/* dead load */
return;
}
IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE);
if (op2_reg != IR_REG_NONE) {
if (IR_REG_SPILLED(op2_reg)) {
@ -4394,11 +4399,15 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_type type = insn->type;
ir_reg def_reg = ctx->regs[def][0];
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op2_reg = ctx->regs[def][2];
ir_reg tmp_reg = ctx->regs[def][3];
int32_t offset;
if (ctx->use_lists[def].count == 1) {
/* dead load */
return;
}
IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE);
if (op2_reg != IR_REG_NONE) {
if (IR_REG_SPILLED(op2_reg)) {
@ -4465,6 +4474,7 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
int count = 0;
ir_val min, max;
ir_reg op1_reg, op2_reg, tmp_reg;
bool has_case_range = 0;
type = ctx->ir_base[insn->op2].type;
if (IR_IS_TYPE_SIGNED(type)) {
@ -4493,6 +4503,22 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
max.u64 = (int64_t)IR_MAX(max.u64, val->val.u64);
}
count++;
} else if (use_insn->op == IR_CASE_RANGE) {
has_case_range = 1;
val = &ctx->ir_base[use_insn->op2];
IR_ASSERT(!IR_IS_SYM_CONST(val->op));
ir_insn *val2 = &ctx->ir_base[use_insn->op3];
IR_ASSERT(!IR_IS_SYM_CONST(val2->op));
if (IR_IS_TYPE_SIGNED(type)) {
IR_ASSERT(IR_IS_TYPE_SIGNED(val->type));
min.i64 = IR_MIN(min.i64, val->val.i64);
max.i64 = IR_MAX(max.i64, val2->val.i64);
} else {
IR_ASSERT(!IR_IS_TYPE_SIGNED(val->type));
min.u64 = (int64_t)IR_MIN(min.u64, val->val.u64);
max.u64 = (int64_t)IR_MAX(max.u64, val2->val.u64);
}
count++;
} else {
IR_ASSERT(use_insn->op == IR_CASE_DEFAULT);
default_label = ir_skip_empty_target_blocks(ctx, use_block);
@ -4510,7 +4536,7 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
}
/* Generate a table jmp or a sequence of calls */
if (count > 2 && (max.i64-min.i64) < count * 8) {
if (!has_case_range && count > 2 && (max.i64-min.i64) < count * 8) {
int *labels = ir_mem_malloc(sizeof(int) * (max.i64 - min.i64 + 1));
for (i = 0; i <= (max.i64 - min.i64); i++) {
@ -4615,6 +4641,38 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
}
| beq =>label
} else if (use_insn->op == IR_CASE_RANGE) {
val = &ctx->ir_base[use_insn->op2];
IR_ASSERT(!IR_IS_SYM_CONST(val->op));
label = ir_skip_empty_target_blocks(ctx, use_block);
if (aarch64_may_encode_imm12(val->val.i64)) {
| ASM_REG_IMM_OP cmp, type, op2_reg, val->val.i64
} else {
ir_emit_load_imm_int(ctx, type, tmp_reg, val->val.i64);
| ASM_REG_REG_OP cmp, type, op2_reg, tmp_reg
}
if (IR_IS_TYPE_SIGNED(type)) {
| blt >1
} else {
| blo >1
}
val = &ctx->ir_base[use_insn->op3];
IR_ASSERT(!IR_IS_SYM_CONST(val->op));
label = ir_skip_empty_target_blocks(ctx, use_block);
if (aarch64_may_encode_imm12(val->val.i64)) {
| ASM_REG_IMM_OP cmp, type, op2_reg, val->val.i64
} else {
ir_emit_load_imm_int(ctx, type, tmp_reg, val->val.i64);
| ASM_REG_REG_OP cmp, type, op2_reg, tmp_reg
}
if (IR_IS_TYPE_SIGNED(type)) {
| ble =>label
} else {
| bls =>label
}
|1:
}
}
if (default_label) {
@ -4935,6 +4993,28 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
return;
}
/* Move op2 to a tmp register before epilogue if it's in
* used_preserved_regs, because it will be overridden. */
ir_reg op2_reg = IR_REG_NONE;
if (!IR_IS_CONST_REF(insn->op2)) {
op2_reg = ctx->regs[def][2];
IR_ASSERT(op2_reg != IR_REG_NONE);
if (IR_REG_SPILLED(op2_reg)) {
op2_reg = IR_REG_INT_TMP;
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
} else if (IR_REGSET_IN((ir_regset)ctx->used_preserved_regs, IR_REG_NUM(op2_reg))) {
ir_reg orig_op2_reg = op2_reg;
op2_reg = IR_REG_INT_TMP;
ir_type type = ctx->ir_base[insn->op2].type;
| ASM_REG_REG_OP mov, type, op2_reg, IR_REG_NUM(orig_op2_reg)
} else {
op2_reg = IR_REG_NUM(op2_reg);
}
}
ir_emit_epilogue(ctx);
if (IR_IS_CONST_REF(insn->op2)) {
@ -4947,13 +5027,8 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
| br Rx(IR_REG_INT_TMP)
}
} else {
ir_reg op2_reg = ctx->regs[def][2];
IR_ASSERT(op2_reg != IR_REG_NONE);
if (IR_REG_SPILLED(op2_reg)) {
op2_reg = IR_REG_NUM(op2_reg);
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
}
IR_ASSERT(!IR_REGSET_IN((ir_regset)ctx->used_preserved_regs, op2_reg));
| br Rx(op2_reg)
}
}
@ -5590,6 +5665,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
case IR_IF_TRUE:
case IR_IF_FALSE:
case IR_CASE_VAL:
case IR_CASE_RANGE:
case IR_CASE_DEFAULT:
case IR_MERGE:
case IR_LOOP_BEGIN:

View file

@ -603,6 +603,7 @@ extern "C" {
#define ir_LOOP_END() _ir_LOOP_END(_ir_CTX)
#define ir_SWITCH(_val) _ir_SWITCH(_ir_CTX, (_val))
#define ir_CASE_VAL(_switch, _val) _ir_CASE_VAL(_ir_CTX, (_switch), (_val))
#define ir_CASE_RANGE(_switch, _v1, _v2) _ir_CASE_RANGE(_ir_CTX, (_switch), (_v1), (_v2))
#define ir_CASE_DEFAULT(_switch) _ir_CASE_DEFAULT(_ir_CTX, (_switch))
#define ir_RETURN(_val) _ir_RETURN(_ir_CTX, (_val))
#define ir_IJMP(_addr) _ir_IJMP(_ir_CTX, (_addr))
@ -682,6 +683,7 @@ ir_ref _ir_TLS(ir_ctx *ctx, ir_ref index, ir_ref offset);
void _ir_UNREACHABLE(ir_ctx *ctx);
ir_ref _ir_SWITCH(ir_ctx *ctx, ir_ref val);
void _ir_CASE_VAL(ir_ctx *ctx, ir_ref switch_ref, ir_ref val);
void _ir_CASE_RANGE(ir_ctx *ctx, ir_ref switch_ref, ir_ref v1, ir_ref v2);
void _ir_CASE_DEFAULT(ir_ctx *ctx, ir_ref switch_ref);
void _ir_RETURN(ir_ctx *ctx, ir_ref val);
void _ir_IJMP(ir_ctx *ctx, ir_ref addr);

View file

@ -244,7 +244,6 @@ int ir_build_cfg(ir_ctx *ctx)
_blocks[start] = b;
_blocks[end] = b;
IR_ASSERT(IR_IS_BB_START(insn->op));
IR_ASSERT(end > start);
bb->start = start;
bb->end = end;
bb->successors = count;
@ -583,7 +582,6 @@ static int ir_remove_unreachable_blocks(ir_ctx *ctx)
return 1;
}
#if 0
static void compute_postnum(const ir_ctx *ctx, uint32_t *cur, uint32_t b)
{
uint32_t i, *p;
@ -607,34 +605,42 @@ static void compute_postnum(const ir_ctx *ctx, uint32_t *cur, uint32_t b)
/* Computes dominator tree using algorithm from "A Simple, Fast Dominance Algorithm" by
* Cooper, Harvey and Kennedy. */
int ir_build_dominators_tree(ir_ctx *ctx)
static int ir_build_dominators_tree_slow(ir_ctx *ctx)
{
uint32_t blocks_count, b, postnum;
ir_block *blocks, *bb;
uint32_t *edges;
bool changed;
blocks = ctx->cfg_blocks;
edges = ctx->cfg_edges;
blocks_count = ctx->cfg_blocks_count;
/* Clear the dominators tree */
for (b = 0, bb = &blocks[0]; b <= blocks_count; b++, bb++) {
bb->idom = 0;
bb->dom_depth = 0;
bb->dom_child = 0;
bb->dom_next_child = 0;
}
ctx->flags2 &= ~IR_NO_LOOPS;
postnum = 1;
compute_postnum(ctx, &postnum, 1);
/* Find immediate dominators */
blocks = ctx->cfg_blocks;
edges = ctx->cfg_edges;
blocks_count = ctx->cfg_blocks_count;
/* Find immediate dominators by iterative fixed-point algorithm */
blocks[1].idom = 1;
do {
changed = 0;
/* Iterating in Reverse Post Order */
for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) {
IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
IR_ASSERT(bb->predecessors_count > 0);
if (bb->predecessors_count == 1) {
uint32_t pred_b = edges[bb->predecessors];
if (blocks[pred_b].idom <= 0) {
//IR_ASSERT("Wrong blocks order: BB is before its single predecessor");
} else if (bb->idom != pred_b) {
if (blocks[pred_b].idom > 0 && bb->idom != pred_b) {
bb->idom = pred_b;
changed = 1;
}
@ -680,39 +686,53 @@ int ir_build_dominators_tree(ir_ctx *ctx)
}
}
} while (changed);
/* Build dominators tree */
blocks[1].idom = 0;
blocks[1].dom_depth = 0;
/* Construct dominators tree */
for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) {
IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
if (bb->idom > 0) {
ir_block *idom_bb = &blocks[bb->idom];
uint32_t idom = bb->idom;
ir_block *idom_bb = &blocks[idom];
bb->dom_depth = idom_bb->dom_depth + 1;
/* Sort by block number to traverse children in pre-order */
if (idom_bb->dom_child == 0) {
idom_bb->dom_child = b;
} else if (b < idom_bb->dom_child) {
bb->dom_next_child = idom_bb->dom_child;
idom_bb->dom_child = b;
bb->dom_depth = 0;
/* Sort by block number to traverse children in pre-order */
if (idom_bb->dom_child == 0) {
idom_bb->dom_child = b;
} else if (b < idom_bb->dom_child) {
bb->dom_next_child = idom_bb->dom_child;
idom_bb->dom_child = b;
} else {
int child = idom_bb->dom_child;
ir_block *child_bb = &blocks[child];
while (child_bb->dom_next_child > 0 && b > child_bb->dom_next_child) {
child = child_bb->dom_next_child;
child_bb = &blocks[child];
}
bb->dom_next_child = child_bb->dom_next_child;
child_bb->dom_next_child = b;
}
}
/* Recalculate dom_depth for all blocks */
for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) {
uint32_t idom = bb->idom;
uint32_t dom_depth = 0;
while (idom) {
dom_depth++;
if (blocks[idom].dom_depth > 0) {
dom_depth += blocks[idom].dom_depth;
break;
} else {
int child = idom_bb->dom_child;
ir_block *child_bb = &blocks[child];
while (child_bb->dom_next_child > 0 && b > child_bb->dom_next_child) {
child = child_bb->dom_next_child;
child_bb = &blocks[child];
}
bb->dom_next_child = child_bb->dom_next_child;
child_bb->dom_next_child = b;
idom = blocks[idom].idom;
}
}
bb->dom_depth = dom_depth;
}
return 1;
}
#else
/* A single pass modification of "A Simple, Fast Dominance Algorithm" by
* Cooper, Harvey and Kennedy, that relays on IR block ordering.
* It may fallback to the general slow fixed-point algorithm. */
@ -747,7 +767,11 @@ int ir_build_dominators_tree(ir_ctx *ctx)
if (UNEXPECTED(idom >= b)) {
/* In rare cases, LOOP_BEGIN.op1 may be a back-edge. Skip back-edges. */
ctx->flags2 &= ~IR_NO_LOOPS;
IR_ASSERT(k > 1 && "Wrong blocks order: BB is before its single predecessor");
// IR_ASSERT(k > 1 && "Wrong blocks order: BB is before its single predecessor");
if (UNEXPECTED(k <= 1)) {
ir_list_free(&worklist);
return ir_build_dominators_tree_slow(ctx);
}
ir_list_push(&worklist, idom);
while (1) {
k--;
@ -942,7 +966,6 @@ static int ir_build_dominators_tree_iterative(ir_ctx *ctx)
return 1;
}
#endif
static bool ir_dominates(const ir_block *blocks, uint32_t b1, uint32_t b2)
{
@ -958,7 +981,7 @@ static bool ir_dominates(const ir_block *blocks, uint32_t b1, uint32_t b2)
int ir_find_loops(ir_ctx *ctx)
{
uint32_t i, j, n, count;
uint32_t b, j, n, count;
uint32_t *entry_times, *exit_times, *sorted_blocks, time = 1;
ir_block *blocks = ctx->cfg_blocks;
uint32_t *edges = ctx->cfg_edges;
@ -983,13 +1006,13 @@ int ir_find_loops(ir_ctx *ctx)
int child;
next:
i = ir_worklist_peek(&work);
if (!entry_times[i]) {
entry_times[i] = time++;
b = ir_worklist_peek(&work);
if (!entry_times[b]) {
entry_times[b] = time++;
}
/* Visit blocks immediately dominated by i. */
bb = &blocks[i];
/* Visit blocks immediately dominated by "b". */
bb = &blocks[b];
for (child = bb->dom_child; child > 0; child = blocks[child].dom_next_child) {
if (ir_worklist_push(&work, child)) {
goto next;
@ -999,17 +1022,17 @@ next:
/* Visit join edges. */
if (bb->successors_count) {
uint32_t *p = edges + bb->successors;
for (j = 0; j < bb->successors_count; j++,p++) {
for (j = 0; j < bb->successors_count; j++, p++) {
uint32_t succ = *p;
if (blocks[succ].idom == i) {
if (blocks[succ].idom == b) {
continue;
} else if (ir_worklist_push(&work, succ)) {
goto next;
}
}
}
exit_times[i] = time++;
exit_times[b] = time++;
ir_worklist_pop(&work);
}
@ -1018,7 +1041,7 @@ next:
j = 1;
n = 2;
while (j != n) {
i = j;
uint32_t i = j;
j = n;
for (; i < j; i++) {
int child;
@ -1030,9 +1053,82 @@ next:
count = n;
/* Identify loops. See Sreedhar et al, "Identifying Loops Using DJ Graphs". */
uint32_t prev_dom_depth = blocks[sorted_blocks[n - 1]].dom_depth;
uint32_t prev_irreducible = 0;
while (n > 1) {
i = sorted_blocks[--n];
ir_block *bb = &blocks[i];
b = sorted_blocks[--n];
ir_block *bb = &blocks[b];
IR_ASSERT(bb->dom_depth <= prev_dom_depth);
if (UNEXPECTED(prev_irreducible) && bb->dom_depth != prev_dom_depth) {
/* process delyed irreducible loops */
do {
b = sorted_blocks[prev_irreducible];
bb = &blocks[b];
if ((bb->flags & IR_BB_IRREDUCIBLE_LOOP) && !bb->loop_depth) {
/* process irreducible loop */
uint32_t hdr = b;
bb->loop_depth = 1;
if (ctx->ir_base[bb->start].op == IR_MERGE) {
ctx->ir_base[bb->start].op = IR_LOOP_BEGIN;
}
/* find the closing edge(s) of the irreucible loop */
IR_ASSERT(bb->predecessors_count > 1);
uint32_t *p = &edges[bb->predecessors];
j = bb->predecessors_count;
do {
uint32_t pred = *p;
if (entry_times[pred] > entry_times[b] && exit_times[pred] < exit_times[b]) {
if (!ir_worklist_len(&work)) {
ir_bitset_clear(work.visited, ir_bitset_len(ir_worklist_capasity(&work)));
}
blocks[pred].loop_header = 0; /* support for merged loops */
ir_worklist_push(&work, pred);
}
p++;
} while (--j);
if (ir_worklist_len(&work) == 0) continue;
/* collect members of the irreducible loop */
while (ir_worklist_len(&work)) {
b = ir_worklist_pop(&work);
if (b != hdr) {
ir_block *bb = &blocks[b];
bb->loop_header = hdr;
if (bb->predecessors_count) {
uint32_t *p = &edges[bb->predecessors];
uint32_t n = bb->predecessors_count;
do {
uint32_t pred = *p;
while (blocks[pred].loop_header > 0) {
pred = blocks[pred].loop_header;
}
if (pred != hdr) {
if (entry_times[pred] > entry_times[hdr] && exit_times[pred] < exit_times[hdr]) {
/* "pred" is a descendant of "hdr" */
ir_worklist_push(&work, pred);
} else {
/* another entry to the irreducible loop */
bb->flags |= IR_BB_IRREDUCIBLE_LOOP;
if (ctx->ir_base[bb->start].op == IR_MERGE) {
ctx->ir_base[bb->start].op = IR_LOOP_BEGIN;
}
}
}
p++;
} while (--n);
}
}
}
}
} while (--prev_irreducible != n);
prev_irreducible = 0;
b = sorted_blocks[n];
bb = &blocks[b];
}
if (bb->predecessors_count > 1) {
bool irreducible = 0;
@ -1047,7 +1143,7 @@ next:
if (bb->idom != pred) {
/* In a loop back-edge (back-join edge), the successor dominates
the predecessor. */
if (ir_dominates(blocks, i, pred)) {
if (ir_dominates(blocks, b, pred)) {
if (!ir_worklist_len(&work)) {
ir_bitset_clear(work.visited, ir_bitset_len(ir_worklist_capasity(&work)));
}
@ -1056,8 +1152,9 @@ next:
} else {
/* Otherwise it's a cross-join edge. See if it's a branch
to an ancestor on the DJ spanning tree. */
if (entry_times[pred] > entry_times[i] && exit_times[pred] < exit_times[i]) {
if (entry_times[pred] > entry_times[b] && exit_times[pred] < exit_times[b]) {
irreducible = 1;
break;
}
}
}
@ -1065,46 +1162,56 @@ next:
} while (--j);
if (UNEXPECTED(irreducible)) {
// TODO: Support for irreducible loops ???
bb->flags |= IR_BB_IRREDUCIBLE_LOOP;
ctx->flags2 |= IR_IRREDUCIBLE_CFG;
while (ir_worklist_len(&work)) {
ir_worklist_pop(&work);
bb->flags |= IR_BB_LOOP_HEADER | IR_BB_IRREDUCIBLE_LOOP;
ctx->flags2 |= IR_CFG_HAS_LOOPS | IR_IRREDUCIBLE_CFG;
/* Remember the position of the first irreducible loop to process all the irreducible loops
* after the reducible loops with the same dominator tree depth
*/
if (!prev_irreducible) {
prev_irreducible = n;
prev_dom_depth = bb->dom_depth;
}
ir_list_clear(&work.l);
} else if (ir_worklist_len(&work)) {
/* collect members of the reducible loop */
uint32_t hdr = b;
bb->flags |= IR_BB_LOOP_HEADER;
ctx->flags2 |= IR_CFG_HAS_LOOPS;
bb->loop_depth = 1;
if (ctx->ir_base[bb->start].op == IR_MERGE) {
ctx->ir_base[bb->start].op = IR_LOOP_BEGIN;
}
while (ir_worklist_len(&work)) {
j = ir_worklist_pop(&work);
while (blocks[j].loop_header > 0) {
j = blocks[j].loop_header;
}
if (j != i) {
ir_block *bb = &blocks[j];
if (bb->idom == 0 && j != 1) {
/* Ignore blocks that are unreachable or only abnormally reachable. */
continue;
}
bb->loop_header = i;
b = ir_worklist_pop(&work);
if (b != hdr) {
ir_block *bb = &blocks[b];
bb->loop_header = hdr;
if (bb->predecessors_count) {
uint32_t *p = &edges[bb->predecessors];
j = bb->predecessors_count;
uint32_t n = bb->predecessors_count;
do {
ir_worklist_push(&work, *p);
uint32_t pred = *p;
while (blocks[pred].loop_header > 0) {
pred = blocks[pred].loop_header;
}
if (pred != hdr) {
ir_worklist_push(&work, pred);
}
p++;
} while (--j);
} while (--n);
}
}
}
}
}
}
IR_ASSERT(!prev_irreducible);
if (ctx->flags2 & IR_CFG_HAS_LOOPS) {
for (n = 1; n < count; n++) {
i = sorted_blocks[n];
ir_block *bb = &blocks[i];
b = sorted_blocks[n];
ir_block *bb = &blocks[b];
if (bb->loop_header > 0) {
ir_block *loop = &blocks[bb->loop_header];
uint32_t loop_depth = loop->loop_depth;
@ -1389,7 +1496,7 @@ restart:
goto restart;
}
} else if (b != predecessor && ctx->cfg_blocks[predecessor].loop_header != b) {
ir_dump_cfg(ctx, stderr);
/* not a loop back-edge */
IR_ASSERT(b == predecessor || ctx->cfg_blocks[predecessor].loop_header == b);
}
}

View file

@ -213,13 +213,18 @@ bool ir_check(const ir_ctx *ctx)
ok = 0;
}
}
break;
case IR_OPND_CONTROL_DEP:
if ((ctx->flags2 & IR_LINEAR)
&& use >= i
&& !(insn->op == IR_LOOP_BEGIN)) {
fprintf(stderr, "ir_base[%d].ops[%d] invalid forward reference (%d)\n", i, j, use);
ok = 0;
}
break;
case IR_OPND_CONTROL_DEP:
if ((ctx->flags2 & IR_LINEAR)
&& use >= i) {
fprintf(stderr, "ir_base[%d].ops[%d] invalid forward reference (%d)\n", i, j, use);
ok = 0;
} else if (insn->op == IR_PHI) {
ir_insn *merge_insn = &ctx->ir_base[insn->op1];
if (merge_insn->op != IR_MERGE && merge_insn->op != IR_LOOP_BEGIN) {

View file

@ -309,7 +309,7 @@ static void* ir_sym_addr(ir_ctx *ctx, const ir_insn *addr_insn)
{
const char *name = ir_get_str(ctx, addr_insn->val.name);
void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ?
ctx->loader->resolve_sym_name(ctx->loader, name, 0) :
ctx->loader->resolve_sym_name(ctx->loader, name, IR_RESOLVE_SYM_SILENT) :
ir_resolve_sym_name(name);
return addr;
@ -320,7 +320,7 @@ static void* ir_sym_val(ir_ctx *ctx, const ir_insn *addr_insn)
{
const char *name = ir_get_str(ctx, addr_insn->val.name);
void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ?
ctx->loader->resolve_sym_name(ctx->loader, name, addr_insn->op == IR_FUNC) :
ctx->loader->resolve_sym_name(ctx->loader, name, addr_insn->op == IR_FUNC ? IR_RESOLVE_SYM_ADD_THUNK : 0) :
ir_resolve_sym_name(name);
IR_ASSERT(addr);

View file

@ -1909,7 +1909,9 @@ IR_FOLD(SUB(_, SUB))
IR_FOLD(SUB(ADD, ADD))
{
if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt))) {
if (op1_insn->op1 == op2_insn->op1) {
if (op1 == op2) {
IR_FOLD_CONST_U(0);
} else if (op1_insn->op1 == op2_insn->op1) {
/* (a + b) - (a + c) => b - c */
op1 = op1_insn->op2;
op2 = op2_insn->op2;

View file

@ -785,6 +785,139 @@ IR_ALWAYS_INLINE ir_ref ir_count_constant(ir_ref *_xlat, ir_ref ref)
return 0;
}
IR_ALWAYS_INLINE bool ir_is_good_bb_order(ir_ctx *ctx, uint32_t b, ir_block *bb, ir_ref start)
{
ir_insn *insn = &ctx->ir_base[start];
uint32_t n = insn->inputs_count;
ir_ref *p = insn->ops + 1;
if (n == 1) {
return *p < start;
} else {
IR_ASSERT(n > 1);
for (; n > 0; p++, n--) {
ir_ref input = *p;
if (input < start) {
/* ordered */
} else if ((bb->flags & IR_BB_LOOP_HEADER)
&& (ctx->cfg_map[input] == b || ctx->cfg_blocks[ctx->cfg_map[input]].loop_header == b)) {
/* back-edge of reducible loop */
} else if ((bb->flags & IR_BB_IRREDUCIBLE_LOOP)
&& (ctx->cfg_blocks[ctx->cfg_map[input]].loop_header == ctx->cfg_blocks[b].loop_header)) {
/* closing edge of irreducible loop */
} else {
return 0;
}
}
return 1;
}
}
static IR_NEVER_INLINE void ir_fix_bb_order(ir_ctx *ctx, ir_ref *_prev, ir_ref *_next)
{
uint32_t b, succ, count, *q, *xlat;
ir_block *bb;
ir_ref ref, n, prev;
ir_worklist worklist;
ir_block *new_blocks;
#if 0
for (b = 1, bb = ctx->cfg_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) {
if (!ir_is_good_bb_order(ctx, b, bb, bb->start)) {
goto fix;
}
}
return;
fix:
#endif
count = ctx->cfg_blocks_count + 1;
new_blocks = ir_mem_malloc(count * sizeof(ir_block));
xlat = ir_mem_malloc(count * sizeof(uint32_t));
ir_worklist_init(&worklist, count);
ir_worklist_push(&worklist, 1);
while (ir_worklist_len(&worklist) != 0) {
next:
b = ir_worklist_peek(&worklist);
bb = &ctx->cfg_blocks[b];
n = bb->successors_count;
if (n == 1) {
succ = ctx->cfg_edges[bb->successors];
if (ir_worklist_push(&worklist, succ)) {
goto next;
}
} else if (n > 1) {
uint32_t best = 0;
uint32_t best_loop_depth = 0;
q = ctx->cfg_edges + bb->successors + n;
do {
q--;
succ = *q;
if (ir_bitset_in(worklist.visited, succ)) {
/* already processed */
} else if ((ctx->cfg_blocks[succ].flags & IR_BB_LOOP_HEADER)
&& (succ == b || ctx->cfg_blocks[b].loop_header == succ)) {
/* back-edge of reducible loop */
} else if ((ctx->cfg_blocks[succ].flags & IR_BB_IRREDUCIBLE_LOOP)
&& (ctx->cfg_blocks[succ].loop_header == ctx->cfg_blocks[b].loop_header)) {
/* closing edge of irreducible loop */
} else if (!best) {
best = succ;
best_loop_depth = ctx->cfg_blocks[best].loop_depth;
} else if (ctx->cfg_blocks[succ].loop_depth < best_loop_depth) {
/* prefer deeper loop */
best = succ;
best_loop_depth = ctx->cfg_blocks[best].loop_depth;
}
n--;
} while (n > 0);
if (best) {
ir_worklist_push(&worklist, best);
goto next;
}
}
ir_worklist_pop(&worklist);
count--;
new_blocks[count] = *bb;
xlat[b] = count;
}
IR_ASSERT(count == 1);
xlat[0] = 0;
ir_worklist_free(&worklist);
prev = 0;
for (b = 1, bb = new_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) {
bb->idom = xlat[bb->idom];
bb->loop_header = xlat[bb->loop_header];
n = bb->successors_count;
if (n > 0) {
for (q = ctx->cfg_edges + bb->successors; n > 0; q++, n--) {
*q = xlat[*q];
}
}
n = bb->predecessors_count;
if (n > 0) {
for (q = ctx->cfg_edges + bb->predecessors; n > 0; q++, n--) {
*q = xlat[*q];
}
}
_next[prev] = bb->start;
_prev[bb->start] = prev;
prev = bb->end;
}
_next[0] = 0;
_next[prev] = 0;
for (ref = 2; ref < ctx->insns_count; ref++) {
ctx->cfg_map[ref] = xlat[ctx->cfg_map[ref]];
}
ir_mem_free(xlat);
ir_mem_free(ctx->cfg_blocks);
ctx->cfg_blocks = new_blocks;
}
int ir_schedule(ir_ctx *ctx)
{
ir_ctx new_ctx;
@ -800,6 +933,7 @@ int ir_schedule(ir_ctx *ctx)
ir_block *bb;
ir_insn *insn, *new_insn;
ir_use_list *lists, *use_list, *new_list;
bool bad_bb_order = 0;
/* Create a double-linked list of nodes ordered by BB, respecting BB->start and BB->end */
IR_ASSERT(_blocks[1] == 1);
@ -818,27 +952,61 @@ int ir_schedule(ir_ctx *ctx)
} else if (b > prev_b) {
bb = &ctx->cfg_blocks[b];
if (i == bb->start) {
IR_ASSERT(bb->end > bb->start);
prev_b = b;
prev_b_end = bb->end;
_prev[bb->end] = 0;
/* add to the end of the list */
_next[j] = i;
_prev[i] = j;
j = i;
} else {
IR_ASSERT(i != bb->end);
if (bb->end > bb->start) {
prev_b = b;
prev_b_end = bb->end;
/* add to the end of the list */
_next[j] = i;
_prev[i] = j;
j = i;
} else {
prev_b = 0;
prev_b_end = 0;
k = bb->end;
while (_blocks[_prev[k]] == b) {
k = _prev[k];
}
/* insert before "k" */
_prev[i] = _prev[k];
_next[i] = k;
_next[_prev[k]] = i;
_prev[k] = i;
}
if (!ir_is_good_bb_order(ctx, b, bb, i)) {
bad_bb_order = 1;
}
} else if (i != bb->end) {
/* move down late (see the following loop) */
_next[i] = _move_down;
_move_down = i;
} else {
prev_b = 0;
prev_b_end = 0;
if (bb->start > bb->end) {
/* add to the end of the list */
_next[j] = i;
_prev[i] = j;
j = i;
} else {
k = bb->start;
while (_blocks[_next[k]] == b) {
k = _next[k];
}
/* insert after "k" */
_next[i] = _next[k];
_prev[i] = k;
_prev[_next[k]] = i;
_next[k] = i;
}
}
} else if (b) {
bb = &ctx->cfg_blocks[b];
IR_ASSERT(i != bb->start);
if (_prev[bb->end]) {
if (i > bb->end) {
/* move up, insert before the end of the already scheduled BB */
k = bb->end;
} else {
IR_ASSERT(i > bb->start);
/* move up, insert at the end of the block */
k = ctx->cfg_blocks[b + 1].start;
}
@ -883,6 +1051,10 @@ int ir_schedule(ir_ctx *ctx)
}
#endif
if (bad_bb_order) {
ir_fix_bb_order(ctx, _prev, _next);
}
_xlat = ir_mem_calloc((ctx->consts_count + ctx->insns_count), sizeof(ir_ref));
_xlat += ctx->consts_count;
_xlat[IR_TRUE] = IR_TRUE;
@ -904,6 +1076,11 @@ int ir_schedule(ir_ctx *ctx)
if (insn->op == IR_CASE_VAL) {
IR_ASSERT(insn->op2 < IR_TRUE);
consts_count += ir_count_constant(_xlat, insn->op2);
} else if (insn->op == IR_CASE_RANGE) {
IR_ASSERT(insn->op2 < IR_TRUE);
consts_count += ir_count_constant(_xlat, insn->op2);
IR_ASSERT(insn->op3 < IR_TRUE);
consts_count += ir_count_constant(_xlat, insn->op3);
}
n = insn->inputs_count;
insns_count += ir_insn_inputs_to_len(n);

View file

@ -9,6 +9,7 @@
#define IR_PRIVATE_H
#include <string.h>
#include <stdlib.h>
#include <stddef.h>
#ifdef IR_DEBUG
# include <assert.h>
@ -62,7 +63,7 @@
#define IR_MAX(a, b) (((a) > (b)) ? (a) : (b))
#define IR_MIN(a, b) (((a) < (b)) ? (a) : (b))
#define IR_IS_POWER_OF_TWO(x) (!((x) & ((x) - 1)))
#define IR_IS_POWER_OF_TWO(x) ((x) && (!((x) & ((x) - 1))))
#define IR_LOG2(x) ir_ntzl(x)
@ -257,7 +258,7 @@ IR_ALWAYS_INLINE void* ir_arena_alloc(ir_arena **arena_ptr, size_t size)
ir_arena *arena = *arena_ptr;
char *ptr = (char*)IR_ALIGNED_SIZE((uintptr_t)arena->ptr, 8);
if (EXPECTED(size <= (size_t)(arena->end - ptr))) {
if (EXPECTED((ptrdiff_t)size <= (ptrdiff_t)(arena->end - ptr))) {
arena->ptr = ptr + size;
} else {
size_t arena_size =

View file

@ -97,10 +97,14 @@ void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f)
for (i = IR_UNUSED + 1, insn = ctx->ir_base - i; i < ctx->consts_count; i++, insn--) {
fprintf(f, "\t%s c_%d = ", ir_type_cname[insn->type], i);
if (insn->op == IR_FUNC) {
fprintf(f, "func %s", ir_get_str(ctx, insn->val.name));
fprintf(f, "func %s%s",
(save_flags & IR_SAVE_SAFE_NAMES) ? "@" : "",
ir_get_str(ctx, insn->val.name));
ir_print_proto(ctx, insn->proto, f);
} else if (insn->op == IR_SYM) {
fprintf(f, "sym(%s)", ir_get_str(ctx, insn->val.name));
fprintf(f, "sym(%s%s)",
(save_flags & IR_SAVE_SAFE_NAMES) ? "@" : "",
ir_get_str(ctx, insn->val.name));
} else if (insn->op == IR_FUNC_ADDR) {
fprintf(f, "func *");
ir_print_const(ctx, insn, f, true);
@ -140,6 +144,9 @@ void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f)
fprintf(f, ", loop=BB%d(%d)", bb->loop_header, bb->loop_depth);
}
}
if (bb->flags & IR_BB_IRREDUCIBLE_LOOP) {
fprintf(f, ", IRREDUCIBLE");
}
if (bb->predecessors_count) {
uint32_t i;

View file

@ -458,6 +458,22 @@ static bool ir_sccp_is_equal(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b)
return v1->val.u64 == v2->val.u64;
}
static bool ir_sccp_in_range(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b, ir_ref c)
{
ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a];
ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b];
ir_insn *v3 = IR_IS_CONST_REF(c) ? &ctx->ir_base[c] : &_values[c];
IR_ASSERT(!IR_IS_SYM_CONST(v1->op));
IR_ASSERT(!IR_IS_SYM_CONST(v2->op));
IR_ASSERT(!IR_IS_SYM_CONST(v3->op));
if (IR_IS_TYPE_SIGNED(v1->type)) {
return v1->val.i64 >= v2->val.i64 && v1->val.i64 <= v3->val.i64;
} else {
return v1->val.u64 >= v2->val.u64 && v1->val.u64 <= v3->val.u64;
}
}
#ifdef IR_SCCP_TRACE
static void ir_sccp_trace_val(ir_ctx *ctx, ir_insn *_values, ir_ref i)
{
@ -676,6 +692,11 @@ static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bi
}
} else if (use_insn->op == IR_CASE_DEFAULT) {
use_case = use;
} else if (use_insn->op == IR_CASE_RANGE) {
if (ir_sccp_in_range(ctx, _values, insn->op2, use_insn->op2, use_insn->op3)) {
use_case = use;
break;
}
}
}
if (use_case) {
@ -1732,7 +1753,20 @@ static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use,
ir_ref *p, n, input;
if (IR_IS_CONST_REF(ref)) {
return ir_const(ctx, insn->val, type);
ir_val val;
switch (type) {
case IR_I8: val.i64 = insn->val.i8; break;
case IR_U8: val.u64 = insn->val.u8; break;
case IR_I16: val.i64 = insn->val.i16; break;
case IR_U16: val.u64 = insn->val.u16; break;
case IR_I32: val.i64 = insn->val.i32; break;
case IR_U32: val.u64 = insn->val.u32; break;
case IR_CHAR:val.i64 = insn->val.i8; break;
case IR_BOOL:val.u64 = insn->val.u8 != 0; break;
default: IR_ASSERT(0); val.u64 = 0;
}
return ir_const(ctx, val, type);
} else {
ir_bitqueue_add(worklist, ref);
switch (insn->op) {
@ -2391,7 +2425,7 @@ static bool ir_try_remove_empty_diamond(ir_ctx *ctx, ir_ref ref, ir_insn *insn,
}
start_ref = end->op1;
start = &ctx->ir_base[start_ref];
if (start->op != IR_CASE_VAL && start->op != IR_CASE_DEFAULT) {
if (start->op != IR_CASE_VAL && start->op != IR_CASE_RANGE && start->op != IR_CASE_DEFAULT) {
return 0;
}
if (ctx->use_lists[start_ref].count != 1) {

View file

@ -1569,6 +1569,20 @@ op2_const:
constraints->tmp_regs[0] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
n = 1;
break;
case IR_SSE_SQRT:
case IR_SSE_RINT:
case IR_SSE_FLOOR:
case IR_SSE_CEIL:
case IR_SSE_TRUNC:
case IR_SSE_NEARBYINT:
insn = &ctx->ir_base[ref];
flags = IR_USE_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG;
if (IR_IS_CONST_REF(insn->op3)) {
const ir_insn *val_insn = &ctx->ir_base[insn->op3];
constraints->tmp_regs[n] = IR_TMP_REG(3, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
n = 1;
}
break;
}
constraints->tmps_count = n;
@ -2630,6 +2644,7 @@ store_int:
case IR_IF_TRUE:
case IR_IF_FALSE:
case IR_CASE_VAL:
case IR_CASE_RANGE:
case IR_CASE_DEFAULT:
case IR_MERGE:
case IR_LOOP_BEGIN:
@ -6868,7 +6883,24 @@ static void ir_emit_return_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn)
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
if (op2_reg == IR_REG_NONE || IR_REG_SPILLED(op2_reg)) {
if (IR_IS_CONST_REF(insn->op2)) {
ir_insn *value = &ctx->ir_base[insn->op2];
if ((type == IR_FLOAT && value->val.f == 0.0) || (type == IR_DOUBLE && value->val.d == 0.0)) {
| fldz
} else if ((type == IR_FLOAT && value->val.f == 1.0) || (type == IR_DOUBLE && value->val.d == 1.0)) {
| fld1
} else {
int label = ir_const_label(ctx, insn->op2);
if (type == IR_DOUBLE) {
| fld qword [=>label]
} else {
IR_ASSERT(type == IR_FLOAT);
| fld dword [=>label]
}
}
} else if (op2_reg == IR_REG_NONE || IR_REG_SPILLED(op2_reg)) {
ir_reg fp;
int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op2, &fp);
@ -8442,11 +8474,15 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_type type = insn->type;
ir_reg def_reg = ctx->regs[def][0];
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op2_reg = ctx->regs[def][2];
ir_reg tmp_reg = ctx->regs[def][3];
int32_t offset;
if (ctx->use_lists[def].count == 1) {
/* dead load */
return;
}
IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE);
if (op2_reg != IR_REG_NONE) {
if (IR_REG_SPILLED(op2_reg)) {
@ -8471,11 +8507,15 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_type type = insn->type;
ir_reg def_reg = ctx->regs[def][0];
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op2_reg = ctx->regs[def][2];
ir_reg tmp_reg = ctx->regs[def][3];
int32_t offset;
if (ctx->use_lists[def].count == 1) {
/* dead load */
return;
}
IR_ASSERT(def_reg != IR_REG_NONE&& tmp_reg != IR_REG_NONE);
if (op2_reg != IR_REG_NONE) {
if (IR_REG_SPILLED(op2_reg)) {
@ -8541,6 +8581,7 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
ir_val min, max;
ir_reg op2_reg = ctx->regs[def][2];
ir_reg tmp_reg = ctx->regs[def][3];
bool has_case_range = 0;
type = ctx->ir_base[insn->op2].type;
IR_ASSERT(tmp_reg != IR_REG_NONE);
@ -8570,6 +8611,21 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
max.u64 = (int64_t)IR_MAX(max.u64, val->val.u64);
}
count++;
} else if (use_insn->op == IR_CASE_RANGE) {
has_case_range = 1;
val = &ctx->ir_base[use_insn->op2];
IR_ASSERT(!IR_IS_SYM_CONST(val->op));
ir_insn *val2 = &ctx->ir_base[use_insn->op3];
IR_ASSERT(!IR_IS_SYM_CONST(val2->op));
if (IR_IS_TYPE_SIGNED(type)) {
IR_ASSERT(IR_IS_TYPE_SIGNED(val->type));
min.i64 = IR_MIN(min.i64, val->val.i64);
max.i64 = IR_MAX(max.i64, val2->val.i64);
} else {
IR_ASSERT(!IR_IS_TYPE_SIGNED(val->type));
min.u64 = (int64_t)IR_MIN(min.u64, val->val.u64);
max.u64 = (int64_t)IR_MAX(max.u64, val2->val.u64);
}
} else {
IR_ASSERT(use_insn->op == IR_CASE_DEFAULT);
default_label = ir_skip_empty_target_blocks(ctx, use_block);
@ -8583,7 +8639,7 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
}
/* Generate a table jmp or a seqence of calls */
if (count > 2 && (max.i64-min.i64) < count * 8) {
if (!has_case_range && count > 2 && (max.i64-min.i64) < count * 8) {
int *labels = ir_mem_malloc(sizeof(int) * (size_t)(max.i64 - min.i64 + 1));
for (i = 0; i <= (max.i64 - min.i64); i++) {
@ -8747,6 +8803,42 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
|.endif
}
| je =>label
} else if (use_insn->op == IR_CASE_RANGE) {
val = &ctx->ir_base[use_insn->op2];
IR_ASSERT(!IR_IS_SYM_CONST(val->op));
label = ir_skip_empty_target_blocks(ctx, use_block);
if (IR_IS_32BIT(type, val->val)) {
| ASM_REG_IMM_OP cmp, type, op2_reg, val->val.i32
} else {
IR_ASSERT(sizeof(void*) == 8);
|.if X64
| mov64 Ra(tmp_reg), val->val.i64
| ASM_REG_REG_OP cmp, type, op2_reg, tmp_reg
|.endif
}
if (IR_IS_TYPE_SIGNED(type)) {
| jl >1
} else {
| jb >1
}
val = &ctx->ir_base[use_insn->op3];
IR_ASSERT(!IR_IS_SYM_CONST(val->op3));
label = ir_skip_empty_target_blocks(ctx, use_block);
if (IR_IS_32BIT(type, val->val)) {
| ASM_REG_IMM_OP cmp, type, op2_reg, val->val.i32
} else {
IR_ASSERT(sizeof(void*) == 8);
|.if X64
| mov64 Ra(tmp_reg), val->val.i64
| ASM_REG_REG_OP cmp, type, op2_reg, tmp_reg
|.endif
}
if (IR_IS_TYPE_SIGNED(type)) {
| jle =>label
} else {
| jbe =>label
}
|1:
}
}
if (default_label) {
@ -9221,6 +9313,58 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
return;
}
/* Move op2 to a tmp register before epilogue if it's in
* used_preserved_regs, because it will be overridden. */
ir_reg op2_reg = IR_REG_NONE;
ir_mem mem = IR_MEM_B(IR_REG_NONE);
if (!IR_IS_CONST_REF(insn->op2)) {
op2_reg = ctx->regs[def][2];
ir_regset preserved_regs = (ir_regset)ctx->used_preserved_regs | IR_REGSET(IR_REG_STACK_POINTER);
if (ctx->flags & IR_USE_FRAME_POINTER) {
preserved_regs |= IR_REGSET(IR_REG_FRAME_POINTER);
}
bool is_spill_slot = op2_reg != IR_REG_NONE
&& IR_REG_SPILLED(op2_reg)
&& ctx->vregs[insn->op2];
if (op2_reg != IR_REG_NONE && !is_spill_slot) {
if (IR_REGSET_IN(preserved_regs, IR_REG_NUM(op2_reg))) {
ir_ref orig_op2_reg = op2_reg;
op2_reg = IR_REG_RAX;
if (IR_REG_SPILLED(orig_op2_reg)) {
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
} else {
ir_type type = ctx->ir_base[insn->op2].type;
| ASM_REG_REG_OP mov, type, op2_reg, IR_REG_NUM(orig_op2_reg)
}
} else {
op2_reg = IR_REG_NUM(op2_reg);
}
} else {
if (ir_rule(ctx, insn->op2) & IR_FUSED) {
IR_ASSERT(op2_reg == IR_REG_NONE);
mem = ir_fuse_load(ctx, def, insn->op2);
} else {
mem = ir_ref_spill_slot(ctx, insn->op2);
}
ir_reg base = IR_MEM_BASE(mem);
ir_reg index = IR_MEM_INDEX(mem);
if ((base != IR_REG_NONE && IR_REGSET_IN(preserved_regs, base)) ||
(index != IR_REG_NONE && IR_REGSET_IN(preserved_regs, index))) {
op2_reg = IR_REG_RAX;
ir_type type = ctx->ir_base[insn->op2].type;
ir_emit_load_mem_int(ctx, type, op2_reg, mem);
} else {
op2_reg = IR_REG_NONE;
}
}
}
ir_emit_epilogue(ctx);
if (IR_IS_CONST_REF(insn->op2)) {
@ -9246,22 +9390,10 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
|.endif
}
} else {
ir_reg op2_reg = ctx->regs[def][2];
if (op2_reg != IR_REG_NONE) {
if (IR_REG_SPILLED(op2_reg)) {
op2_reg = IR_REG_NUM(op2_reg);
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
}
IR_ASSERT(!IR_REGSET_IN((ir_regset)ctx->used_preserved_regs, op2_reg));
| jmp Ra(op2_reg)
} else {
ir_mem mem;
if (ir_rule(ctx, insn->op2) & IR_FUSED) {
mem = ir_fuse_load(ctx, def, insn->op2);
} else {
mem = ir_ref_spill_slot(ctx, insn->op2);
}
| ASM_TMEM_OP jmp, aword, mem
}
}
@ -10314,6 +10446,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
case IR_IF_TRUE:
case IR_IF_FALSE:
case IR_CASE_VAL:
case IR_CASE_RANGE:
case IR_CASE_DEFAULT:
case IR_MERGE:
case IR_LOOP_BEGIN: