Merge branch 'PHP-8.4'

* PHP-8.4:
  Update IR
This commit is contained in:
Dmitry Stogov 2025-07-07 14:03:36 +03:00
commit dd69b65638
No known key found for this signature in database
12 changed files with 482 additions and 113 deletions

View file

@ -803,7 +803,9 @@ ir_ref ir_proto(ir_ctx *ctx, uint8_t flags, ir_type ret_type, uint32_t params_co
proto->flags = flags; proto->flags = flags;
proto->ret_type = ret_type; proto->ret_type = ret_type;
proto->params_count = params_count; proto->params_count = params_count;
if (params_count) {
memcpy(proto->param_types, param_types, params_count); memcpy(proto->param_types, param_types, params_count);
}
return ir_strl(ctx, (const char *)proto, offsetof(ir_proto_t, param_types) + params_count); return ir_strl(ctx, (const char *)proto, offsetof(ir_proto_t, param_types) + params_count);
} }

View file

@ -854,6 +854,9 @@ void ir_gdb_unregister_all(void);
bool ir_gdb_present(void); bool ir_gdb_present(void);
/* IR load API (implementation in ir_load.c) */ /* IR load API (implementation in ir_load.c) */
#define IR_RESOLVE_SYM_ADD_THUNK (1<<0)
#define IR_RESOLVE_SYM_SILENT (1<<1)
struct _ir_loader { struct _ir_loader {
uint32_t default_func_flags; uint32_t default_func_flags;
bool (*init_module) (ir_loader *loader, const char *name, const char *filename, const char *target); bool (*init_module) (ir_loader *loader, const char *name, const char *filename, const char *target);
@ -870,7 +873,7 @@ struct _ir_loader {
bool (*sym_data_end) (ir_loader *loader, uint32_t flags); bool (*sym_data_end) (ir_loader *loader, uint32_t flags);
bool (*func_init) (ir_loader *loader, ir_ctx *ctx, const char *name); bool (*func_init) (ir_loader *loader, ir_ctx *ctx, const char *name);
bool (*func_process) (ir_loader *loader, ir_ctx *ctx, const char *name); bool (*func_process) (ir_loader *loader, ir_ctx *ctx, const char *name);
void*(*resolve_sym_name) (ir_loader *loader, const char *name, bool add_thunk); void*(*resolve_sym_name) (ir_loader *loader, const char *name, uint32_t flags);
bool (*has_sym) (ir_loader *loader, const char *name); bool (*has_sym) (ir_loader *loader, const char *name);
bool (*add_sym) (ir_loader *loader, const char *name, void *addr); bool (*add_sym) (ir_loader *loader, const char *name, void *addr);
}; };

View file

@ -4366,11 +4366,15 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
ir_backend_data *data = ctx->data; ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state; dasm_State **Dst = &data->dasm_state;
ir_type type = insn->type; ir_type type = insn->type;
ir_reg def_reg = ctx->regs[def][0]; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op2_reg = ctx->regs[def][2]; ir_reg op2_reg = ctx->regs[def][2];
ir_reg tmp_reg = ctx->regs[def][3]; ir_reg tmp_reg = ctx->regs[def][3];
int32_t offset; int32_t offset;
if (ctx->use_lists[def].count == 1) {
/* dead load */
return;
}
IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE);
if (op2_reg != IR_REG_NONE) { if (op2_reg != IR_REG_NONE) {
if (IR_REG_SPILLED(op2_reg)) { if (IR_REG_SPILLED(op2_reg)) {
@ -4394,11 +4398,15 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
ir_backend_data *data = ctx->data; ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state; dasm_State **Dst = &data->dasm_state;
ir_type type = insn->type; ir_type type = insn->type;
ir_reg def_reg = ctx->regs[def][0]; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op2_reg = ctx->regs[def][2]; ir_reg op2_reg = ctx->regs[def][2];
ir_reg tmp_reg = ctx->regs[def][3]; ir_reg tmp_reg = ctx->regs[def][3];
int32_t offset; int32_t offset;
if (ctx->use_lists[def].count == 1) {
/* dead load */
return;
}
IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE);
if (op2_reg != IR_REG_NONE) { if (op2_reg != IR_REG_NONE) {
if (IR_REG_SPILLED(op2_reg)) { if (IR_REG_SPILLED(op2_reg)) {
@ -4935,6 +4943,28 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
return; return;
} }
/* Move op2 to a tmp register before epilogue if it's in
* used_preserved_regs, because it will be overridden. */
ir_reg op2_reg = IR_REG_NONE;
if (!IR_IS_CONST_REF(insn->op2)) {
op2_reg = ctx->regs[def][2];
IR_ASSERT(op2_reg != IR_REG_NONE);
if (IR_REG_SPILLED(op2_reg)) {
op2_reg = IR_REG_INT_TMP;
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
} else if (IR_REGSET_IN((ir_regset)ctx->used_preserved_regs, IR_REG_NUM(op2_reg))) {
ir_reg orig_op2_reg = op2_reg;
op2_reg = IR_REG_INT_TMP;
ir_type type = ctx->ir_base[insn->op2].type;
| ASM_REG_REG_OP mov, type, op2_reg, IR_REG_NUM(orig_op2_reg)
} else {
op2_reg = IR_REG_NUM(op2_reg);
}
}
ir_emit_epilogue(ctx); ir_emit_epilogue(ctx);
if (IR_IS_CONST_REF(insn->op2)) { if (IR_IS_CONST_REF(insn->op2)) {
@ -4947,13 +4977,8 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
| br Rx(IR_REG_INT_TMP) | br Rx(IR_REG_INT_TMP)
} }
} else { } else {
ir_reg op2_reg = ctx->regs[def][2];
IR_ASSERT(op2_reg != IR_REG_NONE); IR_ASSERT(op2_reg != IR_REG_NONE);
if (IR_REG_SPILLED(op2_reg)) { IR_ASSERT(!IR_REGSET_IN((ir_regset)ctx->used_preserved_regs, op2_reg));
op2_reg = IR_REG_NUM(op2_reg);
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
}
| br Rx(op2_reg) | br Rx(op2_reg)
} }
} }

View file

@ -244,7 +244,6 @@ int ir_build_cfg(ir_ctx *ctx)
_blocks[start] = b; _blocks[start] = b;
_blocks[end] = b; _blocks[end] = b;
IR_ASSERT(IR_IS_BB_START(insn->op)); IR_ASSERT(IR_IS_BB_START(insn->op));
IR_ASSERT(end > start);
bb->start = start; bb->start = start;
bb->end = end; bb->end = end;
bb->successors = count; bb->successors = count;
@ -583,7 +582,6 @@ static int ir_remove_unreachable_blocks(ir_ctx *ctx)
return 1; return 1;
} }
#if 0
static void compute_postnum(const ir_ctx *ctx, uint32_t *cur, uint32_t b) static void compute_postnum(const ir_ctx *ctx, uint32_t *cur, uint32_t b)
{ {
uint32_t i, *p; uint32_t i, *p;
@ -607,34 +605,42 @@ static void compute_postnum(const ir_ctx *ctx, uint32_t *cur, uint32_t b)
/* Computes dominator tree using algorithm from "A Simple, Fast Dominance Algorithm" by /* Computes dominator tree using algorithm from "A Simple, Fast Dominance Algorithm" by
* Cooper, Harvey and Kennedy. */ * Cooper, Harvey and Kennedy. */
int ir_build_dominators_tree(ir_ctx *ctx) static int ir_build_dominators_tree_slow(ir_ctx *ctx)
{ {
uint32_t blocks_count, b, postnum; uint32_t blocks_count, b, postnum;
ir_block *blocks, *bb; ir_block *blocks, *bb;
uint32_t *edges; uint32_t *edges;
bool changed; bool changed;
blocks = ctx->cfg_blocks;
edges = ctx->cfg_edges;
blocks_count = ctx->cfg_blocks_count;
/* Clear the dominators tree */
for (b = 0, bb = &blocks[0]; b <= blocks_count; b++, bb++) {
bb->idom = 0;
bb->dom_depth = 0;
bb->dom_child = 0;
bb->dom_next_child = 0;
}
ctx->flags2 &= ~IR_NO_LOOPS; ctx->flags2 &= ~IR_NO_LOOPS;
postnum = 1; postnum = 1;
compute_postnum(ctx, &postnum, 1); compute_postnum(ctx, &postnum, 1);
/* Find immediate dominators */ /* Find immediate dominators by iterative fixed-point algorithm */
blocks = ctx->cfg_blocks;
edges = ctx->cfg_edges;
blocks_count = ctx->cfg_blocks_count;
blocks[1].idom = 1; blocks[1].idom = 1;
do { do {
changed = 0; changed = 0;
/* Iterating in Reverse Post Order */ /* Iterating in Reverse Post Order */
for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) { for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) {
IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
IR_ASSERT(bb->predecessors_count > 0);
if (bb->predecessors_count == 1) { if (bb->predecessors_count == 1) {
uint32_t pred_b = edges[bb->predecessors]; uint32_t pred_b = edges[bb->predecessors];
if (blocks[pred_b].idom <= 0) { if (blocks[pred_b].idom > 0 && bb->idom != pred_b) {
//IR_ASSERT("Wrong blocks order: BB is before its single predecessor");
} else if (bb->idom != pred_b) {
bb->idom = pred_b; bb->idom = pred_b;
changed = 1; changed = 1;
} }
@ -680,14 +686,13 @@ int ir_build_dominators_tree(ir_ctx *ctx)
} }
} }
} while (changed); } while (changed);
/* Build dominators tree */
blocks[1].idom = 0; blocks[1].idom = 0;
blocks[1].dom_depth = 0; blocks[1].dom_depth = 0;
/* Construct dominators tree */
for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) { for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) {
IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); uint32_t idom = bb->idom;
if (bb->idom > 0) { ir_block *idom_bb = &blocks[idom];
ir_block *idom_bb = &blocks[bb->idom];
bb->dom_depth = idom_bb->dom_depth + 1; bb->dom_depth = idom_bb->dom_depth + 1;
/* Sort by block number to traverse children in pre-order */ /* Sort by block number to traverse children in pre-order */
@ -708,11 +713,10 @@ int ir_build_dominators_tree(ir_ctx *ctx)
child_bb->dom_next_child = b; child_bb->dom_next_child = b;
} }
} }
}
return 1; return 1;
} }
#else
/* A single pass modification of "A Simple, Fast Dominance Algorithm" by /* A single pass modification of "A Simple, Fast Dominance Algorithm" by
* Cooper, Harvey and Kennedy, that relays on IR block ordering. * Cooper, Harvey and Kennedy, that relays on IR block ordering.
* It may fallback to the general slow fixed-point algorithm. */ * It may fallback to the general slow fixed-point algorithm. */
@ -747,7 +751,11 @@ int ir_build_dominators_tree(ir_ctx *ctx)
if (UNEXPECTED(idom >= b)) { if (UNEXPECTED(idom >= b)) {
/* In rare cases, LOOP_BEGIN.op1 may be a back-edge. Skip back-edges. */ /* In rare cases, LOOP_BEGIN.op1 may be a back-edge. Skip back-edges. */
ctx->flags2 &= ~IR_NO_LOOPS; ctx->flags2 &= ~IR_NO_LOOPS;
IR_ASSERT(k > 1 && "Wrong blocks order: BB is before its single predecessor"); // IR_ASSERT(k > 1 && "Wrong blocks order: BB is before its single predecessor");
if (UNEXPECTED(k <= 1)) {
ir_list_free(&worklist);
return ir_build_dominators_tree_slow(ctx);
}
ir_list_push(&worklist, idom); ir_list_push(&worklist, idom);
while (1) { while (1) {
k--; k--;
@ -942,7 +950,6 @@ static int ir_build_dominators_tree_iterative(ir_ctx *ctx)
return 1; return 1;
} }
#endif
static bool ir_dominates(const ir_block *blocks, uint32_t b1, uint32_t b2) static bool ir_dominates(const ir_block *blocks, uint32_t b1, uint32_t b2)
{ {
@ -958,7 +965,7 @@ static bool ir_dominates(const ir_block *blocks, uint32_t b1, uint32_t b2)
int ir_find_loops(ir_ctx *ctx) int ir_find_loops(ir_ctx *ctx)
{ {
uint32_t i, j, n, count; uint32_t b, j, n, count;
uint32_t *entry_times, *exit_times, *sorted_blocks, time = 1; uint32_t *entry_times, *exit_times, *sorted_blocks, time = 1;
ir_block *blocks = ctx->cfg_blocks; ir_block *blocks = ctx->cfg_blocks;
uint32_t *edges = ctx->cfg_edges; uint32_t *edges = ctx->cfg_edges;
@ -983,13 +990,13 @@ int ir_find_loops(ir_ctx *ctx)
int child; int child;
next: next:
i = ir_worklist_peek(&work); b = ir_worklist_peek(&work);
if (!entry_times[i]) { if (!entry_times[b]) {
entry_times[i] = time++; entry_times[b] = time++;
} }
/* Visit blocks immediately dominated by i. */ /* Visit blocks immediately dominated by "b". */
bb = &blocks[i]; bb = &blocks[b];
for (child = bb->dom_child; child > 0; child = blocks[child].dom_next_child) { for (child = bb->dom_child; child > 0; child = blocks[child].dom_next_child) {
if (ir_worklist_push(&work, child)) { if (ir_worklist_push(&work, child)) {
goto next; goto next;
@ -1002,14 +1009,14 @@ next:
for (j = 0; j < bb->successors_count; j++, p++) { for (j = 0; j < bb->successors_count; j++, p++) {
uint32_t succ = *p; uint32_t succ = *p;
if (blocks[succ].idom == i) { if (blocks[succ].idom == b) {
continue; continue;
} else if (ir_worklist_push(&work, succ)) { } else if (ir_worklist_push(&work, succ)) {
goto next; goto next;
} }
} }
} }
exit_times[i] = time++; exit_times[b] = time++;
ir_worklist_pop(&work); ir_worklist_pop(&work);
} }
@ -1018,7 +1025,7 @@ next:
j = 1; j = 1;
n = 2; n = 2;
while (j != n) { while (j != n) {
i = j; uint32_t i = j;
j = n; j = n;
for (; i < j; i++) { for (; i < j; i++) {
int child; int child;
@ -1030,9 +1037,82 @@ next:
count = n; count = n;
/* Identify loops. See Sreedhar et al, "Identifying Loops Using DJ Graphs". */ /* Identify loops. See Sreedhar et al, "Identifying Loops Using DJ Graphs". */
uint32_t prev_dom_depth = blocks[sorted_blocks[n - 1]].dom_depth;
uint32_t prev_irreducible = 0;
while (n > 1) { while (n > 1) {
i = sorted_blocks[--n]; b = sorted_blocks[--n];
ir_block *bb = &blocks[i]; ir_block *bb = &blocks[b];
IR_ASSERT(bb->dom_depth <= prev_dom_depth);
if (UNEXPECTED(prev_irreducible) && bb->dom_depth != prev_dom_depth) {
/* process delyed irreducible loops */
do {
b = sorted_blocks[prev_irreducible];
bb = &blocks[b];
if ((bb->flags & IR_BB_IRREDUCIBLE_LOOP) && !bb->loop_depth) {
/* process irreducible loop */
uint32_t hdr = b;
bb->loop_depth = 1;
if (ctx->ir_base[bb->start].op == IR_MERGE) {
ctx->ir_base[bb->start].op = IR_LOOP_BEGIN;
}
/* find the closing edge(s) of the irreucible loop */
IR_ASSERT(bb->predecessors_count > 1);
uint32_t *p = &edges[bb->predecessors];
j = bb->predecessors_count;
do {
uint32_t pred = *p;
if (entry_times[pred] > entry_times[b] && exit_times[pred] < exit_times[b]) {
if (!ir_worklist_len(&work)) {
ir_bitset_clear(work.visited, ir_bitset_len(ir_worklist_capasity(&work)));
}
blocks[pred].loop_header = 0; /* support for merged loops */
ir_worklist_push(&work, pred);
}
p++;
} while (--j);
IR_ASSERT(ir_worklist_len(&work) != 0);
/* collect members of the irreducible loop */
while (ir_worklist_len(&work)) {
b = ir_worklist_pop(&work);
if (b != hdr) {
ir_block *bb = &blocks[b];
bb->loop_header = hdr;
if (bb->predecessors_count) {
uint32_t *p = &edges[bb->predecessors];
uint32_t n = bb->predecessors_count;
do {
uint32_t pred = *p;
while (blocks[pred].loop_header > 0) {
pred = blocks[pred].loop_header;
}
if (pred != hdr) {
if (entry_times[pred] > entry_times[hdr] && exit_times[pred] < exit_times[hdr]) {
/* "pred" is a descendant of "hdr" */
ir_worklist_push(&work, pred);
} else {
/* another entry to the irreducible loop */
bb->flags |= IR_BB_IRREDUCIBLE_LOOP;
if (ctx->ir_base[bb->start].op == IR_MERGE) {
ctx->ir_base[bb->start].op = IR_LOOP_BEGIN;
}
}
}
p++;
} while (--n);
}
}
}
}
} while (--prev_irreducible != n);
prev_irreducible = 0;
b = sorted_blocks[n];
bb = &blocks[b];
}
if (bb->predecessors_count > 1) { if (bb->predecessors_count > 1) {
bool irreducible = 0; bool irreducible = 0;
@ -1047,7 +1127,7 @@ next:
if (bb->idom != pred) { if (bb->idom != pred) {
/* In a loop back-edge (back-join edge), the successor dominates /* In a loop back-edge (back-join edge), the successor dominates
the predecessor. */ the predecessor. */
if (ir_dominates(blocks, i, pred)) { if (ir_dominates(blocks, b, pred)) {
if (!ir_worklist_len(&work)) { if (!ir_worklist_len(&work)) {
ir_bitset_clear(work.visited, ir_bitset_len(ir_worklist_capasity(&work))); ir_bitset_clear(work.visited, ir_bitset_len(ir_worklist_capasity(&work)));
} }
@ -1056,8 +1136,9 @@ next:
} else { } else {
/* Otherwise it's a cross-join edge. See if it's a branch /* Otherwise it's a cross-join edge. See if it's a branch
to an ancestor on the DJ spanning tree. */ to an ancestor on the DJ spanning tree. */
if (entry_times[pred] > entry_times[i] && exit_times[pred] < exit_times[i]) { if (entry_times[pred] > entry_times[b] && exit_times[pred] < exit_times[b]) {
irreducible = 1; irreducible = 1;
break;
} }
} }
} }
@ -1065,46 +1146,55 @@ next:
} while (--j); } while (--j);
if (UNEXPECTED(irreducible)) { if (UNEXPECTED(irreducible)) {
// TODO: Support for irreducible loops ??? bb->flags |= IR_BB_LOOP_HEADER | IR_BB_IRREDUCIBLE_LOOP;
bb->flags |= IR_BB_IRREDUCIBLE_LOOP; ctx->flags2 |= IR_CFG_HAS_LOOPS | IR_IRREDUCIBLE_CFG;
ctx->flags2 |= IR_IRREDUCIBLE_CFG; /* Remember the position of the first irreducible loop to process all the irreducible loops
while (ir_worklist_len(&work)) { * after the reducible loops with the same dominator tree depth
ir_worklist_pop(&work); */
if (!prev_irreducible) {
prev_irreducible = n;
} }
ir_list_clear(&work.l);
} else if (ir_worklist_len(&work)) { } else if (ir_worklist_len(&work)) {
/* collect members of the reducible loop */
uint32_t hdr = b;
bb->flags |= IR_BB_LOOP_HEADER; bb->flags |= IR_BB_LOOP_HEADER;
ctx->flags2 |= IR_CFG_HAS_LOOPS; ctx->flags2 |= IR_CFG_HAS_LOOPS;
bb->loop_depth = 1; bb->loop_depth = 1;
if (ctx->ir_base[bb->start].op == IR_MERGE) {
ctx->ir_base[bb->start].op = IR_LOOP_BEGIN;
}
while (ir_worklist_len(&work)) { while (ir_worklist_len(&work)) {
j = ir_worklist_pop(&work); b = ir_worklist_pop(&work);
while (blocks[j].loop_header > 0) { if (b != hdr) {
j = blocks[j].loop_header; ir_block *bb = &blocks[b];
} bb->loop_header = hdr;
if (j != i) {
ir_block *bb = &blocks[j];
if (bb->idom == 0 && j != 1) {
/* Ignore blocks that are unreachable or only abnormally reachable. */
continue;
}
bb->loop_header = i;
if (bb->predecessors_count) { if (bb->predecessors_count) {
uint32_t *p = &edges[bb->predecessors]; uint32_t *p = &edges[bb->predecessors];
j = bb->predecessors_count; uint32_t n = bb->predecessors_count;
do { do {
ir_worklist_push(&work, *p); uint32_t pred = *p;
while (blocks[pred].loop_header > 0) {
pred = blocks[pred].loop_header;
}
if (pred != hdr) {
ir_worklist_push(&work, pred);
}
p++; p++;
} while (--j); } while (--n);
} }
} }
} }
} }
} }
} }
IR_ASSERT(!prev_irreducible);
if (ctx->flags2 & IR_CFG_HAS_LOOPS) { if (ctx->flags2 & IR_CFG_HAS_LOOPS) {
for (n = 1; n < count; n++) { for (n = 1; n < count; n++) {
i = sorted_blocks[n]; b = sorted_blocks[n];
ir_block *bb = &blocks[i]; ir_block *bb = &blocks[b];
if (bb->loop_header > 0) { if (bb->loop_header > 0) {
ir_block *loop = &blocks[bb->loop_header]; ir_block *loop = &blocks[bb->loop_header];
uint32_t loop_depth = loop->loop_depth; uint32_t loop_depth = loop->loop_depth;
@ -1389,7 +1479,7 @@ restart:
goto restart; goto restart;
} }
} else if (b != predecessor && ctx->cfg_blocks[predecessor].loop_header != b) { } else if (b != predecessor && ctx->cfg_blocks[predecessor].loop_header != b) {
ir_dump_cfg(ctx, stderr); /* not a loop back-edge */
IR_ASSERT(b == predecessor || ctx->cfg_blocks[predecessor].loop_header == b); IR_ASSERT(b == predecessor || ctx->cfg_blocks[predecessor].loop_header == b);
} }
} }

View file

@ -213,13 +213,18 @@ bool ir_check(const ir_ctx *ctx)
ok = 0; ok = 0;
} }
} }
break;
case IR_OPND_CONTROL_DEP:
if ((ctx->flags2 & IR_LINEAR) if ((ctx->flags2 & IR_LINEAR)
&& use >= i && use >= i
&& !(insn->op == IR_LOOP_BEGIN)) { && !(insn->op == IR_LOOP_BEGIN)) {
fprintf(stderr, "ir_base[%d].ops[%d] invalid forward reference (%d)\n", i, j, use); fprintf(stderr, "ir_base[%d].ops[%d] invalid forward reference (%d)\n", i, j, use);
ok = 0; ok = 0;
}
break;
case IR_OPND_CONTROL_DEP:
if ((ctx->flags2 & IR_LINEAR)
&& use >= i) {
fprintf(stderr, "ir_base[%d].ops[%d] invalid forward reference (%d)\n", i, j, use);
ok = 0;
} else if (insn->op == IR_PHI) { } else if (insn->op == IR_PHI) {
ir_insn *merge_insn = &ctx->ir_base[insn->op1]; ir_insn *merge_insn = &ctx->ir_base[insn->op1];
if (merge_insn->op != IR_MERGE && merge_insn->op != IR_LOOP_BEGIN) { if (merge_insn->op != IR_MERGE && merge_insn->op != IR_LOOP_BEGIN) {

View file

@ -309,7 +309,7 @@ static void* ir_sym_addr(ir_ctx *ctx, const ir_insn *addr_insn)
{ {
const char *name = ir_get_str(ctx, addr_insn->val.name); const char *name = ir_get_str(ctx, addr_insn->val.name);
void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ? void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ?
ctx->loader->resolve_sym_name(ctx->loader, name, 0) : ctx->loader->resolve_sym_name(ctx->loader, name, IR_RESOLVE_SYM_SILENT) :
ir_resolve_sym_name(name); ir_resolve_sym_name(name);
return addr; return addr;
@ -320,7 +320,7 @@ static void* ir_sym_val(ir_ctx *ctx, const ir_insn *addr_insn)
{ {
const char *name = ir_get_str(ctx, addr_insn->val.name); const char *name = ir_get_str(ctx, addr_insn->val.name);
void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ? void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ?
ctx->loader->resolve_sym_name(ctx->loader, name, addr_insn->op == IR_FUNC) : ctx->loader->resolve_sym_name(ctx->loader, name, addr_insn->op == IR_FUNC ? IR_RESOLVE_SYM_ADD_THUNK : 0) :
ir_resolve_sym_name(name); ir_resolve_sym_name(name);
IR_ASSERT(addr); IR_ASSERT(addr);

View file

@ -1909,7 +1909,9 @@ IR_FOLD(SUB(_, SUB))
IR_FOLD(SUB(ADD, ADD)) IR_FOLD(SUB(ADD, ADD))
{ {
if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt))) { if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt))) {
if (op1_insn->op1 == op2_insn->op1) { if (op1 == op2) {
IR_FOLD_CONST_U(0);
} else if (op1_insn->op1 == op2_insn->op1) {
/* (a + b) - (a + c) => b - c */ /* (a + b) - (a + c) => b - c */
op1 = op1_insn->op2; op1 = op1_insn->op2;
op2 = op2_insn->op2; op2 = op2_insn->op2;

View file

@ -785,6 +785,139 @@ IR_ALWAYS_INLINE ir_ref ir_count_constant(ir_ref *_xlat, ir_ref ref)
return 0; return 0;
} }
IR_ALWAYS_INLINE bool ir_is_good_bb_order(ir_ctx *ctx, uint32_t b, ir_block *bb, ir_ref start)
{
ir_insn *insn = &ctx->ir_base[start];
uint32_t n = insn->inputs_count;
ir_ref *p = insn->ops + 1;
if (n == 1) {
return *p < start;
} else {
IR_ASSERT(n > 1);
for (; n > 0; p++, n--) {
ir_ref input = *p;
if (input < start) {
/* ordered */
} else if ((bb->flags & IR_BB_LOOP_HEADER)
&& (ctx->cfg_map[input] == b || ctx->cfg_blocks[ctx->cfg_map[input]].loop_header == b)) {
/* back-edge of reducible loop */
} else if ((bb->flags & IR_BB_IRREDUCIBLE_LOOP)
&& (ctx->cfg_blocks[ctx->cfg_map[input]].loop_header == ctx->cfg_blocks[b].loop_header)) {
/* closing edge of irreducible loop */
} else {
return 0;
}
}
return 1;
}
}
static IR_NEVER_INLINE void ir_fix_bb_order(ir_ctx *ctx, ir_ref *_prev, ir_ref *_next)
{
uint32_t b, succ, count, *q, *xlat;
ir_block *bb;
ir_ref ref, n, prev;
ir_worklist worklist;
ir_block *new_blocks;
#if 0
for (b = 1, bb = ctx->cfg_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) {
if (!ir_is_good_bb_order(ctx, b, bb, bb->start)) {
goto fix;
}
}
return;
fix:
#endif
count = ctx->cfg_blocks_count + 1;
new_blocks = ir_mem_malloc(count * sizeof(ir_block));
xlat = ir_mem_malloc(count * sizeof(uint32_t));
ir_worklist_init(&worklist, count);
ir_worklist_push(&worklist, 1);
while (ir_worklist_len(&worklist) != 0) {
next:
b = ir_worklist_peek(&worklist);
bb = &ctx->cfg_blocks[b];
n = bb->successors_count;
if (n == 1) {
succ = ctx->cfg_edges[bb->successors];
if (ir_worklist_push(&worklist, succ)) {
goto next;
}
} else if (n > 1) {
uint32_t best = 0;
uint32_t best_loop_depth = 0;
q = ctx->cfg_edges + bb->successors + n;
do {
q--;
succ = *q;
if (ir_bitset_in(worklist.visited, succ)) {
/* already processed */
} else if ((ctx->cfg_blocks[succ].flags & IR_BB_LOOP_HEADER)
&& (succ == b || ctx->cfg_blocks[b].loop_header == succ)) {
/* back-edge of reducible loop */
} else if ((ctx->cfg_blocks[succ].flags & IR_BB_IRREDUCIBLE_LOOP)
&& (ctx->cfg_blocks[succ].loop_header == ctx->cfg_blocks[b].loop_header)) {
/* closing edge of irreducible loop */
} else if (!best) {
best = succ;
best_loop_depth = ctx->cfg_blocks[best].loop_depth;
} else if (ctx->cfg_blocks[succ].loop_depth < best_loop_depth) {
/* prefer deeper loop */
best = succ;
best_loop_depth = ctx->cfg_blocks[best].loop_depth;
}
n--;
} while (n > 0);
if (best) {
ir_worklist_push(&worklist, best);
goto next;
}
}
ir_worklist_pop(&worklist);
count--;
new_blocks[count] = *bb;
xlat[b] = count;
}
IR_ASSERT(count == 1);
xlat[0] = 0;
ir_worklist_free(&worklist);
prev = 0;
for (b = 1, bb = new_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) {
bb->idom = xlat[bb->idom];
bb->loop_header = xlat[bb->loop_header];
n = bb->successors_count;
if (n > 0) {
for (q = ctx->cfg_edges + bb->successors; n > 0; q++, n--) {
*q = xlat[*q];
}
}
n = bb->predecessors_count;
if (n > 0) {
for (q = ctx->cfg_edges + bb->predecessors; n > 0; q++, n--) {
*q = xlat[*q];
}
}
_next[prev] = bb->start;
_prev[bb->start] = prev;
prev = bb->end;
}
_next[0] = 0;
_next[prev] = 0;
for (ref = 2; ref < ctx->insns_count; ref++) {
ctx->cfg_map[ref] = xlat[ctx->cfg_map[ref]];
}
ir_mem_free(xlat);
ir_mem_free(ctx->cfg_blocks);
ctx->cfg_blocks = new_blocks;
}
int ir_schedule(ir_ctx *ctx) int ir_schedule(ir_ctx *ctx)
{ {
ir_ctx new_ctx; ir_ctx new_ctx;
@ -800,6 +933,7 @@ int ir_schedule(ir_ctx *ctx)
ir_block *bb; ir_block *bb;
ir_insn *insn, *new_insn; ir_insn *insn, *new_insn;
ir_use_list *lists, *use_list, *new_list; ir_use_list *lists, *use_list, *new_list;
bool bad_bb_order = 0;
/* Create a double-linked list of nodes ordered by BB, respecting BB->start and BB->end */ /* Create a double-linked list of nodes ordered by BB, respecting BB->start and BB->end */
IR_ASSERT(_blocks[1] == 1); IR_ASSERT(_blocks[1] == 1);
@ -818,27 +952,50 @@ int ir_schedule(ir_ctx *ctx)
} else if (b > prev_b) { } else if (b > prev_b) {
bb = &ctx->cfg_blocks[b]; bb = &ctx->cfg_blocks[b];
if (i == bb->start) { if (i == bb->start) {
IR_ASSERT(bb->end > bb->start); if (bb->end > bb->start) {
prev_b = b; prev_b = b;
prev_b_end = bb->end; prev_b_end = bb->end;
_prev[bb->end] = 0;
/* add to the end of the list */ /* add to the end of the list */
_next[j] = i; _next[j] = i;
_prev[i] = j; _prev[i] = j;
j = i; j = i;
} else { } else {
IR_ASSERT(i != bb->end); prev_b = 0;
prev_b_end = 0;
k = bb->end;
while (_blocks[_prev[k]] == b) {
k = _prev[k];
}
/* insert before "k" */
_prev[i] = _prev[k];
_next[i] = k;
_next[_prev[k]] = i;
_prev[k] = i;
}
if (!ir_is_good_bb_order(ctx, b, bb, i)) {
bad_bb_order = 1;
}
} else if (i != bb->end) {
/* move down late (see the following loop) */ /* move down late (see the following loop) */
_next[i] = _move_down; _next[i] = _move_down;
_move_down = i; _move_down = i;
} else {
IR_ASSERT(bb->start > bb->end);
prev_b = 0;
prev_b_end = 0;
/* add to the end of the list */
_next[j] = i;
_prev[i] = j;
j = i;
} }
} else if (b) { } else if (b) {
bb = &ctx->cfg_blocks[b]; bb = &ctx->cfg_blocks[b];
IR_ASSERT(i != bb->start); IR_ASSERT(i != bb->start);
if (_prev[bb->end]) { if (i > bb->end) {
/* move up, insert before the end of the already scheduled BB */ /* move up, insert before the end of the already scheduled BB */
k = bb->end; k = bb->end;
} else { } else {
IR_ASSERT(i > bb->start);
/* move up, insert at the end of the block */ /* move up, insert at the end of the block */
k = ctx->cfg_blocks[b + 1].start; k = ctx->cfg_blocks[b + 1].start;
} }
@ -883,6 +1040,10 @@ int ir_schedule(ir_ctx *ctx)
} }
#endif #endif
if (bad_bb_order) {
ir_fix_bb_order(ctx, _prev, _next);
}
_xlat = ir_mem_calloc((ctx->consts_count + ctx->insns_count), sizeof(ir_ref)); _xlat = ir_mem_calloc((ctx->consts_count + ctx->insns_count), sizeof(ir_ref));
_xlat += ctx->consts_count; _xlat += ctx->consts_count;
_xlat[IR_TRUE] = IR_TRUE; _xlat[IR_TRUE] = IR_TRUE;

View file

@ -62,7 +62,7 @@
#define IR_MAX(a, b) (((a) > (b)) ? (a) : (b)) #define IR_MAX(a, b) (((a) > (b)) ? (a) : (b))
#define IR_MIN(a, b) (((a) < (b)) ? (a) : (b)) #define IR_MIN(a, b) (((a) < (b)) ? (a) : (b))
#define IR_IS_POWER_OF_TWO(x) (!((x) & ((x) - 1))) #define IR_IS_POWER_OF_TWO(x) ((x) && (!((x) & ((x) - 1))))
#define IR_LOG2(x) ir_ntzl(x) #define IR_LOG2(x) ir_ntzl(x)

View file

@ -140,6 +140,9 @@ void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f)
fprintf(f, ", loop=BB%d(%d)", bb->loop_header, bb->loop_depth); fprintf(f, ", loop=BB%d(%d)", bb->loop_header, bb->loop_depth);
} }
} }
if (bb->flags & IR_BB_IRREDUCIBLE_LOOP) {
fprintf(f, ", IRREDUCIBLE");
}
if (bb->predecessors_count) { if (bb->predecessors_count) {
uint32_t i; uint32_t i;

View file

@ -1732,7 +1732,20 @@ static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use,
ir_ref *p, n, input; ir_ref *p, n, input;
if (IR_IS_CONST_REF(ref)) { if (IR_IS_CONST_REF(ref)) {
return ir_const(ctx, insn->val, type); ir_val val;
switch (type) {
case IR_I8: val.i64 = insn->val.i8; break;
case IR_U8: val.u64 = insn->val.u8; break;
case IR_I16: val.i64 = insn->val.i16; break;
case IR_U16: val.u64 = insn->val.u16; break;
case IR_I32: val.i64 = insn->val.i32; break;
case IR_U32: val.u64 = insn->val.u32; break;
case IR_CHAR:val.i64 = insn->val.i8; break;
case IR_BOOL:val.u64 = insn->val.u8 != 0; break;
default: IR_ASSERT(0); val.u64 = 0;
}
return ir_const(ctx, val, type);
} else { } else {
ir_bitqueue_add(worklist, ref); ir_bitqueue_add(worklist, ref);
switch (insn->op) { switch (insn->op) {

View file

@ -6868,7 +6868,24 @@ static void ir_emit_return_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn)
ir_backend_data *data = ctx->data; ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state; dasm_State **Dst = &data->dasm_state;
if (op2_reg == IR_REG_NONE || IR_REG_SPILLED(op2_reg)) { if (IR_IS_CONST_REF(insn->op2)) {
ir_insn *value = &ctx->ir_base[insn->op2];
if ((type == IR_FLOAT && value->val.f == 0.0) || (type == IR_DOUBLE && value->val.d == 0.0)) {
| fldz
} else if ((type == IR_FLOAT && value->val.f == 1.0) || (type == IR_DOUBLE && value->val.d == 1.0)) {
| fld1
} else {
int label = ir_const_label(ctx, insn->op2);
if (type == IR_DOUBLE) {
| fld qword [=>label]
} else {
IR_ASSERT(type == IR_FLOAT);
| fld dword [=>label]
}
}
} else if (op2_reg == IR_REG_NONE || IR_REG_SPILLED(op2_reg)) {
ir_reg fp; ir_reg fp;
int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op2, &fp); int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op2, &fp);
@ -8442,11 +8459,15 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
ir_backend_data *data = ctx->data; ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state; dasm_State **Dst = &data->dasm_state;
ir_type type = insn->type; ir_type type = insn->type;
ir_reg def_reg = ctx->regs[def][0]; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op2_reg = ctx->regs[def][2]; ir_reg op2_reg = ctx->regs[def][2];
ir_reg tmp_reg = ctx->regs[def][3]; ir_reg tmp_reg = ctx->regs[def][3];
int32_t offset; int32_t offset;
if (ctx->use_lists[def].count == 1) {
/* dead load */
return;
}
IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE);
if (op2_reg != IR_REG_NONE) { if (op2_reg != IR_REG_NONE) {
if (IR_REG_SPILLED(op2_reg)) { if (IR_REG_SPILLED(op2_reg)) {
@ -8471,11 +8492,15 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
ir_backend_data *data = ctx->data; ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state; dasm_State **Dst = &data->dasm_state;
ir_type type = insn->type; ir_type type = insn->type;
ir_reg def_reg = ctx->regs[def][0]; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op2_reg = ctx->regs[def][2]; ir_reg op2_reg = ctx->regs[def][2];
ir_reg tmp_reg = ctx->regs[def][3]; ir_reg tmp_reg = ctx->regs[def][3];
int32_t offset; int32_t offset;
if (ctx->use_lists[def].count == 1) {
/* dead load */
return;
}
IR_ASSERT(def_reg != IR_REG_NONE&& tmp_reg != IR_REG_NONE); IR_ASSERT(def_reg != IR_REG_NONE&& tmp_reg != IR_REG_NONE);
if (op2_reg != IR_REG_NONE) { if (op2_reg != IR_REG_NONE) {
if (IR_REG_SPILLED(op2_reg)) { if (IR_REG_SPILLED(op2_reg)) {
@ -9221,6 +9246,58 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
return; return;
} }
/* Move op2 to a tmp register before epilogue if it's in
* used_preserved_regs, because it will be overridden. */
ir_reg op2_reg = IR_REG_NONE;
ir_mem mem = IR_MEM_B(IR_REG_NONE);
if (!IR_IS_CONST_REF(insn->op2)) {
op2_reg = ctx->regs[def][2];
ir_regset preserved_regs = (ir_regset)ctx->used_preserved_regs | IR_REGSET(IR_REG_STACK_POINTER);
if (ctx->flags & IR_USE_FRAME_POINTER) {
preserved_regs |= IR_REGSET(IR_REG_FRAME_POINTER);
}
bool is_spill_slot = op2_reg != IR_REG_NONE
&& IR_REG_SPILLED(op2_reg)
&& ctx->vregs[insn->op2];
if (op2_reg != IR_REG_NONE && !is_spill_slot) {
if (IR_REGSET_IN(preserved_regs, IR_REG_NUM(op2_reg))) {
ir_ref orig_op2_reg = op2_reg;
op2_reg = IR_REG_RAX;
if (IR_REG_SPILLED(orig_op2_reg)) {
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
} else {
ir_type type = ctx->ir_base[insn->op2].type;
| ASM_REG_REG_OP mov, type, op2_reg, IR_REG_NUM(orig_op2_reg)
}
} else {
op2_reg = IR_REG_NUM(op2_reg);
}
} else {
if (ir_rule(ctx, insn->op2) & IR_FUSED) {
IR_ASSERT(op2_reg == IR_REG_NONE);
mem = ir_fuse_load(ctx, def, insn->op2);
} else {
mem = ir_ref_spill_slot(ctx, insn->op2);
}
ir_reg base = IR_MEM_BASE(mem);
ir_reg index = IR_MEM_INDEX(mem);
if ((base != IR_REG_NONE && IR_REGSET_IN(preserved_regs, base)) ||
(index != IR_REG_NONE && IR_REGSET_IN(preserved_regs, index))) {
op2_reg = IR_REG_RAX;
ir_type type = ctx->ir_base[insn->op2].type;
ir_emit_load_mem_int(ctx, type, op2_reg, mem);
} else {
op2_reg = IR_REG_NONE;
}
}
}
ir_emit_epilogue(ctx); ir_emit_epilogue(ctx);
if (IR_IS_CONST_REF(insn->op2)) { if (IR_IS_CONST_REF(insn->op2)) {
@ -9246,22 +9323,10 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
|.endif |.endif
} }
} else { } else {
ir_reg op2_reg = ctx->regs[def][2];
if (op2_reg != IR_REG_NONE) { if (op2_reg != IR_REG_NONE) {
if (IR_REG_SPILLED(op2_reg)) { IR_ASSERT(!IR_REGSET_IN((ir_regset)ctx->used_preserved_regs, op2_reg));
op2_reg = IR_REG_NUM(op2_reg);
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
}
| jmp Ra(op2_reg) | jmp Ra(op2_reg)
} else { } else {
ir_mem mem;
if (ir_rule(ctx, insn->op2) & IR_FUSED) {
mem = ir_fuse_load(ctx, def, insn->op2);
} else {
mem = ir_ref_spill_slot(ctx, insn->op2);
}
| ASM_TMEM_OP jmp, aword, mem | ASM_TMEM_OP jmp, aword, mem
} }
} }