mirror of
https://github.com/php/php-src.git
synced 2025-08-16 05:58:45 +02:00
Merge branch 'PHP-8.4'
* PHP-8.4: Update IR
This commit is contained in:
commit
dd69b65638
12 changed files with 482 additions and 113 deletions
|
@ -803,7 +803,9 @@ ir_ref ir_proto(ir_ctx *ctx, uint8_t flags, ir_type ret_type, uint32_t params_co
|
||||||
proto->flags = flags;
|
proto->flags = flags;
|
||||||
proto->ret_type = ret_type;
|
proto->ret_type = ret_type;
|
||||||
proto->params_count = params_count;
|
proto->params_count = params_count;
|
||||||
|
if (params_count) {
|
||||||
memcpy(proto->param_types, param_types, params_count);
|
memcpy(proto->param_types, param_types, params_count);
|
||||||
|
}
|
||||||
return ir_strl(ctx, (const char *)proto, offsetof(ir_proto_t, param_types) + params_count);
|
return ir_strl(ctx, (const char *)proto, offsetof(ir_proto_t, param_types) + params_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -854,6 +854,9 @@ void ir_gdb_unregister_all(void);
|
||||||
bool ir_gdb_present(void);
|
bool ir_gdb_present(void);
|
||||||
|
|
||||||
/* IR load API (implementation in ir_load.c) */
|
/* IR load API (implementation in ir_load.c) */
|
||||||
|
#define IR_RESOLVE_SYM_ADD_THUNK (1<<0)
|
||||||
|
#define IR_RESOLVE_SYM_SILENT (1<<1)
|
||||||
|
|
||||||
struct _ir_loader {
|
struct _ir_loader {
|
||||||
uint32_t default_func_flags;
|
uint32_t default_func_flags;
|
||||||
bool (*init_module) (ir_loader *loader, const char *name, const char *filename, const char *target);
|
bool (*init_module) (ir_loader *loader, const char *name, const char *filename, const char *target);
|
||||||
|
@ -870,7 +873,7 @@ struct _ir_loader {
|
||||||
bool (*sym_data_end) (ir_loader *loader, uint32_t flags);
|
bool (*sym_data_end) (ir_loader *loader, uint32_t flags);
|
||||||
bool (*func_init) (ir_loader *loader, ir_ctx *ctx, const char *name);
|
bool (*func_init) (ir_loader *loader, ir_ctx *ctx, const char *name);
|
||||||
bool (*func_process) (ir_loader *loader, ir_ctx *ctx, const char *name);
|
bool (*func_process) (ir_loader *loader, ir_ctx *ctx, const char *name);
|
||||||
void*(*resolve_sym_name) (ir_loader *loader, const char *name, bool add_thunk);
|
void*(*resolve_sym_name) (ir_loader *loader, const char *name, uint32_t flags);
|
||||||
bool (*has_sym) (ir_loader *loader, const char *name);
|
bool (*has_sym) (ir_loader *loader, const char *name);
|
||||||
bool (*add_sym) (ir_loader *loader, const char *name, void *addr);
|
bool (*add_sym) (ir_loader *loader, const char *name, void *addr);
|
||||||
};
|
};
|
||||||
|
|
|
@ -4366,11 +4366,15 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
|
||||||
ir_backend_data *data = ctx->data;
|
ir_backend_data *data = ctx->data;
|
||||||
dasm_State **Dst = &data->dasm_state;
|
dasm_State **Dst = &data->dasm_state;
|
||||||
ir_type type = insn->type;
|
ir_type type = insn->type;
|
||||||
ir_reg def_reg = ctx->regs[def][0];
|
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
|
||||||
ir_reg op2_reg = ctx->regs[def][2];
|
ir_reg op2_reg = ctx->regs[def][2];
|
||||||
ir_reg tmp_reg = ctx->regs[def][3];
|
ir_reg tmp_reg = ctx->regs[def][3];
|
||||||
int32_t offset;
|
int32_t offset;
|
||||||
|
|
||||||
|
if (ctx->use_lists[def].count == 1) {
|
||||||
|
/* dead load */
|
||||||
|
return;
|
||||||
|
}
|
||||||
IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE);
|
IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE);
|
||||||
if (op2_reg != IR_REG_NONE) {
|
if (op2_reg != IR_REG_NONE) {
|
||||||
if (IR_REG_SPILLED(op2_reg)) {
|
if (IR_REG_SPILLED(op2_reg)) {
|
||||||
|
@ -4394,11 +4398,15 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
|
||||||
ir_backend_data *data = ctx->data;
|
ir_backend_data *data = ctx->data;
|
||||||
dasm_State **Dst = &data->dasm_state;
|
dasm_State **Dst = &data->dasm_state;
|
||||||
ir_type type = insn->type;
|
ir_type type = insn->type;
|
||||||
ir_reg def_reg = ctx->regs[def][0];
|
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
|
||||||
ir_reg op2_reg = ctx->regs[def][2];
|
ir_reg op2_reg = ctx->regs[def][2];
|
||||||
ir_reg tmp_reg = ctx->regs[def][3];
|
ir_reg tmp_reg = ctx->regs[def][3];
|
||||||
int32_t offset;
|
int32_t offset;
|
||||||
|
|
||||||
|
if (ctx->use_lists[def].count == 1) {
|
||||||
|
/* dead load */
|
||||||
|
return;
|
||||||
|
}
|
||||||
IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE);
|
IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE);
|
||||||
if (op2_reg != IR_REG_NONE) {
|
if (op2_reg != IR_REG_NONE) {
|
||||||
if (IR_REG_SPILLED(op2_reg)) {
|
if (IR_REG_SPILLED(op2_reg)) {
|
||||||
|
@ -4935,6 +4943,28 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Move op2 to a tmp register before epilogue if it's in
|
||||||
|
* used_preserved_regs, because it will be overridden. */
|
||||||
|
|
||||||
|
ir_reg op2_reg = IR_REG_NONE;
|
||||||
|
if (!IR_IS_CONST_REF(insn->op2)) {
|
||||||
|
op2_reg = ctx->regs[def][2];
|
||||||
|
IR_ASSERT(op2_reg != IR_REG_NONE);
|
||||||
|
|
||||||
|
if (IR_REG_SPILLED(op2_reg)) {
|
||||||
|
op2_reg = IR_REG_INT_TMP;
|
||||||
|
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
|
||||||
|
} else if (IR_REGSET_IN((ir_regset)ctx->used_preserved_regs, IR_REG_NUM(op2_reg))) {
|
||||||
|
ir_reg orig_op2_reg = op2_reg;
|
||||||
|
op2_reg = IR_REG_INT_TMP;
|
||||||
|
|
||||||
|
ir_type type = ctx->ir_base[insn->op2].type;
|
||||||
|
| ASM_REG_REG_OP mov, type, op2_reg, IR_REG_NUM(orig_op2_reg)
|
||||||
|
} else {
|
||||||
|
op2_reg = IR_REG_NUM(op2_reg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ir_emit_epilogue(ctx);
|
ir_emit_epilogue(ctx);
|
||||||
|
|
||||||
if (IR_IS_CONST_REF(insn->op2)) {
|
if (IR_IS_CONST_REF(insn->op2)) {
|
||||||
|
@ -4947,13 +4977,8 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
|
||||||
| br Rx(IR_REG_INT_TMP)
|
| br Rx(IR_REG_INT_TMP)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
ir_reg op2_reg = ctx->regs[def][2];
|
|
||||||
|
|
||||||
IR_ASSERT(op2_reg != IR_REG_NONE);
|
IR_ASSERT(op2_reg != IR_REG_NONE);
|
||||||
if (IR_REG_SPILLED(op2_reg)) {
|
IR_ASSERT(!IR_REGSET_IN((ir_regset)ctx->used_preserved_regs, op2_reg));
|
||||||
op2_reg = IR_REG_NUM(op2_reg);
|
|
||||||
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
|
|
||||||
}
|
|
||||||
| br Rx(op2_reg)
|
| br Rx(op2_reg)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -244,7 +244,6 @@ int ir_build_cfg(ir_ctx *ctx)
|
||||||
_blocks[start] = b;
|
_blocks[start] = b;
|
||||||
_blocks[end] = b;
|
_blocks[end] = b;
|
||||||
IR_ASSERT(IR_IS_BB_START(insn->op));
|
IR_ASSERT(IR_IS_BB_START(insn->op));
|
||||||
IR_ASSERT(end > start);
|
|
||||||
bb->start = start;
|
bb->start = start;
|
||||||
bb->end = end;
|
bb->end = end;
|
||||||
bb->successors = count;
|
bb->successors = count;
|
||||||
|
@ -583,7 +582,6 @@ static int ir_remove_unreachable_blocks(ir_ctx *ctx)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
|
||||||
static void compute_postnum(const ir_ctx *ctx, uint32_t *cur, uint32_t b)
|
static void compute_postnum(const ir_ctx *ctx, uint32_t *cur, uint32_t b)
|
||||||
{
|
{
|
||||||
uint32_t i, *p;
|
uint32_t i, *p;
|
||||||
|
@ -607,34 +605,42 @@ static void compute_postnum(const ir_ctx *ctx, uint32_t *cur, uint32_t b)
|
||||||
|
|
||||||
/* Computes dominator tree using algorithm from "A Simple, Fast Dominance Algorithm" by
|
/* Computes dominator tree using algorithm from "A Simple, Fast Dominance Algorithm" by
|
||||||
* Cooper, Harvey and Kennedy. */
|
* Cooper, Harvey and Kennedy. */
|
||||||
int ir_build_dominators_tree(ir_ctx *ctx)
|
static int ir_build_dominators_tree_slow(ir_ctx *ctx)
|
||||||
{
|
{
|
||||||
uint32_t blocks_count, b, postnum;
|
uint32_t blocks_count, b, postnum;
|
||||||
ir_block *blocks, *bb;
|
ir_block *blocks, *bb;
|
||||||
uint32_t *edges;
|
uint32_t *edges;
|
||||||
bool changed;
|
bool changed;
|
||||||
|
|
||||||
|
blocks = ctx->cfg_blocks;
|
||||||
|
edges = ctx->cfg_edges;
|
||||||
|
blocks_count = ctx->cfg_blocks_count;
|
||||||
|
|
||||||
|
/* Clear the dominators tree */
|
||||||
|
for (b = 0, bb = &blocks[0]; b <= blocks_count; b++, bb++) {
|
||||||
|
bb->idom = 0;
|
||||||
|
bb->dom_depth = 0;
|
||||||
|
bb->dom_child = 0;
|
||||||
|
bb->dom_next_child = 0;
|
||||||
|
}
|
||||||
|
|
||||||
ctx->flags2 &= ~IR_NO_LOOPS;
|
ctx->flags2 &= ~IR_NO_LOOPS;
|
||||||
|
|
||||||
postnum = 1;
|
postnum = 1;
|
||||||
compute_postnum(ctx, &postnum, 1);
|
compute_postnum(ctx, &postnum, 1);
|
||||||
|
|
||||||
/* Find immediate dominators */
|
/* Find immediate dominators by iterative fixed-point algorithm */
|
||||||
blocks = ctx->cfg_blocks;
|
|
||||||
edges = ctx->cfg_edges;
|
|
||||||
blocks_count = ctx->cfg_blocks_count;
|
|
||||||
blocks[1].idom = 1;
|
blocks[1].idom = 1;
|
||||||
do {
|
do {
|
||||||
changed = 0;
|
changed = 0;
|
||||||
/* Iterating in Reverse Post Order */
|
/* Iterating in Reverse Post Order */
|
||||||
for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) {
|
for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) {
|
||||||
IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
|
IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
|
||||||
|
IR_ASSERT(bb->predecessors_count > 0);
|
||||||
if (bb->predecessors_count == 1) {
|
if (bb->predecessors_count == 1) {
|
||||||
uint32_t pred_b = edges[bb->predecessors];
|
uint32_t pred_b = edges[bb->predecessors];
|
||||||
|
|
||||||
if (blocks[pred_b].idom <= 0) {
|
if (blocks[pred_b].idom > 0 && bb->idom != pred_b) {
|
||||||
//IR_ASSERT("Wrong blocks order: BB is before its single predecessor");
|
|
||||||
} else if (bb->idom != pred_b) {
|
|
||||||
bb->idom = pred_b;
|
bb->idom = pred_b;
|
||||||
changed = 1;
|
changed = 1;
|
||||||
}
|
}
|
||||||
|
@ -680,14 +686,13 @@ int ir_build_dominators_tree(ir_ctx *ctx)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} while (changed);
|
} while (changed);
|
||||||
|
|
||||||
|
/* Build dominators tree */
|
||||||
blocks[1].idom = 0;
|
blocks[1].idom = 0;
|
||||||
blocks[1].dom_depth = 0;
|
blocks[1].dom_depth = 0;
|
||||||
|
|
||||||
/* Construct dominators tree */
|
|
||||||
for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) {
|
for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) {
|
||||||
IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
|
uint32_t idom = bb->idom;
|
||||||
if (bb->idom > 0) {
|
ir_block *idom_bb = &blocks[idom];
|
||||||
ir_block *idom_bb = &blocks[bb->idom];
|
|
||||||
|
|
||||||
bb->dom_depth = idom_bb->dom_depth + 1;
|
bb->dom_depth = idom_bb->dom_depth + 1;
|
||||||
/* Sort by block number to traverse children in pre-order */
|
/* Sort by block number to traverse children in pre-order */
|
||||||
|
@ -708,11 +713,10 @@ int ir_build_dominators_tree(ir_ctx *ctx)
|
||||||
child_bb->dom_next_child = b;
|
child_bb->dom_next_child = b;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
/* A single pass modification of "A Simple, Fast Dominance Algorithm" by
|
/* A single pass modification of "A Simple, Fast Dominance Algorithm" by
|
||||||
* Cooper, Harvey and Kennedy, that relays on IR block ordering.
|
* Cooper, Harvey and Kennedy, that relays on IR block ordering.
|
||||||
* It may fallback to the general slow fixed-point algorithm. */
|
* It may fallback to the general slow fixed-point algorithm. */
|
||||||
|
@ -747,7 +751,11 @@ int ir_build_dominators_tree(ir_ctx *ctx)
|
||||||
if (UNEXPECTED(idom >= b)) {
|
if (UNEXPECTED(idom >= b)) {
|
||||||
/* In rare cases, LOOP_BEGIN.op1 may be a back-edge. Skip back-edges. */
|
/* In rare cases, LOOP_BEGIN.op1 may be a back-edge. Skip back-edges. */
|
||||||
ctx->flags2 &= ~IR_NO_LOOPS;
|
ctx->flags2 &= ~IR_NO_LOOPS;
|
||||||
IR_ASSERT(k > 1 && "Wrong blocks order: BB is before its single predecessor");
|
// IR_ASSERT(k > 1 && "Wrong blocks order: BB is before its single predecessor");
|
||||||
|
if (UNEXPECTED(k <= 1)) {
|
||||||
|
ir_list_free(&worklist);
|
||||||
|
return ir_build_dominators_tree_slow(ctx);
|
||||||
|
}
|
||||||
ir_list_push(&worklist, idom);
|
ir_list_push(&worklist, idom);
|
||||||
while (1) {
|
while (1) {
|
||||||
k--;
|
k--;
|
||||||
|
@ -942,7 +950,6 @@ static int ir_build_dominators_tree_iterative(ir_ctx *ctx)
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
static bool ir_dominates(const ir_block *blocks, uint32_t b1, uint32_t b2)
|
static bool ir_dominates(const ir_block *blocks, uint32_t b1, uint32_t b2)
|
||||||
{
|
{
|
||||||
|
@ -958,7 +965,7 @@ static bool ir_dominates(const ir_block *blocks, uint32_t b1, uint32_t b2)
|
||||||
|
|
||||||
int ir_find_loops(ir_ctx *ctx)
|
int ir_find_loops(ir_ctx *ctx)
|
||||||
{
|
{
|
||||||
uint32_t i, j, n, count;
|
uint32_t b, j, n, count;
|
||||||
uint32_t *entry_times, *exit_times, *sorted_blocks, time = 1;
|
uint32_t *entry_times, *exit_times, *sorted_blocks, time = 1;
|
||||||
ir_block *blocks = ctx->cfg_blocks;
|
ir_block *blocks = ctx->cfg_blocks;
|
||||||
uint32_t *edges = ctx->cfg_edges;
|
uint32_t *edges = ctx->cfg_edges;
|
||||||
|
@ -983,13 +990,13 @@ int ir_find_loops(ir_ctx *ctx)
|
||||||
int child;
|
int child;
|
||||||
|
|
||||||
next:
|
next:
|
||||||
i = ir_worklist_peek(&work);
|
b = ir_worklist_peek(&work);
|
||||||
if (!entry_times[i]) {
|
if (!entry_times[b]) {
|
||||||
entry_times[i] = time++;
|
entry_times[b] = time++;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Visit blocks immediately dominated by i. */
|
/* Visit blocks immediately dominated by "b". */
|
||||||
bb = &blocks[i];
|
bb = &blocks[b];
|
||||||
for (child = bb->dom_child; child > 0; child = blocks[child].dom_next_child) {
|
for (child = bb->dom_child; child > 0; child = blocks[child].dom_next_child) {
|
||||||
if (ir_worklist_push(&work, child)) {
|
if (ir_worklist_push(&work, child)) {
|
||||||
goto next;
|
goto next;
|
||||||
|
@ -999,17 +1006,17 @@ next:
|
||||||
/* Visit join edges. */
|
/* Visit join edges. */
|
||||||
if (bb->successors_count) {
|
if (bb->successors_count) {
|
||||||
uint32_t *p = edges + bb->successors;
|
uint32_t *p = edges + bb->successors;
|
||||||
for (j = 0; j < bb->successors_count; j++,p++) {
|
for (j = 0; j < bb->successors_count; j++, p++) {
|
||||||
uint32_t succ = *p;
|
uint32_t succ = *p;
|
||||||
|
|
||||||
if (blocks[succ].idom == i) {
|
if (blocks[succ].idom == b) {
|
||||||
continue;
|
continue;
|
||||||
} else if (ir_worklist_push(&work, succ)) {
|
} else if (ir_worklist_push(&work, succ)) {
|
||||||
goto next;
|
goto next;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
exit_times[i] = time++;
|
exit_times[b] = time++;
|
||||||
ir_worklist_pop(&work);
|
ir_worklist_pop(&work);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1018,7 +1025,7 @@ next:
|
||||||
j = 1;
|
j = 1;
|
||||||
n = 2;
|
n = 2;
|
||||||
while (j != n) {
|
while (j != n) {
|
||||||
i = j;
|
uint32_t i = j;
|
||||||
j = n;
|
j = n;
|
||||||
for (; i < j; i++) {
|
for (; i < j; i++) {
|
||||||
int child;
|
int child;
|
||||||
|
@ -1030,9 +1037,82 @@ next:
|
||||||
count = n;
|
count = n;
|
||||||
|
|
||||||
/* Identify loops. See Sreedhar et al, "Identifying Loops Using DJ Graphs". */
|
/* Identify loops. See Sreedhar et al, "Identifying Loops Using DJ Graphs". */
|
||||||
|
uint32_t prev_dom_depth = blocks[sorted_blocks[n - 1]].dom_depth;
|
||||||
|
uint32_t prev_irreducible = 0;
|
||||||
while (n > 1) {
|
while (n > 1) {
|
||||||
i = sorted_blocks[--n];
|
b = sorted_blocks[--n];
|
||||||
ir_block *bb = &blocks[i];
|
ir_block *bb = &blocks[b];
|
||||||
|
|
||||||
|
IR_ASSERT(bb->dom_depth <= prev_dom_depth);
|
||||||
|
if (UNEXPECTED(prev_irreducible) && bb->dom_depth != prev_dom_depth) {
|
||||||
|
/* process delyed irreducible loops */
|
||||||
|
do {
|
||||||
|
b = sorted_blocks[prev_irreducible];
|
||||||
|
bb = &blocks[b];
|
||||||
|
if ((bb->flags & IR_BB_IRREDUCIBLE_LOOP) && !bb->loop_depth) {
|
||||||
|
/* process irreducible loop */
|
||||||
|
uint32_t hdr = b;
|
||||||
|
|
||||||
|
bb->loop_depth = 1;
|
||||||
|
if (ctx->ir_base[bb->start].op == IR_MERGE) {
|
||||||
|
ctx->ir_base[bb->start].op = IR_LOOP_BEGIN;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* find the closing edge(s) of the irreucible loop */
|
||||||
|
IR_ASSERT(bb->predecessors_count > 1);
|
||||||
|
uint32_t *p = &edges[bb->predecessors];
|
||||||
|
j = bb->predecessors_count;
|
||||||
|
do {
|
||||||
|
uint32_t pred = *p;
|
||||||
|
|
||||||
|
if (entry_times[pred] > entry_times[b] && exit_times[pred] < exit_times[b]) {
|
||||||
|
if (!ir_worklist_len(&work)) {
|
||||||
|
ir_bitset_clear(work.visited, ir_bitset_len(ir_worklist_capasity(&work)));
|
||||||
|
}
|
||||||
|
blocks[pred].loop_header = 0; /* support for merged loops */
|
||||||
|
ir_worklist_push(&work, pred);
|
||||||
|
}
|
||||||
|
p++;
|
||||||
|
} while (--j);
|
||||||
|
IR_ASSERT(ir_worklist_len(&work) != 0);
|
||||||
|
|
||||||
|
/* collect members of the irreducible loop */
|
||||||
|
while (ir_worklist_len(&work)) {
|
||||||
|
b = ir_worklist_pop(&work);
|
||||||
|
if (b != hdr) {
|
||||||
|
ir_block *bb = &blocks[b];
|
||||||
|
bb->loop_header = hdr;
|
||||||
|
if (bb->predecessors_count) {
|
||||||
|
uint32_t *p = &edges[bb->predecessors];
|
||||||
|
uint32_t n = bb->predecessors_count;
|
||||||
|
do {
|
||||||
|
uint32_t pred = *p;
|
||||||
|
while (blocks[pred].loop_header > 0) {
|
||||||
|
pred = blocks[pred].loop_header;
|
||||||
|
}
|
||||||
|
if (pred != hdr) {
|
||||||
|
if (entry_times[pred] > entry_times[hdr] && exit_times[pred] < exit_times[hdr]) {
|
||||||
|
/* "pred" is a descendant of "hdr" */
|
||||||
|
ir_worklist_push(&work, pred);
|
||||||
|
} else {
|
||||||
|
/* another entry to the irreducible loop */
|
||||||
|
bb->flags |= IR_BB_IRREDUCIBLE_LOOP;
|
||||||
|
if (ctx->ir_base[bb->start].op == IR_MERGE) {
|
||||||
|
ctx->ir_base[bb->start].op = IR_LOOP_BEGIN;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
p++;
|
||||||
|
} while (--n);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while (--prev_irreducible != n);
|
||||||
|
prev_irreducible = 0;
|
||||||
|
b = sorted_blocks[n];
|
||||||
|
bb = &blocks[b];
|
||||||
|
}
|
||||||
|
|
||||||
if (bb->predecessors_count > 1) {
|
if (bb->predecessors_count > 1) {
|
||||||
bool irreducible = 0;
|
bool irreducible = 0;
|
||||||
|
@ -1047,7 +1127,7 @@ next:
|
||||||
if (bb->idom != pred) {
|
if (bb->idom != pred) {
|
||||||
/* In a loop back-edge (back-join edge), the successor dominates
|
/* In a loop back-edge (back-join edge), the successor dominates
|
||||||
the predecessor. */
|
the predecessor. */
|
||||||
if (ir_dominates(blocks, i, pred)) {
|
if (ir_dominates(blocks, b, pred)) {
|
||||||
if (!ir_worklist_len(&work)) {
|
if (!ir_worklist_len(&work)) {
|
||||||
ir_bitset_clear(work.visited, ir_bitset_len(ir_worklist_capasity(&work)));
|
ir_bitset_clear(work.visited, ir_bitset_len(ir_worklist_capasity(&work)));
|
||||||
}
|
}
|
||||||
|
@ -1056,8 +1136,9 @@ next:
|
||||||
} else {
|
} else {
|
||||||
/* Otherwise it's a cross-join edge. See if it's a branch
|
/* Otherwise it's a cross-join edge. See if it's a branch
|
||||||
to an ancestor on the DJ spanning tree. */
|
to an ancestor on the DJ spanning tree. */
|
||||||
if (entry_times[pred] > entry_times[i] && exit_times[pred] < exit_times[i]) {
|
if (entry_times[pred] > entry_times[b] && exit_times[pred] < exit_times[b]) {
|
||||||
irreducible = 1;
|
irreducible = 1;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1065,46 +1146,55 @@ next:
|
||||||
} while (--j);
|
} while (--j);
|
||||||
|
|
||||||
if (UNEXPECTED(irreducible)) {
|
if (UNEXPECTED(irreducible)) {
|
||||||
// TODO: Support for irreducible loops ???
|
bb->flags |= IR_BB_LOOP_HEADER | IR_BB_IRREDUCIBLE_LOOP;
|
||||||
bb->flags |= IR_BB_IRREDUCIBLE_LOOP;
|
ctx->flags2 |= IR_CFG_HAS_LOOPS | IR_IRREDUCIBLE_CFG;
|
||||||
ctx->flags2 |= IR_IRREDUCIBLE_CFG;
|
/* Remember the position of the first irreducible loop to process all the irreducible loops
|
||||||
while (ir_worklist_len(&work)) {
|
* after the reducible loops with the same dominator tree depth
|
||||||
ir_worklist_pop(&work);
|
*/
|
||||||
|
if (!prev_irreducible) {
|
||||||
|
prev_irreducible = n;
|
||||||
}
|
}
|
||||||
|
ir_list_clear(&work.l);
|
||||||
} else if (ir_worklist_len(&work)) {
|
} else if (ir_worklist_len(&work)) {
|
||||||
|
/* collect members of the reducible loop */
|
||||||
|
uint32_t hdr = b;
|
||||||
|
|
||||||
bb->flags |= IR_BB_LOOP_HEADER;
|
bb->flags |= IR_BB_LOOP_HEADER;
|
||||||
ctx->flags2 |= IR_CFG_HAS_LOOPS;
|
ctx->flags2 |= IR_CFG_HAS_LOOPS;
|
||||||
bb->loop_depth = 1;
|
bb->loop_depth = 1;
|
||||||
|
if (ctx->ir_base[bb->start].op == IR_MERGE) {
|
||||||
|
ctx->ir_base[bb->start].op = IR_LOOP_BEGIN;
|
||||||
|
}
|
||||||
while (ir_worklist_len(&work)) {
|
while (ir_worklist_len(&work)) {
|
||||||
j = ir_worklist_pop(&work);
|
b = ir_worklist_pop(&work);
|
||||||
while (blocks[j].loop_header > 0) {
|
if (b != hdr) {
|
||||||
j = blocks[j].loop_header;
|
ir_block *bb = &blocks[b];
|
||||||
}
|
bb->loop_header = hdr;
|
||||||
if (j != i) {
|
|
||||||
ir_block *bb = &blocks[j];
|
|
||||||
if (bb->idom == 0 && j != 1) {
|
|
||||||
/* Ignore blocks that are unreachable or only abnormally reachable. */
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
bb->loop_header = i;
|
|
||||||
if (bb->predecessors_count) {
|
if (bb->predecessors_count) {
|
||||||
uint32_t *p = &edges[bb->predecessors];
|
uint32_t *p = &edges[bb->predecessors];
|
||||||
j = bb->predecessors_count;
|
uint32_t n = bb->predecessors_count;
|
||||||
do {
|
do {
|
||||||
ir_worklist_push(&work, *p);
|
uint32_t pred = *p;
|
||||||
|
while (blocks[pred].loop_header > 0) {
|
||||||
|
pred = blocks[pred].loop_header;
|
||||||
|
}
|
||||||
|
if (pred != hdr) {
|
||||||
|
ir_worklist_push(&work, pred);
|
||||||
|
}
|
||||||
p++;
|
p++;
|
||||||
} while (--j);
|
} while (--n);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
IR_ASSERT(!prev_irreducible);
|
||||||
|
|
||||||
if (ctx->flags2 & IR_CFG_HAS_LOOPS) {
|
if (ctx->flags2 & IR_CFG_HAS_LOOPS) {
|
||||||
for (n = 1; n < count; n++) {
|
for (n = 1; n < count; n++) {
|
||||||
i = sorted_blocks[n];
|
b = sorted_blocks[n];
|
||||||
ir_block *bb = &blocks[i];
|
ir_block *bb = &blocks[b];
|
||||||
if (bb->loop_header > 0) {
|
if (bb->loop_header > 0) {
|
||||||
ir_block *loop = &blocks[bb->loop_header];
|
ir_block *loop = &blocks[bb->loop_header];
|
||||||
uint32_t loop_depth = loop->loop_depth;
|
uint32_t loop_depth = loop->loop_depth;
|
||||||
|
@ -1389,7 +1479,7 @@ restart:
|
||||||
goto restart;
|
goto restart;
|
||||||
}
|
}
|
||||||
} else if (b != predecessor && ctx->cfg_blocks[predecessor].loop_header != b) {
|
} else if (b != predecessor && ctx->cfg_blocks[predecessor].loop_header != b) {
|
||||||
ir_dump_cfg(ctx, stderr);
|
/* not a loop back-edge */
|
||||||
IR_ASSERT(b == predecessor || ctx->cfg_blocks[predecessor].loop_header == b);
|
IR_ASSERT(b == predecessor || ctx->cfg_blocks[predecessor].loop_header == b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -213,13 +213,18 @@ bool ir_check(const ir_ctx *ctx)
|
||||||
ok = 0;
|
ok = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
case IR_OPND_CONTROL_DEP:
|
|
||||||
if ((ctx->flags2 & IR_LINEAR)
|
if ((ctx->flags2 & IR_LINEAR)
|
||||||
&& use >= i
|
&& use >= i
|
||||||
&& !(insn->op == IR_LOOP_BEGIN)) {
|
&& !(insn->op == IR_LOOP_BEGIN)) {
|
||||||
fprintf(stderr, "ir_base[%d].ops[%d] invalid forward reference (%d)\n", i, j, use);
|
fprintf(stderr, "ir_base[%d].ops[%d] invalid forward reference (%d)\n", i, j, use);
|
||||||
ok = 0;
|
ok = 0;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case IR_OPND_CONTROL_DEP:
|
||||||
|
if ((ctx->flags2 & IR_LINEAR)
|
||||||
|
&& use >= i) {
|
||||||
|
fprintf(stderr, "ir_base[%d].ops[%d] invalid forward reference (%d)\n", i, j, use);
|
||||||
|
ok = 0;
|
||||||
} else if (insn->op == IR_PHI) {
|
} else if (insn->op == IR_PHI) {
|
||||||
ir_insn *merge_insn = &ctx->ir_base[insn->op1];
|
ir_insn *merge_insn = &ctx->ir_base[insn->op1];
|
||||||
if (merge_insn->op != IR_MERGE && merge_insn->op != IR_LOOP_BEGIN) {
|
if (merge_insn->op != IR_MERGE && merge_insn->op != IR_LOOP_BEGIN) {
|
||||||
|
|
|
@ -309,7 +309,7 @@ static void* ir_sym_addr(ir_ctx *ctx, const ir_insn *addr_insn)
|
||||||
{
|
{
|
||||||
const char *name = ir_get_str(ctx, addr_insn->val.name);
|
const char *name = ir_get_str(ctx, addr_insn->val.name);
|
||||||
void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ?
|
void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ?
|
||||||
ctx->loader->resolve_sym_name(ctx->loader, name, 0) :
|
ctx->loader->resolve_sym_name(ctx->loader, name, IR_RESOLVE_SYM_SILENT) :
|
||||||
ir_resolve_sym_name(name);
|
ir_resolve_sym_name(name);
|
||||||
|
|
||||||
return addr;
|
return addr;
|
||||||
|
@ -320,7 +320,7 @@ static void* ir_sym_val(ir_ctx *ctx, const ir_insn *addr_insn)
|
||||||
{
|
{
|
||||||
const char *name = ir_get_str(ctx, addr_insn->val.name);
|
const char *name = ir_get_str(ctx, addr_insn->val.name);
|
||||||
void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ?
|
void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ?
|
||||||
ctx->loader->resolve_sym_name(ctx->loader, name, addr_insn->op == IR_FUNC) :
|
ctx->loader->resolve_sym_name(ctx->loader, name, addr_insn->op == IR_FUNC ? IR_RESOLVE_SYM_ADD_THUNK : 0) :
|
||||||
ir_resolve_sym_name(name);
|
ir_resolve_sym_name(name);
|
||||||
|
|
||||||
IR_ASSERT(addr);
|
IR_ASSERT(addr);
|
||||||
|
|
|
@ -1909,7 +1909,9 @@ IR_FOLD(SUB(_, SUB))
|
||||||
IR_FOLD(SUB(ADD, ADD))
|
IR_FOLD(SUB(ADD, ADD))
|
||||||
{
|
{
|
||||||
if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt))) {
|
if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt))) {
|
||||||
if (op1_insn->op1 == op2_insn->op1) {
|
if (op1 == op2) {
|
||||||
|
IR_FOLD_CONST_U(0);
|
||||||
|
} else if (op1_insn->op1 == op2_insn->op1) {
|
||||||
/* (a + b) - (a + c) => b - c */
|
/* (a + b) - (a + c) => b - c */
|
||||||
op1 = op1_insn->op2;
|
op1 = op1_insn->op2;
|
||||||
op2 = op2_insn->op2;
|
op2 = op2_insn->op2;
|
||||||
|
|
|
@ -785,6 +785,139 @@ IR_ALWAYS_INLINE ir_ref ir_count_constant(ir_ref *_xlat, ir_ref ref)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
IR_ALWAYS_INLINE bool ir_is_good_bb_order(ir_ctx *ctx, uint32_t b, ir_block *bb, ir_ref start)
|
||||||
|
{
|
||||||
|
ir_insn *insn = &ctx->ir_base[start];
|
||||||
|
uint32_t n = insn->inputs_count;
|
||||||
|
ir_ref *p = insn->ops + 1;
|
||||||
|
|
||||||
|
if (n == 1) {
|
||||||
|
return *p < start;
|
||||||
|
} else {
|
||||||
|
IR_ASSERT(n > 1);
|
||||||
|
for (; n > 0; p++, n--) {
|
||||||
|
ir_ref input = *p;
|
||||||
|
if (input < start) {
|
||||||
|
/* ordered */
|
||||||
|
} else if ((bb->flags & IR_BB_LOOP_HEADER)
|
||||||
|
&& (ctx->cfg_map[input] == b || ctx->cfg_blocks[ctx->cfg_map[input]].loop_header == b)) {
|
||||||
|
/* back-edge of reducible loop */
|
||||||
|
} else if ((bb->flags & IR_BB_IRREDUCIBLE_LOOP)
|
||||||
|
&& (ctx->cfg_blocks[ctx->cfg_map[input]].loop_header == ctx->cfg_blocks[b].loop_header)) {
|
||||||
|
/* closing edge of irreducible loop */
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static IR_NEVER_INLINE void ir_fix_bb_order(ir_ctx *ctx, ir_ref *_prev, ir_ref *_next)
|
||||||
|
{
|
||||||
|
uint32_t b, succ, count, *q, *xlat;
|
||||||
|
ir_block *bb;
|
||||||
|
ir_ref ref, n, prev;
|
||||||
|
ir_worklist worklist;
|
||||||
|
ir_block *new_blocks;
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
for (b = 1, bb = ctx->cfg_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) {
|
||||||
|
if (!ir_is_good_bb_order(ctx, b, bb, bb->start)) {
|
||||||
|
goto fix;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
|
||||||
|
fix:
|
||||||
|
#endif
|
||||||
|
count = ctx->cfg_blocks_count + 1;
|
||||||
|
new_blocks = ir_mem_malloc(count * sizeof(ir_block));
|
||||||
|
xlat = ir_mem_malloc(count * sizeof(uint32_t));
|
||||||
|
ir_worklist_init(&worklist, count);
|
||||||
|
ir_worklist_push(&worklist, 1);
|
||||||
|
while (ir_worklist_len(&worklist) != 0) {
|
||||||
|
next:
|
||||||
|
b = ir_worklist_peek(&worklist);
|
||||||
|
bb = &ctx->cfg_blocks[b];
|
||||||
|
n = bb->successors_count;
|
||||||
|
if (n == 1) {
|
||||||
|
succ = ctx->cfg_edges[bb->successors];
|
||||||
|
if (ir_worklist_push(&worklist, succ)) {
|
||||||
|
goto next;
|
||||||
|
}
|
||||||
|
} else if (n > 1) {
|
||||||
|
uint32_t best = 0;
|
||||||
|
uint32_t best_loop_depth = 0;
|
||||||
|
|
||||||
|
q = ctx->cfg_edges + bb->successors + n;
|
||||||
|
do {
|
||||||
|
q--;
|
||||||
|
succ = *q;
|
||||||
|
if (ir_bitset_in(worklist.visited, succ)) {
|
||||||
|
/* already processed */
|
||||||
|
} else if ((ctx->cfg_blocks[succ].flags & IR_BB_LOOP_HEADER)
|
||||||
|
&& (succ == b || ctx->cfg_blocks[b].loop_header == succ)) {
|
||||||
|
/* back-edge of reducible loop */
|
||||||
|
} else if ((ctx->cfg_blocks[succ].flags & IR_BB_IRREDUCIBLE_LOOP)
|
||||||
|
&& (ctx->cfg_blocks[succ].loop_header == ctx->cfg_blocks[b].loop_header)) {
|
||||||
|
/* closing edge of irreducible loop */
|
||||||
|
} else if (!best) {
|
||||||
|
best = succ;
|
||||||
|
best_loop_depth = ctx->cfg_blocks[best].loop_depth;
|
||||||
|
} else if (ctx->cfg_blocks[succ].loop_depth < best_loop_depth) {
|
||||||
|
/* prefer deeper loop */
|
||||||
|
best = succ;
|
||||||
|
best_loop_depth = ctx->cfg_blocks[best].loop_depth;
|
||||||
|
}
|
||||||
|
n--;
|
||||||
|
} while (n > 0);
|
||||||
|
if (best) {
|
||||||
|
ir_worklist_push(&worklist, best);
|
||||||
|
goto next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ir_worklist_pop(&worklist);
|
||||||
|
count--;
|
||||||
|
new_blocks[count] = *bb;
|
||||||
|
xlat[b] = count;
|
||||||
|
}
|
||||||
|
IR_ASSERT(count == 1);
|
||||||
|
xlat[0] = 0;
|
||||||
|
ir_worklist_free(&worklist);
|
||||||
|
|
||||||
|
prev = 0;
|
||||||
|
for (b = 1, bb = new_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) {
|
||||||
|
bb->idom = xlat[bb->idom];
|
||||||
|
bb->loop_header = xlat[bb->loop_header];
|
||||||
|
n = bb->successors_count;
|
||||||
|
if (n > 0) {
|
||||||
|
for (q = ctx->cfg_edges + bb->successors; n > 0; q++, n--) {
|
||||||
|
*q = xlat[*q];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
n = bb->predecessors_count;
|
||||||
|
if (n > 0) {
|
||||||
|
for (q = ctx->cfg_edges + bb->predecessors; n > 0; q++, n--) {
|
||||||
|
*q = xlat[*q];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_next[prev] = bb->start;
|
||||||
|
_prev[bb->start] = prev;
|
||||||
|
prev = bb->end;
|
||||||
|
}
|
||||||
|
_next[0] = 0;
|
||||||
|
_next[prev] = 0;
|
||||||
|
|
||||||
|
for (ref = 2; ref < ctx->insns_count; ref++) {
|
||||||
|
ctx->cfg_map[ref] = xlat[ctx->cfg_map[ref]];
|
||||||
|
}
|
||||||
|
ir_mem_free(xlat);
|
||||||
|
|
||||||
|
ir_mem_free(ctx->cfg_blocks);
|
||||||
|
ctx->cfg_blocks = new_blocks;
|
||||||
|
}
|
||||||
|
|
||||||
int ir_schedule(ir_ctx *ctx)
|
int ir_schedule(ir_ctx *ctx)
|
||||||
{
|
{
|
||||||
ir_ctx new_ctx;
|
ir_ctx new_ctx;
|
||||||
|
@ -800,6 +933,7 @@ int ir_schedule(ir_ctx *ctx)
|
||||||
ir_block *bb;
|
ir_block *bb;
|
||||||
ir_insn *insn, *new_insn;
|
ir_insn *insn, *new_insn;
|
||||||
ir_use_list *lists, *use_list, *new_list;
|
ir_use_list *lists, *use_list, *new_list;
|
||||||
|
bool bad_bb_order = 0;
|
||||||
|
|
||||||
/* Create a double-linked list of nodes ordered by BB, respecting BB->start and BB->end */
|
/* Create a double-linked list of nodes ordered by BB, respecting BB->start and BB->end */
|
||||||
IR_ASSERT(_blocks[1] == 1);
|
IR_ASSERT(_blocks[1] == 1);
|
||||||
|
@ -818,27 +952,50 @@ int ir_schedule(ir_ctx *ctx)
|
||||||
} else if (b > prev_b) {
|
} else if (b > prev_b) {
|
||||||
bb = &ctx->cfg_blocks[b];
|
bb = &ctx->cfg_blocks[b];
|
||||||
if (i == bb->start) {
|
if (i == bb->start) {
|
||||||
IR_ASSERT(bb->end > bb->start);
|
if (bb->end > bb->start) {
|
||||||
prev_b = b;
|
prev_b = b;
|
||||||
prev_b_end = bb->end;
|
prev_b_end = bb->end;
|
||||||
_prev[bb->end] = 0;
|
|
||||||
/* add to the end of the list */
|
/* add to the end of the list */
|
||||||
_next[j] = i;
|
_next[j] = i;
|
||||||
_prev[i] = j;
|
_prev[i] = j;
|
||||||
j = i;
|
j = i;
|
||||||
} else {
|
} else {
|
||||||
IR_ASSERT(i != bb->end);
|
prev_b = 0;
|
||||||
|
prev_b_end = 0;
|
||||||
|
k = bb->end;
|
||||||
|
while (_blocks[_prev[k]] == b) {
|
||||||
|
k = _prev[k];
|
||||||
|
}
|
||||||
|
/* insert before "k" */
|
||||||
|
_prev[i] = _prev[k];
|
||||||
|
_next[i] = k;
|
||||||
|
_next[_prev[k]] = i;
|
||||||
|
_prev[k] = i;
|
||||||
|
}
|
||||||
|
if (!ir_is_good_bb_order(ctx, b, bb, i)) {
|
||||||
|
bad_bb_order = 1;
|
||||||
|
}
|
||||||
|
} else if (i != bb->end) {
|
||||||
/* move down late (see the following loop) */
|
/* move down late (see the following loop) */
|
||||||
_next[i] = _move_down;
|
_next[i] = _move_down;
|
||||||
_move_down = i;
|
_move_down = i;
|
||||||
|
} else {
|
||||||
|
IR_ASSERT(bb->start > bb->end);
|
||||||
|
prev_b = 0;
|
||||||
|
prev_b_end = 0;
|
||||||
|
/* add to the end of the list */
|
||||||
|
_next[j] = i;
|
||||||
|
_prev[i] = j;
|
||||||
|
j = i;
|
||||||
}
|
}
|
||||||
} else if (b) {
|
} else if (b) {
|
||||||
bb = &ctx->cfg_blocks[b];
|
bb = &ctx->cfg_blocks[b];
|
||||||
IR_ASSERT(i != bb->start);
|
IR_ASSERT(i != bb->start);
|
||||||
if (_prev[bb->end]) {
|
if (i > bb->end) {
|
||||||
/* move up, insert before the end of the already scheduled BB */
|
/* move up, insert before the end of the already scheduled BB */
|
||||||
k = bb->end;
|
k = bb->end;
|
||||||
} else {
|
} else {
|
||||||
|
IR_ASSERT(i > bb->start);
|
||||||
/* move up, insert at the end of the block */
|
/* move up, insert at the end of the block */
|
||||||
k = ctx->cfg_blocks[b + 1].start;
|
k = ctx->cfg_blocks[b + 1].start;
|
||||||
}
|
}
|
||||||
|
@ -883,6 +1040,10 @@ int ir_schedule(ir_ctx *ctx)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
if (bad_bb_order) {
|
||||||
|
ir_fix_bb_order(ctx, _prev, _next);
|
||||||
|
}
|
||||||
|
|
||||||
_xlat = ir_mem_calloc((ctx->consts_count + ctx->insns_count), sizeof(ir_ref));
|
_xlat = ir_mem_calloc((ctx->consts_count + ctx->insns_count), sizeof(ir_ref));
|
||||||
_xlat += ctx->consts_count;
|
_xlat += ctx->consts_count;
|
||||||
_xlat[IR_TRUE] = IR_TRUE;
|
_xlat[IR_TRUE] = IR_TRUE;
|
||||||
|
|
|
@ -62,7 +62,7 @@
|
||||||
#define IR_MAX(a, b) (((a) > (b)) ? (a) : (b))
|
#define IR_MAX(a, b) (((a) > (b)) ? (a) : (b))
|
||||||
#define IR_MIN(a, b) (((a) < (b)) ? (a) : (b))
|
#define IR_MIN(a, b) (((a) < (b)) ? (a) : (b))
|
||||||
|
|
||||||
#define IR_IS_POWER_OF_TWO(x) (!((x) & ((x) - 1)))
|
#define IR_IS_POWER_OF_TWO(x) ((x) && (!((x) & ((x) - 1))))
|
||||||
|
|
||||||
#define IR_LOG2(x) ir_ntzl(x)
|
#define IR_LOG2(x) ir_ntzl(x)
|
||||||
|
|
||||||
|
|
|
@ -140,6 +140,9 @@ void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f)
|
||||||
fprintf(f, ", loop=BB%d(%d)", bb->loop_header, bb->loop_depth);
|
fprintf(f, ", loop=BB%d(%d)", bb->loop_header, bb->loop_depth);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (bb->flags & IR_BB_IRREDUCIBLE_LOOP) {
|
||||||
|
fprintf(f, ", IRREDUCIBLE");
|
||||||
|
}
|
||||||
if (bb->predecessors_count) {
|
if (bb->predecessors_count) {
|
||||||
uint32_t i;
|
uint32_t i;
|
||||||
|
|
||||||
|
|
|
@ -1732,7 +1732,20 @@ static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use,
|
||||||
ir_ref *p, n, input;
|
ir_ref *p, n, input;
|
||||||
|
|
||||||
if (IR_IS_CONST_REF(ref)) {
|
if (IR_IS_CONST_REF(ref)) {
|
||||||
return ir_const(ctx, insn->val, type);
|
ir_val val;
|
||||||
|
|
||||||
|
switch (type) {
|
||||||
|
case IR_I8: val.i64 = insn->val.i8; break;
|
||||||
|
case IR_U8: val.u64 = insn->val.u8; break;
|
||||||
|
case IR_I16: val.i64 = insn->val.i16; break;
|
||||||
|
case IR_U16: val.u64 = insn->val.u16; break;
|
||||||
|
case IR_I32: val.i64 = insn->val.i32; break;
|
||||||
|
case IR_U32: val.u64 = insn->val.u32; break;
|
||||||
|
case IR_CHAR:val.i64 = insn->val.i8; break;
|
||||||
|
case IR_BOOL:val.u64 = insn->val.u8 != 0; break;
|
||||||
|
default: IR_ASSERT(0); val.u64 = 0;
|
||||||
|
}
|
||||||
|
return ir_const(ctx, val, type);
|
||||||
} else {
|
} else {
|
||||||
ir_bitqueue_add(worklist, ref);
|
ir_bitqueue_add(worklist, ref);
|
||||||
switch (insn->op) {
|
switch (insn->op) {
|
||||||
|
|
|
@ -6868,7 +6868,24 @@ static void ir_emit_return_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn)
|
||||||
ir_backend_data *data = ctx->data;
|
ir_backend_data *data = ctx->data;
|
||||||
dasm_State **Dst = &data->dasm_state;
|
dasm_State **Dst = &data->dasm_state;
|
||||||
|
|
||||||
if (op2_reg == IR_REG_NONE || IR_REG_SPILLED(op2_reg)) {
|
if (IR_IS_CONST_REF(insn->op2)) {
|
||||||
|
ir_insn *value = &ctx->ir_base[insn->op2];
|
||||||
|
|
||||||
|
if ((type == IR_FLOAT && value->val.f == 0.0) || (type == IR_DOUBLE && value->val.d == 0.0)) {
|
||||||
|
| fldz
|
||||||
|
} else if ((type == IR_FLOAT && value->val.f == 1.0) || (type == IR_DOUBLE && value->val.d == 1.0)) {
|
||||||
|
| fld1
|
||||||
|
} else {
|
||||||
|
int label = ir_const_label(ctx, insn->op2);
|
||||||
|
|
||||||
|
if (type == IR_DOUBLE) {
|
||||||
|
| fld qword [=>label]
|
||||||
|
} else {
|
||||||
|
IR_ASSERT(type == IR_FLOAT);
|
||||||
|
| fld dword [=>label]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (op2_reg == IR_REG_NONE || IR_REG_SPILLED(op2_reg)) {
|
||||||
ir_reg fp;
|
ir_reg fp;
|
||||||
int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op2, &fp);
|
int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op2, &fp);
|
||||||
|
|
||||||
|
@ -8442,11 +8459,15 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
|
||||||
ir_backend_data *data = ctx->data;
|
ir_backend_data *data = ctx->data;
|
||||||
dasm_State **Dst = &data->dasm_state;
|
dasm_State **Dst = &data->dasm_state;
|
||||||
ir_type type = insn->type;
|
ir_type type = insn->type;
|
||||||
ir_reg def_reg = ctx->regs[def][0];
|
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
|
||||||
ir_reg op2_reg = ctx->regs[def][2];
|
ir_reg op2_reg = ctx->regs[def][2];
|
||||||
ir_reg tmp_reg = ctx->regs[def][3];
|
ir_reg tmp_reg = ctx->regs[def][3];
|
||||||
int32_t offset;
|
int32_t offset;
|
||||||
|
|
||||||
|
if (ctx->use_lists[def].count == 1) {
|
||||||
|
/* dead load */
|
||||||
|
return;
|
||||||
|
}
|
||||||
IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE);
|
IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE);
|
||||||
if (op2_reg != IR_REG_NONE) {
|
if (op2_reg != IR_REG_NONE) {
|
||||||
if (IR_REG_SPILLED(op2_reg)) {
|
if (IR_REG_SPILLED(op2_reg)) {
|
||||||
|
@ -8471,11 +8492,15 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
|
||||||
ir_backend_data *data = ctx->data;
|
ir_backend_data *data = ctx->data;
|
||||||
dasm_State **Dst = &data->dasm_state;
|
dasm_State **Dst = &data->dasm_state;
|
||||||
ir_type type = insn->type;
|
ir_type type = insn->type;
|
||||||
ir_reg def_reg = ctx->regs[def][0];
|
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
|
||||||
ir_reg op2_reg = ctx->regs[def][2];
|
ir_reg op2_reg = ctx->regs[def][2];
|
||||||
ir_reg tmp_reg = ctx->regs[def][3];
|
ir_reg tmp_reg = ctx->regs[def][3];
|
||||||
int32_t offset;
|
int32_t offset;
|
||||||
|
|
||||||
|
if (ctx->use_lists[def].count == 1) {
|
||||||
|
/* dead load */
|
||||||
|
return;
|
||||||
|
}
|
||||||
IR_ASSERT(def_reg != IR_REG_NONE&& tmp_reg != IR_REG_NONE);
|
IR_ASSERT(def_reg != IR_REG_NONE&& tmp_reg != IR_REG_NONE);
|
||||||
if (op2_reg != IR_REG_NONE) {
|
if (op2_reg != IR_REG_NONE) {
|
||||||
if (IR_REG_SPILLED(op2_reg)) {
|
if (IR_REG_SPILLED(op2_reg)) {
|
||||||
|
@ -9221,6 +9246,58 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Move op2 to a tmp register before epilogue if it's in
|
||||||
|
* used_preserved_regs, because it will be overridden. */
|
||||||
|
|
||||||
|
ir_reg op2_reg = IR_REG_NONE;
|
||||||
|
ir_mem mem = IR_MEM_B(IR_REG_NONE);
|
||||||
|
if (!IR_IS_CONST_REF(insn->op2)) {
|
||||||
|
op2_reg = ctx->regs[def][2];
|
||||||
|
|
||||||
|
ir_regset preserved_regs = (ir_regset)ctx->used_preserved_regs | IR_REGSET(IR_REG_STACK_POINTER);
|
||||||
|
if (ctx->flags & IR_USE_FRAME_POINTER) {
|
||||||
|
preserved_regs |= IR_REGSET(IR_REG_FRAME_POINTER);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_spill_slot = op2_reg != IR_REG_NONE
|
||||||
|
&& IR_REG_SPILLED(op2_reg)
|
||||||
|
&& ctx->vregs[insn->op2];
|
||||||
|
|
||||||
|
if (op2_reg != IR_REG_NONE && !is_spill_slot) {
|
||||||
|
if (IR_REGSET_IN(preserved_regs, IR_REG_NUM(op2_reg))) {
|
||||||
|
ir_ref orig_op2_reg = op2_reg;
|
||||||
|
op2_reg = IR_REG_RAX;
|
||||||
|
|
||||||
|
if (IR_REG_SPILLED(orig_op2_reg)) {
|
||||||
|
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
|
||||||
|
} else {
|
||||||
|
ir_type type = ctx->ir_base[insn->op2].type;
|
||||||
|
| ASM_REG_REG_OP mov, type, op2_reg, IR_REG_NUM(orig_op2_reg)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
op2_reg = IR_REG_NUM(op2_reg);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (ir_rule(ctx, insn->op2) & IR_FUSED) {
|
||||||
|
IR_ASSERT(op2_reg == IR_REG_NONE);
|
||||||
|
mem = ir_fuse_load(ctx, def, insn->op2);
|
||||||
|
} else {
|
||||||
|
mem = ir_ref_spill_slot(ctx, insn->op2);
|
||||||
|
}
|
||||||
|
ir_reg base = IR_MEM_BASE(mem);
|
||||||
|
ir_reg index = IR_MEM_INDEX(mem);
|
||||||
|
if ((base != IR_REG_NONE && IR_REGSET_IN(preserved_regs, base)) ||
|
||||||
|
(index != IR_REG_NONE && IR_REGSET_IN(preserved_regs, index))) {
|
||||||
|
op2_reg = IR_REG_RAX;
|
||||||
|
|
||||||
|
ir_type type = ctx->ir_base[insn->op2].type;
|
||||||
|
ir_emit_load_mem_int(ctx, type, op2_reg, mem);
|
||||||
|
} else {
|
||||||
|
op2_reg = IR_REG_NONE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ir_emit_epilogue(ctx);
|
ir_emit_epilogue(ctx);
|
||||||
|
|
||||||
if (IR_IS_CONST_REF(insn->op2)) {
|
if (IR_IS_CONST_REF(insn->op2)) {
|
||||||
|
@ -9246,22 +9323,10 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
|
||||||
|.endif
|
|.endif
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
ir_reg op2_reg = ctx->regs[def][2];
|
|
||||||
|
|
||||||
if (op2_reg != IR_REG_NONE) {
|
if (op2_reg != IR_REG_NONE) {
|
||||||
if (IR_REG_SPILLED(op2_reg)) {
|
IR_ASSERT(!IR_REGSET_IN((ir_regset)ctx->used_preserved_regs, op2_reg));
|
||||||
op2_reg = IR_REG_NUM(op2_reg);
|
|
||||||
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
|
|
||||||
}
|
|
||||||
| jmp Ra(op2_reg)
|
| jmp Ra(op2_reg)
|
||||||
} else {
|
} else {
|
||||||
ir_mem mem;
|
|
||||||
|
|
||||||
if (ir_rule(ctx, insn->op2) & IR_FUSED) {
|
|
||||||
mem = ir_fuse_load(ctx, def, insn->op2);
|
|
||||||
} else {
|
|
||||||
mem = ir_ref_spill_slot(ctx, insn->op2);
|
|
||||||
}
|
|
||||||
| ASM_TMEM_OP jmp, aword, mem
|
| ASM_TMEM_OP jmp, aword, mem
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue