From d2b54dc53e55e5175763c9d70d8ae2682427c007 Mon Sep 17 00:00:00 2001 From: Dmitry Stogov Date: Thu, 4 Apr 2024 01:11:28 +0300 Subject: [PATCH] Update IR IR commit: eee484977acfe97b81b338a16390e218a852ec1c --- ext/opcache/jit/ir/ir.c | 7 ++ ext/opcache/jit/ir/ir.h | 2 + ext/opcache/jit/ir/ir_aarch64.dasc | 34 +++++++ ext/opcache/jit/ir/ir_builder.h | 4 + ext/opcache/jit/ir/ir_cfg.c | 141 ++++++++++++++++++++++++++++- ext/opcache/jit/ir/ir_emit.c | 14 ++- ext/opcache/jit/ir/ir_ra.c | 10 +- ext/opcache/jit/ir/ir_sccp.c | 33 ++++--- ext/opcache/jit/ir/ir_x86.dasc | 34 +++++++ 9 files changed, 252 insertions(+), 27 deletions(-) diff --git a/ext/opcache/jit/ir/ir.c b/ext/opcache/jit/ir/ir.c index 454e0d44319..2c2f7c3d04d 100644 --- a/ext/opcache/jit/ir/ir.c +++ b/ext/opcache/jit/ir/ir.c @@ -267,6 +267,7 @@ void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted #define ir_op_flag_x2 (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_CALL | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) #define ir_op_flag_x3 (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_CALL | 3 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) #define ir_op_flag_xN (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_CALL | IR_OP_FLAG_VAR_INPUTS) +#define ir_op_flag_a1 (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_ALLOC | 1 | (1 << IR_OP_FLAG_OPERANDS_SHIFT)) #define ir_op_flag_a2 (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_ALLOC | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) #define ir_op_kind____ IR_OPND_UNUSED @@ -2771,3 +2772,9 @@ ir_ref _ir_VA_ARG(ir_ctx *ctx, ir_type type, ir_ref list) IR_ASSERT(ctx->control); return ctx->control = ir_emit2(ctx, IR_OPT(IR_VA_ARG, type), ctx->control, list); } + +ir_ref _ir_BLOCK_BEGIN(ir_ctx *ctx) +{ + IR_ASSERT(ctx->control); + return ctx->control = ir_emit1(ctx, IR_OPT(IR_BLOCK_BEGIN, IR_ADDR), ctx->control); +} diff --git a/ext/opcache/jit/ir/ir.h b/ext/opcache/jit/ir/ir.h index 3fbf7fcb7f8..728714d5452 100644 --- a/ext/opcache/jit/ir/ir.h +++ b/ext/opcache/jit/ir/ir.h @@ -316,6 +316,8 @@ typedef enum _ir_type { /* memory reference and load/store ops */ \ _(ALLOCA, a2, src, def, ___) /* alloca(def) */ \ _(AFREE, a2, src, def, ___) /* revert alloca(def) */ \ + _(BLOCK_BEGIN, a1, src, ___, ___) /* stacksave */ \ + _(BLOCK_END, a2, src, def, ___) /* stackrestore */ \ _(VADDR, d1, var, ___, ___) /* load address of local var */ \ _(VLOAD, l2, src, var, ___) /* load value of local var */ \ _(VSTORE, s3, src, var, def) /* store value to local var */ \ diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc index 78d193d4d70..663482022f7 100644 --- a/ext/opcache/jit/ir/ir_aarch64.dasc +++ b/ext/opcache/jit/ir/ir_aarch64.dasc @@ -4063,6 +4063,34 @@ static void ir_emit_afree(ir_ctx *ctx, ir_ref def, ir_insn *insn) } } +static void ir_emit_block_begin(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + | mov Rx(def_reg), sp + + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, IR_ADDR, def, def_reg); + } +} + +static void ir_emit_block_end(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg op2_reg = ctx->regs[def][2]; + + IR_ASSERT(op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + + | mov sp, Rx(op2_reg) +} + static void ir_emit_frame_addr(ir_ctx *ctx, ir_ref def) { ir_backend_data *data = ctx->data; @@ -5965,6 +5993,12 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) case IR_AFREE: ir_emit_afree(ctx, i, insn); break; + case IR_BLOCK_BEGIN: + ir_emit_block_begin(ctx, i, insn); + break; + case IR_BLOCK_END: + ir_emit_block_end(ctx, i, insn); + break; case IR_FRAME_ADDR: ir_emit_frame_addr(ctx, i); break; diff --git a/ext/opcache/jit/ir/ir_builder.h b/ext/opcache/jit/ir/ir_builder.h index 96bfda6dc76..097c80e048a 100644 --- a/ext/opcache/jit/ir/ir_builder.h +++ b/ext/opcache/jit/ir/ir_builder.h @@ -577,6 +577,9 @@ extern "C" { #define ir_FRAME_ADDR() ir_fold0(_ir_CTX, IR_OPT(IR_FRAME_ADDR, IR_ADDR)) +#define ir_BLOCK_BEGIN() _ir_BLOCK_BEGIN(_ir_CTX) +#define ir_BLOCK_END(_val) do {_ir_CTX->control = ir_emit2(_ir_CTX, IR_BLOCK_END, _ir_CTX->control, (_val));} while (0) + #define ir_VA_START(_list) _ir_VA_START(_ir_CTX, _list) #define ir_VA_END(_list) _ir_VA_END(_ir_CTX, _list) #define ir_VA_COPY(_dst, _src) _ir_VA_COPY(_ir_CTX, _dst, _src) @@ -680,6 +683,7 @@ void _ir_RETURN(ir_ctx *ctx, ir_ref val); void _ir_IJMP(ir_ctx *ctx, ir_ref addr); void _ir_GUARD(ir_ctx *ctx, ir_ref condition, ir_ref addr); void _ir_GUARD_NOT(ir_ctx *ctx, ir_ref condition, ir_ref addr); +ir_ref _ir_BLOCK_BEGIN(ir_ctx *ctx); ir_ref _ir_SNAPSHOT(ir_ctx *ctx, ir_ref n); void _ir_SNAPSHOT_SET_OP(ir_ctx *ctx, ir_ref snapshot, ir_ref pos, ir_ref val); ir_ref _ir_EXITCALL(ir_ctx *ctx, ir_ref func); diff --git a/ext/opcache/jit/ir/ir_cfg.c b/ext/opcache/jit/ir/ir_cfg.c index fa502484d3a..693434921fa 100644 --- a/ext/opcache/jit/ir/ir_cfg.c +++ b/ext/opcache/jit/ir/ir_cfg.c @@ -1511,12 +1511,17 @@ int ir_build_dominators_tree(ir_ctx *ctx) } #else /* A single pass modification of "A Simple, Fast Dominance Algorithm" by - * Cooper, Harvey and Kennedy, that relays on IR block ordering */ + * Cooper, Harvey and Kennedy, that relays on IR block ordering. + * It may fallback to the general slow fixed-point algorithm. */ +static int ir_build_dominators_tree_iterative(ir_ctx *ctx); int ir_build_dominators_tree(ir_ctx *ctx) { uint32_t blocks_count, b; ir_block *blocks, *bb; uint32_t *edges; + ir_list worklist; + + ir_list_init(&worklist, ctx->cfg_blocks_count / 2); ctx->flags2 |= IR_NO_LOOPS; @@ -1539,6 +1544,9 @@ int ir_build_dominators_tree(ir_ctx *ctx) if (UNEXPECTED(idom > b)) { /* In rare cases, LOOP_BEGIN.op1 may be a back-edge. Skip back-edges. */ ctx->flags2 &= ~IR_NO_LOOPS; + IR_ASSERT(k > 1); + IR_ASSERT(blocks[idom].successors_count == 1); + ir_list_push(&worklist, idom); while (1) { k--; p++; @@ -1547,9 +1555,12 @@ int ir_build_dominators_tree(ir_ctx *ctx) break; } IR_ASSERT(k > 0); + IR_ASSERT(blocks[idom].successors_count == 1); + ir_list_push(&worklist, idom); } } IR_ASSERT(blocks[idom].idom > 0); + IR_ASSERT(k != 0); while (--k > 0) { uint32_t pred_b = *(++p); @@ -1566,6 +1577,9 @@ int ir_build_dominators_tree(ir_ctx *ctx) } } else { ctx->flags2 &= ~IR_NO_LOOPS; + IR_ASSERT(bb->predecessors_count > 1); + IR_ASSERT(blocks[pred_b].successors_count == 1); + ir_list_push(&worklist, pred_b); } } bb->idom = idom; @@ -1593,6 +1607,131 @@ int ir_build_dominators_tree(ir_ctx *ctx) blocks[1].idom = 0; + if (ir_list_len(&worklist) != 0) { + uint32_t dom_depth; + uint32_t succ_b; + bool complete = 1; + + /* Check if all the back-edges lead to the loop headers */ + do { + b = ir_list_pop(&worklist); + bb = &blocks[b]; + IR_ASSERT(bb->successors_count == 1); + succ_b = ctx->cfg_edges[bb->successors]; + dom_depth = blocks[succ_b].dom_depth;; + while (bb->dom_depth > dom_depth) { + b = bb->dom_parent; + bb = &blocks[b]; + } + if (UNEXPECTED(b != succ_b)) { + complete = 0; + break; + } + } while (ir_list_len(&worklist) != 0); + + if (UNEXPECTED(!complete)) { + ir_list_free(&worklist); + return ir_build_dominators_tree_iterative(ctx); + } + } + + ir_list_free(&worklist); + + return 1; +} + +static int ir_build_dominators_tree_iterative(ir_ctx *ctx) +{ + bool changed; + uint32_t blocks_count, b; + ir_block *blocks, *bb; + uint32_t *edges; + + /* Find immediate dominators */ + blocks = ctx->cfg_blocks; + edges = ctx->cfg_edges; + blocks_count = ctx->cfg_blocks_count; + + /* Clear the dominators tree, but keep already found dominators */ + for (b = 0, bb = &blocks[0]; b <= blocks_count; b++, bb++) { + bb->dom_depth = 0; + bb->dom_child = 0; + bb->dom_next_child = 0; + } + + /* Find immediate dominators by iterative fixed-point algorithm */ + blocks[1].idom = 1; + do { + changed = 0; + + for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + IR_ASSERT(bb->predecessors_count > 0); + uint32_t k = bb->predecessors_count; + uint32_t *p = edges + bb->predecessors; + uint32_t idom = *p; + + if (blocks[idom].idom == 0) { + while (1) { + k--; + p++; + idom = *p; + if (blocks[idom].idom > 0) { + break; + } + IR_ASSERT(k > 0); + } + } + IR_ASSERT(k != 0); + while (--k > 0) { + uint32_t pred_b = *(++p); + + if (blocks[pred_b].idom > 0) { + IR_ASSERT(blocks[pred_b].idom > 0); + while (idom != pred_b) { + while (pred_b > idom) { + pred_b = blocks[pred_b].idom; + } + while (idom > pred_b) { + idom = blocks[idom].idom; + } + } + } + } + if (bb->idom != idom) { + bb->idom = idom; + changed = 1; + } + } + } while (changed); + + /* Build dominators tree */ + blocks[1].idom = 0; + blocks[1].dom_depth = 0; + for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) { + uint32_t idom = bb->idom; + ir_block *idom_bb = &blocks[idom]; + + bb->dom_depth = idom_bb->dom_depth + 1; + /* Sort by block number to traverse children in pre-order */ + if (idom_bb->dom_child == 0) { + idom_bb->dom_child = b; + } else if (b < idom_bb->dom_child) { + bb->dom_next_child = idom_bb->dom_child; + idom_bb->dom_child = b; + } else { + int child = idom_bb->dom_child; + ir_block *child_bb = &blocks[child]; + + while (child_bb->dom_next_child > 0 && b > child_bb->dom_next_child) { + child = child_bb->dom_next_child; + child_bb = &blocks[child]; + } + bb->dom_next_child = child_bb->dom_next_child; + child_bb->dom_next_child = b; + } + } + return 1; } #endif diff --git a/ext/opcache/jit/ir/ir_emit.c b/ext/opcache/jit/ir/ir_emit.c index 0b5f1539b78..a3630da62a4 100644 --- a/ext/opcache/jit/ir/ir_emit.c +++ b/ext/opcache/jit/ir/ir_emit.c @@ -662,14 +662,16 @@ static void ir_emit_dessa_move(ir_ctx *ctx, ir_type type, ir_ref to, ir_ref from } } -IR_ALWAYS_INLINE void ir_dessa_resolve_cycle(ir_ctx *ctx, int32_t *pred, int32_t *loc, ir_bitset todo, ir_type type, int32_t to, ir_reg tmp_reg, ir_reg tmp_fp_reg) +IR_ALWAYS_INLINE void ir_dessa_resolve_cycle(ir_ctx *ctx, int32_t *pred, int32_t *loc, int8_t *types, ir_bitset todo, int32_t to, ir_reg tmp_reg, ir_reg tmp_fp_reg) { ir_ref from; ir_mem tmp_spill_slot; + ir_type type; IR_MEM_VAL(tmp_spill_slot) = 0; IR_ASSERT(!IR_IS_CONST_REF(to)); from = pred[to]; + type = types[from]; IR_ASSERT(!IR_IS_CONST_REF(from)); IR_ASSERT(from != to); IR_ASSERT(loc[from] == from); @@ -721,6 +723,7 @@ IR_ALWAYS_INLINE void ir_dessa_resolve_cycle(ir_ctx *ctx, int32_t *pred, int32_t from = pred[to]; r = loc[from]; + type = types[to]; if (from == r && ir_bitset_in(todo, from)) { /* Memory to memory move inside an isolated or "blocked" cycle requres an additional temporary register */ @@ -743,6 +746,8 @@ IR_ALWAYS_INLINE void ir_dessa_resolve_cycle(ir_ctx *ctx, int32_t *pred, int32_t break; } } + + type = types[to]; if (IR_MEM_VAL(tmp_spill_slot)) { ir_emit_load_mem(ctx, type, IR_IS_TYPE_INT(type) ? tmp_reg : tmp_fp_reg, tmp_spill_slot); } @@ -830,11 +835,12 @@ static int ir_dessa_parallel_copy(ir_ctx *ctx, ir_dessa_copy *copies, int count, to = pred[to]; while (!IR_IS_CONST_REF(to) && ir_bitset_in(ready, to)) { to = pred[to]; - if (!IR_IS_CONST_REF(to) && ir_bitset_in(visited, to)) { + if (IR_IS_CONST_REF(to)) { + break; + } else if (ir_bitset_in(visited, to)) { /* We found a cycle. Resolve it. */ ir_bitset_incl(visited, to); - type = types[to]; - ir_dessa_resolve_cycle(ctx, pred, loc, todo, type, to, tmp_reg, tmp_fp_reg); + ir_dessa_resolve_cycle(ctx, pred, loc, types, todo, to, tmp_reg, tmp_fp_reg); break; } ir_bitset_incl(visited, to); diff --git a/ext/opcache/jit/ir/ir_ra.c b/ext/opcache/jit/ir/ir_ra.c index db2972a05ea..f265567d84b 100644 --- a/ext/opcache/jit/ir/ir_ra.c +++ b/ext/opcache/jit/ir/ir_ra.c @@ -3311,26 +3311,26 @@ static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to) if (IR_IS_TYPE_INT(type)) { tmp_reg.num = 0; tmp_reg.type = type; - tmp_reg.start = IR_DEF_SUB_REF; + tmp_reg.start = IR_USE_SUB_REF; tmp_reg.end = IR_SAVE_SUB_REF; } else { IR_ASSERT(IR_IS_TYPE_FP(type)); tmp_reg.num = 1; tmp_reg.type = type; - tmp_reg.start = IR_DEF_SUB_REF; + tmp_reg.start = IR_USE_SUB_REF; tmp_reg.end = IR_SAVE_SUB_REF; } } else if (from != 0) { if (IR_IS_TYPE_INT(type)) { tmp_reg.num = 0; tmp_reg.type = type; - tmp_reg.start = IR_DEF_SUB_REF; + tmp_reg.start = IR_USE_SUB_REF; tmp_reg.end = IR_SAVE_SUB_REF; } else { IR_ASSERT(IR_IS_TYPE_FP(type)); tmp_reg.num = 1; tmp_reg.type = type; - tmp_reg.start = IR_DEF_SUB_REF; + tmp_reg.start = IR_USE_SUB_REF; tmp_reg.end = IR_SAVE_SUB_REF; } } else { @@ -3916,7 +3916,7 @@ static void assign_regs(ir_ctx *ctx) } else { reg |= IR_REG_SPILL_LOAD; } - if (ctx->ir_base[ref].op != IR_SNAPSHOT) { + if (ctx->ir_base[ref].op != IR_SNAPSHOT && !(use_pos->flags & IR_PHI_USE)) { uint32_t use_b = ctx->cfg_map[ref]; if (ir_ival_covers(ival, IR_SAVE_LIVE_POS_FROM_REF(ctx->cfg_blocks[use_b].end))) { diff --git a/ext/opcache/jit/ir/ir_sccp.c b/ext/opcache/jit/ir/ir_sccp.c index 204f3741e37..fd7a8f55b3f 100644 --- a/ext/opcache/jit/ir/ir_sccp.c +++ b/ext/opcache/jit/ir/ir_sccp.c @@ -528,25 +528,24 @@ restart: static void ir_sccp_remove_if(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref dst) { - ir_ref j, n, *p, use, next; + ir_ref next; ir_insn *insn, *next_insn; - ir_use_list *use_list = &ctx->use_lists[ref]; insn = &ctx->ir_base[ref]; - n = use_list->count; - for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { - use = *p; - if (use == dst) { - next = ctx->use_edges[ctx->use_lists[use].refs]; - next_insn = &ctx->ir_base[next]; - /* remove IF and IF_TRUE/FALSE from double linked control list */ - next_insn->op1 = insn->op1; - ir_use_list_replace_one(ctx, insn->op1, ref, next); - /* remove IF and IF_TRUE/FALSE instructions */ - ir_sccp_make_nop(ctx, ref); - ir_sccp_make_nop(ctx, use); - break; - } + if (ctx->use_lists[dst].count == 1) { + next = ctx->use_edges[ctx->use_lists[dst].refs]; + next_insn = &ctx->ir_base[next]; + /* remove IF and IF_TRUE/FALSE from double linked control list */ + next_insn->op1 = insn->op1; + ir_use_list_replace_one(ctx, insn->op1, ref, next); + /* remove IF and IF_TRUE/FALSE instructions */ + ir_sccp_make_nop(ctx, ref); + ir_sccp_make_nop(ctx, dst); + } else { + insn->op2 = IR_UNUSED; + insn->optx = IR_OPTX(IR_END, IR_VOID, 1); + next_insn = &ctx->ir_base[dst]; + next_insn->op = IR_BEGIN; } } @@ -1247,7 +1246,7 @@ int ir_sccp(ir_ctx *ctx) /* remove unreachable instruction */ insn = &ctx->ir_base[i]; if (ir_op_flags[insn->op] & (IR_OP_FLAG_DATA|IR_OP_FLAG_MEM)) { - if (insn->op != IR_PARAM && insn->op != IR_VAR) { + if (insn->op != IR_PARAM && (insn->op != IR_VAR || _values[insn->op1].op == IR_TOP)) { ir_sccp_remove_insn(ctx, _values, i, &worklist2); } } else { diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc index 39fcb62895d..89bab163007 100644 --- a/ext/opcache/jit/ir/ir_x86.dasc +++ b/ext/opcache/jit/ir/ir_x86.dasc @@ -7733,6 +7733,34 @@ static void ir_emit_afree(ir_ctx *ctx, ir_ref def, ir_insn *insn) } } +static void ir_emit_block_begin(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + | mov Ra(def_reg), Ra(IR_REG_RSP) + + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, IR_ADDR, def, def_reg); + } +} + +static void ir_emit_block_end(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg op2_reg = ctx->regs[def][2]; + + IR_ASSERT(op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + + | mov Ra(IR_REG_RSP), Ra(op2_reg) +} + static void ir_emit_frame_addr(ir_ctx *ctx, ir_ref def) { ir_backend_data *data = ctx->data; @@ -10391,6 +10419,12 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) case IR_AFREE: ir_emit_afree(ctx, i, insn); break; + case IR_BLOCK_BEGIN: + ir_emit_block_begin(ctx, i, insn); + break; + case IR_BLOCK_END: + ir_emit_block_end(ctx, i, insn); + break; case IR_FRAME_ADDR: ir_emit_frame_addr(ctx, i); break;