diff --git a/ext/opcache/jit/ir/ir.c b/ext/opcache/jit/ir/ir.c index c45b1efc21e..17c39545916 100644 --- a/ext/opcache/jit/ir/ir.c +++ b/ext/opcache/jit/ir/ir.c @@ -330,6 +330,7 @@ static ir_ref ir_next_const(ir_ctx *ctx) static void ir_grow_top(ir_ctx *ctx) { + ir_ref old_insns_limit = ctx->insns_limit; ir_insn *buf = ctx->ir_base - ctx->consts_limit; if (ctx->insns_limit < 1024 * 4) { @@ -341,6 +342,12 @@ static void ir_grow_top(ir_ctx *ctx) } buf = ir_mem_realloc(buf, (ctx->consts_limit + ctx->insns_limit) * sizeof(ir_insn)); ctx->ir_base = buf + ctx->consts_limit; + + if (ctx->use_lists) { + ctx->use_lists = ir_mem_realloc(ctx->use_lists, ctx->insns_limit * sizeof(ir_use_list)); + memset(ctx->use_lists + old_insns_limit, 0, + (ctx->insns_limit - old_insns_limit) * sizeof(ir_use_list)); + } } static ir_ref ir_next_insn(ir_ctx *ctx) @@ -1152,7 +1159,7 @@ void ir_build_def_use_lists(ir_ctx *ctx) ir_ref n, i, j, *p, def; ir_insn *insn; uint32_t edges_count; - ir_use_list *lists = ir_mem_calloc(ctx->insns_count, sizeof(ir_use_list)); + ir_use_list *lists = ir_mem_calloc(ctx->insns_limit, sizeof(ir_use_list)); ir_ref *edges; ir_use_list *use_list; @@ -1207,7 +1214,7 @@ void ir_build_def_use_lists(ir_ctx *ctx) ir_ref n, i, j, *p, def; ir_insn *insn; size_t linked_lists_size, linked_lists_top = 0, edges_count = 0; - ir_use_list *lists = ir_mem_calloc(ctx->insns_count, sizeof(ir_use_list)); + ir_use_list *lists = ir_mem_calloc(ctx->insns_limit, sizeof(ir_use_list)); ir_ref *edges; ir_use_list *use_list; ir_ref *linked_lists; diff --git a/ext/opcache/jit/ir/ir.h b/ext/opcache/jit/ir/ir.h index c4f0926e085..433c59472f5 100644 --- a/ext/opcache/jit/ir/ir.h +++ b/ext/opcache/jit/ir/ir.h @@ -528,11 +528,12 @@ void ir_strtab_free(ir_strtab *strtab); #define IR_OPT_INLINE (1<<16) #define IR_OPT_FOLDING (1<<17) #define IR_OPT_CFG (1<<18) /* merge BBs, by remove END->BEGIN nodes during CFG construction */ -#define IR_OPT_CODEGEN (1<<19) -#define IR_GEN_NATIVE (1<<20) -#define IR_GEN_CODE (1<<21) /* C or LLVM */ +#define IR_OPT_MEM2SSA (1<<19) +#define IR_OPT_CODEGEN (1<<20) +#define IR_GEN_NATIVE (1<<21) +#define IR_GEN_CODE (1<<22) /* C or LLVM */ -#define IR_GEN_CACHE_DEMOTE (1<<22) /* Demote the generated code from closest CPU caches */ +#define IR_GEN_CACHE_DEMOTE (1<<23) /* Demote the generated code from closest CPU caches */ /* debug related */ #ifdef IR_DEBUG @@ -751,13 +752,15 @@ ir_ref ir_binding_find(const ir_ctx *ctx, ir_ref ref); /* Def -> Use lists */ void ir_build_def_use_lists(ir_ctx *ctx); +/* SSA Construction */ +int ir_mem2ssa(ir_ctx *ctx); + /* CFG - Control Flow Graph (implementation in ir_cfg.c) */ int ir_build_cfg(ir_ctx *ctx); -int ir_remove_unreachable_blocks(ir_ctx *ctx); int ir_build_dominators_tree(ir_ctx *ctx); int ir_find_loops(ir_ctx *ctx); int ir_schedule_blocks(ir_ctx *ctx); -void ir_build_prev_refs(ir_ctx *ctx); +void ir_reset_cfg(ir_ctx *ctx); /* SCCP - Sparse Conditional Constant Propagation (implementation in ir_sccp.c) */ int ir_sccp(ir_ctx *ctx); @@ -929,7 +932,7 @@ IR_ALWAYS_INLINE void *ir_jit_compile(ir_ctx *ctx, int opt_level, size_t *size) } return ir_emit_code(ctx, size); - } else if (opt_level == 1 || opt_level == 2) { + } else if (opt_level > 0) { if (!(ctx->flags & IR_OPT_FOLDING)) { // IR_ASSERT(0 && "IR_OPT_FOLDING must be set in ir_init() for -O1 and -O2"); return NULL; @@ -938,14 +941,29 @@ IR_ALWAYS_INLINE void *ir_jit_compile(ir_ctx *ctx, int opt_level, size_t *size) ir_build_def_use_lists(ctx); - if (opt_level == 2 - && !ir_sccp(ctx)) { - return NULL; + if (ctx->flags & IR_OPT_MEM2SSA) { + if (!ir_build_cfg(ctx) + || !ir_build_dominators_tree(ctx) + || !ir_mem2ssa(ctx)) { + return NULL; + } } - if (!ir_build_cfg(ctx) - || !ir_build_dominators_tree(ctx) - || !ir_find_loops(ctx) + if (opt_level > 1) { + ir_reset_cfg(ctx); + if (!ir_sccp(ctx)) { + return NULL; + } + } + + if (!ctx->cfg_blocks) { + if (!ir_build_cfg(ctx) + || !ir_build_dominators_tree(ctx)) { + return NULL; + } + } + + if (!ir_find_loops(ctx) || !ir_gcm(ctx) || !ir_schedule(ctx) || !ir_match(ctx) diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc index 27595ad3124..6b397f27b31 100644 --- a/ext/opcache/jit/ir/ir_aarch64.dasc +++ b/ext/opcache/jit/ir/ir_aarch64.dasc @@ -1095,6 +1095,8 @@ binop_fp: } } return IR_SKIPPED | IR_NOP; + case IR_NOP: + return IR_SKIPPED | IR_NOP; default: break; } @@ -5603,7 +5605,8 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) } else if (def_flags & IR_USE_MUST_BE_IN_REG) { if (insn->op == IR_VLOAD && ctx->live_intervals[ctx->vregs[i]] - && ctx->live_intervals[ctx->vregs[i]]->stack_spill_pos != -1) { + && ctx->live_intervals[ctx->vregs[i]]->stack_spill_pos != -1 + && ir_is_same_mem_var(ctx, i, ctx->ir_base[insn->op2].op3)) { /* pass */ } else if (insn->op != IR_PARAM) { reg = ir_get_free_reg(insn->type, available); @@ -5704,7 +5707,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) { IR_REGSET_EXCL(available, reg); ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD; - } else if (j > 1 && input == insn->op1 && ctx->regs[i][1] != IR_REG_NONE) { + } else if (IR_IS_FOLDABLE_OP(insn->op) && j > 1 && input == insn->op1 && ctx->regs[i][1] != IR_REG_NONE) { ctx->regs[i][j] = ctx->regs[i][1]; } else if (use_flags & IR_USE_MUST_BE_IN_REG) { reg = ir_get_free_reg(ctx->ir_base[input].type, available); diff --git a/ext/opcache/jit/ir/ir_cfg.c b/ext/opcache/jit/ir/ir_cfg.c index 0a36d5d9880..7a71208d782 100644 --- a/ext/opcache/jit/ir/ir_cfg.c +++ b/ext/opcache/jit/ir/ir_cfg.c @@ -8,6 +8,8 @@ #include "ir.h" #include "ir_private.h" +static int ir_remove_unreachable_blocks(ir_ctx *ctx); + IR_ALWAYS_INLINE void _ir_add_successors(const ir_ctx *ctx, ir_ref ref, ir_worklist *worklist) { ir_use_list *use_list = &ctx->use_lists[ref]; @@ -57,6 +59,24 @@ IR_ALWAYS_INLINE void _ir_add_predecessors(const ir_insn *insn, ir_worklist *wor } } +void ir_reset_cfg(ir_ctx *ctx) +{ + ctx->cfg_blocks_count = 0; + ctx->cfg_edges_count = 0; + if (ctx->cfg_blocks) { + ir_mem_free(ctx->cfg_blocks); + ctx->cfg_blocks = NULL; + if (ctx->cfg_edges) { + ir_mem_free(ctx->cfg_edges); + ctx->cfg_edges = NULL; + } + if (ctx->cfg_map) { + ir_mem_free(ctx->cfg_map); + ctx->cfg_map = NULL; + } + } +} + int ir_build_cfg(ir_ctx *ctx) { ir_ref n, *p, ref, start, end; @@ -330,11 +350,15 @@ static void ir_remove_merge_input(ir_ctx *ctx, ir_ref merge, ir_ref from) } } i--; + for (j = i + 1; j <= n; j++) { + ir_insn_set_op(insn, j, IR_UNUSED); + } if (i == 1) { insn->op = IR_BEGIN; insn->inputs_count = 1; use_list = &ctx->use_lists[merge]; if (use_list->count > 1) { + n++; for (k = 0, p = &ctx->use_edges[use_list->refs]; k < use_list->count; k++, p++) { use = *p; use_insn = &ctx->ir_base[use]; @@ -347,12 +371,14 @@ static void ir_remove_merge_input(ir_ctx *ctx, ir_ref merge, ir_ref from) if (ir_bitset_in(life_inputs, j - 1)) { use_insn->op1 = ir_insn_op(use_insn, j); } else if (input > 0) { - ir_use_list_remove_all(ctx, input, use); + ir_use_list_remove_one(ctx, input, use); } } use_insn->op = IR_COPY; - use_insn->op2 = IR_UNUSED; - use_insn->op3 = IR_UNUSED; + use_insn->inputs_count = 1; + for (j = 2; j <= n; j++) { + ir_insn_set_op(use_insn, j, IR_UNUSED); + } ir_use_list_remove_all(ctx, merge, use); } } @@ -360,9 +386,9 @@ static void ir_remove_merge_input(ir_ctx *ctx, ir_ref merge, ir_ref from) } else { insn->inputs_count = i; - n++; use_list = &ctx->use_lists[merge]; if (use_list->count > 1) { + n++; for (k = 0, p = &ctx->use_edges[use_list->refs]; k < use_list->count; k++, p++) { use = *p; use_insn = &ctx->ir_base[use]; @@ -378,9 +404,13 @@ static void ir_remove_merge_input(ir_ctx *ctx, ir_ref merge, ir_ref from) } i++; } else if (input > 0) { - ir_use_list_remove_all(ctx, input, use); + ir_use_list_remove_one(ctx, input, use); } } + use_insn->inputs_count = i - 1; + for (j = i; j <= n; j++) { + ir_insn_set_op(use_insn, j, IR_UNUSED); + } } } } @@ -390,7 +420,7 @@ static void ir_remove_merge_input(ir_ctx *ctx, ir_ref merge, ir_ref from) } /* CFG constructed after SCCP pass doesn't have unreachable BBs, otherwise they should be removed */ -int ir_remove_unreachable_blocks(ir_ctx *ctx) +static int ir_remove_unreachable_blocks(ir_ctx *ctx) { uint32_t b, *p, i; uint32_t unreachable_count = 0; diff --git a/ext/opcache/jit/ir/ir_check.c b/ext/opcache/jit/ir/ir_check.c index f41957a2647..656f8dbe7c1 100644 --- a/ext/opcache/jit/ir/ir_check.c +++ b/ext/opcache/jit/ir/ir_check.c @@ -362,6 +362,10 @@ bool ir_check(const ir_ctx *ctx) break; } } + if (count == 0 && (insn->op == IR_END || insn->op == IR_LOOP_END)) { + /* Dead block */ + break; + } fprintf(stderr, "ir_base[%d].op (%s) must have 1 successor (%d)\n", i, ir_op_name[insn->op], count); ok = 0; diff --git a/ext/opcache/jit/ir/ir_gcm.c b/ext/opcache/jit/ir/ir_gcm.c index ed1cd7e39be..12103a174d0 100644 --- a/ext/opcache/jit/ir/ir_gcm.c +++ b/ext/opcache/jit/ir/ir_gcm.c @@ -356,9 +356,11 @@ static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b) } } else { j = i = ctx->cfg_map[use]; - IR_ASSERT(i > 0); - while (ir_sparse_set_in(&data->totally_useful, ctx->cfg_blocks[j].idom)) { - j = ctx->cfg_blocks[j].idom; + if (i) { + IR_ASSERT(i > 0); + while (ir_sparse_set_in(&data->totally_useful, ctx->cfg_blocks[j].idom)) { + j = ctx->cfg_blocks[j].idom; + } } clone = ir_hashtab_find(&hash, j); if (clone == IR_INVALID_VAL) { @@ -941,8 +943,9 @@ int ir_schedule(ir_ctx *ctx) for (p = &ctx->use_edges[use_list->refs]; count > 0; p++, count--) { ir_ref use = *p; - if (!_xlat[use]) { - ir_insn *use_insn = &ctx->ir_base[use]; + ir_insn *use_insn = &ctx->ir_base[use]; + if (!_xlat[use] && (_blocks[use] || use_insn->op == IR_PARAM)) { + IR_ASSERT(_blocks[use] == b || use_insn->op == IR_PARAM); if (use_insn->op == IR_PARAM || use_insn->op == IR_VAR || use_insn->op == IR_PI diff --git a/ext/opcache/jit/ir/ir_private.h b/ext/opcache/jit/ir/ir_private.h index fe4a7942686..f88ba754969 100644 --- a/ext/opcache/jit/ir/ir_private.h +++ b/ext/opcache/jit/ir/ir_private.h @@ -1120,6 +1120,7 @@ struct _ir_block { uint32_t loop_depth; }; +void ir_build_prev_refs(ir_ctx *ctx); uint32_t ir_skip_empty_target_blocks(const ir_ctx *ctx, uint32_t b); uint32_t ir_next_block(const ir_ctx *ctx, uint32_t b); void ir_get_true_false_blocks(const ir_ctx *ctx, uint32_t b, uint32_t *true_block, uint32_t *false_block); diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc index 284e1480d38..bc90dc5966a 100644 --- a/ext/opcache/jit/ir/ir_x86.dasc +++ b/ext/opcache/jit/ir/ir_x86.dasc @@ -1456,11 +1456,29 @@ op2_const: case IR_SEXT: case IR_ZEXT: case IR_TRUNC: - case IR_BITCAST: case IR_PROTO: case IR_FP2FP: flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; break; + case IR_BITCAST: + insn = &ctx->ir_base[ref]; + if (IR_IS_TYPE_INT(insn->type) && IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) { + flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; + } else { + flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; + } + break; + case IR_FP2INT: + flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; + break; + case IR_INT2FP: + flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; + insn = &ctx->ir_base[ref]; + if (IR_IS_CONST_REF(insn->op1)) { + constraints->tmp_regs[0] = IR_TMP_REG(1, ctx->ir_base[insn->op1].type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; + } + break; case IR_ABS_INT: flags = IR_DEF_CONFLICTS_WITH_INPUT_REGS | IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG; break; @@ -2878,6 +2896,8 @@ store_int: } } return IR_SKIPPED | IR_NOP; + case IR_NOP: + return IR_SKIPPED | IR_NOP; default: break; } @@ -6202,20 +6222,33 @@ static void ir_emit_cond(ir_ctx *ctx, ir_ref def, ir_insn *insn) IR_ASSERT(def_reg != IR_REG_NONE); - if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { + if (op2 != op3) { + if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, op2); + if (op1 == op2) { + op1_reg = op2_reg; + } + } + if (op3_reg != IR_REG_NONE && IR_REG_SPILLED(op3_reg)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, op3); + if (op1 == op2) { + op1_reg = op3_reg; + } + } + } else if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); ir_emit_load(ctx, type, op2_reg, op2); + op3_reg = op2_reg; if (op1 == op2) { op1_reg = op2_reg; } - if (op3 == op2) { - op3_reg = op2_reg; - } - } - if (op3_reg != IR_REG_NONE && op3 != op2 && IR_REG_SPILLED(op3_reg)) { + } else if (op3_reg != IR_REG_NONE && IR_REG_SPILLED(op3_reg)) { op3_reg = IR_REG_NUM(op3_reg); ir_emit_load(ctx, type, op3_reg, op3); - if (op1 == op2) { + op2_reg = op3_reg; + if (op1 == op3) { op1_reg = op3_reg; } } @@ -6710,7 +6743,19 @@ static void ir_emit_sext(ir_ctx *ctx, ir_ref def, ir_insn *insn) |.endif } } else if (IR_IS_CONST_REF(insn->op1)) { - IR_ASSERT(0); + int64_t val; + + if (ir_type_size[src_type] == 1) { + val = ctx->ir_base[insn->op1].val.i8; + } else if (ir_type_size[src_type] == 2) { + val = ctx->ir_base[insn->op1].val.i16; + } else if (ir_type_size[src_type] == 4) { + val = ctx->ir_base[insn->op1].val.i32; + } else { + IR_ASSERT(ir_type_size[src_type] == 8); + val = ctx->ir_base[insn->op1].val.i64; + } + ir_emit_mov_imm_int(ctx, dst_type, def_reg, val); } else { ir_mem mem; @@ -6809,7 +6854,19 @@ static void ir_emit_zext(ir_ctx *ctx, ir_ref def, ir_insn *insn) |.endif } } else if (IR_IS_CONST_REF(insn->op1)) { - IR_ASSERT(0); + uint64_t val; + + if (ir_type_size[src_type] == 1) { + val = ctx->ir_base[insn->op1].val.u8; + } else if (ir_type_size[src_type] == 2) { + val = ctx->ir_base[insn->op1].val.u16; + } else if (ir_type_size[src_type] == 4) { + val = ctx->ir_base[insn->op1].val.u32; + } else { + IR_ASSERT(ir_type_size[src_type] == 8); + val = ctx->ir_base[insn->op1].val.u64; + } + ir_emit_mov_imm_int(ctx, dst_type, def_reg, val); } else { ir_mem mem; @@ -7117,6 +7174,8 @@ static void ir_emit_int2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) } |.endif } + } else if (IR_IS_CONST_REF(insn->op1)) { + IR_ASSERT(0); } else { ir_mem mem; bool src64 = ir_type_size[src_type] == 8; @@ -9559,7 +9618,7 @@ static void ir_emit_sse_sqrt(ir_ctx *ctx, ir_ref def, ir_insn *insn) if (IR_REG_SPILLED(op3_reg)) { op3_reg = IR_REG_NUM(op3_reg); - ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3); + ir_emit_load(ctx, insn->type, op3_reg, insn->op3); } | ASM_FP_REG_REG_OP sqrts, insn->type, def_reg, op3_reg @@ -9581,7 +9640,7 @@ static void ir_emit_sse_round(ir_ctx *ctx, ir_ref def, ir_insn *insn, int round_ if (IR_REG_SPILLED(op3_reg)) { op3_reg = IR_REG_NUM(op3_reg); - ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3); + ir_emit_load(ctx, insn->type, op3_reg, insn->op3); } if (ctx->mflags & IR_X86_AVX) { @@ -10019,7 +10078,8 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) } else if (def_flags & IR_USE_MUST_BE_IN_REG) { if (insn->op == IR_VLOAD && ctx->live_intervals[ctx->vregs[i]] - && ctx->live_intervals[ctx->vregs[i]]->stack_spill_pos != -1) { + && ctx->live_intervals[ctx->vregs[i]]->stack_spill_pos != -1 + && ir_is_same_mem_var(ctx, i, ctx->ir_base[insn->op2].op3)) { /* pass */ } else if (insn->op != IR_PARAM) { reg = ir_get_free_reg(insn->type, available); @@ -10120,7 +10180,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) { IR_REGSET_EXCL(available, reg); ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD; - } else if (j > 1 && input == insn->op1 && ctx->regs[i][1] != IR_REG_NONE) { + } else if (IR_IS_FOLDABLE_OP(insn->op) && j > 1 && input == insn->op1 && ctx->regs[i][1] != IR_REG_NONE) { ctx->regs[i][j] = ctx->regs[i][1]; } else if (use_flags & IR_USE_MUST_BE_IN_REG) { reg = ir_get_free_reg(ctx->ir_base[input].type, available); @@ -11018,7 +11078,8 @@ void ir_fix_thunk(void *thunk_entry, void *addr) addr_ptr = (void**)(code + 6 + *offset_ptr); *addr_ptr = addr; } else { - int32_t *addr_ptr; + typedef IR_SET_ALIGNED(1, int32_t unaligned_int32_t); + unaligned_int32_t *addr_ptr; code[0] = 0xe9; addr_ptr = (int32_t*)(code + 1);