From ac1cd9c26ef90fd2512587c185db2964bc060f77 Mon Sep 17 00:00:00 2001 From: Dmitry Stogov Date: Mon, 4 Aug 2025 17:26:24 +0300 Subject: [PATCH] Update IR IR commit: 6e2aea0ebfef2c741ebec30c57aa492df0d4e319 --- ext/opcache/jit/ir/ir.c | 15 +- ext/opcache/jit/ir/ir.h | 17 +- ext/opcache/jit/ir/ir_aarch64.dasc | 94 +++++++++-- ext/opcache/jit/ir/ir_builder.h | 2 + ext/opcache/jit/ir/ir_cfg.c | 247 +++++++++++++++++++++-------- ext/opcache/jit/ir/ir_check.c | 9 +- ext/opcache/jit/ir/ir_emit.c | 4 +- ext/opcache/jit/ir/ir_fold.h | 4 +- ext/opcache/jit/ir/ir_gcm.c | 199 +++++++++++++++++++++-- ext/opcache/jit/ir/ir_private.h | 5 +- ext/opcache/jit/ir/ir_save.c | 11 +- ext/opcache/jit/ir/ir_sccp.c | 38 ++++- ext/opcache/jit/ir/ir_x86.dasc | 167 +++++++++++++++++-- 13 files changed, 685 insertions(+), 127 deletions(-) diff --git a/ext/opcache/jit/ir/ir.c b/ext/opcache/jit/ir/ir.c index a9f55cc0e46..97d32680e4f 100644 --- a/ext/opcache/jit/ir/ir.c +++ b/ext/opcache/jit/ir/ir.c @@ -172,7 +172,7 @@ void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted } else if (insn->val.c == '\0') { fprintf(f, "'\\0'"); } else { - fprintf(f, "%u", insn->val.c); + fprintf(f, "%u", (unsigned char)insn->val.c); } break; case IR_I8: @@ -247,6 +247,7 @@ void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted #define ir_op_flag_S1X1 (ir_op_flag_S | 1 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) #define ir_op_flag_S2 (ir_op_flag_S | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) #define ir_op_flag_S2X1 (ir_op_flag_S | 2 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_S3 (ir_op_flag_S | 3 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) #define ir_op_flag_SN (ir_op_flag_S | IR_OP_FLAG_VAR_INPUTS) #define ir_op_flag_E (IR_OP_FLAG_CONTROL|IR_OP_FLAG_BB_END) #define ir_op_flag_E1 (ir_op_flag_E | 1 | (1 << IR_OP_FLAG_OPERANDS_SHIFT)) @@ -803,7 +804,9 @@ ir_ref ir_proto(ir_ctx *ctx, uint8_t flags, ir_type ret_type, uint32_t params_co proto->flags = flags; proto->ret_type = ret_type; proto->params_count = params_count; - memcpy(proto->param_types, param_types, params_count); + if (params_count) { + memcpy(proto->param_types, param_types, params_count); + } return ir_strl(ctx, (const char *)proto, offsetof(ir_proto_t, param_types) + params_count); } @@ -1080,7 +1083,7 @@ ir_ref ir_emit_N(ir_ctx *ctx, uint32_t opt, int32_t count) ir_ref *p, ref = ctx->insns_count; ir_insn *insn; - IR_ASSERT(count >= 0); + IR_ASSERT(count >= 0 && count < 65536); while (UNEXPECTED(ref + count/4 >= ctx->insns_limit)) { ir_grow_top(ctx); } @@ -2973,6 +2976,12 @@ void _ir_CASE_VAL(ir_ctx *ctx, ir_ref switch_ref, ir_ref val) ctx->control = ir_emit2(ctx, IR_CASE_VAL, switch_ref, val); } +void _ir_CASE_RANGE(ir_ctx *ctx, ir_ref switch_ref, ir_ref v1, ir_ref v2) +{ + IR_ASSERT(!ctx->control); + ctx->control = ir_emit3(ctx, IR_CASE_RANGE, switch_ref, v1, v2); +} + void _ir_CASE_DEFAULT(ir_ctx *ctx, ir_ref switch_ref) { IR_ASSERT(!ctx->control); diff --git a/ext/opcache/jit/ir/ir.h b/ext/opcache/jit/ir/ir.h index ec5e57129c9..9575348ff54 100644 --- a/ext/opcache/jit/ir/ir.h +++ b/ext/opcache/jit/ir/ir.h @@ -359,6 +359,7 @@ typedef enum _ir_type { _(IF_TRUE, S1X1, src, prb, ___) /* IF TRUE proj. */ \ _(IF_FALSE, S1X1, src, prb, ___) /* IF FALSE proj. */ \ _(CASE_VAL, S2X1, src, def, prb) /* switch proj. */ \ + _(CASE_RANGE, S3, src, def, def) /* switch proj. */ \ _(CASE_DEFAULT, S1X1, src, prb, ___) /* switch proj. */ \ _(MERGE, SN, src, src, src) /* control merge */ \ _(LOOP_BEGIN, SN, src, src, src) /* loop start */ \ @@ -854,6 +855,9 @@ void ir_gdb_unregister_all(void); bool ir_gdb_present(void); /* IR load API (implementation in ir_load.c) */ +#define IR_RESOLVE_SYM_ADD_THUNK (1<<0) +#define IR_RESOLVE_SYM_SILENT (1<<1) + struct _ir_loader { uint32_t default_func_flags; bool (*init_module) (ir_loader *loader, const char *name, const char *filename, const char *target); @@ -870,7 +874,7 @@ struct _ir_loader { bool (*sym_data_end) (ir_loader *loader, uint32_t flags); bool (*func_init) (ir_loader *loader, ir_ctx *ctx, const char *name); bool (*func_process) (ir_loader *loader, ir_ctx *ctx, const char *name); - void*(*resolve_sym_name) (ir_loader *loader, const char *name, bool add_thunk); + void*(*resolve_sym_name) (ir_loader *loader, const char *name, uint32_t flags); bool (*has_sym) (ir_loader *loader, const char *name); bool (*add_sym) (ir_loader *loader, const char *name, void *addr); }; @@ -884,11 +888,12 @@ int ir_load_llvm_bitcode(ir_loader *loader, const char *filename); int ir_load_llvm_asm(ir_loader *loader, const char *filename); /* IR save API (implementation in ir_save.c) */ -#define IR_SAVE_CFG (1<<0) /* add info about CFG */ -#define IR_SAVE_CFG_MAP (1<<1) /* add info about CFG block assignment */ -#define IR_SAVE_USE_LISTS (1<<2) /* add info about def->use lists */ -#define IR_SAVE_RULES (1<<3) /* add info about selected code-generation rules */ -#define IR_SAVE_REGS (1<<4) /* add info about selected registers */ +#define IR_SAVE_CFG (1<<0) /* add info about CFG */ +#define IR_SAVE_CFG_MAP (1<<1) /* add info about CFG block assignment */ +#define IR_SAVE_USE_LISTS (1<<2) /* add info about def->use lists */ +#define IR_SAVE_RULES (1<<3) /* add info about selected code-generation rules */ +#define IR_SAVE_REGS (1<<4) /* add info about selected registers */ +#define IR_SAVE_SAFE_NAMES (1<<5) /* add '@' prefix to symbol names */ void ir_print_proto(const ir_ctx *ctx, ir_ref proto, FILE *f); void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f); diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc index 772eea7a5d7..476529555e9 100644 --- a/ext/opcache/jit/ir/ir_aarch64.dasc +++ b/ext/opcache/jit/ir/ir_aarch64.dasc @@ -996,6 +996,7 @@ binop_fp: case IR_IF_TRUE: case IR_IF_FALSE: case IR_CASE_VAL: + case IR_CASE_RANGE: case IR_CASE_DEFAULT: case IR_MERGE: case IR_LOOP_BEGIN: @@ -4366,11 +4367,15 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; - ir_reg def_reg = ctx->regs[def][0]; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op2_reg = ctx->regs[def][2]; ir_reg tmp_reg = ctx->regs[def][3]; int32_t offset; + if (ctx->use_lists[def].count == 1) { + /* dead load */ + return; + } IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { @@ -4394,11 +4399,15 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; - ir_reg def_reg = ctx->regs[def][0]; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op2_reg = ctx->regs[def][2]; ir_reg tmp_reg = ctx->regs[def][3]; int32_t offset; + if (ctx->use_lists[def].count == 1) { + /* dead load */ + return; + } IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { @@ -4465,6 +4474,7 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) int count = 0; ir_val min, max; ir_reg op1_reg, op2_reg, tmp_reg; + bool has_case_range = 0; type = ctx->ir_base[insn->op2].type; if (IR_IS_TYPE_SIGNED(type)) { @@ -4493,6 +4503,22 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) max.u64 = (int64_t)IR_MAX(max.u64, val->val.u64); } count++; + } else if (use_insn->op == IR_CASE_RANGE) { + has_case_range = 1; + val = &ctx->ir_base[use_insn->op2]; + IR_ASSERT(!IR_IS_SYM_CONST(val->op)); + ir_insn *val2 = &ctx->ir_base[use_insn->op3]; + IR_ASSERT(!IR_IS_SYM_CONST(val2->op)); + if (IR_IS_TYPE_SIGNED(type)) { + IR_ASSERT(IR_IS_TYPE_SIGNED(val->type)); + min.i64 = IR_MIN(min.i64, val->val.i64); + max.i64 = IR_MAX(max.i64, val2->val.i64); + } else { + IR_ASSERT(!IR_IS_TYPE_SIGNED(val->type)); + min.u64 = (int64_t)IR_MIN(min.u64, val->val.u64); + max.u64 = (int64_t)IR_MAX(max.u64, val2->val.u64); + } + count++; } else { IR_ASSERT(use_insn->op == IR_CASE_DEFAULT); default_label = ir_skip_empty_target_blocks(ctx, use_block); @@ -4510,7 +4536,7 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) } /* Generate a table jmp or a sequence of calls */ - if (count > 2 && (max.i64-min.i64) < count * 8) { + if (!has_case_range && count > 2 && (max.i64-min.i64) < count * 8) { int *labels = ir_mem_malloc(sizeof(int) * (max.i64 - min.i64 + 1)); for (i = 0; i <= (max.i64 - min.i64); i++) { @@ -4615,6 +4641,38 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) } | beq =>label + } else if (use_insn->op == IR_CASE_RANGE) { + val = &ctx->ir_base[use_insn->op2]; + IR_ASSERT(!IR_IS_SYM_CONST(val->op)); + label = ir_skip_empty_target_blocks(ctx, use_block); + if (aarch64_may_encode_imm12(val->val.i64)) { + | ASM_REG_IMM_OP cmp, type, op2_reg, val->val.i64 + } else { + ir_emit_load_imm_int(ctx, type, tmp_reg, val->val.i64); + | ASM_REG_REG_OP cmp, type, op2_reg, tmp_reg + + } + if (IR_IS_TYPE_SIGNED(type)) { + | blt >1 + } else { + | blo >1 + } + val = &ctx->ir_base[use_insn->op3]; + IR_ASSERT(!IR_IS_SYM_CONST(val->op)); + label = ir_skip_empty_target_blocks(ctx, use_block); + if (aarch64_may_encode_imm12(val->val.i64)) { + | ASM_REG_IMM_OP cmp, type, op2_reg, val->val.i64 + } else { + ir_emit_load_imm_int(ctx, type, tmp_reg, val->val.i64); + | ASM_REG_REG_OP cmp, type, op2_reg, tmp_reg + + } + if (IR_IS_TYPE_SIGNED(type)) { + | ble =>label + } else { + | bls =>label + } + |1: } } if (default_label) { @@ -4935,6 +4993,28 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) return; } + /* Move op2 to a tmp register before epilogue if it's in + * used_preserved_regs, because it will be overridden. */ + + ir_reg op2_reg = IR_REG_NONE; + if (!IR_IS_CONST_REF(insn->op2)) { + op2_reg = ctx->regs[def][2]; + IR_ASSERT(op2_reg != IR_REG_NONE); + + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_INT_TMP; + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } else if (IR_REGSET_IN((ir_regset)ctx->used_preserved_regs, IR_REG_NUM(op2_reg))) { + ir_reg orig_op2_reg = op2_reg; + op2_reg = IR_REG_INT_TMP; + + ir_type type = ctx->ir_base[insn->op2].type; + | ASM_REG_REG_OP mov, type, op2_reg, IR_REG_NUM(orig_op2_reg) + } else { + op2_reg = IR_REG_NUM(op2_reg); + } + } + ir_emit_epilogue(ctx); if (IR_IS_CONST_REF(insn->op2)) { @@ -4947,13 +5027,8 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) | br Rx(IR_REG_INT_TMP) } } else { - ir_reg op2_reg = ctx->regs[def][2]; - IR_ASSERT(op2_reg != IR_REG_NONE); - if (IR_REG_SPILLED(op2_reg)) { - op2_reg = IR_REG_NUM(op2_reg); - ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); - } + IR_ASSERT(!IR_REGSET_IN((ir_regset)ctx->used_preserved_regs, op2_reg)); | br Rx(op2_reg) } } @@ -5590,6 +5665,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) case IR_IF_TRUE: case IR_IF_FALSE: case IR_CASE_VAL: + case IR_CASE_RANGE: case IR_CASE_DEFAULT: case IR_MERGE: case IR_LOOP_BEGIN: diff --git a/ext/opcache/jit/ir/ir_builder.h b/ext/opcache/jit/ir/ir_builder.h index 4e4ea53683a..ba1924f8d65 100644 --- a/ext/opcache/jit/ir/ir_builder.h +++ b/ext/opcache/jit/ir/ir_builder.h @@ -603,6 +603,7 @@ extern "C" { #define ir_LOOP_END() _ir_LOOP_END(_ir_CTX) #define ir_SWITCH(_val) _ir_SWITCH(_ir_CTX, (_val)) #define ir_CASE_VAL(_switch, _val) _ir_CASE_VAL(_ir_CTX, (_switch), (_val)) +#define ir_CASE_RANGE(_switch, _v1, _v2) _ir_CASE_RANGE(_ir_CTX, (_switch), (_v1), (_v2)) #define ir_CASE_DEFAULT(_switch) _ir_CASE_DEFAULT(_ir_CTX, (_switch)) #define ir_RETURN(_val) _ir_RETURN(_ir_CTX, (_val)) #define ir_IJMP(_addr) _ir_IJMP(_ir_CTX, (_addr)) @@ -682,6 +683,7 @@ ir_ref _ir_TLS(ir_ctx *ctx, ir_ref index, ir_ref offset); void _ir_UNREACHABLE(ir_ctx *ctx); ir_ref _ir_SWITCH(ir_ctx *ctx, ir_ref val); void _ir_CASE_VAL(ir_ctx *ctx, ir_ref switch_ref, ir_ref val); +void _ir_CASE_RANGE(ir_ctx *ctx, ir_ref switch_ref, ir_ref v1, ir_ref v2); void _ir_CASE_DEFAULT(ir_ctx *ctx, ir_ref switch_ref); void _ir_RETURN(ir_ctx *ctx, ir_ref val); void _ir_IJMP(ir_ctx *ctx, ir_ref addr); diff --git a/ext/opcache/jit/ir/ir_cfg.c b/ext/opcache/jit/ir/ir_cfg.c index 01532c8ea3e..13d66a71302 100644 --- a/ext/opcache/jit/ir/ir_cfg.c +++ b/ext/opcache/jit/ir/ir_cfg.c @@ -244,7 +244,6 @@ int ir_build_cfg(ir_ctx *ctx) _blocks[start] = b; _blocks[end] = b; IR_ASSERT(IR_IS_BB_START(insn->op)); - IR_ASSERT(end > start); bb->start = start; bb->end = end; bb->successors = count; @@ -583,7 +582,6 @@ static int ir_remove_unreachable_blocks(ir_ctx *ctx) return 1; } -#if 0 static void compute_postnum(const ir_ctx *ctx, uint32_t *cur, uint32_t b) { uint32_t i, *p; @@ -607,34 +605,42 @@ static void compute_postnum(const ir_ctx *ctx, uint32_t *cur, uint32_t b) /* Computes dominator tree using algorithm from "A Simple, Fast Dominance Algorithm" by * Cooper, Harvey and Kennedy. */ -int ir_build_dominators_tree(ir_ctx *ctx) +static int ir_build_dominators_tree_slow(ir_ctx *ctx) { uint32_t blocks_count, b, postnum; ir_block *blocks, *bb; uint32_t *edges; bool changed; + blocks = ctx->cfg_blocks; + edges = ctx->cfg_edges; + blocks_count = ctx->cfg_blocks_count; + + /* Clear the dominators tree */ + for (b = 0, bb = &blocks[0]; b <= blocks_count; b++, bb++) { + bb->idom = 0; + bb->dom_depth = 0; + bb->dom_child = 0; + bb->dom_next_child = 0; + } + ctx->flags2 &= ~IR_NO_LOOPS; postnum = 1; compute_postnum(ctx, &postnum, 1); - /* Find immediate dominators */ - blocks = ctx->cfg_blocks; - edges = ctx->cfg_edges; - blocks_count = ctx->cfg_blocks_count; + /* Find immediate dominators by iterative fixed-point algorithm */ blocks[1].idom = 1; do { changed = 0; /* Iterating in Reverse Post Order */ for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) { IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + IR_ASSERT(bb->predecessors_count > 0); if (bb->predecessors_count == 1) { uint32_t pred_b = edges[bb->predecessors]; - if (blocks[pred_b].idom <= 0) { - //IR_ASSERT("Wrong blocks order: BB is before its single predecessor"); - } else if (bb->idom != pred_b) { + if (blocks[pred_b].idom > 0 && bb->idom != pred_b) { bb->idom = pred_b; changed = 1; } @@ -680,39 +686,53 @@ int ir_build_dominators_tree(ir_ctx *ctx) } } } while (changed); + + /* Build dominators tree */ blocks[1].idom = 0; blocks[1].dom_depth = 0; - - /* Construct dominators tree */ for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) { - IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); - if (bb->idom > 0) { - ir_block *idom_bb = &blocks[bb->idom]; + uint32_t idom = bb->idom; + ir_block *idom_bb = &blocks[idom]; - bb->dom_depth = idom_bb->dom_depth + 1; - /* Sort by block number to traverse children in pre-order */ - if (idom_bb->dom_child == 0) { - idom_bb->dom_child = b; - } else if (b < idom_bb->dom_child) { - bb->dom_next_child = idom_bb->dom_child; - idom_bb->dom_child = b; + bb->dom_depth = 0; + /* Sort by block number to traverse children in pre-order */ + if (idom_bb->dom_child == 0) { + idom_bb->dom_child = b; + } else if (b < idom_bb->dom_child) { + bb->dom_next_child = idom_bb->dom_child; + idom_bb->dom_child = b; + } else { + int child = idom_bb->dom_child; + ir_block *child_bb = &blocks[child]; + + while (child_bb->dom_next_child > 0 && b > child_bb->dom_next_child) { + child = child_bb->dom_next_child; + child_bb = &blocks[child]; + } + bb->dom_next_child = child_bb->dom_next_child; + child_bb->dom_next_child = b; + } + } + + /* Recalculate dom_depth for all blocks */ + for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) { + uint32_t idom = bb->idom; + uint32_t dom_depth = 0; + while (idom) { + dom_depth++; + if (blocks[idom].dom_depth > 0) { + dom_depth += blocks[idom].dom_depth; + break; } else { - int child = idom_bb->dom_child; - ir_block *child_bb = &blocks[child]; - - while (child_bb->dom_next_child > 0 && b > child_bb->dom_next_child) { - child = child_bb->dom_next_child; - child_bb = &blocks[child]; - } - bb->dom_next_child = child_bb->dom_next_child; - child_bb->dom_next_child = b; + idom = blocks[idom].idom; } } + bb->dom_depth = dom_depth; } return 1; } -#else + /* A single pass modification of "A Simple, Fast Dominance Algorithm" by * Cooper, Harvey and Kennedy, that relays on IR block ordering. * It may fallback to the general slow fixed-point algorithm. */ @@ -747,7 +767,11 @@ int ir_build_dominators_tree(ir_ctx *ctx) if (UNEXPECTED(idom >= b)) { /* In rare cases, LOOP_BEGIN.op1 may be a back-edge. Skip back-edges. */ ctx->flags2 &= ~IR_NO_LOOPS; - IR_ASSERT(k > 1 && "Wrong blocks order: BB is before its single predecessor"); +// IR_ASSERT(k > 1 && "Wrong blocks order: BB is before its single predecessor"); + if (UNEXPECTED(k <= 1)) { + ir_list_free(&worklist); + return ir_build_dominators_tree_slow(ctx); + } ir_list_push(&worklist, idom); while (1) { k--; @@ -942,7 +966,6 @@ static int ir_build_dominators_tree_iterative(ir_ctx *ctx) return 1; } -#endif static bool ir_dominates(const ir_block *blocks, uint32_t b1, uint32_t b2) { @@ -958,7 +981,7 @@ static bool ir_dominates(const ir_block *blocks, uint32_t b1, uint32_t b2) int ir_find_loops(ir_ctx *ctx) { - uint32_t i, j, n, count; + uint32_t b, j, n, count; uint32_t *entry_times, *exit_times, *sorted_blocks, time = 1; ir_block *blocks = ctx->cfg_blocks; uint32_t *edges = ctx->cfg_edges; @@ -983,13 +1006,13 @@ int ir_find_loops(ir_ctx *ctx) int child; next: - i = ir_worklist_peek(&work); - if (!entry_times[i]) { - entry_times[i] = time++; + b = ir_worklist_peek(&work); + if (!entry_times[b]) { + entry_times[b] = time++; } - /* Visit blocks immediately dominated by i. */ - bb = &blocks[i]; + /* Visit blocks immediately dominated by "b". */ + bb = &blocks[b]; for (child = bb->dom_child; child > 0; child = blocks[child].dom_next_child) { if (ir_worklist_push(&work, child)) { goto next; @@ -999,17 +1022,17 @@ next: /* Visit join edges. */ if (bb->successors_count) { uint32_t *p = edges + bb->successors; - for (j = 0; j < bb->successors_count; j++,p++) { + for (j = 0; j < bb->successors_count; j++, p++) { uint32_t succ = *p; - if (blocks[succ].idom == i) { + if (blocks[succ].idom == b) { continue; } else if (ir_worklist_push(&work, succ)) { goto next; } } } - exit_times[i] = time++; + exit_times[b] = time++; ir_worklist_pop(&work); } @@ -1018,7 +1041,7 @@ next: j = 1; n = 2; while (j != n) { - i = j; + uint32_t i = j; j = n; for (; i < j; i++) { int child; @@ -1030,9 +1053,82 @@ next: count = n; /* Identify loops. See Sreedhar et al, "Identifying Loops Using DJ Graphs". */ + uint32_t prev_dom_depth = blocks[sorted_blocks[n - 1]].dom_depth; + uint32_t prev_irreducible = 0; while (n > 1) { - i = sorted_blocks[--n]; - ir_block *bb = &blocks[i]; + b = sorted_blocks[--n]; + ir_block *bb = &blocks[b]; + + IR_ASSERT(bb->dom_depth <= prev_dom_depth); + if (UNEXPECTED(prev_irreducible) && bb->dom_depth != prev_dom_depth) { + /* process delyed irreducible loops */ + do { + b = sorted_blocks[prev_irreducible]; + bb = &blocks[b]; + if ((bb->flags & IR_BB_IRREDUCIBLE_LOOP) && !bb->loop_depth) { + /* process irreducible loop */ + uint32_t hdr = b; + + bb->loop_depth = 1; + if (ctx->ir_base[bb->start].op == IR_MERGE) { + ctx->ir_base[bb->start].op = IR_LOOP_BEGIN; + } + + /* find the closing edge(s) of the irreucible loop */ + IR_ASSERT(bb->predecessors_count > 1); + uint32_t *p = &edges[bb->predecessors]; + j = bb->predecessors_count; + do { + uint32_t pred = *p; + + if (entry_times[pred] > entry_times[b] && exit_times[pred] < exit_times[b]) { + if (!ir_worklist_len(&work)) { + ir_bitset_clear(work.visited, ir_bitset_len(ir_worklist_capasity(&work))); + } + blocks[pred].loop_header = 0; /* support for merged loops */ + ir_worklist_push(&work, pred); + } + p++; + } while (--j); + if (ir_worklist_len(&work) == 0) continue; + + /* collect members of the irreducible loop */ + while (ir_worklist_len(&work)) { + b = ir_worklist_pop(&work); + if (b != hdr) { + ir_block *bb = &blocks[b]; + bb->loop_header = hdr; + if (bb->predecessors_count) { + uint32_t *p = &edges[bb->predecessors]; + uint32_t n = bb->predecessors_count; + do { + uint32_t pred = *p; + while (blocks[pred].loop_header > 0) { + pred = blocks[pred].loop_header; + } + if (pred != hdr) { + if (entry_times[pred] > entry_times[hdr] && exit_times[pred] < exit_times[hdr]) { + /* "pred" is a descendant of "hdr" */ + ir_worklist_push(&work, pred); + } else { + /* another entry to the irreducible loop */ + bb->flags |= IR_BB_IRREDUCIBLE_LOOP; + if (ctx->ir_base[bb->start].op == IR_MERGE) { + ctx->ir_base[bb->start].op = IR_LOOP_BEGIN; + } + } + } + p++; + } while (--n); + } + } + } + } + } while (--prev_irreducible != n); + prev_irreducible = 0; + b = sorted_blocks[n]; + bb = &blocks[b]; + } if (bb->predecessors_count > 1) { bool irreducible = 0; @@ -1047,7 +1143,7 @@ next: if (bb->idom != pred) { /* In a loop back-edge (back-join edge), the successor dominates the predecessor. */ - if (ir_dominates(blocks, i, pred)) { + if (ir_dominates(blocks, b, pred)) { if (!ir_worklist_len(&work)) { ir_bitset_clear(work.visited, ir_bitset_len(ir_worklist_capasity(&work))); } @@ -1056,8 +1152,9 @@ next: } else { /* Otherwise it's a cross-join edge. See if it's a branch to an ancestor on the DJ spanning tree. */ - if (entry_times[pred] > entry_times[i] && exit_times[pred] < exit_times[i]) { + if (entry_times[pred] > entry_times[b] && exit_times[pred] < exit_times[b]) { irreducible = 1; + break; } } } @@ -1065,46 +1162,56 @@ next: } while (--j); if (UNEXPECTED(irreducible)) { - // TODO: Support for irreducible loops ??? - bb->flags |= IR_BB_IRREDUCIBLE_LOOP; - ctx->flags2 |= IR_IRREDUCIBLE_CFG; - while (ir_worklist_len(&work)) { - ir_worklist_pop(&work); + bb->flags |= IR_BB_LOOP_HEADER | IR_BB_IRREDUCIBLE_LOOP; + ctx->flags2 |= IR_CFG_HAS_LOOPS | IR_IRREDUCIBLE_CFG; + /* Remember the position of the first irreducible loop to process all the irreducible loops + * after the reducible loops with the same dominator tree depth + */ + if (!prev_irreducible) { + prev_irreducible = n; + prev_dom_depth = bb->dom_depth; } + ir_list_clear(&work.l); } else if (ir_worklist_len(&work)) { + /* collect members of the reducible loop */ + uint32_t hdr = b; + bb->flags |= IR_BB_LOOP_HEADER; ctx->flags2 |= IR_CFG_HAS_LOOPS; bb->loop_depth = 1; + if (ctx->ir_base[bb->start].op == IR_MERGE) { + ctx->ir_base[bb->start].op = IR_LOOP_BEGIN; + } while (ir_worklist_len(&work)) { - j = ir_worklist_pop(&work); - while (blocks[j].loop_header > 0) { - j = blocks[j].loop_header; - } - if (j != i) { - ir_block *bb = &blocks[j]; - if (bb->idom == 0 && j != 1) { - /* Ignore blocks that are unreachable or only abnormally reachable. */ - continue; - } - bb->loop_header = i; + b = ir_worklist_pop(&work); + if (b != hdr) { + ir_block *bb = &blocks[b]; + bb->loop_header = hdr; if (bb->predecessors_count) { uint32_t *p = &edges[bb->predecessors]; - j = bb->predecessors_count; + uint32_t n = bb->predecessors_count; do { - ir_worklist_push(&work, *p); + uint32_t pred = *p; + while (blocks[pred].loop_header > 0) { + pred = blocks[pred].loop_header; + } + if (pred != hdr) { + ir_worklist_push(&work, pred); + } p++; - } while (--j); + } while (--n); } } } } } } + IR_ASSERT(!prev_irreducible); if (ctx->flags2 & IR_CFG_HAS_LOOPS) { for (n = 1; n < count; n++) { - i = sorted_blocks[n]; - ir_block *bb = &blocks[i]; + b = sorted_blocks[n]; + ir_block *bb = &blocks[b]; if (bb->loop_header > 0) { ir_block *loop = &blocks[bb->loop_header]; uint32_t loop_depth = loop->loop_depth; @@ -1389,7 +1496,7 @@ restart: goto restart; } } else if (b != predecessor && ctx->cfg_blocks[predecessor].loop_header != b) { - ir_dump_cfg(ctx, stderr); + /* not a loop back-edge */ IR_ASSERT(b == predecessor || ctx->cfg_blocks[predecessor].loop_header == b); } } diff --git a/ext/opcache/jit/ir/ir_check.c b/ext/opcache/jit/ir/ir_check.c index f12b4776fa1..a791baef5db 100644 --- a/ext/opcache/jit/ir/ir_check.c +++ b/ext/opcache/jit/ir/ir_check.c @@ -213,13 +213,18 @@ bool ir_check(const ir_ctx *ctx) ok = 0; } } - break; - case IR_OPND_CONTROL_DEP: if ((ctx->flags2 & IR_LINEAR) && use >= i && !(insn->op == IR_LOOP_BEGIN)) { fprintf(stderr, "ir_base[%d].ops[%d] invalid forward reference (%d)\n", i, j, use); ok = 0; + } + break; + case IR_OPND_CONTROL_DEP: + if ((ctx->flags2 & IR_LINEAR) + && use >= i) { + fprintf(stderr, "ir_base[%d].ops[%d] invalid forward reference (%d)\n", i, j, use); + ok = 0; } else if (insn->op == IR_PHI) { ir_insn *merge_insn = &ctx->ir_base[insn->op1]; if (merge_insn->op != IR_MERGE && merge_insn->op != IR_LOOP_BEGIN) { diff --git a/ext/opcache/jit/ir/ir_emit.c b/ext/opcache/jit/ir/ir_emit.c index c82655daf48..fab9f56228d 100644 --- a/ext/opcache/jit/ir/ir_emit.c +++ b/ext/opcache/jit/ir/ir_emit.c @@ -309,7 +309,7 @@ static void* ir_sym_addr(ir_ctx *ctx, const ir_insn *addr_insn) { const char *name = ir_get_str(ctx, addr_insn->val.name); void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ? - ctx->loader->resolve_sym_name(ctx->loader, name, 0) : + ctx->loader->resolve_sym_name(ctx->loader, name, IR_RESOLVE_SYM_SILENT) : ir_resolve_sym_name(name); return addr; @@ -320,7 +320,7 @@ static void* ir_sym_val(ir_ctx *ctx, const ir_insn *addr_insn) { const char *name = ir_get_str(ctx, addr_insn->val.name); void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ? - ctx->loader->resolve_sym_name(ctx->loader, name, addr_insn->op == IR_FUNC) : + ctx->loader->resolve_sym_name(ctx->loader, name, addr_insn->op == IR_FUNC ? IR_RESOLVE_SYM_ADD_THUNK : 0) : ir_resolve_sym_name(name); IR_ASSERT(addr); diff --git a/ext/opcache/jit/ir/ir_fold.h b/ext/opcache/jit/ir/ir_fold.h index 88539e52ab0..90112214d0c 100644 --- a/ext/opcache/jit/ir/ir_fold.h +++ b/ext/opcache/jit/ir/ir_fold.h @@ -1909,7 +1909,9 @@ IR_FOLD(SUB(_, SUB)) IR_FOLD(SUB(ADD, ADD)) { if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt))) { - if (op1_insn->op1 == op2_insn->op1) { + if (op1 == op2) { + IR_FOLD_CONST_U(0); + } else if (op1_insn->op1 == op2_insn->op1) { /* (a + b) - (a + c) => b - c */ op1 = op1_insn->op2; op2 = op2_insn->op2; diff --git a/ext/opcache/jit/ir/ir_gcm.c b/ext/opcache/jit/ir/ir_gcm.c index 8bd6be5d10a..4d518d20079 100644 --- a/ext/opcache/jit/ir/ir_gcm.c +++ b/ext/opcache/jit/ir/ir_gcm.c @@ -785,6 +785,139 @@ IR_ALWAYS_INLINE ir_ref ir_count_constant(ir_ref *_xlat, ir_ref ref) return 0; } +IR_ALWAYS_INLINE bool ir_is_good_bb_order(ir_ctx *ctx, uint32_t b, ir_block *bb, ir_ref start) +{ + ir_insn *insn = &ctx->ir_base[start]; + uint32_t n = insn->inputs_count; + ir_ref *p = insn->ops + 1; + + if (n == 1) { + return *p < start; + } else { + IR_ASSERT(n > 1); + for (; n > 0; p++, n--) { + ir_ref input = *p; + if (input < start) { + /* ordered */ + } else if ((bb->flags & IR_BB_LOOP_HEADER) + && (ctx->cfg_map[input] == b || ctx->cfg_blocks[ctx->cfg_map[input]].loop_header == b)) { + /* back-edge of reducible loop */ + } else if ((bb->flags & IR_BB_IRREDUCIBLE_LOOP) + && (ctx->cfg_blocks[ctx->cfg_map[input]].loop_header == ctx->cfg_blocks[b].loop_header)) { + /* closing edge of irreducible loop */ + } else { + return 0; + } + } + return 1; + } +} + +static IR_NEVER_INLINE void ir_fix_bb_order(ir_ctx *ctx, ir_ref *_prev, ir_ref *_next) +{ + uint32_t b, succ, count, *q, *xlat; + ir_block *bb; + ir_ref ref, n, prev; + ir_worklist worklist; + ir_block *new_blocks; + +#if 0 + for (b = 1, bb = ctx->cfg_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) { + if (!ir_is_good_bb_order(ctx, b, bb, bb->start)) { + goto fix; + } + } + return; + +fix: +#endif + count = ctx->cfg_blocks_count + 1; + new_blocks = ir_mem_malloc(count * sizeof(ir_block)); + xlat = ir_mem_malloc(count * sizeof(uint32_t)); + ir_worklist_init(&worklist, count); + ir_worklist_push(&worklist, 1); + while (ir_worklist_len(&worklist) != 0) { +next: + b = ir_worklist_peek(&worklist); + bb = &ctx->cfg_blocks[b]; + n = bb->successors_count; + if (n == 1) { + succ = ctx->cfg_edges[bb->successors]; + if (ir_worklist_push(&worklist, succ)) { + goto next; + } + } else if (n > 1) { + uint32_t best = 0; + uint32_t best_loop_depth = 0; + + q = ctx->cfg_edges + bb->successors + n; + do { + q--; + succ = *q; + if (ir_bitset_in(worklist.visited, succ)) { + /* already processed */ + } else if ((ctx->cfg_blocks[succ].flags & IR_BB_LOOP_HEADER) + && (succ == b || ctx->cfg_blocks[b].loop_header == succ)) { + /* back-edge of reducible loop */ + } else if ((ctx->cfg_blocks[succ].flags & IR_BB_IRREDUCIBLE_LOOP) + && (ctx->cfg_blocks[succ].loop_header == ctx->cfg_blocks[b].loop_header)) { + /* closing edge of irreducible loop */ + } else if (!best) { + best = succ; + best_loop_depth = ctx->cfg_blocks[best].loop_depth; + } else if (ctx->cfg_blocks[succ].loop_depth < best_loop_depth) { + /* prefer deeper loop */ + best = succ; + best_loop_depth = ctx->cfg_blocks[best].loop_depth; + } + n--; + } while (n > 0); + if (best) { + ir_worklist_push(&worklist, best); + goto next; + } + } + ir_worklist_pop(&worklist); + count--; + new_blocks[count] = *bb; + xlat[b] = count; + } + IR_ASSERT(count == 1); + xlat[0] = 0; + ir_worklist_free(&worklist); + + prev = 0; + for (b = 1, bb = new_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) { + bb->idom = xlat[bb->idom]; + bb->loop_header = xlat[bb->loop_header]; + n = bb->successors_count; + if (n > 0) { + for (q = ctx->cfg_edges + bb->successors; n > 0; q++, n--) { + *q = xlat[*q]; + } + } + n = bb->predecessors_count; + if (n > 0) { + for (q = ctx->cfg_edges + bb->predecessors; n > 0; q++, n--) { + *q = xlat[*q]; + } + } + _next[prev] = bb->start; + _prev[bb->start] = prev; + prev = bb->end; + } + _next[0] = 0; + _next[prev] = 0; + + for (ref = 2; ref < ctx->insns_count; ref++) { + ctx->cfg_map[ref] = xlat[ctx->cfg_map[ref]]; + } + ir_mem_free(xlat); + + ir_mem_free(ctx->cfg_blocks); + ctx->cfg_blocks = new_blocks; +} + int ir_schedule(ir_ctx *ctx) { ir_ctx new_ctx; @@ -800,6 +933,7 @@ int ir_schedule(ir_ctx *ctx) ir_block *bb; ir_insn *insn, *new_insn; ir_use_list *lists, *use_list, *new_list; + bool bad_bb_order = 0; /* Create a double-linked list of nodes ordered by BB, respecting BB->start and BB->end */ IR_ASSERT(_blocks[1] == 1); @@ -818,27 +952,61 @@ int ir_schedule(ir_ctx *ctx) } else if (b > prev_b) { bb = &ctx->cfg_blocks[b]; if (i == bb->start) { - IR_ASSERT(bb->end > bb->start); - prev_b = b; - prev_b_end = bb->end; - _prev[bb->end] = 0; - /* add to the end of the list */ - _next[j] = i; - _prev[i] = j; - j = i; - } else { - IR_ASSERT(i != bb->end); + if (bb->end > bb->start) { + prev_b = b; + prev_b_end = bb->end; + /* add to the end of the list */ + _next[j] = i; + _prev[i] = j; + j = i; + } else { + prev_b = 0; + prev_b_end = 0; + k = bb->end; + while (_blocks[_prev[k]] == b) { + k = _prev[k]; + } + /* insert before "k" */ + _prev[i] = _prev[k]; + _next[i] = k; + _next[_prev[k]] = i; + _prev[k] = i; + } + if (!ir_is_good_bb_order(ctx, b, bb, i)) { + bad_bb_order = 1; + } + } else if (i != bb->end) { /* move down late (see the following loop) */ _next[i] = _move_down; _move_down = i; + } else { + prev_b = 0; + prev_b_end = 0; + if (bb->start > bb->end) { + /* add to the end of the list */ + _next[j] = i; + _prev[i] = j; + j = i; + } else { + k = bb->start; + while (_blocks[_next[k]] == b) { + k = _next[k]; + } + /* insert after "k" */ + _next[i] = _next[k]; + _prev[i] = k; + _prev[_next[k]] = i; + _next[k] = i; + } } } else if (b) { bb = &ctx->cfg_blocks[b]; IR_ASSERT(i != bb->start); - if (_prev[bb->end]) { + if (i > bb->end) { /* move up, insert before the end of the already scheduled BB */ k = bb->end; } else { + IR_ASSERT(i > bb->start); /* move up, insert at the end of the block */ k = ctx->cfg_blocks[b + 1].start; } @@ -883,6 +1051,10 @@ int ir_schedule(ir_ctx *ctx) } #endif + if (bad_bb_order) { + ir_fix_bb_order(ctx, _prev, _next); + } + _xlat = ir_mem_calloc((ctx->consts_count + ctx->insns_count), sizeof(ir_ref)); _xlat += ctx->consts_count; _xlat[IR_TRUE] = IR_TRUE; @@ -904,6 +1076,11 @@ int ir_schedule(ir_ctx *ctx) if (insn->op == IR_CASE_VAL) { IR_ASSERT(insn->op2 < IR_TRUE); consts_count += ir_count_constant(_xlat, insn->op2); + } else if (insn->op == IR_CASE_RANGE) { + IR_ASSERT(insn->op2 < IR_TRUE); + consts_count += ir_count_constant(_xlat, insn->op2); + IR_ASSERT(insn->op3 < IR_TRUE); + consts_count += ir_count_constant(_xlat, insn->op3); } n = insn->inputs_count; insns_count += ir_insn_inputs_to_len(n); diff --git a/ext/opcache/jit/ir/ir_private.h b/ext/opcache/jit/ir/ir_private.h index 69a0101d24e..369b4c34e37 100644 --- a/ext/opcache/jit/ir/ir_private.h +++ b/ext/opcache/jit/ir/ir_private.h @@ -9,6 +9,7 @@ #define IR_PRIVATE_H #include #include +#include #ifdef IR_DEBUG # include @@ -62,7 +63,7 @@ #define IR_MAX(a, b) (((a) > (b)) ? (a) : (b)) #define IR_MIN(a, b) (((a) < (b)) ? (a) : (b)) -#define IR_IS_POWER_OF_TWO(x) (!((x) & ((x) - 1))) +#define IR_IS_POWER_OF_TWO(x) ((x) && (!((x) & ((x) - 1)))) #define IR_LOG2(x) ir_ntzl(x) @@ -257,7 +258,7 @@ IR_ALWAYS_INLINE void* ir_arena_alloc(ir_arena **arena_ptr, size_t size) ir_arena *arena = *arena_ptr; char *ptr = (char*)IR_ALIGNED_SIZE((uintptr_t)arena->ptr, 8); - if (EXPECTED(size <= (size_t)(arena->end - ptr))) { + if (EXPECTED((ptrdiff_t)size <= (ptrdiff_t)(arena->end - ptr))) { arena->ptr = ptr + size; } else { size_t arena_size = diff --git a/ext/opcache/jit/ir/ir_save.c b/ext/opcache/jit/ir/ir_save.c index b12cc267af6..ea787f162ec 100644 --- a/ext/opcache/jit/ir/ir_save.c +++ b/ext/opcache/jit/ir/ir_save.c @@ -97,10 +97,14 @@ void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f) for (i = IR_UNUSED + 1, insn = ctx->ir_base - i; i < ctx->consts_count; i++, insn--) { fprintf(f, "\t%s c_%d = ", ir_type_cname[insn->type], i); if (insn->op == IR_FUNC) { - fprintf(f, "func %s", ir_get_str(ctx, insn->val.name)); + fprintf(f, "func %s%s", + (save_flags & IR_SAVE_SAFE_NAMES) ? "@" : "", + ir_get_str(ctx, insn->val.name)); ir_print_proto(ctx, insn->proto, f); } else if (insn->op == IR_SYM) { - fprintf(f, "sym(%s)", ir_get_str(ctx, insn->val.name)); + fprintf(f, "sym(%s%s)", + (save_flags & IR_SAVE_SAFE_NAMES) ? "@" : "", + ir_get_str(ctx, insn->val.name)); } else if (insn->op == IR_FUNC_ADDR) { fprintf(f, "func *"); ir_print_const(ctx, insn, f, true); @@ -140,6 +144,9 @@ void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f) fprintf(f, ", loop=BB%d(%d)", bb->loop_header, bb->loop_depth); } } + if (bb->flags & IR_BB_IRREDUCIBLE_LOOP) { + fprintf(f, ", IRREDUCIBLE"); + } if (bb->predecessors_count) { uint32_t i; diff --git a/ext/opcache/jit/ir/ir_sccp.c b/ext/opcache/jit/ir/ir_sccp.c index 2e006516df8..ae0eebadee5 100644 --- a/ext/opcache/jit/ir/ir_sccp.c +++ b/ext/opcache/jit/ir/ir_sccp.c @@ -458,6 +458,22 @@ static bool ir_sccp_is_equal(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b) return v1->val.u64 == v2->val.u64; } +static bool ir_sccp_in_range(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b, ir_ref c) +{ + ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a]; + ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b]; + ir_insn *v3 = IR_IS_CONST_REF(c) ? &ctx->ir_base[c] : &_values[c]; + + IR_ASSERT(!IR_IS_SYM_CONST(v1->op)); + IR_ASSERT(!IR_IS_SYM_CONST(v2->op)); + IR_ASSERT(!IR_IS_SYM_CONST(v3->op)); + if (IR_IS_TYPE_SIGNED(v1->type)) { + return v1->val.i64 >= v2->val.i64 && v1->val.i64 <= v3->val.i64; + } else { + return v1->val.u64 >= v2->val.u64 && v1->val.u64 <= v3->val.u64; + } +} + #ifdef IR_SCCP_TRACE static void ir_sccp_trace_val(ir_ctx *ctx, ir_insn *_values, ir_ref i) { @@ -676,6 +692,11 @@ static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bi } } else if (use_insn->op == IR_CASE_DEFAULT) { use_case = use; + } else if (use_insn->op == IR_CASE_RANGE) { + if (ir_sccp_in_range(ctx, _values, insn->op2, use_insn->op2, use_insn->op3)) { + use_case = use; + break; + } } } if (use_case) { @@ -1732,7 +1753,20 @@ static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use, ir_ref *p, n, input; if (IR_IS_CONST_REF(ref)) { - return ir_const(ctx, insn->val, type); + ir_val val; + + switch (type) { + case IR_I8: val.i64 = insn->val.i8; break; + case IR_U8: val.u64 = insn->val.u8; break; + case IR_I16: val.i64 = insn->val.i16; break; + case IR_U16: val.u64 = insn->val.u16; break; + case IR_I32: val.i64 = insn->val.i32; break; + case IR_U32: val.u64 = insn->val.u32; break; + case IR_CHAR:val.i64 = insn->val.i8; break; + case IR_BOOL:val.u64 = insn->val.u8 != 0; break; + default: IR_ASSERT(0); val.u64 = 0; + } + return ir_const(ctx, val, type); } else { ir_bitqueue_add(worklist, ref); switch (insn->op) { @@ -2391,7 +2425,7 @@ static bool ir_try_remove_empty_diamond(ir_ctx *ctx, ir_ref ref, ir_insn *insn, } start_ref = end->op1; start = &ctx->ir_base[start_ref]; - if (start->op != IR_CASE_VAL && start->op != IR_CASE_DEFAULT) { + if (start->op != IR_CASE_VAL && start->op != IR_CASE_RANGE && start->op != IR_CASE_DEFAULT) { return 0; } if (ctx->use_lists[start_ref].count != 1) { diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc index 76602c2b4bc..d9967d24dbe 100644 --- a/ext/opcache/jit/ir/ir_x86.dasc +++ b/ext/opcache/jit/ir/ir_x86.dasc @@ -1569,6 +1569,20 @@ op2_const: constraints->tmp_regs[0] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n = 1; break; + case IR_SSE_SQRT: + case IR_SSE_RINT: + case IR_SSE_FLOOR: + case IR_SSE_CEIL: + case IR_SSE_TRUNC: + case IR_SSE_NEARBYINT: + insn = &ctx->ir_base[ref]; + flags = IR_USE_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; + if (IR_IS_CONST_REF(insn->op3)) { + const ir_insn *val_insn = &ctx->ir_base[insn->op3]; + constraints->tmp_regs[n] = IR_TMP_REG(3, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; + } + break; } constraints->tmps_count = n; @@ -2630,6 +2644,7 @@ store_int: case IR_IF_TRUE: case IR_IF_FALSE: case IR_CASE_VAL: + case IR_CASE_RANGE: case IR_CASE_DEFAULT: case IR_MERGE: case IR_LOOP_BEGIN: @@ -6868,7 +6883,24 @@ static void ir_emit_return_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; - if (op2_reg == IR_REG_NONE || IR_REG_SPILLED(op2_reg)) { + if (IR_IS_CONST_REF(insn->op2)) { + ir_insn *value = &ctx->ir_base[insn->op2]; + + if ((type == IR_FLOAT && value->val.f == 0.0) || (type == IR_DOUBLE && value->val.d == 0.0)) { + | fldz + } else if ((type == IR_FLOAT && value->val.f == 1.0) || (type == IR_DOUBLE && value->val.d == 1.0)) { + | fld1 + } else { + int label = ir_const_label(ctx, insn->op2); + + if (type == IR_DOUBLE) { + | fld qword [=>label] + } else { + IR_ASSERT(type == IR_FLOAT); + | fld dword [=>label] + } + } + } else if (op2_reg == IR_REG_NONE || IR_REG_SPILLED(op2_reg)) { ir_reg fp; int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op2, &fp); @@ -8442,11 +8474,15 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; - ir_reg def_reg = ctx->regs[def][0]; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op2_reg = ctx->regs[def][2]; ir_reg tmp_reg = ctx->regs[def][3]; int32_t offset; + if (ctx->use_lists[def].count == 1) { + /* dead load */ + return; + } IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { @@ -8471,11 +8507,15 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; - ir_reg def_reg = ctx->regs[def][0]; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op2_reg = ctx->regs[def][2]; ir_reg tmp_reg = ctx->regs[def][3]; int32_t offset; + if (ctx->use_lists[def].count == 1) { + /* dead load */ + return; + } IR_ASSERT(def_reg != IR_REG_NONE&& tmp_reg != IR_REG_NONE); if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { @@ -8541,6 +8581,7 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) ir_val min, max; ir_reg op2_reg = ctx->regs[def][2]; ir_reg tmp_reg = ctx->regs[def][3]; + bool has_case_range = 0; type = ctx->ir_base[insn->op2].type; IR_ASSERT(tmp_reg != IR_REG_NONE); @@ -8570,6 +8611,21 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) max.u64 = (int64_t)IR_MAX(max.u64, val->val.u64); } count++; + } else if (use_insn->op == IR_CASE_RANGE) { + has_case_range = 1; + val = &ctx->ir_base[use_insn->op2]; + IR_ASSERT(!IR_IS_SYM_CONST(val->op)); + ir_insn *val2 = &ctx->ir_base[use_insn->op3]; + IR_ASSERT(!IR_IS_SYM_CONST(val2->op)); + if (IR_IS_TYPE_SIGNED(type)) { + IR_ASSERT(IR_IS_TYPE_SIGNED(val->type)); + min.i64 = IR_MIN(min.i64, val->val.i64); + max.i64 = IR_MAX(max.i64, val2->val.i64); + } else { + IR_ASSERT(!IR_IS_TYPE_SIGNED(val->type)); + min.u64 = (int64_t)IR_MIN(min.u64, val->val.u64); + max.u64 = (int64_t)IR_MAX(max.u64, val2->val.u64); + } } else { IR_ASSERT(use_insn->op == IR_CASE_DEFAULT); default_label = ir_skip_empty_target_blocks(ctx, use_block); @@ -8583,7 +8639,7 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) } /* Generate a table jmp or a seqence of calls */ - if (count > 2 && (max.i64-min.i64) < count * 8) { + if (!has_case_range && count > 2 && (max.i64-min.i64) < count * 8) { int *labels = ir_mem_malloc(sizeof(int) * (size_t)(max.i64 - min.i64 + 1)); for (i = 0; i <= (max.i64 - min.i64); i++) { @@ -8747,6 +8803,42 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) |.endif } | je =>label + } else if (use_insn->op == IR_CASE_RANGE) { + val = &ctx->ir_base[use_insn->op2]; + IR_ASSERT(!IR_IS_SYM_CONST(val->op)); + label = ir_skip_empty_target_blocks(ctx, use_block); + if (IR_IS_32BIT(type, val->val)) { + | ASM_REG_IMM_OP cmp, type, op2_reg, val->val.i32 + } else { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + | mov64 Ra(tmp_reg), val->val.i64 + | ASM_REG_REG_OP cmp, type, op2_reg, tmp_reg +|.endif + } + if (IR_IS_TYPE_SIGNED(type)) { + | jl >1 + } else { + | jb >1 + } + val = &ctx->ir_base[use_insn->op3]; + IR_ASSERT(!IR_IS_SYM_CONST(val->op3)); + label = ir_skip_empty_target_blocks(ctx, use_block); + if (IR_IS_32BIT(type, val->val)) { + | ASM_REG_IMM_OP cmp, type, op2_reg, val->val.i32 + } else { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + | mov64 Ra(tmp_reg), val->val.i64 + | ASM_REG_REG_OP cmp, type, op2_reg, tmp_reg +|.endif + } + if (IR_IS_TYPE_SIGNED(type)) { + | jle =>label + } else { + | jbe =>label + } + |1: } } if (default_label) { @@ -9221,6 +9313,58 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) return; } + /* Move op2 to a tmp register before epilogue if it's in + * used_preserved_regs, because it will be overridden. */ + + ir_reg op2_reg = IR_REG_NONE; + ir_mem mem = IR_MEM_B(IR_REG_NONE); + if (!IR_IS_CONST_REF(insn->op2)) { + op2_reg = ctx->regs[def][2]; + + ir_regset preserved_regs = (ir_regset)ctx->used_preserved_regs | IR_REGSET(IR_REG_STACK_POINTER); + if (ctx->flags & IR_USE_FRAME_POINTER) { + preserved_regs |= IR_REGSET(IR_REG_FRAME_POINTER); + } + + bool is_spill_slot = op2_reg != IR_REG_NONE + && IR_REG_SPILLED(op2_reg) + && ctx->vregs[insn->op2]; + + if (op2_reg != IR_REG_NONE && !is_spill_slot) { + if (IR_REGSET_IN(preserved_regs, IR_REG_NUM(op2_reg))) { + ir_ref orig_op2_reg = op2_reg; + op2_reg = IR_REG_RAX; + + if (IR_REG_SPILLED(orig_op2_reg)) { + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } else { + ir_type type = ctx->ir_base[insn->op2].type; + | ASM_REG_REG_OP mov, type, op2_reg, IR_REG_NUM(orig_op2_reg) + } + } else { + op2_reg = IR_REG_NUM(op2_reg); + } + } else { + if (ir_rule(ctx, insn->op2) & IR_FUSED) { + IR_ASSERT(op2_reg == IR_REG_NONE); + mem = ir_fuse_load(ctx, def, insn->op2); + } else { + mem = ir_ref_spill_slot(ctx, insn->op2); + } + ir_reg base = IR_MEM_BASE(mem); + ir_reg index = IR_MEM_INDEX(mem); + if ((base != IR_REG_NONE && IR_REGSET_IN(preserved_regs, base)) || + (index != IR_REG_NONE && IR_REGSET_IN(preserved_regs, index))) { + op2_reg = IR_REG_RAX; + + ir_type type = ctx->ir_base[insn->op2].type; + ir_emit_load_mem_int(ctx, type, op2_reg, mem); + } else { + op2_reg = IR_REG_NONE; + } + } + } + ir_emit_epilogue(ctx); if (IR_IS_CONST_REF(insn->op2)) { @@ -9246,22 +9390,10 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) |.endif } } else { - ir_reg op2_reg = ctx->regs[def][2]; - if (op2_reg != IR_REG_NONE) { - if (IR_REG_SPILLED(op2_reg)) { - op2_reg = IR_REG_NUM(op2_reg); - ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); - } + IR_ASSERT(!IR_REGSET_IN((ir_regset)ctx->used_preserved_regs, op2_reg)); | jmp Ra(op2_reg) } else { - ir_mem mem; - - if (ir_rule(ctx, insn->op2) & IR_FUSED) { - mem = ir_fuse_load(ctx, def, insn->op2); - } else { - mem = ir_ref_spill_slot(ctx, insn->op2); - } | ASM_TMEM_OP jmp, aword, mem } } @@ -10314,6 +10446,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) case IR_IF_TRUE: case IR_IF_FALSE: case IR_CASE_VAL: + case IR_CASE_RANGE: case IR_CASE_DEFAULT: case IR_MERGE: case IR_LOOP_BEGIN: