diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc index 4f0ac2ee697..f7e529929d2 100644 --- a/ext/opcache/jit/ir/ir_aarch64.dasc +++ b/ext/opcache/jit/ir/ir_aarch64.dasc @@ -518,6 +518,9 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co insn = &ctx->ir_base[insn->op3]; constraints->tmp_regs[0] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n = 1; + } else if (ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA) { + constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; } break; case IR_LOAD_FP: @@ -919,6 +922,7 @@ binop_fp: return insn->op; } case IR_CALL: + ctx->flags |= IR_USE_FRAME_POINTER; ctx->flags2 |= IR_HAS_CALLS | IR_16B_FRAME_ALIGNMENT; return IR_CALL; case IR_VAR: @@ -935,7 +939,7 @@ binop_fp: } } ctx->flags |= IR_USE_FRAME_POINTER; - ctx->flags2 |= IR_HAS_ALLOCA; + ctx->flags2 |= IR_HAS_ALLOCA | IR_16B_FRAME_ALIGNMENT; } return IR_ALLOCA; case IR_LOAD: @@ -3755,7 +3759,10 @@ static void ir_emit_vstore(ir_ctx *ctx, ir_ref ref, ir_insn *insn) fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); IR_ASSERT(op3_reg != IR_REG_NONE); - if (IR_REG_SPILLED(op3_reg) && ir_is_same_mem_var(ctx, insn->op3, var_insn->op3)) { + if (IR_REG_SPILLED(op3_reg) + && !IR_IS_CONST_REF(insn->op3) + && ir_rule(ctx, insn->op3) != IR_STATIC_ALLOCA + && ir_is_same_mem_var(ctx, insn->op3, var_insn->op3)) { return; // fake store } if (IR_REG_SPILLED(op3_reg)) { @@ -4041,12 +4048,8 @@ static void ir_emit_alloca(ir_ctx *ctx, ir_ref def, ir_insn *insn) IR_ASSERT(!IR_IS_SYM_CONST(val->op)); IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 >= 0); - if (ctx->flags2 & IR_16B_FRAME_ALIGNMENT) { - /* Stack must be 16 byte aligned */ - size = IR_ALIGNED_SIZE(size, 16); - } else { - size = IR_ALIGNED_SIZE(size, 8); - } + /* Stack must be 16 byte aligned */ + size = IR_ALIGNED_SIZE(size, 16); if (aarch64_may_encode_imm12(size)) { | sub sp, sp, #size } else { @@ -4057,7 +4060,7 @@ static void ir_emit_alloca(ir_ctx *ctx, ir_ref def, ir_insn *insn) ctx->call_stack_size += size; } } else { - int32_t alignment = (ctx->flags2 & IR_16B_FRAME_ALIGNMENT) ? 16 : 8; + int32_t alignment = 16; ir_reg op2_reg = ctx->regs[def][2]; ir_type type = ctx->ir_base[insn->op2].type; @@ -4095,18 +4098,14 @@ static void ir_emit_afree(ir_ctx *ctx, ir_ref def, ir_insn *insn) IR_ASSERT(!IR_IS_SYM_CONST(val->op)); IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 > 0); - if (ctx->flags2 & IR_16B_FRAME_ALIGNMENT) { - /* Stack must be 16 byte aligned */ - size = IR_ALIGNED_SIZE(size, 16); - } else { - size = IR_ALIGNED_SIZE(size, 8); - } + /* Stack must be 16 byte aligned */ + size = IR_ALIGNED_SIZE(size, 16); | add sp, sp, #size if (!(ctx->flags & IR_USE_FRAME_POINTER)) { ctx->call_stack_size -= size; } } else { -// int32_t alignment = (ctx->flags2 & IR_16B_FRAME_ALIGNMENT) ? 16 : 8; +// int32_t alignment = 16; ir_reg op2_reg = ctx->regs[def][2]; ir_type type = ctx->ir_base[insn->op2].type; @@ -5766,7 +5765,6 @@ void ir_fix_stack_frame(ir_ctx *ctx) ctx->stack_frame_alignment += sizeof(void*); } } else if (ctx->flags2 & IR_16B_FRAME_ALIGNMENT) { - ctx->flags |= IR_USE_FRAME_POINTER; /* Stack must be 16 byte aligned */ if (!(ctx->flags & IR_FUNCTION)) { while (IR_ALIGNED_SIZE(ctx->stack_frame_size, 16) != ctx->stack_frame_size) { diff --git a/ext/opcache/jit/ir/ir_ra.c b/ext/opcache/jit/ir/ir_ra.c index a7c0403502a..07789563e6c 100644 --- a/ext/opcache/jit/ir/ir_ra.c +++ b/ext/opcache/jit/ir/ir_ra.c @@ -1932,20 +1932,33 @@ int ir_coalesce(ir_ctx *ctx) IR_ASSERT(ir_op_flags[input_insn->op] & IR_OP_FLAG_COMMUTATIVE); if (input_insn->op2 == use && input_insn->op1 != use - && (ctx->live_intervals[v1]->use_pos->flags & IR_DEF_REUSES_OP1_REG) - && ctx->live_intervals[v2]->end == IR_USE_LIVE_POS_FROM_REF(input)) { + && (ctx->live_intervals[v1]->use_pos->flags & IR_DEF_REUSES_OP1_REG)) { ir_live_range *r = &ctx->live_intervals[v2]->range; - while (r->next) { + do { + if (r->end == IR_USE_LIVE_POS_FROM_REF(input)) { + break; + } r = r->next; + } while (r); + if (r) { + r->end = IR_LOAD_LIVE_POS_FROM_REF(input); + if (!r->next) { + ctx->live_intervals[v2]->end = IR_LOAD_LIVE_POS_FROM_REF(input); + } + if (ir_vregs_overlap(ctx, v1, v2)) { + r->end = IR_USE_LIVE_POS_FROM_REF(input); + if (!r->next) { + ctx->live_intervals[v2]->end = IR_USE_LIVE_POS_FROM_REF(input); + } + } else { + ir_swap_operands(ctx, input, input_insn); + IR_ASSERT(!ir_vregs_overlap(ctx, v1, v2)); + ir_vregs_coalesce(ctx, v1, v2, input, use); + compact = 1; + continue; + } } - r->end = IR_LOAD_LIVE_POS_FROM_REF(input); - ctx->live_intervals[v2]->end = IR_LOAD_LIVE_POS_FROM_REF(input); - ir_swap_operands(ctx, input, input_insn); - IR_ASSERT(!ir_vregs_overlap(ctx, v1, v2)); - ir_vregs_coalesce(ctx, v1, v2, input, use); - compact = 1; - continue; } } #endif @@ -2609,14 +2622,11 @@ static int32_t ir_allocate_big_spill_slot(ir_ctx *ctx, int32_t size, ir_reg_allo return ir_allocate_small_spill_slot(ctx, size, data); } - if (ctx->flags2 & IR_16B_FRAME_ALIGNMENT) { - /* Stack must be 16 byte aligned */ - size = IR_ALIGNED_SIZE(size, 16); - } else { - size = IR_ALIGNED_SIZE(size, 8); - } - ret = ctx->stack_frame_size; - ctx->stack_frame_size += size; + /* Align stack allocated data to 16 byte */ + ctx->flags2 |= IR_16B_FRAME_ALIGNMENT; + ret = IR_ALIGNED_SIZE(ctx->stack_frame_size, 16); + size = IR_ALIGNED_SIZE(size, 8); + ctx->stack_frame_size = ret + size; return ret; } diff --git a/ext/opcache/jit/ir/ir_sccp.c b/ext/opcache/jit/ir/ir_sccp.c index fd7a8f55b3f..21b6335114d 100644 --- a/ext/opcache/jit/ir/ir_sccp.c +++ b/ext/opcache/jit/ir/ir_sccp.c @@ -967,6 +967,145 @@ static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use) return ref; } +static ir_ref ir_ext_const(ir_ctx *ctx, ir_insn *val_insn, ir_op op, ir_type type) +{ + ir_val new_val; + + switch (val_insn->type) { + default: + IR_ASSERT(0); + case IR_I8: + case IR_U8: + case IR_BOOL: + if (op == IR_SEXT) { + new_val.i64 = (int64_t)val_insn->val.i8; + } else { + new_val.u64 = (uint64_t)val_insn->val.u8; + } + break; + case IR_I16: + case IR_U16: + if (op == IR_SEXT) { + new_val.i64 = (int64_t)val_insn->val.i16; + } else { + new_val.u64 = (uint64_t)val_insn->val.u16; + } + break; + case IR_I32: + case IR_U32: + if (op == IR_SEXT) { + new_val.i64 = (int64_t)val_insn->val.i32; + } else { + new_val.u64 = (uint64_t)val_insn->val.u32; + } + break; + } + return ir_const(ctx, new_val, type); +} + +static ir_ref ir_ext_ref(ir_ctx *ctx, ir_ref var_ref, ir_ref src_ref, ir_op op, ir_type type) +{ + ir_ref new_ext_ref = ir_emit1(ctx, IR_OPTX(op, type, 1), src_ref); + + ctx->use_lists = ir_mem_realloc(ctx->use_lists, ctx->insns_count * sizeof(ir_use_list)); + ctx->use_lists[new_ext_ref].count = 0; + ctx->use_lists[new_ext_ref].refs = IR_UNUSED; + ir_use_list_add(ctx, new_ext_ref, var_ref); + if (!IR_IS_CONST_REF(src_ref)) { + ir_use_list_replace_one(ctx, src_ref, var_ref, new_ext_ref); + } + return new_ext_ref; +} + +static bool ir_try_promote_ext(ir_ctx *ctx, ir_ref ext_ref, ir_insn *insn, ir_bitqueue *worklist) +{ + ir_type type = insn->type; + ir_op op = insn->op; + ir_ref ref = insn->op1; + ir_insn *phi_insn = &ctx->ir_base[ref]; + ir_insn *op_insn; + ir_use_list *use_list; + ir_ref n, *p, use, op_ref; + + /* Check for simple induction variable in the form: x2 = PHI(loop, x1, x3); x3 = ADD(x2, _); */ + if (phi_insn->op != IR_PHI + || phi_insn->inputs_count != 3 /* (2 values) */ + || ctx->ir_base[phi_insn->op1].op != IR_LOOP_BEGIN) { + return 0; + } + + op_ref = phi_insn->op3; + op_insn = &ctx->ir_base[op_ref]; + if ((op_insn->op != IR_ADD && op_insn->op != IR_SUB && op_insn->op != IR_MUL) + || op_insn->op1 != ref + || op_insn->op2 == ref + || ctx->use_lists[op_ref].count != 1) { + return 0; + } + + /* Check if we may change the type of the induction variable */ + use_list = &ctx->use_lists[ref]; + n = use_list->count; + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + use = *p; + if (use == op_ref || use == ext_ref) { + continue; + } else { + ir_insn *use_insn = &ctx->ir_base[use]; + + if ((use_insn->op >= IR_EQ && use_insn->op <= IR_UGT) + && use_insn->op1 == ref + && use_insn->op2 != ref) { + continue; + } else if (use_insn->op == IR_IF) { + continue; + } else { + return 0; + } + } + } + + phi_insn->type = insn->type; + op_insn->type = insn->type; + + use_list = &ctx->use_lists[ref]; + n = use_list->count; + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + use = *p; + if (use == ext_ref) { + continue; + } else { + ir_insn *use_insn = &ctx->ir_base[use]; + + if (use_insn->op == IR_IF) { + continue; + } + IR_ASSERT(((use_insn->op >= IR_EQ && use_insn->op <= IR_UGT) + || use_insn->op == IR_ADD || use_insn->op == IR_SUB || use_insn->op == IR_MUL) + && use_insn->op1 == ref + && use_insn->op2 != ref); + if (IR_IS_CONST_REF(use_insn->op2) + && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op2].op)) { + ctx->ir_base[use].op2 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op2], op, type); + } else { + ctx->ir_base[use].op2 = ir_ext_ref(ctx, use, use_insn->op2, op, type); + } + } + } + + ir_sccp_replace_insn2(ctx, ext_ref, ref, worklist); + + phi_insn = &ctx->ir_base[ref]; + if (IR_IS_CONST_REF(phi_insn->op2) + && !IR_IS_SYM_CONST(ctx->ir_base[phi_insn->op2].op)) { + ctx->ir_base[ref].op2 = ir_ext_const(ctx, &ctx->ir_base[phi_insn->op2], op, type); + } else { + ctx->ir_base[ref].op2 = ir_ext_ref(ctx, ref, phi_insn->op2, op, type); + } + + return 1; +} + int ir_sccp(ir_ctx *ctx) { ir_ref i, j, n, *p, use; @@ -1025,12 +1164,18 @@ int ir_sccp(ir_ctx *ctx) } if (!may_benefit) { IR_MAKE_BOTTOM(i); - if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC) { + if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC + || insn->op == IR_ZEXT || insn->op == IR_SEXT) { ir_bitqueue_add(&worklist2, i); } } else if (!ir_sccp_fold(ctx, _values, i, insn->opt, insn->op1, insn->op2, insn->op3)) { /* not changed */ continue; + } else if (_values[i].optx == IR_BOTTOM) { + if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC + || insn->op == IR_ZEXT || insn->op == IR_SEXT) { + ir_bitqueue_add(&worklist2, i); + } } } else { IR_MAKE_BOTTOM(i); @@ -1298,15 +1443,17 @@ int ir_sccp(ir_ctx *ctx) ir_ref ref = ir_promote_d2f(ctx, insn->op1, i); insn->op1 = ref; ir_sccp_replace_insn2(ctx, i, ref, &worklist2); + break; } } else { if (ir_may_promote_f2d(ctx, insn->op1)) { ir_ref ref = ir_promote_f2d(ctx, insn->op1, i); insn->op1 = ref; ir_sccp_replace_insn2(ctx, i, ref, &worklist2); + break; } } - break; + goto folding; case IR_FP2INT: if (ctx->ir_base[insn->op1].type == IR_DOUBLE) { if (ir_may_promote_d2f(ctx, insn->op1)) { @@ -1317,15 +1464,25 @@ int ir_sccp(ir_ctx *ctx) insn->op1 = ir_promote_f2d(ctx, insn->op1, i); } } - break; + goto folding; case IR_TRUNC: if (ir_may_promote_i2i(ctx, insn->type, insn->op1)) { ir_ref ref = ir_promote_i2i(ctx, insn->type, insn->op1, i); insn->op1 = ref; ir_sccp_replace_insn2(ctx, i, ref, &worklist2); + break; } + goto folding; + case IR_SEXT: + case IR_ZEXT: + if (ir_try_promote_ext(ctx, i, insn, &worklist2)) { + break; + } + goto folding; + case IR_PHI: break; default: +folding: ir_sccp_fold2(ctx, i, &worklist2); break; } diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc index a975392d00f..e5822d8323b 100644 --- a/ext/opcache/jit/ir/ir_x86.dasc +++ b/ext/opcache/jit/ir/ir_x86.dasc @@ -1285,6 +1285,9 @@ op2_const: insn = &ctx->ir_base[ref]; if (IR_IS_CONST_REF(insn->op3)) { n = ir_add_const_tmp_reg(ctx, insn->op3, 3, n, constraints); + } else if (ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA) { + constraints->tmp_regs[n] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; } break; case IR_STORE_INT: @@ -2281,7 +2284,7 @@ binop_fp: } } ctx->flags |= IR_USE_FRAME_POINTER; - ctx->flags2 |= IR_HAS_ALLOCA; + ctx->flags2 |= IR_HAS_ALLOCA | IR_16B_FRAME_ALIGNMENT; } return IR_ALLOCA; case IR_VSTORE: @@ -7431,7 +7434,9 @@ static void ir_emit_vstore_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; mem = IR_MEM_BO(fp, IR_SPILL_POS_TO_OFFSET(var_insn->op3)); if ((op3_reg == IR_REG_NONE || IR_REG_SPILLED(op3_reg)) - && !IR_IS_CONST_REF(insn->op3) && ir_is_same_mem_var(ctx, insn->op3, var_insn->op3)) { + && !IR_IS_CONST_REF(insn->op3) + && ir_rule(ctx, insn->op3) != IR_STATIC_ALLOCA + && ir_is_same_mem_var(ctx, insn->op3, var_insn->op3)) { return; // fake store } if (IR_IS_CONST_REF(insn->op3)) { @@ -7458,7 +7463,9 @@ static void ir_emit_vstore_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; mem = IR_MEM_BO(fp, IR_SPILL_POS_TO_OFFSET(var_insn->op3)); if ((op3_reg == IR_REG_NONE || IR_REG_SPILLED(op3_reg)) - && !IR_IS_CONST_REF(insn->op3) && ir_is_same_mem_var(ctx, insn->op3, var_insn->op3)) { + && !IR_IS_CONST_REF(insn->op3) + && ir_rule(ctx, insn->op3) != IR_STATIC_ALLOCA + && ir_is_same_mem_var(ctx, insn->op3, var_insn->op3)) { return; // fake store } if (IR_IS_CONST_REF(insn->op3)) { @@ -7763,18 +7770,14 @@ static void ir_emit_alloca(ir_ctx *ctx, ir_ref def, ir_insn *insn) IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 >= 0); IR_ASSERT(IR_IS_SIGNED_32BIT(val->val.i64)); - if (ctx->flags2 & IR_16B_FRAME_ALIGNMENT) { - /* Stack must be 16 byte aligned */ - size = IR_ALIGNED_SIZE(size, 16); - } else { - size = IR_ALIGNED_SIZE(size, 8); - } + /* Stack must be 16 byte aligned */ + size = IR_ALIGNED_SIZE(size, 16); | ASM_REG_IMM_OP sub, IR_ADDR, IR_REG_RSP, size if (!(ctx->flags & IR_USE_FRAME_POINTER)) { ctx->call_stack_size += size; } } else { - int32_t alignment = (ctx->flags2 & IR_16B_FRAME_ALIGNMENT) ? 16 : 8; + int32_t alignment = 16; ir_reg op2_reg = ctx->regs[def][2]; ir_type type = ctx->ir_base[insn->op2].type; @@ -7821,18 +7824,14 @@ static void ir_emit_afree(ir_ctx *ctx, ir_ref def, ir_insn *insn) IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 > 0); IR_ASSERT(IR_IS_SIGNED_32BIT(val->val.i64)); - if (ctx->flags2 & IR_16B_FRAME_ALIGNMENT) { - /* Stack must be 16 byte aligned */ - size = IR_ALIGNED_SIZE(size, 16); - } else { - size = IR_ALIGNED_SIZE(size, 8); - } + /* Stack must be 16 byte aligned */ + size = IR_ALIGNED_SIZE(size, 16); | ASM_REG_IMM_OP add, IR_ADDR, IR_REG_RSP, size if (!(ctx->flags & IR_USE_FRAME_POINTER)) { ctx->call_stack_size -= size; } } else { -// int32_t alignment = (ctx->flags2 & IR_16B_FRAME_ALIGNMENT) ? 16 : 8; +// int32_t alignment = 16; ir_reg op2_reg = ctx->regs[def][2]; ir_type type = ctx->ir_base[insn->op2].type;