diff --git a/ext/opcache/jit/ir/ir.c b/ext/opcache/jit/ir/ir.c index d41c3803e51..521d33a2f6c 100644 --- a/ext/opcache/jit/ir/ir.c +++ b/ext/opcache/jit/ir/ir.c @@ -201,6 +201,7 @@ void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted #define ir_op_flag_s3 (ir_op_flag_s | 3 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) #define ir_op_flag_x1 (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_CALL | 1 | (1 << IR_OP_FLAG_OPERANDS_SHIFT)) #define ir_op_flag_x2 (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_CALL | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_x3 (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_CALL | 3 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) #define ir_op_flag_xN (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_CALL | IR_OP_FLAG_VAR_INPUTS) #define ir_op_flag_a2 (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_ALLOC | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) @@ -433,8 +434,10 @@ static IR_NEVER_INLINE ir_ref ir_const_ex(ir_ctx *ctx, ir_val val, uint8_t type, while (ref) { insn = &ctx->ir_base[ref]; if (UNEXPECTED(insn->val.u64 >= val.u64)) { - if (insn->val.u64 == val.u64 && insn->optx == optx) { - return ref; + if (insn->val.u64 == val.u64) { + if (insn->optx == optx) { + return ref; + } } else { break; } @@ -779,7 +782,7 @@ restart: } while (1); ir_fold_restart: - if (!(ctx->flags & IR_OPT_IN_SCCP)) { + if (!(ctx->flags2 & IR_OPT_IN_SCCP)) { op1_insn = ctx->ir_base + op1; op2_insn = ctx->ir_base + op2; op3_insn = ctx->ir_base + op3; @@ -792,7 +795,7 @@ ir_fold_restart: return IR_FOLD_DO_RESTART; } ir_fold_cse: - if (!(ctx->flags & IR_OPT_IN_SCCP)) { + if (!(ctx->flags2 & IR_OPT_IN_SCCP)) { /* Local CSE */ ref = _ir_fold_cse(ctx, opt, op1, op2, op3); if (ref) { @@ -816,7 +819,7 @@ ir_fold_cse: return ref; } ir_fold_emit: - if (!(ctx->flags & IR_OPT_IN_SCCP)) { + if (!(ctx->flags2 & IR_OPT_IN_SCCP)) { return ir_emit(ctx, opt, op1, op2, op3); } else { ctx->fold_insn.optx = opt; @@ -826,14 +829,14 @@ ir_fold_emit: return IR_FOLD_DO_EMIT; } ir_fold_copy: - if (!(ctx->flags & IR_OPT_IN_SCCP)) { + if (!(ctx->flags2 & IR_OPT_IN_SCCP)) { return ref; } else { ctx->fold_insn.op1 = ref; return IR_FOLD_DO_COPY; } ir_fold_const: - if (!(ctx->flags & IR_OPT_IN_SCCP)) { + if (!(ctx->flags2 & IR_OPT_IN_SCCP)) { return ir_const(ctx, val, IR_OPT_TYPE(opt)); } else { ctx->fold_insn.type = IR_OPT_TYPE(opt); @@ -2320,3 +2323,27 @@ check_aliasing: } ctx->control = ir_emit3(ctx, IR_STORE, ctx->control, addr, val); } + +void _ir_VA_START(ir_ctx *ctx, ir_ref list) +{ + IR_ASSERT(ctx->control); + ctx->control = ir_emit2(ctx, IR_VA_START, ctx->control, list); +} + +void _ir_VA_END(ir_ctx *ctx, ir_ref list) +{ + IR_ASSERT(ctx->control); + ctx->control = ir_emit2(ctx, IR_VA_END, ctx->control, list); +} + +void _ir_VA_COPY(ir_ctx *ctx, ir_ref dst, ir_ref src) +{ + IR_ASSERT(ctx->control); + ctx->control = ir_emit3(ctx, IR_VA_COPY, ctx->control, dst, src); +} + +ir_ref _ir_VA_ARG(ir_ctx *ctx, ir_type type, ir_ref list) +{ + IR_ASSERT(ctx->control); + return ctx->control = ir_emit2(ctx, IR_OPT(IR_VA_ARG, type), ctx->control, list); +} diff --git a/ext/opcache/jit/ir/ir.h b/ext/opcache/jit/ir/ir.h index 18ac9e7a33f..a09bf82ec33 100644 --- a/ext/opcache/jit/ir/ir.h +++ b/ext/opcache/jit/ir/ir.h @@ -136,6 +136,18 @@ typedef enum _ir_type { IR_LAST_TYPE } ir_type; +#ifdef IR_64 +# define IR_SIZE_T IR_U64 +# define IR_SSIZE_T IR_I64 +# define IR_UINTPTR_T IR_U64 +# define IR_INTPTR_T IR_I64 +#else +# define IR_SIZE_T IR_U32 +# define IR_SSIZE_T IR_I32 +# define IR_UINTPTR_T IR_U32 +# define IR_INTPTR_T IR_I32 +#endif + /* List of IR opcodes * ================== * @@ -252,6 +264,9 @@ typedef enum _ir_type { _(ROL, d2, def, def, ___) /* rotate left */ \ _(ROR, d2, def, def, ___) /* rotate right */ \ _(BSWAP, d1, def, ___, ___) /* byte swap */ \ + _(CTPOP, d1, def, ___, ___) /* count population */ \ + _(CTLZ, d1, def, ___, ___) /* count leading zeros */ \ + _(CTTZ, d1, def, ___, ___) /* count trailing zeros */ \ \ /* branch-less conditional ops */ \ _(MIN, d2C, def, def, ___) /* min(op1, op2) */ \ @@ -262,6 +277,7 @@ typedef enum _ir_type { _(PHI, pN, reg, def, def) /* SSA Phi function */ \ _(COPY, d1X1, def, opt, ___) /* COPY (last foldable op) */ \ _(PI, p2, reg, def, ___) /* e-SSA Pi constraint ??? */ \ + _(FRAME_ADDR, d0, ___, ___, ___) /* function frame address */ \ /* (USE, RENAME) */ \ \ /* data ops */ \ @@ -290,6 +306,12 @@ typedef enum _ir_type { _(TRAP, x1, src, ___, ___) /* DebugBreak */ \ /* memory reference ops (A, H, U, S, TMP, STR, NEW, X, V) ??? */ \ \ + /* va_args */ \ + _(VA_START, x2, src, def, ___) /* va_start(va_list) */ \ + _(VA_END, x2, src, def, ___) /* va_end(va_list) */ \ + _(VA_COPY, x3, src, def, def) /* va_copy(dst, stc) */ \ + _(VA_ARG, x2, src, def, ___) /* va_arg(va_list) */ \ + \ /* guards */ \ _(GUARD, c3, src, def, def) /* IF without second successor */ \ _(GUARD_NOT , c3, src, def, def) /* IF without second successor */ \ @@ -399,6 +421,7 @@ typedef union _ir_val { #define IR_CONST_EMIT (1<<0) #define IR_CONST_FASTCALL_FUNC (1<<1) #define IR_CONST_VARARG_FUNC (1<<2) +#define IR_CONST_BUILTIN_FUNC (1<<3) /* IR Instruction */ typedef struct _ir_insn { @@ -473,38 +496,18 @@ void ir_strtab_free(ir_strtab *strtab); #define IR_SKIP_PROLOGUE (1<<6) /* Don't generate function prologue. */ #define IR_USE_FRAME_POINTER (1<<7) #define IR_PREALLOCATED_STACK (1<<8) -#define IR_HAS_ALLOCA (1<<9) -#define IR_HAS_CALLS (1<<10) -#define IR_NO_STACK_COMBINE (1<<11) -#define IR_START_BR_TARGET (1<<12) -#define IR_ENTRY_BR_TARGET (1<<13) -#define IR_GEN_ENDBR (1<<14) -#define IR_MERGE_EMPTY_ENTRIES (1<<15) - -#define IR_CFG_HAS_LOOPS (1<<16) -#define IR_IRREDUCIBLE_CFG (1<<17) +#define IR_NO_STACK_COMBINE (1<<9) +#define IR_START_BR_TARGET (1<<10) +#define IR_ENTRY_BR_TARGET (1<<11) +#define IR_GEN_ENDBR (1<<12) +#define IR_MERGE_EMPTY_ENTRIES (1<<13) #define IR_OPT_FOLDING (1<<18) #define IR_OPT_CFG (1<<19) /* merge BBs, by remove END->BEGIN nodes during CFG construction */ #define IR_OPT_CODEGEN (1<<20) -#define IR_OPT_IN_SCCP (1<<21) -#define IR_LINEAR (1<<22) #define IR_GEN_NATIVE (1<<23) #define IR_GEN_CODE (1<<24) /* C or LLVM */ -/* Temporary: SCCP -> CFG */ -#define IR_SCCP_DONE (1<<25) - -/* Temporary: Dominators -> Loops */ -#define IR_NO_LOOPS (1<<25) - -/* Temporary: Live Ranges */ -#define IR_LR_HAVE_DESSA_MOVES (1<<25) - -/* Temporary: Register Allocator */ -#define IR_RA_HAVE_SPLITS (1<<25) -#define IR_RA_HAVE_SPILLS (1<<26) - /* debug related */ #ifdef IR_DEBUG # define IR_DEBUG_SCCP (1<<27) @@ -537,6 +540,7 @@ struct _ir_ctx { ir_ref consts_count; /* number of constants stored in constants buffer */ ir_ref consts_limit; /* size of allocated constants buffer (it's extended when overflow) */ uint32_t flags; /* IR context flags (see IR_* defines above) */ + uint32_t flags2; /* IR context provate flags (see IR_* defines in ir_private.h) */ ir_type ret_type; /* Function return type */ uint32_t mflags; /* CPU specific flags (see IR_X86_... macros below) */ int32_t status; /* non-zero error code (see IR_ERROR_... macros), app may use negative codes */ @@ -766,6 +770,7 @@ struct _ir_loader { uint32_t flags, ir_type ret_type, uint32_t params_count, ir_type *param_types); bool (*sym_dcl) (ir_loader *loader, const char *name, uint32_t flags, size_t size, bool has_data); bool (*sym_data) (ir_loader *loader, ir_type type, uint32_t count, const void *data); + bool (*sym_data_ref) (ir_loader *loader, ir_op op, const char *ref); bool (*sym_data_end) (ir_loader *loader); bool (*func_init) (ir_loader *loader, ir_ctx *ctx, const char *name); bool (*func_process) (ir_loader *loader, ir_ctx *ctx, const char *name); @@ -816,6 +821,7 @@ int ir_patch(const void *code, size_t size, uint32_t jmp_table_size, const void # define IR_X86_SSE42 (1<<4) # define IR_X86_AVX (1<<5) # define IR_X86_AVX2 (1<<6) +# define IR_X86_BMI1 (1<<7) #endif uint32_t ir_cpuinfo(void); diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc index a7cc1c71dba..76b03ebd51d 100644 --- a/ext/opcache/jit/ir/ir_aarch64.dasc +++ b/ext/opcache/jit/ir/ir_aarch64.dasc @@ -354,6 +354,12 @@ int ir_get_target_constraints(const ir_ctx *ctx, ir_ref ref, ir_target_constrain n++; } break; + case IR_CTPOP: + flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG; + insn = &ctx->ir_base[ref]; + constraints->tmp_regs[0] = IR_TMP_REG(2, IR_DOUBLE, IR_USE_SUB_REF, IR_SAVE_SUB_REF); + n = 1; + break; case IR_BINOP_FP: case IR_MIN_MAX_INT: insn = &ctx->ir_base[ref]; @@ -649,6 +655,8 @@ binop_fp: return IR_BINOP_INT; case IR_BSWAP: case IR_NOT: + case IR_CTLZ: + case IR_CTTZ: IR_ASSERT(IR_IS_TYPE_INT(insn->type)); return IR_OP_INT; case IR_NEG: @@ -744,7 +752,7 @@ binop_fp: } break; case IR_CALL: - ctx->flags |= IR_HAS_CALLS; + ctx->flags2 |= IR_HAS_CALLS; return IR_CALL; case IR_VAR: return IR_SKIPPED | IR_VAR; @@ -752,7 +760,8 @@ binop_fp: return ctx->use_lists[ref].count > 0 ? IR_PARAM : IR_SKIPPED | IR_PARAM; case IR_ALLOCA: if (ctx->flags & IR_FUNCTION) { - ctx->flags |= IR_USE_FRAME_POINTER | IR_HAS_ALLOCA; + ctx->flags |= IR_USE_FRAME_POINTER; + ctx->flags2 |= IR_HAS_ALLOCA; } return IR_ALLOCA; case IR_LOAD: @@ -1353,7 +1362,7 @@ static void ir_emit_epilogue(ir_ctx *ctx) } if (ctx->flags & IR_USE_FRAME_POINTER) { - if (ctx->call_stack_size || (ctx->flags & IR_HAS_ALLOCA)) { + if (ctx->call_stack_size || (ctx->flags2 & IR_HAS_ALLOCA)) { | mov sp, x29 } | ldp x29, x30, [sp], # (ctx->stack_frame_size+16) @@ -1874,6 +1883,21 @@ static void ir_emit_op_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) | cmp Rw(op1_reg), #0 | cneg Rw(def_reg), Rw(op1_reg), lt } + } else if (insn->op == IR_CTLZ) { + if (ir_type_size[type] == 1) { + | and Rw(def_reg), Rw(op1_reg), #0xff + | clz Rw(def_reg), Rw(def_reg) + | sub Rw(def_reg), Rw(def_reg), #24 + } else if (ir_type_size[type] == 2) { + | and Rw(def_reg), Rw(op1_reg), #0xffff + | clz Rw(def_reg), Rw(def_reg) + | sub Rw(def_reg), Rw(def_reg), #16 + } else { + | ASM_REG_REG_OP clz, type, def_reg, op1_reg + } + } else if (insn->op == IR_CTTZ) { + | ASM_REG_REG_OP rbit, insn->type, def_reg, op1_reg + | ASM_REG_REG_OP clz, insn->type, def_reg, def_reg } else { IR_ASSERT(insn->op == IR_BSWAP); | ASM_REG_REG_OP rev, insn->type, def_reg, op1_reg @@ -1883,6 +1907,59 @@ static void ir_emit_op_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) } } +static void ir_emit_ctpop(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg tmp_reg = ctx->regs[def][2]; + uint32_t code1 = 0x0e205800 | (tmp_reg-IR_REG_FP_FIRST); // cnt v0.8b, v0.8b + uint32_t code2 = 0x0e31b800 | (tmp_reg-IR_REG_FP_FIRST); // addv b0, v0.8b + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); + + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + switch (ir_type_size[insn->type]) { + default: + IR_ASSERT(0); + case 1: + | and Rw(def_reg), Rw(op1_reg), #0xff + | fmov Rs(tmp_reg-IR_REG_FP_FIRST), Rw(def_reg) + | .long code1 // cnt v0.8b, v0.8b + | .long code2 // addv b0, v0.8b + | fmov Rw(def_reg), Rs(tmp_reg-IR_REG_FP_FIRST) + break; + case 2: + | and Rw(def_reg), Rw(op1_reg), #0xffff + | fmov Rs(tmp_reg-IR_REG_FP_FIRST), Rw(def_reg) + | .long code1 // cnt v0.8b, v0.8b + | .long code2 // addv b0, v0.8b + | fmov Rw(def_reg), Rs(tmp_reg-IR_REG_FP_FIRST) + break; + case 4: + | fmov Rs(tmp_reg-IR_REG_FP_FIRST), Rw(op1_reg) + | .long code1 // cnt v0.8b, v0.8b + | .long code2 // addv b0, v0.8b + | fmov Rw(def_reg), Rs(tmp_reg-IR_REG_FP_FIRST) + break; + case 8: + | fmov Rd(tmp_reg-IR_REG_FP_FIRST), Rx(op1_reg) + | .long code1 // cnt v0.8b, v0.8b + | .long code2 // addv b0, v0.8b + | fmov Rx(def_reg), Rd(tmp_reg-IR_REG_FP_FIRST) + break; + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + static void ir_emit_op_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; @@ -3372,7 +3449,7 @@ static void ir_emit_alloca(ir_ctx *ctx, ir_ref def, ir_insn *insn) IR_ASSERT(IR_IS_TYPE_INT(val->type)); IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 > 0); - if (ctx->flags & IR_HAS_CALLS) { + if (ctx->flags2 & IR_HAS_CALLS) { /* Stack must be 16 byte aligned */ size = IR_ALIGNED_SIZE(size, 16); } else { @@ -3383,11 +3460,12 @@ static void ir_emit_alloca(ir_ctx *ctx, ir_ref def, ir_insn *insn) ctx->call_stack_size += size; } } else { - int32_t alignment = (ctx->flags & IR_HAS_CALLS) ? 16 : 8; + int32_t alignment = (ctx->flags2 & IR_HAS_CALLS) ? 16 : 8; ir_reg op2_reg = ctx->regs[def][2]; ir_type type = ctx->ir_base[insn->op2].type; IR_ASSERT(ctx->flags & IR_FUNCTION); + IR_ASSERT(ctx->flags & IR_USE_FRAME_POINTER); IR_ASSERT(def_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); @@ -3419,7 +3497,7 @@ static void ir_emit_afree(ir_ctx *ctx, ir_ref def, ir_insn *insn) IR_ASSERT(IR_IS_TYPE_INT(val->type)); IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 > 0); - if (ctx->flags & IR_HAS_CALLS) { + if (ctx->flags2 & IR_HAS_CALLS) { /* Stack must be 16 byte aligned */ size = IR_ALIGNED_SIZE(size, 16); } else { @@ -3430,7 +3508,7 @@ static void ir_emit_afree(ir_ctx *ctx, ir_ref def, ir_insn *insn) ctx->call_stack_size -= size; } } else { -// int32_t alignment = (ctx->flags & IR_HAS_CALLS) ? 16 : 8; +// int32_t alignment = (ctx->flags2 & IR_HAS_CALLS) ? 16 : 8; ir_reg op2_reg = ctx->regs[def][2]; ir_type type = ctx->ir_base[insn->op2].type; @@ -3447,6 +3525,34 @@ static void ir_emit_afree(ir_ctx *ctx, ir_ref def, ir_insn *insn) } } +static void ir_emit_frame_addr(ir_ctx *ctx, ir_ref def) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + if (ctx->flags & IR_USE_FRAME_POINTER) { + | mov Rx(def_reg), Rx(IR_REG_X29) + } else { + | add Rx(def_reg), Rx(IR_REG_X31), #(ctx->stack_frame_size + ctx->call_stack_size) + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, IR_ADDR, def, def_reg); + } +} + +static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ +} + +static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ +} + +static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ +} + static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; @@ -4886,12 +4992,12 @@ void ir_fix_stack_frame(ir_ctx *ctx) ctx->stack_frame_alignment = 0; ctx->call_stack_size = 0; - if ((ctx->flags & IR_HAS_CALLS) && !(ctx->flags & IR_FUNCTION)) { + if ((ctx->flags2 & IR_HAS_CALLS) && !(ctx->flags & IR_FUNCTION)) { while (IR_ALIGNED_SIZE(ctx->stack_frame_size, 16) != ctx->stack_frame_size) { ctx->stack_frame_size += sizeof(void*); ctx->stack_frame_alignment += sizeof(void*); } - } else if (ctx->flags & IR_HAS_CALLS) { + } else if (ctx->flags2 & IR_HAS_CALLS) { ctx->flags |= IR_USE_FRAME_POINTER; /* Stack must be 16 byte aligned */ if (!(ctx->flags & IR_FUNCTION)) { @@ -5018,6 +5124,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) case IR_PI: case IR_PHI: case IR_SNAPSHOT: + case IR_VA_END: break; case IR_MUL_PWR2: case IR_DIV_PWR2: @@ -5030,6 +5137,9 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) case IR_SHIFT_CONST: ir_emit_shift_const(ctx, i, insn); break; + case IR_CTPOP: + ir_emit_ctpop(ctx, i, insn); + break; case IR_OP_INT: ir_emit_op_int(ctx, i, insn); break; @@ -5182,9 +5292,21 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) case IR_ALLOCA: ir_emit_alloca(ctx, i, insn); break; + case IR_VA_START: + ir_emit_va_start(ctx, i, insn); + break; + case IR_VA_COPY: + ir_emit_va_copy(ctx, i, insn); + break; + case IR_VA_ARG: + ir_emit_va_arg(ctx, i, insn); + break; case IR_AFREE: ir_emit_afree(ctx, i, insn); break; + case IR_FRAME_ADDR: + ir_emit_frame_addr(ctx, i); + break; case IR_EXITCALL: ir_emit_exitcall(ctx, i, insn); break; @@ -5198,6 +5320,9 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) case IR_TLS: ir_emit_tls(ctx, i, insn); break; + case IR_TRAP: + | brk; + break; default: IR_ASSERT(0 && "NIY rule/instruction"); dasm_free(&data.dasm_state); @@ -5337,9 +5462,6 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) if (ret != DASM_S_OK) { IR_ASSERT(0); dasm_free(&data.dasm_state); - if (ctx->code_buffer == NULL) { - ir_mem_unmap(entry, size); - } ctx->data = NULL; ctx->status = IR_ERROR_LINK; return NULL; diff --git a/ext/opcache/jit/ir/ir_builder.h b/ext/opcache/jit/ir/ir_builder.h index c7d5abf5e46..bba207fb668 100644 --- a/ext/opcache/jit/ir/ir_builder.h +++ b/ext/opcache/jit/ir/ir_builder.h @@ -389,6 +389,42 @@ extern "C" { #define ir_BSWAP_I32(_op1) ir_UNARY_OP_I32(IR_BSWAP, (_op1)) #define ir_BSWAP_I64(_op1) ir_UNARY_OP_I64(IR_BSWAP, (_op1)) +#define ir_CTPOP(_type, _op1) ir_UNARY_OP(IR_CTPOP, (_type), (_op1)) +#define ir_CTPOP_8(_op1) ir_UNARY_OP_U8(IR_CTPOP, (_op1)) +#define ir_CTPOP_U16(_op1) ir_UNARY_OP_U16(IR_CTPOP, (_op1)) +#define ir_CTPOP_U32(_op1) ir_UNARY_OP_U32(IR_CTPOP, (_op1)) +#define ir_CTPOP_U64(_op1) ir_UNARY_OP_U64(IR_CTPOP, (_op1)) +#define ir_CTPOP_A(_op1) ir_UNARY_OP_A(IR_CTPOP, (_op1)) +#define ir_CTPOP_C(_op1) ir_UNARY_OP_C(IR_CTPOP, (_op1)) +#define ir_CTPOP_I8(_op1) ir_UNARY_OP_I8(IR_CTPOP, (_op1)) +#define ir_CTPOP_I16(_op1) ir_UNARY_OP_I16(IR_CTPOP, (_op1)) +#define ir_CTPOP_I32(_op1) ir_UNARY_OP_I32(IR_CTPOP, (_op1)) +#define ir_CTPOP_I64(_op1) ir_UNARY_OP_I64(IR_CTPOP, (_op1)) + +#define ir_CTLZ(_type, _op1) ir_UNARY_OP(IR_CTLZ, (_type), (_op1)) +#define ir_CTLZ_8(_op1) ir_UNARY_OP_U8(IR_CTLZ, (_op1)) +#define ir_CTLZ_U16(_op1) ir_UNARY_OP_U16(IR_CTLZ, (_op1)) +#define ir_CTLZ_U32(_op1) ir_UNARY_OP_U32(IR_CTLZ, (_op1)) +#define ir_CTLZ_U64(_op1) ir_UNARY_OP_U64(IR_CTLZ, (_op1)) +#define ir_CTLZ_A(_op1) ir_UNARY_OP_A(IR_CTLZ, (_op1)) +#define ir_CTLZ_C(_op1) ir_UNARY_OP_C(IR_CTLZ, (_op1)) +#define ir_CTLZ_I8(_op1) ir_UNARY_OP_I8(IR_CTLZ, (_op1)) +#define ir_CTLZ_I16(_op1) ir_UNARY_OP_I16(IR_CTLZ, (_op1)) +#define ir_CTLZ_I32(_op1) ir_UNARY_OP_I32(IR_CTLZ, (_op1)) +#define ir_CTLZ_I64(_op1) ir_UNARY_OP_I64(IR_CTLZ, (_op1)) + +#define ir_CTTZ(_type, _op1) ir_UNARY_OP(IR_CTTZ, (_type), (_op1)) +#define ir_CTTZ_8(_op1) ir_UNARY_OP_U8(IR_CTTZ, (_op1)) +#define ir_CTTZ_U16(_op1) ir_UNARY_OP_U16(IR_CTTZ, (_op1)) +#define ir_CTTZ_U32(_op1) ir_UNARY_OP_U32(IR_CTTZ, (_op1)) +#define ir_CTTZ_U64(_op1) ir_UNARY_OP_U64(IR_CTTZ, (_op1)) +#define ir_CTTZ_A(_op1) ir_UNARY_OP_A(IR_CTTZ, (_op1)) +#define ir_CTTZ_C(_op1) ir_UNARY_OP_C(IR_CTTZ, (_op1)) +#define ir_CTTZ_I8(_op1) ir_UNARY_OP_I8(IR_CTTZ, (_op1)) +#define ir_CTTZ_I16(_op1) ir_UNARY_OP_I16(IR_CTTZ, (_op1)) +#define ir_CTTZ_I32(_op1) ir_UNARY_OP_I32(IR_CTTZ, (_op1)) +#define ir_CTTZ_I64(_op1) ir_UNARY_OP_I64(IR_CTTZ, (_op1)) + #define ir_MIN(_type, _op1, _op2) ir_BINARY_OP(IR_MIN, (_type), (_op1), (_op2)) #define ir_MIN_U8(_op1, _op2) ir_BINARY_OP_U8(IR_MIN, (_op1), (_op2)) #define ir_MIN_U16(_op1, _op2) ir_BINARY_OP_U16(IR_MIN, (_op1), (_op2)) @@ -539,6 +575,13 @@ extern "C" { #define ir_TLS(_index, _offset) _ir_TLS(_ir_CTX, (_index), (_offset)) #define ir_TRAP() do {_ir_CTX->control = ir_emit1(_ir_CTX, IR_TRAP, _ir_CTX->control);} while (0) +#define ir_FRAME_ADDR() ir_fold0(_ir_CTX, IR_OPT(IR_FRAME_ADDR, IR_ADDR)) + +#define ir_VA_START(_list) _ir_VA_START(_ir_CTX, _list) +#define ir_VA_END(_list) _ir_VA_END(_ir_CTX, _list) +#define ir_VA_COPY(_dst, _src) _ir_VA_COPY(_ir_CTX, _dst, _src) +#define ir_VA_ARG(_list, _type) _ir_VA_ARG(_ir_CTX, _type, _list) + #define ir_START() _ir_START(_ir_CTX) #define ir_ENTRY(_src, _num) _ir_ENTRY(_ir_CTX, (_src), (_num)) #define ir_BEGIN(_src) _ir_BEGIN(_ir_CTX, (_src)) @@ -603,6 +646,10 @@ ir_ref _ir_RLOAD(ir_ctx *ctx, ir_type type, ir_ref reg); void _ir_RSTORE(ir_ctx *ctx, ir_ref reg, ir_ref val); ir_ref _ir_LOAD(ir_ctx *ctx, ir_type type, ir_ref addr); void _ir_STORE(ir_ctx *ctx, ir_ref addr, ir_ref val); +void _ir_VA_START(ir_ctx *ctx, ir_ref list); +void _ir_VA_END(ir_ctx *ctx, ir_ref list); +void _ir_VA_COPY(ir_ctx *ctx, ir_ref dst, ir_ref src); +ir_ref _ir_VA_ARG(ir_ctx *ctx, ir_type type, ir_ref list); void _ir_START(ir_ctx *ctx); void _ir_ENTRY(ir_ctx *ctx, ir_ref src, ir_ref num); void _ir_BEGIN(ir_ctx *ctx, ir_ref src); diff --git a/ext/opcache/jit/ir/ir_cfg.c b/ext/opcache/jit/ir/ir_cfg.c index b886319beb4..f22d9f032ad 100644 --- a/ext/opcache/jit/ir/ir_cfg.c +++ b/ext/opcache/jit/ir/ir_cfg.c @@ -231,7 +231,7 @@ next_successor: bb = blocks + 1; count = 0; /* SCCP already removed UNREACHABKE blocks, otherwise all blocks are marked as UNREACHABLE first */ - bb_init_falgs = (ctx->flags & IR_SCCP_DONE) ? 0 : IR_BB_UNREACHABLE; + bb_init_falgs = (ctx->flags2 & IR_SCCP_DONE) ? 0 : IR_BB_UNREACHABLE; IR_BITSET_FOREACH(bb_starts, len, start) { end = _blocks[start]; _blocks[start] = b; @@ -313,7 +313,7 @@ next_successor: ctx->cfg_edges = edges; ctx->cfg_map = _blocks; - if (!(ctx->flags & IR_SCCP_DONE)) { + if (!(ctx->flags2 & IR_SCCP_DONE)) { uint32_t reachable_count = 0; /* Mark reachable blocks */ @@ -600,7 +600,7 @@ int ir_build_dominators_tree(ir_ctx *ctx) uint32_t *edges; bool changed; - ctx->flags &= ~IR_NO_LOOPS; + ctx->flags2 &= ~IR_NO_LOOPS; postnum = 1; compute_postnum(ctx, &postnum, 1); @@ -706,7 +706,7 @@ int ir_build_dominators_tree(ir_ctx *ctx) ir_block *blocks, *bb; uint32_t *edges; - ctx->flags |= IR_NO_LOOPS; + ctx->flags2 |= IR_NO_LOOPS; /* Find immediate dominators */ blocks = ctx->cfg_blocks; @@ -726,7 +726,7 @@ int ir_build_dominators_tree(ir_ctx *ctx) if (UNEXPECTED(idom > b)) { /* In rare cases, LOOP_BEGIN.op1 may be a back-edge. Skip back-edges. */ - ctx->flags &= ~IR_NO_LOOPS; + ctx->flags2 &= ~IR_NO_LOOPS; while (1) { k--; p++; @@ -753,7 +753,7 @@ int ir_build_dominators_tree(ir_ctx *ctx) } } } else { - ctx->flags &= ~IR_NO_LOOPS; + ctx->flags2 &= ~IR_NO_LOOPS; } } bb->idom = idom; @@ -805,7 +805,7 @@ int ir_find_loops(ir_ctx *ctx) uint32_t *edges = ctx->cfg_edges; ir_worklist work; - if (ctx->flags & IR_NO_LOOPS) { + if (ctx->flags2 & IR_NO_LOOPS) { return 1; } @@ -908,13 +908,13 @@ next: if (UNEXPECTED(irreducible)) { // TODO: Support for irreducible loops ??? bb->flags |= IR_BB_IRREDUCIBLE_LOOP; - ctx->flags |= IR_IRREDUCIBLE_CFG; + ctx->flags2 |= IR_IRREDUCIBLE_CFG; while (ir_worklist_len(&work)) { ir_worklist_pop(&work); } } else if (ir_worklist_len(&work)) { bb->flags |= IR_BB_LOOP_HEADER; - ctx->flags |= IR_CFG_HAS_LOOPS; + ctx->flags2 |= IR_CFG_HAS_LOOPS; bb->loop_depth = 1; while (ir_worklist_len(&work)) { j = ir_worklist_pop(&work); @@ -942,7 +942,7 @@ next: } } - if (ctx->flags & IR_CFG_HAS_LOOPS) { + if (ctx->flags2 & IR_CFG_HAS_LOOPS) { for (n = 1; n < count; n++) { i = sorted_blocks[n]; ir_block *bb = &blocks[i]; diff --git a/ext/opcache/jit/ir/ir_check.c b/ext/opcache/jit/ir/ir_check.c index 1dbe4a787e7..671d6e7e623 100644 --- a/ext/opcache/jit/ir/ir_check.c +++ b/ext/opcache/jit/ir/ir_check.c @@ -127,7 +127,7 @@ bool ir_check(const ir_ctx *ctx) } if (use >= i && !(insn->op == IR_PHI - && (!(ctx->flags & IR_LINEAR) || ctx->ir_base[insn->op1].op == IR_LOOP_BEGIN))) { + && (!(ctx->flags2 & IR_LINEAR) || ctx->ir_base[insn->op1].op == IR_LOOP_BEGIN))) { fprintf(stderr, "ir_base[%d].ops[%d] invalid forward reference (%d)\n", i, j, use); ok = 0; } @@ -194,7 +194,7 @@ bool ir_check(const ir_ctx *ctx) break; } } - if ((ctx->flags & IR_LINEAR) + if ((ctx->flags2 & IR_LINEAR) && ctx->cfg_map && insn->op != IR_PHI && !ir_check_domination(ctx, use, i)) { diff --git a/ext/opcache/jit/ir/ir_dump.c b/ext/opcache/jit/ir/ir_dump.c index 06c1bf65f33..cfd8b81a884 100644 --- a/ext/opcache/jit/ir/ir_dump.c +++ b/ext/opcache/jit/ir/ir_dump.c @@ -577,7 +577,8 @@ void ir_dump_codegen(const ir_ctx *ctx, FILE *f) } else if (opnd_kind == IR_OPND_NUM) { fprintf(f, "%s%d", first ? "(" : ", ", ref); first = 0; - } else if (IR_IS_REF_OPND_KIND(opnd_kind) && j != n) { + } else if (j != n && + (IR_IS_REF_OPND_KIND(opnd_kind) || (opnd_kind == IR_OPND_UNUSED && p[n-j]))) { fprintf(f, "%snull", first ? "(" : ", "); first = 0; } diff --git a/ext/opcache/jit/ir/ir_gcm.c b/ext/opcache/jit/ir/ir_gcm.c index 694271a57c2..6161426e6ba 100644 --- a/ext/opcache/jit/ir/ir_gcm.c +++ b/ext/opcache/jit/ir/ir_gcm.c @@ -682,7 +682,7 @@ restart: ir_mem_free(_next); ctx->prev_ref = _prev; - ctx->flags |= IR_LINEAR; + ctx->flags2 |= IR_LINEAR; ir_truncate(ctx); return 1; @@ -694,6 +694,7 @@ restart: ir_init(&new_ctx, ctx->flags, consts_count, insns_count); new_ctx.insns_count = insns_count; + new_ctx.flags2 = ctx->flags2; new_ctx.ret_type = ctx->ret_type; new_ctx.mflags = ctx->mflags; new_ctx.spill_base = ctx->spill_base; @@ -867,7 +868,7 @@ restart: IR_ASSERT(new_ctx.consts_count == new_ctx.consts_limit); IR_ASSERT(new_ctx.insns_count == new_ctx.insns_limit); memcpy(ctx, &new_ctx, sizeof(ir_ctx)); - ctx->flags |= IR_LINEAR; + ctx->flags2 |= IR_LINEAR; ir_mem_free(_next); diff --git a/ext/opcache/jit/ir/ir_private.h b/ext/opcache/jit/ir/ir_private.h index 0f6267bd585..5010793c957 100644 --- a/ext/opcache/jit/ir/ir_private.h +++ b/ext/opcache/jit/ir/ir_private.h @@ -876,6 +876,27 @@ IR_ALWAYS_INLINE uint32_t ir_insn_len(const ir_insn *insn) return ir_insn_inputs_to_len(insn->inputs_count); } +/*** IR Context Private Flags (ir_ctx->flags2) ***/ +#define IR_CFG_HAS_LOOPS (1<<0) +#define IR_IRREDUCIBLE_CFG (1<<1) +#define IR_HAS_ALLOCA (1<<2) +#define IR_HAS_CALLS (1<<3) +#define IR_OPT_IN_SCCP (1<<4) +#define IR_LINEAR (1<<5) + +/* Temporary: SCCP -> CFG */ +#define IR_SCCP_DONE (1<<25) + +/* Temporary: Dominators -> Loops */ +#define IR_NO_LOOPS (1<<25) + +/* Temporary: Live Ranges */ +#define IR_LR_HAVE_DESSA_MOVES (1<<25) + +/* Temporary: Register Allocator */ +#define IR_RA_HAVE_SPLITS (1<<25) +#define IR_RA_HAVE_SPILLS (1<<26) + /*** IR Binding ***/ IR_ALWAYS_INLINE ir_ref ir_binding_find(const ir_ctx *ctx, ir_ref ref) { diff --git a/ext/opcache/jit/ir/ir_ra.c b/ext/opcache/jit/ir/ir_ra.c index d3b9ac134a9..916653bdc99 100644 --- a/ext/opcache/jit/ir/ir_ra.c +++ b/ext/opcache/jit/ir/ir_ra.c @@ -593,7 +593,7 @@ int ir_compute_live_ranges(ir_ctx *ctx) ir_bitqueue queue; ir_live_interval *ival; - if (!(ctx->flags & IR_LINEAR) || !ctx->vregs) { + if (!(ctx->flags2 & IR_LINEAR) || !ctx->vregs) { return 0; } @@ -606,7 +606,7 @@ int ir_compute_live_ranges(ir_ctx *ctx) ctx->vars = IR_UNUSED; /* Compute Live Ranges */ - ctx->flags &= ~IR_LR_HAVE_DESSA_MOVES; + ctx->flags2 &= ~IR_LR_HAVE_DESSA_MOVES; len = ir_bitset_len(ctx->vregs_count + 1); bb_live = ir_mem_malloc((ctx->cfg_blocks_count + 1) * len * sizeof(ir_bitset_base_t)); @@ -1243,7 +1243,7 @@ int ir_compute_live_ranges(ir_ctx *ctx) ir_list live_lists; ir_live_interval *ival; - if (!(ctx->flags & IR_LINEAR) || !ctx->vregs) { + if (!(ctx->flags2 & IR_LINEAR) || !ctx->vregs) { return 0; } @@ -1256,7 +1256,7 @@ int ir_compute_live_ranges(ir_ctx *ctx) ctx->vars = IR_UNUSED; /* Compute Live Ranges */ - ctx->flags &= ~IR_LR_HAVE_DESSA_MOVES; + ctx->flags2 &= ~IR_LR_HAVE_DESSA_MOVES; /* vregs + tmp + fixed + SRATCH + ALL */ ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*)); @@ -1645,7 +1645,7 @@ static void ir_add_phi_move(ir_ctx *ctx, uint32_t b, ir_ref from, ir_ref to) if (IR_IS_CONST_REF(from) || ctx->vregs[from] != ctx->vregs[to]) { ctx->cfg_blocks[b].flags &= ~IR_BB_EMPTY; ctx->cfg_blocks[b].flags |= IR_BB_DESSA_MOVES; - ctx->flags |= IR_LR_HAVE_DESSA_MOVES; + ctx->flags2 |= IR_LR_HAVE_DESSA_MOVES; #if 0 fprintf(stderr, "BB%d: MOV %d -> %d\n", b, from, to); #endif @@ -1980,7 +1980,7 @@ int ir_compute_dessa_moves(ir_ctx *ctx) int pred = ctx->cfg_edges[bb->predecessors + (j-2)]; ctx->cfg_blocks[pred].flags &= ~IR_BB_EMPTY; ctx->cfg_blocks[pred].flags |= IR_BB_DESSA_MOVES; - ctx->flags |= IR_LR_HAVE_DESSA_MOVES; + ctx->flags2 |= IR_LR_HAVE_DESSA_MOVES; } } } @@ -2295,7 +2295,7 @@ static ir_live_interval *ir_split_interval_at(ir_ctx *ctx, ir_live_interval *iva IR_LOG_LSRA_SPLIT(ival, pos); IR_ASSERT(pos > ival->range.start); - ctx->flags |= IR_RA_HAVE_SPLITS; + ctx->flags2 |= IR_RA_HAVE_SPLITS; p = &ival->range; prev = NULL; @@ -2883,7 +2883,7 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li if (!use_pos) { /* spill */ IR_LOG_LSRA(" ---- Spill", ival, " (no use pos that must be in reg)"); - ctx->flags |= IR_RA_HAVE_SPILLS; + ctx->flags2 |= IR_RA_HAVE_SPILLS; return IR_REG_NONE; } next_use_pos = use_pos->pos; @@ -3333,7 +3333,7 @@ static int ir_linear_scan(ir_ctx *ctx) return 0; } - if (ctx->flags & IR_LR_HAVE_DESSA_MOVES) { + if (ctx->flags2 & IR_LR_HAVE_DESSA_MOVES) { /* Add fixed intervals for temporary registers used for DESSA moves */ for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) { IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); @@ -3385,7 +3385,7 @@ static int ir_linear_scan(ir_ctx *ctx) } } - ctx->flags &= ~(IR_RA_HAVE_SPLITS|IR_RA_HAVE_SPILLS); + ctx->flags2 &= ~(IR_RA_HAVE_SPLITS|IR_RA_HAVE_SPILLS); #ifdef IR_DEBUG if (ctx->flags & IR_DEBUG_RA) { @@ -3499,7 +3499,7 @@ static int ir_linear_scan(ir_ctx *ctx) } #endif - if (ctx->flags & (IR_RA_HAVE_SPLITS|IR_RA_HAVE_SPILLS)) { + if (ctx->flags2 & (IR_RA_HAVE_SPLITS|IR_RA_HAVE_SPILLS)) { if (ctx->binding) { ir_assign_bound_spill_slots(ctx); @@ -3674,7 +3674,7 @@ static void assign_regs(ir_ctx *ctx) memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count); } - if (!(ctx->flags & (IR_RA_HAVE_SPLITS|IR_RA_HAVE_SPILLS))) { + if (!(ctx->flags2 & (IR_RA_HAVE_SPLITS|IR_RA_HAVE_SPILLS))) { for (i = 1; i <= ctx->vregs_count; i++) { ival = ctx->live_intervals[i]; if (ival) { diff --git a/ext/opcache/jit/ir/ir_save.c b/ext/opcache/jit/ir/ir_save.c index d89ec6eebce..68d8aba1aa8 100644 --- a/ext/opcache/jit/ir/ir_save.c +++ b/ext/opcache/jit/ir/ir_save.c @@ -102,7 +102,8 @@ void ir_save(const ir_ctx *ctx, FILE *f) } else if (opnd_kind == IR_OPND_NUM) { fprintf(f, "%s%d", first ? "(" : ", ", ref); first = 0; - } else if (IR_IS_REF_OPND_KIND(opnd_kind) && j != n) { + } else if (j != n && + (IR_IS_REF_OPND_KIND(opnd_kind) || (opnd_kind == IR_OPND_UNUSED && p[n-j]))) { fprintf(f, "%snull", first ? "(" : ", "); first = 0; } diff --git a/ext/opcache/jit/ir/ir_sccp.c b/ext/opcache/jit/ir/ir_sccp.c index 6c0297f2b14..808b0152e5f 100644 --- a/ext/opcache/jit/ir/ir_sccp.c +++ b/ext/opcache/jit/ir/ir_sccp.c @@ -545,7 +545,7 @@ int ir_sccp(ir_ctx *ctx) ir_bitqueue worklist; ir_insn *_values = ir_mem_calloc(ctx->insns_count, sizeof(ir_insn)); - ctx->flags |= IR_OPT_IN_SCCP; + ctx->flags2 |= IR_OPT_IN_SCCP; /* A bit modified SCCP algorithm of M. N. Wegman and F. K. Zadeck */ ir_bitqueue_init(&worklist, ctx->insns_count); @@ -878,8 +878,8 @@ int ir_sccp(ir_ctx *ctx) ir_mem_free(_values); ir_bitqueue_free(&worklist); - ctx->flags &= ~IR_OPT_IN_SCCP; - ctx->flags |= IR_SCCP_DONE; + ctx->flags2 &= ~IR_OPT_IN_SCCP; + ctx->flags2 |= IR_SCCP_DONE; return 1; } diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc index 7cb4fe36755..68f2933a729 100644 --- a/ext/opcache/jit/ir/ir_x86.dasc +++ b/ext/opcache/jit/ir/ir_x86.dasc @@ -547,6 +547,7 @@ const char *ir_reg_name(int8_t reg, ir_type type) _(RETURN_VOID) \ _(RETURN_INT) \ _(RETURN_FP) \ + _(BIT_COUNT) \ #define IR_RULE_ENUM(name) IR_ ## name, @@ -803,6 +804,28 @@ op2_const: case IR_OP_FP: flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; break; + case IR_BIT_COUNT: + insn = &ctx->ir_base[ref]; + if (ir_type_size[insn->type] == 1) { + flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG; + } else { + flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; + } + if (IR_IS_CONST_REF(insn->op1)) { + constraints->tmp_regs[0] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; + } + break; + case IR_CTPOP: + flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; + insn = &ctx->ir_base[ref]; + constraints->tmp_regs[0] = IR_TMP_REG(2, insn->type, IR_USE_SUB_REF, IR_SAVE_SUB_REF); + n = 1; + if (ir_type_size[insn->type] == 8) { + constraints->tmp_regs[1] = IR_TMP_REG(3, insn->type, IR_USE_SUB_REF, IR_SAVE_SUB_REF); + n = 2; + } + break; case IR_COPY_INT: case IR_COPY_FP: case IR_SEXT: @@ -1400,7 +1423,7 @@ binop_fp: } break; case IR_CALL: - ctx->flags |= IR_HAS_CALLS; + ctx->flags2 |= IR_HAS_CALLS; IR_FALLTHROUGH; case IR_TAILCALL: if (ir_in_same_block(ctx, insn->op2)) { @@ -1412,9 +1435,10 @@ binop_fp: case IR_PARAM: return ctx->use_lists[ref].count > 0 ? IR_PARAM : IR_SKIPPED | IR_PARAM; case IR_ALLOCA: - /* alloca() may be use only in functions */ + /* alloca() may be used only in functions */ if (ctx->flags & IR_FUNCTION) { - ctx->flags |= IR_USE_FRAME_POINTER | IR_HAS_ALLOCA; + ctx->flags |= IR_USE_FRAME_POINTER; + ctx->flags2 |= IR_HAS_ALLOCA; } return IR_ALLOCA; case IR_VSTORE: @@ -1890,6 +1914,13 @@ store_int: case IR_FP2FP: ir_match_fuse_load(ctx, insn->op1, ref); return insn->op; + case IR_CTLZ: + case IR_CTTZ: + ir_match_fuse_load(ctx, insn->op1, ref); + return IR_BIT_COUNT; + case IR_CTPOP: + ir_match_fuse_load(ctx, insn->op1, ref); + return (ctx->mflags & IR_X86_BMI1) ? IR_BIT_COUNT : IR_CTPOP; default: break; } @@ -3155,6 +3186,366 @@ static void ir_emit_op_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) } } +static void ir_emit_bit_count(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE); + + if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + switch (ir_type_size[insn->type]) { + default: + IR_ASSERT(0); + case 2: + if (insn->op == IR_CTLZ) { + if (ctx->mflags & IR_X86_BMI1) { + | lzcnt Rw(def_reg), Rw(op1_reg) + } else { + | bsr Rw(def_reg), Rw(op1_reg) + | xor Rw(def_reg), 0xf + } + } else if (insn->op == IR_CTTZ) { + if (ctx->mflags & IR_X86_BMI1) { + | tzcnt Rw(def_reg), Rw(op1_reg) + } else { + | bsf Rw(def_reg), Rw(op1_reg) + } + } else { + IR_ASSERT(insn->op == IR_CTPOP); + | popcnt Rw(def_reg), Rw(op1_reg) + } + break; + case 1: + | movzx Rd(op1_reg), Rb(op1_reg) + if (insn->op == IR_CTLZ) { + if (ctx->mflags & IR_X86_BMI1) { + | lzcnt Rd(def_reg), Rd(op1_reg) + | sub Rd(def_reg), 24 + } else { + | bsr Rd(def_reg), Rd(op1_reg) + | xor Rw(def_reg), 0x7 + } + break; + } + IR_FALLTHROUGH; + case 4: + if (insn->op == IR_CTLZ) { + if (ctx->mflags & IR_X86_BMI1) { + | lzcnt Rd(def_reg), Rd(op1_reg) + } else { + | bsr Rd(def_reg), Rd(op1_reg) + | xor Rw(def_reg), 0x1f + } + } else if (insn->op == IR_CTTZ) { + if (ctx->mflags & IR_X86_BMI1) { + | tzcnt Rd(def_reg), Rd(op1_reg) + } else { + | bsf Rd(def_reg), Rd(op1_reg) + } + } else { + IR_ASSERT(insn->op == IR_CTPOP); + | popcnt Rd(def_reg), Rd(op1_reg) + } + break; +|.if X64 + case 8: + if (insn->op == IR_CTLZ) { + if (ctx->mflags & IR_X86_BMI1) { + | lzcnt Rq(def_reg), Rq(op1_reg) + } else { + | bsr Rq(def_reg), Rq(op1_reg) + | xor Rw(def_reg), 0x3f + } + } else if (insn->op == IR_CTTZ) { + if (ctx->mflags & IR_X86_BMI1) { + | tzcnt Rq(def_reg), Rq(op1_reg) + } else { + | bsf Rq(def_reg), Rq(op1_reg) + } + } else { + IR_ASSERT(insn->op == IR_CTPOP); + | popcnt Rq(def_reg), Rq(op1_reg) + } + break; +|.endif + } + } else { + int32_t offset = 0; + + if (ir_rule(ctx, op1) & IR_FUSED) { + offset = ir_fuse_load(ctx, op1, &op1_reg); + } else { + offset = ir_ref_spill_slot(ctx, op1, &op1_reg); + } + if (op1_reg != IR_REG_NONE) { + switch (ir_type_size[insn->type]) { + default: + IR_ASSERT(0); + case 2: + if (insn->op == IR_CTLZ) { + if (ctx->mflags & IR_X86_BMI1) { + | lzcnt Rw(def_reg), word [Ra(op1_reg)+offset] + } else { + | bsr Rw(def_reg), word [Ra(op1_reg)+offset] + | xor Rw(def_reg), 0xf + } + } else if (insn->op == IR_CTTZ) { + if (ctx->mflags & IR_X86_BMI1) { + | tzcnt Rw(def_reg), word [Ra(op1_reg)+offset] + } else { + | bsf Rw(def_reg), word [Ra(op1_reg)+offset] + } + } else { + | popcnt Rw(def_reg), word [Ra(op1_reg)+offset] + } + break; + case 4: + if (insn->op == IR_CTLZ) { + if (ctx->mflags & IR_X86_BMI1) { + | lzcnt Rd(def_reg), dword [Ra(op1_reg)+offset] + } else { + | bsr Rd(def_reg), dword [Ra(op1_reg)+offset] + | xor Rw(def_reg), 0x1f + } + } else if (insn->op == IR_CTTZ) { + if (ctx->mflags & IR_X86_BMI1) { + | tzcnt Rd(def_reg), dword [Ra(op1_reg)+offset] + } else { + | bsf Rd(def_reg), dword [Ra(op1_reg)+offset] + } + } else { + | popcnt Rd(def_reg), dword [Ra(op1_reg)+offset] + } + break; +|.if X64 + case 8: + if (insn->op == IR_CTLZ) { + if (ctx->mflags & IR_X86_BMI1) { + | lzcnt Rq(def_reg), qword [Ra(op1_reg)+offset] + } else { + | bsr Rq(def_reg), qword [Ra(op1_reg)+offset] + | xor Rw(def_reg), 0x3f + } + } else if (insn->op == IR_CTTZ) { + if (ctx->mflags & IR_X86_BMI1) { + | tzcnt Rq(def_reg), qword [Ra(op1_reg)+offset] + } else { + | bsf Rq(def_reg), qword [Ra(op1_reg)+offset] + } + } else { + | popcnt Rq(def_reg), qword [Ra(op1_reg)+offset] + } + break; +|.endif + } + } else { + switch (ir_type_size[insn->type]) { + default: + IR_ASSERT(0); + case 2: + if (insn->op == IR_CTLZ) { + if (ctx->mflags & IR_X86_BMI1) { + | lzcnt Rw(def_reg), word [offset] + } else { + | bsr Rw(def_reg), word [offset] + | xor Rw(def_reg), 0xf + } + } else if (insn->op == IR_CTTZ) { + if (ctx->mflags & IR_X86_BMI1) { + | tzcnt Rw(def_reg), word [offset] + } else { + | bsf Rw(def_reg), word [offset] + } + } else { + | popcnt Rw(def_reg), word [offset] + } + break; + case 4: + if (insn->op == IR_CTLZ) { + if (ctx->mflags & IR_X86_BMI1) { + | lzcnt Rd(def_reg), dword [offset] + } else { + | bsr Rw(def_reg), word [offset] + | xor Rw(def_reg), 0x1f + } + } else if (insn->op == IR_CTTZ) { + if (ctx->mflags & IR_X86_BMI1) { + | tzcnt Rd(def_reg), dword [offset] + } else { + | bsf Rd(def_reg), dword [offset] + } + } else { + | popcnt Rd(def_reg), dword [offset] + } + break; +|.if X64 + case 8: + if (insn->op == IR_CTLZ) { + if (ctx->mflags & IR_X86_BMI1) { + | lzcnt Rq(def_reg), qword [offset] + } else { + | bsr Rw(def_reg), word [offset] + | xor Rw(def_reg), 0x1f + } + } else if (insn->op == IR_CTTZ) { + if (ctx->mflags & IR_X86_BMI1) { + | tzcnt Rq(def_reg), qword [offset] + } else { + | bsf Rq(def_reg), qword [offset] + } + } else { + | popcnt Rq(def_reg), qword [offset] + } + break; +|.endif + } + } + } + + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_ctpop(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg tmp_reg = ctx->regs[def][2]; +|.if X64 +|| ir_reg const_reg = ctx->regs[def][3]; +|.endif + + IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); + if (IR_IS_CONST_REF(op1) || op1_reg == IR_REG_NONE) { + ir_emit_load(ctx, type, def_reg, op1); + if (ir_type_size[insn->type] == 1) { + | movzx Rd(def_reg), Rb(def_reg) + } else if (ir_type_size[insn->type] == 2) { + | movzx Rd(def_reg), Rw(def_reg) + } + } else { + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + switch (ir_type_size[insn->type]) { + default: + IR_ASSERT(0); + case 1: + | movzx Rd(def_reg), Rb(op1_reg) + break; + case 2: + | movzx Rd(def_reg), Rw(op1_reg) + break; + case 4: + | mov Rd(def_reg), Rd(op1_reg) + break; +|.if X64 +|| case 8: + | mov Rq(def_reg), Rq(op1_reg) +|| break; +|.endif + } + } + switch (ir_type_size[insn->type]) { + default: + IR_ASSERT(0); + case 1: + | mov Rd(tmp_reg), Rd(def_reg) + | shr Rd(def_reg), 1 + | and Rd(def_reg), 0x55 + | sub Rd(tmp_reg), Rd(def_reg) + | mov Rd(def_reg), Rd(tmp_reg) + | and Rd(def_reg), 0x33 + | shr Rd(tmp_reg), 2 + | and Rd(tmp_reg), 0x33 + | add Rd(tmp_reg), Rd(def_reg) + | mov Rd(def_reg), Rd(tmp_reg) + | shr Rd(def_reg), 4 + | add Rd(def_reg), Rd(tmp_reg) + | and Rd(def_reg), 0x0f + break; + case 2: + | mov Rd(tmp_reg), Rd(def_reg) + | shr Rd(def_reg), 1 + | and Rd(def_reg), 0x5555 + | sub Rd(tmp_reg), Rd(def_reg) + | mov Rd(def_reg), Rd(tmp_reg) + | and Rd(def_reg), 0x3333 + | shr Rd(tmp_reg), 2 + | and Rd(tmp_reg), 0x3333 + | add Rd(tmp_reg), Rd(def_reg) + | mov Rd(def_reg), Rd(tmp_reg) + | shr Rd(def_reg), 4 + | add Rd(def_reg), Rd(tmp_reg) + | and Rd(def_reg), 0x0f0f + | mov Rd(tmp_reg), Rd(def_reg) + | shr Rd(tmp_reg), 8 + | and Rd(def_reg), 0x0f + | add Rd(def_reg), Rd(tmp_reg) + break; + case 4: + | mov Rd(tmp_reg), Rd(def_reg) + | shr Rd(def_reg), 1 + | and Rd(def_reg), 0x55555555 + | sub Rd(tmp_reg), Rd(def_reg) + | mov Rd(def_reg), Rd(tmp_reg) + | and Rd(def_reg), 0x33333333 + | shr Rd(tmp_reg), 2 + | and Rd(tmp_reg), 0x33333333 + | add Rd(tmp_reg), Rd(def_reg) + | mov Rd(def_reg), Rd(tmp_reg) + | shr Rd(def_reg), 4 + | add Rd(def_reg), Rd(tmp_reg) + | and Rd(def_reg), 0x0f0f0f0f + | imul Rd(def_reg), 0x01010101 + | shr Rd(def_reg), 24 + break; +|.if X64 +|| case 8: +|| IR_ASSERT(const_reg != IR_REG_NONE); + | mov Rq(tmp_reg), Rq(def_reg) + | shr Rq(def_reg), 1 + | mov64 Rq(const_reg), 0x5555555555555555 + | and Rq(def_reg), Rq(const_reg) + | sub Rq(tmp_reg), Rq(def_reg) + | mov Rq(def_reg), Rq(tmp_reg) + | mov64 Rq(const_reg), 0x3333333333333333 + | and Rq(def_reg), Rq(const_reg) + | shr Rq(tmp_reg), 2 + | and Rq(tmp_reg), Rq(const_reg) + | add Rq(tmp_reg), Rq(def_reg) + | mov Rq(def_reg), Rq(tmp_reg) + | shr Rq(def_reg), 4 + | add Rq(def_reg), Rq(tmp_reg) + | mov64 Rq(const_reg), 0x0f0f0f0f0f0f0f0f + | and Rq(def_reg), Rq(const_reg) + | mov64 Rq(const_reg), 0x0101010101010101 + | imul Rq(def_reg), Rq(const_reg) + | shr Rq(def_reg), 56 +|| break; +|.endif + } + + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + static void ir_emit_mem_op_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; @@ -5754,7 +6145,7 @@ static void ir_emit_alloca(ir_ctx *ctx, ir_ref def, ir_insn *insn) IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 > 0); IR_ASSERT(IR_IS_SIGNED_32BIT(val->val.i64)); - if (ctx->flags & IR_HAS_CALLS) { + if (ctx->flags2 & IR_HAS_CALLS) { /* Stack must be 16 byte aligned */ size = IR_ALIGNED_SIZE(size, 16); } else { @@ -5765,11 +6156,12 @@ static void ir_emit_alloca(ir_ctx *ctx, ir_ref def, ir_insn *insn) ctx->call_stack_size += size; } } else { - int32_t alignment = (ctx->flags & IR_HAS_CALLS) ? 16 : 8; + int32_t alignment = (ctx->flags2 & IR_HAS_CALLS) ? 16 : 8; ir_reg op2_reg = ctx->regs[def][2]; ir_type type = ctx->ir_base[insn->op2].type; IR_ASSERT(ctx->flags & IR_FUNCTION); + IR_ASSERT(ctx->flags & IR_USE_FRAME_POINTER); IR_ASSERT(def_reg != IR_REG_NONE); if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); @@ -5810,7 +6202,7 @@ static void ir_emit_afree(ir_ctx *ctx, ir_ref def, ir_insn *insn) IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 > 0); IR_ASSERT(IR_IS_SIGNED_32BIT(val->val.i64)); - if (ctx->flags & IR_HAS_CALLS) { + if (ctx->flags2 & IR_HAS_CALLS) { /* Stack must be 16 byte aligned */ size = IR_ALIGNED_SIZE(size, 16); } else { @@ -5821,7 +6213,7 @@ static void ir_emit_afree(ir_ctx *ctx, ir_ref def, ir_insn *insn) ctx->call_stack_size -= size; } } else { -// int32_t alignment = (ctx->flags & IR_HAS_CALLS) ? 16 : 8; +// int32_t alignment = (ctx->flags2 & IR_HAS_CALLS) ? 16 : 8; ir_reg op2_reg = ctx->regs[def][2]; ir_type type = ctx->ir_base[insn->op2].type; @@ -5837,6 +6229,22 @@ static void ir_emit_afree(ir_ctx *ctx, ir_ref def, ir_insn *insn) } } +static void ir_emit_frame_addr(ir_ctx *ctx, ir_ref def) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + if (ctx->flags & IR_USE_FRAME_POINTER) { + | mov Ra(def_reg), Ra(IR_REG_RBP) + } else { + | lea Ra(def_reg), [Ra(IR_REG_RSP)+(ctx->stack_frame_size + ctx->call_stack_size)] + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, IR_ADDR, def, def_reg); + } +} + static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; @@ -8102,7 +8510,7 @@ void ir_fix_stack_frame(ir_ctx *ctx) ctx->stack_frame_alignment = 0; ctx->call_stack_size = 0; - if (ctx->flags & IR_HAS_CALLS) { + if (ctx->flags2 & IR_HAS_CALLS) { /* Stack must be 16 byte aligned */ if (!(ctx->flags & IR_FUNCTION)) { while (IR_ALIGNED_SIZE(ctx->stack_frame_size, 16) != ctx->stack_frame_size) { @@ -8509,6 +8917,12 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) case IR_SHIFT_CONST: ir_emit_shift_const(ctx, i, insn); break; + case IR_BIT_COUNT: + ir_emit_bit_count(ctx, i, insn); + break; + case IR_CTPOP: + ir_emit_ctpop(ctx, i, insn); + break; case IR_INC: case IR_DEC: case IR_OP_INT: @@ -8745,6 +9159,9 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) case IR_AFREE: ir_emit_afree(ctx, i, insn); break; + case IR_FRAME_ADDR: + ir_emit_frame_addr(ctx, i); + break; case IR_EXITCALL: ir_emit_exitcall(ctx, i, insn); break; diff --git a/ext/opcache/jit/ir/y.txt b/ext/opcache/jit/ir/y.txt deleted file mode 100644 index b125f850050..00000000000 --- a/ext/opcache/jit/ir/y.txt +++ /dev/null @@ -1,6 +0,0 @@ -llvm.floor.f64 -llvm.fmuladd.f64 -llvm.memcpy.p0.p0.i64 -llvm.memset.p0.i64 -llvm.va_end -llvm.va_start