diff --git a/klee/lib/Expr/Expr.cpp b/klee/lib/Expr/Expr.cpp index 9a47fb919..3dc91b358 100644 --- a/klee/lib/Expr/Expr.cpp +++ b/klee/lib/Expr/Expr.cpp @@ -1162,6 +1162,11 @@ static ref LShrExpr_create(const ref &l, const ref &r) { if (l->getWidth() == Expr::Bool) { // l & !r return AndExpr::create(l, Expr::createIsZero(r)); } else { + // Shifting by 0 is a no-op + ConstantExpr *ce = dyn_cast(r); + if (ce && ce->getAPValue() == 0) { + return l; + } return LShrExpr::alloc(l, r); } } @@ -1170,6 +1175,11 @@ static ref AShrExpr_create(const ref &l, const ref &r) { if (l->getWidth() == Expr::Bool) { // l return l; } else { + // Shifting by 0 is a no-op + ConstantExpr *ce = dyn_cast(r); + if (ce && ce->getAPValue() == 0) { + return l; + } return AShrExpr::alloc(l, r); } } diff --git a/libcpu/include/cpu/exec.h b/libcpu/include/cpu/exec.h index de635437b..8ab0a85a1 100644 --- a/libcpu/include/cpu/exec.h +++ b/libcpu/include/cpu/exec.h @@ -26,35 +26,13 @@ #include #include #include +#include #include #ifdef __cplusplus extern "C" { #endif -/* The return address may point to the start of the next instruction. - Subtracting one gets us the call instruction itself. */ -#if defined(CONFIG_TCG_INTERPRETER) -/* Alpha and SH4 user mode emulations and Softmmu call GETPC(). - For all others, GETPC remains undefined (which makes TCI a little faster. */ -#if defined(CONFIG_SOFTMMU) || defined(TARGET_ALPHA) || defined(TARGET_SH4) -extern void *tci_tb_ptr; -#define GETPC() tci_tb_ptr -#endif -#elif defined(__s390__) && !defined(__s390x__) -#define GETPC() ((void *) (((uintptr_t) __builtin_return_address(0) & 0x7fffffffUL) - 1)) -#elif defined(__arm__) -/* Thumb return addresses have the low bit set, so we need to subtract two. - This is still safe in ARM mode because instructions are 4 bytes. */ -#define GETPC() ((void *) ((uintptr_t) __builtin_return_address(0) - 2)) -#else -#if defined(SYMBEX_LLVM_LIB) -#define GETPC() 0 -#else -#define GETPC() (((uintptr_t) __builtin_return_address(0) - 1)) -#endif -#endif - /* The true return address will often point to a host insn that is part of the next translated guest insn. Adjust the address backward to point to the middle of the call insn. Subtracting one would do the job except for diff --git a/libcpu/src/CMakeLists.txt b/libcpu/src/CMakeLists.txt index 99a086a9c..ad53e1fb8 100644 --- a/libcpu/src/CMakeLists.txt +++ b/libcpu/src/CMakeLists.txt @@ -16,7 +16,7 @@ target_include_directories (cpu PUBLIC ${CMAKE_BINARY_DIR}/include ) -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__STDC_FORMAT_MACROS -D_GNU_SOURCE -DNEED_CPU_H -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -DTARGET_PHYS_ADDR_BITS=64") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__STDC_FORMAT_MACROS -D_GNU_SOURCE -DNEED_CPU_H -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -DTARGET_PHYS_ADDR_BITS=64 -DCOMPILING_PER_TARGET") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -fPIC -Werror -fno-omit-frame-pointer") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-strict-aliasing -Wno-sign-compare -Wno-missing-field-initializers -Wno-mismatched-tags -Wno-deprecated-declarations -Wno-initializer-overrides -Wno-zero-length-array") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fexceptions -Wno-gnu-folding-constant") diff --git a/libcpu/src/precise-pc.c b/libcpu/src/precise-pc.c index 4653069c4..d19ffba36 100644 --- a/libcpu/src/precise-pc.c +++ b/libcpu/src/precise-pc.c @@ -68,7 +68,7 @@ static target_long decode_sleb128(uint8_t **pp) { /* Encode the data collected about the instructions while compiling TB. Place the data at BLOCK, and return the number of bytes consumed. - The logical table consists of TARGET_INSN_START_WORDS target_ulong's, + The logical table consists of INSN_START_WORDS uint64_t's, which come from the target's insn_start data, followed by a uintptr_t which comes from the host pc of the end of the code implementing the insn. @@ -87,13 +87,13 @@ int encode_search(TCGContext *tcg_ctx, TranslationBlock *tb, uint8_t *block) { for (i = 0, n = tb->icount; i < n; ++i) { uint64_t prev, curr; - for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { + for (j = 0; j < INSN_START_WORDS; ++j) { if (i == 0) { prev = (!(tb_cflags(tb) & CF_PCREL) && j == 0 ? tb->pc : 0); } else { - prev = insn_data[(i - 1) * TARGET_INSN_START_WORDS + j]; + prev = insn_data[(i - 1) * INSN_START_WORDS + j]; } - curr = insn_data[i * TARGET_INSN_START_WORDS + j]; + curr = insn_data[i * INSN_START_WORDS + j]; p = encode_sleb128(p, curr - prev); } prev = (i == 0 ? 0 : insn_end_off[i - 1]); @@ -113,7 +113,7 @@ int encode_search(TCGContext *tcg_ctx, TranslationBlock *tb, uint8_t *block) { } int tb_get_instruction_size(TranslationBlock *tb, uint64_t pc) { - target_ulong data[TARGET_INSN_START_WORDS] = {tb->pc}; + target_ulong data[INSN_START_WORDS] = {tb->pc}; uint8_t *p = tb->tc.ptr + tb->tc.size; int i, j, num_insns = tb->icount; @@ -122,7 +122,7 @@ int tb_get_instruction_size(TranslationBlock *tb, uint64_t pc) { } for (i = 0; i < num_insns; ++i) { - for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { + for (j = 0; j < INSN_START_WORDS; ++j) { data[j] += decode_sleb128(&p); } decode_sleb128(&p); @@ -130,7 +130,7 @@ int tb_get_instruction_size(TranslationBlock *tb, uint64_t pc) { if (i == num_insns - 1) { return tb->size - (pc - tb->pc); } else { - for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { + for (j = 0; j < INSN_START_WORDS; ++j) { data[j] += decode_sleb128(&p); } return data[0] - pc; @@ -155,7 +155,7 @@ static int tb_find_guest_pc(TranslationBlock *tb, uintptr_t searched_host_pc, ta // Reconstruct the stored insn data while looking for the point at // which the end of the insn exceeds the searched_pc. for (i = 0; i < num_insns; ++i) { - for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { + for (j = 0; j < INSN_START_WORDS; ++j) { data[j] += decode_sleb128(&p); } host_pc += decode_sleb128(&p); @@ -171,7 +171,7 @@ static int tb_find_guest_pc(TranslationBlock *tb, uintptr_t searched_host_pc, ta * icount should be recalculated. */ static int cpu_restore_state_from_tb(CPUArchState *env, TranslationBlock *tb, uintptr_t searched_pc) { - target_ulong data[TARGET_INSN_START_WORDS] = {tb->pc}; + target_ulong data[INSN_START_WORDS] = {tb->pc}; if (tb_find_guest_pc(tb, searched_pc, data) < 0) { return -1; diff --git a/libcpu/src/target-i386/translate.c b/libcpu/src/target-i386/translate.c index f3071054d..91fc70226 100644 --- a/libcpu/src/target-i386/translate.c +++ b/libcpu/src/target-i386/translate.c @@ -318,7 +318,7 @@ static inline void gen_op_andl_A0_ffff(void) { // a cpu_restore_state() call. static inline void gen_save_eip(DisasContext *s, target_ulong pc) { tcg_gen_movi_tl(cpu_tmp0, pc - s->cs_base); - tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUArchState, eip)); + tcg_gen_st_tl(cpu_tmp0, tcg_env, offsetof(CPUArchState, eip)); } #ifdef CONFIG_SYMBEX @@ -486,10 +486,10 @@ static inline void gen_op_addl_T0_T1(void) { } static inline void gen_op_jmp_T0(DisasContext *s) { - tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, eip)); + tcg_gen_st_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, eip)); #ifdef ENABLE_PRECISE_EXCEPTION_DEBUGGING - tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, precise_eip)); + tcg_gen_st_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, precise_eip)); #endif } @@ -550,11 +550,11 @@ static inline void gen_op_addl_A0_reg_sN(int shift, int reg) { } static inline void gen_op_movl_A0_seg(int reg) { - tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUX86State, segs[reg].base) + REG_L_OFFSET); + tcg_gen_ld32u_tl(cpu_A0, tcg_env, offsetof(CPUX86State, segs[reg].base) + REG_L_OFFSET); } static inline void gen_op_addl_A0_seg(DisasContext *s, int reg) { - tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, segs[reg].base)); + tcg_gen_ld_tl(cpu_tmp0, tcg_env, offsetof(CPUX86State, segs[reg].base)); #ifdef TARGET_X86_64 if (CODE64(s)) { tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff); @@ -570,11 +570,11 @@ static inline void gen_op_addl_A0_seg(DisasContext *s, int reg) { #ifdef TARGET_X86_64 static inline void gen_op_movq_A0_seg(int reg) { - tcg_gen_ld_tl(cpu_A0, cpu_env, offsetof(CPUX86State, segs[reg].base)); + tcg_gen_ld_tl(cpu_A0, tcg_env, offsetof(CPUX86State, segs[reg].base)); } static inline void gen_op_addq_A0_seg(int reg) { - tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, segs[reg].base)); + tcg_gen_ld_tl(cpu_tmp0, tcg_env, offsetof(CPUX86State, segs[reg].base)); tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0); } @@ -673,10 +673,10 @@ static inline void gen_op_st_T1_A0(int idx) { static inline void gen_jmp_im(DisasContext *s, target_ulong pc) { tcg_gen_movi_tl(cpu_tmp0, pc); - tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, eip)); + tcg_gen_st_tl(cpu_tmp0, tcg_env, offsetof(CPUX86State, eip)); #ifdef ENABLE_PRECISE_EXCEPTION_DEBUGGING - tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, precise_eip)); + tcg_gen_st_tl(cpu_tmp0, tcg_env, offsetof(CPUX86State, precise_eip)); #endif } @@ -735,7 +735,7 @@ static inline void gen_string_movl_A0_EDI(DisasContext *s) { } static inline void gen_op_movl_T0_Dshift(int ot) { - tcg_gen_ld32s_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, df)); + tcg_gen_ld32s_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, df)); tcg_gen_shli_tl(cpu_T[0], cpu_T[0], ot); }; @@ -2446,14 +2446,14 @@ static void gen_setcc(DisasContext *s, int b) { } static inline void gen_op_movl_T0_seg(int seg_reg) { - tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, segs[seg_reg].selector)); + tcg_gen_ld32u_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, segs[seg_reg].selector)); } static inline void gen_op_movl_seg_T0_vm(int seg_reg) { tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff); - tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, segs[seg_reg].selector)); + tcg_gen_st32_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, segs[seg_reg].selector)); tcg_gen_shli_tl(cpu_T[0], cpu_T[0], 4); - tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, segs[seg_reg].base)); + tcg_gen_st_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, segs[seg_reg].base)); } /* move T0 to seg_reg and compute if the CPU state may change. Never @@ -2817,53 +2817,53 @@ static void gen_jmp(DisasContext *s, target_ulong eip) { static inline void gen_ldq_env_A0(int idx, int offset) { int mem_index = (idx >> 2) - 1; tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, mem_index); - tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset); + tcg_gen_st_i64(cpu_tmp1_i64, tcg_env, offset); } static inline void gen_stq_env_A0(int idx, int offset) { int mem_index = (idx >> 2) - 1; - tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset); + tcg_gen_ld_i64(cpu_tmp1_i64, tcg_env, offset); tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, mem_index); } static inline void gen_ldo_env_A0(int idx, int offset) { int mem_index = (idx >> 2) - 1; tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, mem_index); - tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0))); + tcg_gen_st_i64(cpu_tmp1_i64, tcg_env, offset + offsetof(XMMReg, XMM_Q(0))); tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8); tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_tmp0, mem_index); - tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1))); + tcg_gen_st_i64(cpu_tmp1_i64, tcg_env, offset + offsetof(XMMReg, XMM_Q(1))); } static inline void gen_sto_env_A0(int idx, int offset) { int mem_index = (idx >> 2) - 1; - tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0))); + tcg_gen_ld_i64(cpu_tmp1_i64, tcg_env, offset + offsetof(XMMReg, XMM_Q(0))); tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, mem_index); tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8); - tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1))); + tcg_gen_ld_i64(cpu_tmp1_i64, tcg_env, offset + offsetof(XMMReg, XMM_Q(1))); tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_tmp0, mem_index); } static inline void gen_op_movo(int d_offset, int s_offset) { - tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset); - tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset); - tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + 8); - tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + 8); + tcg_gen_ld_i64(cpu_tmp1_i64, tcg_env, s_offset); + tcg_gen_st_i64(cpu_tmp1_i64, tcg_env, d_offset); + tcg_gen_ld_i64(cpu_tmp1_i64, tcg_env, s_offset + 8); + tcg_gen_st_i64(cpu_tmp1_i64, tcg_env, d_offset + 8); } static inline void gen_op_movq(int d_offset, int s_offset) { - tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset); - tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset); + tcg_gen_ld_i64(cpu_tmp1_i64, tcg_env, s_offset); + tcg_gen_st_i64(cpu_tmp1_i64, tcg_env, d_offset); } static inline void gen_op_movl(int d_offset, int s_offset) { - tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, s_offset); - tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, d_offset); + tcg_gen_ld_i32(cpu_tmp2_i32, tcg_env, s_offset); + tcg_gen_st_i32(cpu_tmp2_i32, tcg_env, d_offset); } static inline void gen_op_movq_env_0(int d_offset) { tcg_gen_movi_i64(cpu_tmp1_i64, 0); - tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset); + tcg_gen_st_i64(cpu_tmp1_i64, tcg_env, d_offset); } #define SSE_SPECIAL ((void *) 1) @@ -3179,7 +3179,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { if (b1 & 1) { gen_stq_env_A0(s->mem_index, offsetof(CPUX86State, xmm_regs[reg])); } else { - tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, xmm_regs[reg].XMM_L(0))); + tcg_gen_ld32u_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, xmm_regs[reg].XMM_L(0))); gen_op_st_T0_A0(OT_LONG + s->mem_index); } break; @@ -3187,12 +3187,12 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { #ifdef TARGET_X86_64 if (s->dflag == 2) { gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0); - tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, fpregs[reg].mmx)); + tcg_gen_st_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, fpregs[reg].mmx)); } else #endif { gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0); - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State, fpregs[reg].mmx)); + tcg_gen_addi_ptr(cpu_ptr0, tcg_env, offsetof(CPUX86State, fpregs[reg].mmx)); tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32); } @@ -3201,13 +3201,13 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { #ifdef TARGET_X86_64 if (s->dflag == 2) { gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0); - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State, xmm_regs[reg])); + tcg_gen_addi_ptr(cpu_ptr0, tcg_env, offsetof(CPUX86State, xmm_regs[reg])); gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T[0]); } else #endif { gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0); - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State, xmm_regs[reg])); + tcg_gen_addi_ptr(cpu_ptr0, tcg_env, offsetof(CPUX86State, xmm_regs[reg])); tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32); } @@ -3218,8 +3218,8 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State, fpregs[reg].mmx)); } else { rm = (modrm & 7); - tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offsetof(CPUX86State, fpregs[rm].mmx)); - tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offsetof(CPUX86State, fpregs[reg].mmx)); + tcg_gen_ld_i64(cpu_tmp1_i64, tcg_env, offsetof(CPUX86State, fpregs[rm].mmx)); + tcg_gen_st_i64(cpu_tmp1_i64, tcg_env, offsetof(CPUX86State, fpregs[reg].mmx)); } break; case 0x010: /* movups */ @@ -3240,11 +3240,11 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { if (mod != 3) { gen_lea_modrm(s, modrm, ®_addr, &offset_addr); gen_op_ld_T0_A0(OT_LONG + s->mem_index); - tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, xmm_regs[reg].XMM_L(0))); + tcg_gen_st32_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, xmm_regs[reg].XMM_L(0))); gen_op_movl_T0_0(); - tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, xmm_regs[reg].XMM_L(1))); - tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, xmm_regs[reg].XMM_L(2))); - tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, xmm_regs[reg].XMM_L(3))); + tcg_gen_st32_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, xmm_regs[reg].XMM_L(1))); + tcg_gen_st32_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, xmm_regs[reg].XMM_L(2))); + tcg_gen_st32_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, xmm_regs[reg].XMM_L(3))); } else { rm = (modrm & 7) | REX_B(s); gen_op_movl(offsetof(CPUX86State, xmm_regs[reg].XMM_L(0)), @@ -3256,8 +3256,8 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { gen_lea_modrm(s, modrm, ®_addr, &offset_addr); gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State, xmm_regs[reg].XMM_Q(0))); gen_op_movl_T0_0(); - tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, xmm_regs[reg].XMM_L(2))); - tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, xmm_regs[reg].XMM_L(3))); + tcg_gen_st32_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, xmm_regs[reg].XMM_L(2))); + tcg_gen_st32_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, xmm_regs[reg].XMM_L(3))); } else { rm = (modrm & 7) | REX_B(s); gen_op_movq(offsetof(CPUX86State, xmm_regs[reg].XMM_Q(0)), @@ -3340,7 +3340,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { goto illegal_op; field_length = cpu_ldub_code(s->env, s->pc++) & 0x3F; bit_index = cpu_ldub_code(s->env, s->pc++) & 0x3F; - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State, xmm_regs[reg])); + tcg_gen_addi_ptr(cpu_ptr0, tcg_env, offsetof(CPUX86State, xmm_regs[reg])); if (b1 == 1) gen_helper_extrq_i(cpu_ptr0, tcg_constant_i32(bit_index), tcg_constant_i32(field_length)); else @@ -3349,24 +3349,24 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { case 0x7e: /* movd ea, mm */ #ifdef TARGET_X86_64 if (s->dflag == 2) { - tcg_gen_ld_i64(cpu_T[0], cpu_env, offsetof(CPUX86State, fpregs[reg].mmx)); + tcg_gen_ld_i64(cpu_T[0], tcg_env, offsetof(CPUX86State, fpregs[reg].mmx)); gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1); } else #endif { - tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, fpregs[reg].mmx.MMX_L(0))); + tcg_gen_ld32u_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, fpregs[reg].mmx.MMX_L(0))); gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1); } break; case 0x17e: /* movd ea, xmm */ #ifdef TARGET_X86_64 if (s->dflag == 2) { - tcg_gen_ld_i64(cpu_T[0], cpu_env, offsetof(CPUX86State, xmm_regs[reg].XMM_Q(0))); + tcg_gen_ld_i64(cpu_T[0], tcg_env, offsetof(CPUX86State, xmm_regs[reg].XMM_Q(0))); gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1); } else #endif { - tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, xmm_regs[reg].XMM_L(0))); + tcg_gen_ld32u_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, xmm_regs[reg].XMM_L(0))); gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1); } break; @@ -3407,7 +3407,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { case 0x211: /* movss ea, xmm */ if (mod != 3) { gen_lea_modrm(s, modrm, ®_addr, &offset_addr); - tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, xmm_regs[reg].XMM_L(0))); + tcg_gen_ld32u_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, xmm_regs[reg].XMM_L(0))); gen_op_st_T0_A0(OT_LONG + s->mem_index); } else { rm = (modrm & 7) | REX_B(s); @@ -3455,15 +3455,15 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { val = cpu_ldub_code(s->env, s->pc++); if (is_xmm) { gen_op_movl_T0_im(val); - tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, xmm_t0.XMM_L(0))); + tcg_gen_st32_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, xmm_t0.XMM_L(0))); gen_op_movl_T0_0(); - tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, xmm_t0.XMM_L(1))); + tcg_gen_st32_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, xmm_t0.XMM_L(1))); op1_offset = offsetof(CPUX86State, xmm_t0); } else { gen_op_movl_T0_im(val); - tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, mmx_t0.MMX_L(0))); + tcg_gen_st32_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, mmx_t0.MMX_L(0))); gen_op_movl_T0_0(); - tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, mmx_t0.MMX_L(1))); + tcg_gen_st32_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, mmx_t0.MMX_L(1))); op1_offset = offsetof(CPUX86State, mmx_t0); } sse_op2 = sse_op_table2[((b - 1) & 3) * 8 + (((modrm >> 3)) & 7)][b1]; @@ -3476,20 +3476,20 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { rm = (modrm & 7); op2_offset = offsetof(CPUX86State, fpregs[rm].mmx); } - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset); - tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset); + tcg_gen_addi_ptr(cpu_ptr0, tcg_env, op2_offset); + tcg_gen_addi_ptr(cpu_ptr1, tcg_env, op1_offset); ((void (*)(TCGv_ptr, TCGv_ptr)) sse_op2)(cpu_ptr0, cpu_ptr1); break; case 0x050: /* movmskps */ rm = (modrm & 7) | REX_B(s); - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State, xmm_regs[rm])); + tcg_gen_addi_ptr(cpu_ptr0, tcg_env, offsetof(CPUX86State, xmm_regs[rm])); gen_helper_movmskps(cpu_tmp2_i32, cpu_ptr0); tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); gen_op_mov_reg_T0(OT_LONG, reg); break; case 0x150: /* movmskpd */ rm = (modrm & 7) | REX_B(s); - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State, xmm_regs[rm])); + tcg_gen_addi_ptr(cpu_ptr0, tcg_env, offsetof(CPUX86State, xmm_regs[rm])); gen_helper_movmskpd(cpu_tmp2_i32, cpu_ptr0); tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); gen_op_mov_reg_T0(OT_LONG, reg); @@ -3506,8 +3506,8 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { op2_offset = offsetof(CPUX86State, fpregs[rm].mmx); } op1_offset = offsetof(CPUX86State, xmm_regs[reg]); - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + tcg_gen_addi_ptr(cpu_ptr0, tcg_env, op1_offset); + tcg_gen_addi_ptr(cpu_ptr1, tcg_env, op2_offset); switch (b >> 8) { case 0x0: gen_helper_cvtpi2ps(cpu_ptr0, cpu_ptr1); @@ -3523,7 +3523,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { ot = (s->dflag == 2) ? OT_QUAD : OT_LONG; gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0); op1_offset = offsetof(CPUX86State, xmm_regs[reg]); - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(cpu_ptr0, tcg_env, op1_offset); sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2)]; if (ot == OT_LONG) { tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); @@ -3546,8 +3546,8 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { op2_offset = offsetof(CPUX86State, xmm_regs[rm]); } op1_offset = offsetof(CPUX86State, fpregs[reg & 7].mmx); - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + tcg_gen_addi_ptr(cpu_ptr0, tcg_env, op1_offset); + tcg_gen_addi_ptr(cpu_ptr1, tcg_env, op2_offset); switch (b) { case 0x02c: gen_helper_cvttps2pi(cpu_ptr0, cpu_ptr1); @@ -3574,7 +3574,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State, xmm_t0.XMM_Q(0))); } else { gen_op_ld_T0_A0(OT_LONG + s->mem_index); - tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, xmm_t0.XMM_L(0))); + tcg_gen_st32_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, xmm_t0.XMM_L(0))); } op2_offset = offsetof(CPUX86State, xmm_t0); } else { @@ -3582,7 +3582,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { op2_offset = offsetof(CPUX86State, xmm_regs[rm]); } sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2) + 4 + (b & 1) * 4]; - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset); + tcg_gen_addi_ptr(cpu_ptr0, tcg_env, op2_offset); if (ot == OT_LONG) { ((void (*)(TCGv_i32, TCGv_ptr)) sse_op2)(cpu_tmp2_i32, cpu_ptr0); tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); @@ -3598,10 +3598,10 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { val = cpu_ldub_code(s->env, s->pc++); if (b1) { val &= 7; - tcg_gen_st16_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, xmm_regs[reg].XMM_W(val))); + tcg_gen_st16_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, xmm_regs[reg].XMM_W(val))); } else { val &= 3; - tcg_gen_st16_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, fpregs[reg].mmx.MMX_W(val))); + tcg_gen_st16_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, fpregs[reg].mmx.MMX_W(val))); } break; case 0xc5: /* pextrw */ @@ -3613,11 +3613,11 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { if (b1) { val &= 7; rm = (modrm & 7) | REX_B(s); - tcg_gen_ld16u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, xmm_regs[rm].XMM_W(val))); + tcg_gen_ld16u_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, xmm_regs[rm].XMM_W(val))); } else { val &= 3; rm = (modrm & 7); - tcg_gen_ld16u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, fpregs[rm].mmx.MMX_W(val))); + tcg_gen_ld16u_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, fpregs[rm].mmx.MMX_W(val))); } reg = ((modrm >> 3) & 7) | rex_r; gen_op_mov_reg_T0(ot, reg); @@ -3650,11 +3650,11 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { goto illegal_op; if (b1) { rm = (modrm & 7) | REX_B(s); - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State, xmm_regs[rm])); + tcg_gen_addi_ptr(cpu_ptr0, tcg_env, offsetof(CPUX86State, xmm_regs[rm])); gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_ptr0); } else { rm = (modrm & 7); - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State, fpregs[rm].mmx)); + tcg_gen_addi_ptr(cpu_ptr0, tcg_env, offsetof(CPUX86State, fpregs[rm].mmx)); gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_ptr0); } tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); @@ -3702,12 +3702,12 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { case 0x34: /* pmovsxwq, pmovzxwq */ tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0, (s->mem_index >> 2) - 1); tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0); - tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset + offsetof(XMMReg, XMM_L(0))); + tcg_gen_st_i32(cpu_tmp2_i32, tcg_env, op2_offset + offsetof(XMMReg, XMM_L(0))); break; case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */ tcg_gen_qemu_ld16u(cpu_tmp0, cpu_A0, (s->mem_index >> 2) - 1); - tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset + offsetof(XMMReg, XMM_W(0))); + tcg_gen_st16_tl(cpu_tmp0, tcg_env, op2_offset + offsetof(XMMReg, XMM_W(0))); break; case 0x2a: /* movntqda */ gen_ldo_env_A0(s->mem_index, op1_offset); @@ -3729,8 +3729,8 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { if (sse_op2 == SSE_SPECIAL) goto illegal_op; - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + tcg_gen_addi_ptr(cpu_ptr0, tcg_env, op1_offset); + tcg_gen_addi_ptr(cpu_ptr1, tcg_env, op2_offset); ((void (*)(TCGv_ptr, TCGv_ptr)) sse_op2)(cpu_ptr0, cpu_ptr1); if (b == 0x17) @@ -3791,14 +3791,14 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { val = cpu_ldub_code(s->env, s->pc++); switch (b) { case 0x14: /* pextrb */ - tcg_gen_ld8u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, xmm_regs[reg].XMM_B(val & 15))); + tcg_gen_ld8u_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, xmm_regs[reg].XMM_B(val & 15))); if (mod == 3) gen_op_mov_reg_T0(ot, rm); else tcg_gen_qemu_st8(cpu_T[0], cpu_A0, (s->mem_index >> 2) - 1); break; case 0x15: /* pextrw */ - tcg_gen_ld16u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, xmm_regs[reg].XMM_W(val & 7))); + tcg_gen_ld16u_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, xmm_regs[reg].XMM_W(val & 7))); if (mod == 3) gen_op_mov_reg_T0(ot, rm); else @@ -3806,7 +3806,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { break; case 0x16: if (ot == OT_LONG) { /* pextrd */ - tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, + tcg_gen_ld_i32(cpu_tmp2_i32, tcg_env, offsetof(CPUX86State, xmm_regs[reg].XMM_L(val & 3))); tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); if (mod == 3) @@ -3815,7 +3815,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { tcg_gen_qemu_st32(cpu_T[0], cpu_A0, (s->mem_index >> 2) - 1); } else { /* pextrq */ #ifdef TARGET_X86_64 - tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, + tcg_gen_ld_i64(cpu_tmp1_i64, tcg_env, offsetof(CPUX86State, xmm_regs[reg].XMM_Q(val & 1))); if (mod == 3) gen_op_mov_reg_v(ot, rm, cpu_tmp1_i64); @@ -3827,7 +3827,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { } break; case 0x17: /* extractps */ - tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, xmm_regs[reg].XMM_L(val & 3))); + tcg_gen_ld32u_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, xmm_regs[reg].XMM_L(val & 3))); if (mod == 3) gen_op_mov_reg_T0(ot, rm); else @@ -3838,29 +3838,29 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { gen_op_mov_TN_reg(OT_LONG, 0, rm); else tcg_gen_qemu_ld8u(cpu_tmp0, cpu_A0, (s->mem_index >> 2) - 1); - tcg_gen_st8_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, xmm_regs[reg].XMM_B(val & 15))); + tcg_gen_st8_tl(cpu_tmp0, tcg_env, offsetof(CPUX86State, xmm_regs[reg].XMM_B(val & 15))); break; case 0x21: /* insertps */ if (mod == 3) { - tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, + tcg_gen_ld_i32(cpu_tmp2_i32, tcg_env, offsetof(CPUX86State, xmm_regs[rm].XMM_L((val >> 6) & 3))); } else { tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0, (s->mem_index >> 2) - 1); tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0); } - tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, + tcg_gen_st_i32(cpu_tmp2_i32, tcg_env, offsetof(CPUX86State, xmm_regs[reg].XMM_L((val >> 4) & 3))); if ((val >> 0) & 1) - tcg_gen_st_i32(tcg_constant_i32(0 /*float32_zero*/), cpu_env, + tcg_gen_st_i32(tcg_constant_i32(0 /*float32_zero*/), tcg_env, offsetof(CPUX86State, xmm_regs[reg].XMM_L(0))); if ((val >> 1) & 1) - tcg_gen_st_i32(tcg_constant_i32(0 /*float32_zero*/), cpu_env, + tcg_gen_st_i32(tcg_constant_i32(0 /*float32_zero*/), tcg_env, offsetof(CPUX86State, xmm_regs[reg].XMM_L(1))); if ((val >> 2) & 1) - tcg_gen_st_i32(tcg_constant_i32(0 /*float32_zero*/), cpu_env, + tcg_gen_st_i32(tcg_constant_i32(0 /*float32_zero*/), tcg_env, offsetof(CPUX86State, xmm_regs[reg].XMM_L(2))); if ((val >> 3) & 1) - tcg_gen_st_i32(tcg_constant_i32(0 /*float32_zero*/), cpu_env, + tcg_gen_st_i32(tcg_constant_i32(0 /*float32_zero*/), tcg_env, offsetof(CPUX86State, xmm_regs[reg].XMM_L(3))); break; case 0x22: @@ -3870,7 +3870,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { else tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0, (s->mem_index >> 2) - 1); tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0); - tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, + tcg_gen_st_i32(cpu_tmp2_i32, tcg_env, offsetof(CPUX86State, xmm_regs[reg].XMM_L(val & 3))); } else { /* pinsrq */ #ifdef TARGET_X86_64 @@ -3878,7 +3878,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm); else tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, (s->mem_index >> 2) - 1); - tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, + tcg_gen_st_i64(cpu_tmp1_i64, tcg_env, offsetof(CPUX86State, xmm_regs[reg].XMM_Q(val & 1))); #else goto illegal_op; @@ -3918,8 +3918,8 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { val |= 1 << 8; } - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + tcg_gen_addi_ptr(cpu_ptr0, tcg_env, op1_offset); + tcg_gen_addi_ptr(cpu_ptr1, tcg_env, op2_offset); ((void (*)(TCGv_ptr, TCGv_ptr, TCGv_i32)) sse_op2)(cpu_ptr0, cpu_ptr1, tcg_constant_i32(val)); break; default: @@ -3946,7 +3946,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { if (b1 == 2) { /* 32 bit access */ gen_op_ld_T0_A0(OT_LONG + s->mem_index); - tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, xmm_t0.XMM_L(0))); + tcg_gen_st32_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, xmm_t0.XMM_L(0))); } else { /* 64 bit access */ gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State, xmm_t0.XMM_D(0))); @@ -3977,15 +3977,15 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { sse_op2 = sse_op_table5[val]; if (!sse_op2) goto illegal_op; - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + tcg_gen_addi_ptr(cpu_ptr0, tcg_env, op1_offset); + tcg_gen_addi_ptr(cpu_ptr1, tcg_env, op2_offset); ((void (*)(TCGv_ptr, TCGv_ptr)) sse_op2)(cpu_ptr0, cpu_ptr1); break; case 0x70: /* pshufx insn */ case 0xc6: /* pshufx insn */ val = cpu_ldub_code(s->env, s->pc++); - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + tcg_gen_addi_ptr(cpu_ptr0, tcg_env, op1_offset); + tcg_gen_addi_ptr(cpu_ptr1, tcg_env, op2_offset); ((void (*)(TCGv_ptr, TCGv_ptr, TCGv_i32)) sse_op2)(cpu_ptr0, cpu_ptr1, tcg_constant_i32(val)); break; case 0xc2: @@ -3994,8 +3994,8 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { if (val >= 8) goto illegal_op; sse_op2 = sse_op_table4[val][b1]; - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + tcg_gen_addi_ptr(cpu_ptr0, tcg_env, op1_offset); + tcg_gen_addi_ptr(cpu_ptr1, tcg_env, op2_offset); ((void (*)(TCGv_ptr, TCGv_ptr)) sse_op2)(cpu_ptr0, cpu_ptr1); break; case 0xf7: @@ -4014,13 +4014,13 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { } gen_add_A0_ds_seg(s); - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + tcg_gen_addi_ptr(cpu_ptr0, tcg_env, op1_offset); + tcg_gen_addi_ptr(cpu_ptr1, tcg_env, op2_offset); ((void (*)(TCGv_ptr, TCGv_ptr, TCGv)) sse_op2)(cpu_ptr0, cpu_ptr1, cpu_A0); break; default: - tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + tcg_gen_addi_ptr(cpu_ptr0, tcg_env, op1_offset); + tcg_gen_addi_ptr(cpu_ptr1, tcg_env, op2_offset); ((void (*)(TCGv_ptr, TCGv_ptr)) sse_op2)(cpu_ptr0, cpu_ptr1); break; } @@ -4047,7 +4047,7 @@ void instr_gen_flags_update(void *context) { void instr_gen_pc_update(void *context, target_ulong pc, target_ulong cs_base); void instr_gen_pc_update(void *context, target_ulong pc, target_ulong cs_base) { tcg_gen_movi_tl(cpu_tmp0, pc - cs_base); - tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUArchState, eip)); + tcg_gen_st_tl(cpu_tmp0, tcg_env, offsetof(CPUArchState, eip)); instr_gen_flags_update(context); } @@ -4067,7 +4067,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) { #ifdef ENABLE_PRECISE_EXCEPTION_DEBUGGING tcg_gen_movi_tl(cpu_tmp0, pc_start - s->cs_base); - tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUArchState, precise_eip)); + tcg_gen_st_tl(cpu_tmp0, tcg_env, offsetof(CPUArchState, precise_eip)); #endif s->pc = pc_start; @@ -4637,7 +4637,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) { gen_movtl_T1_im(next_eip); #if !defined(STATIC_TRANSLATOR) - tcg_gen_st_tl(cpu_T[1], cpu_env, offsetof(CPUArchState, return_address)); + tcg_gen_st_tl(cpu_T[1], tcg_env, offsetof(CPUArchState, return_address)); #endif gen_push_T1(s); @@ -6281,7 +6281,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) { SET_TB_CALL_EIP(tval); gen_movtl_T0_im(next_eip); #if !defined(STATIC_TRANSLATOR) - tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUArchState, return_address)); + tcg_gen_st_tl(cpu_T[0], tcg_env, offsetof(CPUArchState, return_address)); #endif gen_push_T0(s); gen_jmp(s, tval); @@ -6497,11 +6497,11 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) { break; case 0xfc: /* cld */ tcg_gen_movi_i32(cpu_tmp2_i32, 1); - tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df)); + tcg_gen_st_i32(cpu_tmp2_i32, tcg_env, offsetof(CPUX86State, df)); break; case 0xfd: /* std */ tcg_gen_movi_i32(cpu_tmp2_i32, -1); - tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df)); + tcg_gen_st_i32(cpu_tmp2_i32, tcg_env, offsetof(CPUX86State, df)); break; /************************/ @@ -7003,7 +7003,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) { if (!s->pe || s->vm86) goto illegal_op; gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ); - tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, ldt.selector)); + tcg_gen_ld32u_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, ldt.selector)); ot = OT_WORD; if (mod == 3) ot += s->dflag; @@ -7026,7 +7026,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) { if (!s->pe || s->vm86) goto illegal_op; gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ); - tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, tr.selector)); + tcg_gen_ld32u_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, tr.selector)); ot = OT_WORD; if (mod == 3) ot += s->dflag; @@ -7073,10 +7073,10 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) { goto illegal_op; gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ); gen_lea_modrm(s, modrm, ®_addr, &offset_addr); - tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, gdt.limit)); + tcg_gen_ld32u_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, gdt.limit)); gen_op_st_T0_A0(OT_WORD + s->mem_index); gen_add_A0_im(s, 2); - tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, gdt.base)); + tcg_gen_ld_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, gdt.base)); if (!s->dflag) gen_op_andl_T0_im(0xffffff); gen_op_st_T0_A0(CODE64(s) + OT_LONG + s->mem_index); @@ -7120,10 +7120,10 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) { } else { /* sidt */ gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ); gen_lea_modrm(s, modrm, ®_addr, &offset_addr); - tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, idt.limit)); + tcg_gen_ld32u_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, idt.limit)); gen_op_st_T0_A0(OT_WORD + s->mem_index); gen_add_A0_im(s, 2); - tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, idt.base)); + tcg_gen_ld_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, idt.base)); if (!s->dflag) gen_op_andl_T0_im(0xffffff); gen_op_st_T0_A0(CODE64(s) + OT_LONG + s->mem_index); @@ -7224,20 +7224,20 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) { if (!s->dflag) gen_op_andl_T0_im(0xffffff); if (op == 2) { - tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, gdt.base)); - tcg_gen_st32_tl(cpu_T[1], cpu_env, offsetof(CPUX86State, gdt.limit)); + tcg_gen_st_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, gdt.base)); + tcg_gen_st32_tl(cpu_T[1], tcg_env, offsetof(CPUX86State, gdt.limit)); } else { - tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, idt.base)); - tcg_gen_st32_tl(cpu_T[1], cpu_env, offsetof(CPUX86State, idt.limit)); + tcg_gen_st_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, idt.base)); + tcg_gen_st32_tl(cpu_T[1], tcg_env, offsetof(CPUX86State, idt.limit)); } } break; case 4: /* smsw */ gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0); #if defined TARGET_X86_64 && defined HOST_WORDS_BIGENDIAN - tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, cr[0]) + 4); + tcg_gen_ld32u_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, cr[0]) + 4); #else - tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, cr[0])); + tcg_gen_ld32u_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, cr[0])); #endif gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 1); break; @@ -7273,10 +7273,10 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) { if (s->cpl != 0) { gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); } else { - tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, segs[R_GS].base)); - tcg_gen_ld_tl(cpu_T[1], cpu_env, offsetof(CPUX86State, kernelgsbase)); - tcg_gen_st_tl(cpu_T[1], cpu_env, offsetof(CPUX86State, segs[R_GS].base)); - tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, kernelgsbase)); + tcg_gen_ld_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, segs[R_GS].base)); + tcg_gen_ld_tl(cpu_T[1], tcg_env, offsetof(CPUX86State, kernelgsbase)); + tcg_gen_st_tl(cpu_T[1], tcg_env, offsetof(CPUX86State, segs[R_GS].base)); + tcg_gen_st_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, kernelgsbase)); } } else #endif @@ -7498,7 +7498,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) { gen_eob(s); } else { gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg); - tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, dr[reg])); + tcg_gen_ld_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, dr[reg])); gen_op_mov_reg_T0(ot, rm); } } @@ -7572,7 +7572,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) { tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); gen_helper_ldmxcsr(cpu_tmp2_i32); } else { - tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, mxcsr)); + tcg_gen_ld32u_tl(cpu_T[0], tcg_env, offsetof(CPUX86State, mxcsr)); gen_op_st_T0_A0(OT_LONG + s->mem_index); } break; @@ -7673,37 +7673,37 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) { } void optimize_flags_init(void) { - cpu_cc_op = tcg_global_mem_new_i32(cpu_env, offsetof(CPUX86State, cc_op), "cc_op"); - cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src), "cc_src"); - cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst), "cc_dst"); - cpu_cc_tmp = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_tmp), "cc_tmp"); + cpu_cc_op = tcg_global_mem_new_i32(tcg_env, offsetof(CPUX86State, cc_op), "cc_op"); + cpu_cc_src = tcg_global_mem_new(tcg_env, offsetof(CPUX86State, cc_src), "cc_src"); + cpu_cc_dst = tcg_global_mem_new(tcg_env, offsetof(CPUX86State, cc_dst), "cc_dst"); + cpu_cc_tmp = tcg_global_mem_new(tcg_env, offsetof(CPUX86State, cc_tmp), "cc_tmp"); #ifdef TARGET_X86_64 - cpu_regs[R_EAX] = tcg_global_mem_new_i64(cpu_env, offsetof(CPUX86State, regs[R_EAX]), "rax"); - cpu_regs[R_ECX] = tcg_global_mem_new_i64(cpu_env, offsetof(CPUX86State, regs[R_ECX]), "rcx"); - cpu_regs[R_EDX] = tcg_global_mem_new_i64(cpu_env, offsetof(CPUX86State, regs[R_EDX]), "rdx"); - cpu_regs[R_EBX] = tcg_global_mem_new_i64(cpu_env, offsetof(CPUX86State, regs[R_EBX]), "rbx"); - cpu_regs[R_ESP] = tcg_global_mem_new_i64(cpu_env, offsetof(CPUX86State, regs[R_ESP]), "rsp"); - cpu_regs[R_EBP] = tcg_global_mem_new_i64(cpu_env, offsetof(CPUX86State, regs[R_EBP]), "rbp"); - cpu_regs[R_ESI] = tcg_global_mem_new_i64(cpu_env, offsetof(CPUX86State, regs[R_ESI]), "rsi"); - cpu_regs[R_EDI] = tcg_global_mem_new_i64(cpu_env, offsetof(CPUX86State, regs[R_EDI]), "rdi"); - cpu_regs[8] = tcg_global_mem_new_i64(cpu_env, offsetof(CPUX86State, regs[8]), "r8"); - cpu_regs[9] = tcg_global_mem_new_i64(cpu_env, offsetof(CPUX86State, regs[9]), "r9"); - cpu_regs[10] = tcg_global_mem_new_i64(cpu_env, offsetof(CPUX86State, regs[10]), "r10"); - cpu_regs[11] = tcg_global_mem_new_i64(cpu_env, offsetof(CPUX86State, regs[11]), "r11"); - cpu_regs[12] = tcg_global_mem_new_i64(cpu_env, offsetof(CPUX86State, regs[12]), "r12"); - cpu_regs[13] = tcg_global_mem_new_i64(cpu_env, offsetof(CPUX86State, regs[13]), "r13"); - cpu_regs[14] = tcg_global_mem_new_i64(cpu_env, offsetof(CPUX86State, regs[14]), "r14"); - cpu_regs[15] = tcg_global_mem_new_i64(cpu_env, offsetof(CPUX86State, regs[15]), "r15"); + cpu_regs[R_EAX] = tcg_global_mem_new_i64(tcg_env, offsetof(CPUX86State, regs[R_EAX]), "rax"); + cpu_regs[R_ECX] = tcg_global_mem_new_i64(tcg_env, offsetof(CPUX86State, regs[R_ECX]), "rcx"); + cpu_regs[R_EDX] = tcg_global_mem_new_i64(tcg_env, offsetof(CPUX86State, regs[R_EDX]), "rdx"); + cpu_regs[R_EBX] = tcg_global_mem_new_i64(tcg_env, offsetof(CPUX86State, regs[R_EBX]), "rbx"); + cpu_regs[R_ESP] = tcg_global_mem_new_i64(tcg_env, offsetof(CPUX86State, regs[R_ESP]), "rsp"); + cpu_regs[R_EBP] = tcg_global_mem_new_i64(tcg_env, offsetof(CPUX86State, regs[R_EBP]), "rbp"); + cpu_regs[R_ESI] = tcg_global_mem_new_i64(tcg_env, offsetof(CPUX86State, regs[R_ESI]), "rsi"); + cpu_regs[R_EDI] = tcg_global_mem_new_i64(tcg_env, offsetof(CPUX86State, regs[R_EDI]), "rdi"); + cpu_regs[8] = tcg_global_mem_new_i64(tcg_env, offsetof(CPUX86State, regs[8]), "r8"); + cpu_regs[9] = tcg_global_mem_new_i64(tcg_env, offsetof(CPUX86State, regs[9]), "r9"); + cpu_regs[10] = tcg_global_mem_new_i64(tcg_env, offsetof(CPUX86State, regs[10]), "r10"); + cpu_regs[11] = tcg_global_mem_new_i64(tcg_env, offsetof(CPUX86State, regs[11]), "r11"); + cpu_regs[12] = tcg_global_mem_new_i64(tcg_env, offsetof(CPUX86State, regs[12]), "r12"); + cpu_regs[13] = tcg_global_mem_new_i64(tcg_env, offsetof(CPUX86State, regs[13]), "r13"); + cpu_regs[14] = tcg_global_mem_new_i64(tcg_env, offsetof(CPUX86State, regs[14]), "r14"); + cpu_regs[15] = tcg_global_mem_new_i64(tcg_env, offsetof(CPUX86State, regs[15]), "r15"); #else - cpu_regs[R_EAX] = tcg_global_mem_new_i32(cpu_env, offsetof(CPUX86State, regs[R_EAX]), "eax"); - cpu_regs[R_ECX] = tcg_global_mem_new_i32(cpu_env, offsetof(CPUX86State, regs[R_ECX]), "ecx"); - cpu_regs[R_EDX] = tcg_global_mem_new_i32(cpu_env, offsetof(CPUX86State, regs[R_EDX]), "edx"); - cpu_regs[R_EBX] = tcg_global_mem_new_i32(cpu_env, offsetof(CPUX86State, regs[R_EBX]), "ebx"); - cpu_regs[R_ESP] = tcg_global_mem_new_i32(cpu_env, offsetof(CPUX86State, regs[R_ESP]), "esp"); - cpu_regs[R_EBP] = tcg_global_mem_new_i32(cpu_env, offsetof(CPUX86State, regs[R_EBP]), "ebp"); - cpu_regs[R_ESI] = tcg_global_mem_new_i32(cpu_env, offsetof(CPUX86State, regs[R_ESI]), "esi"); - cpu_regs[R_EDI] = tcg_global_mem_new_i32(cpu_env, offsetof(CPUX86State, regs[R_EDI]), "edi"); + cpu_regs[R_EAX] = tcg_global_mem_new_i32(tcg_env, offsetof(CPUX86State, regs[R_EAX]), "eax"); + cpu_regs[R_ECX] = tcg_global_mem_new_i32(tcg_env, offsetof(CPUX86State, regs[R_ECX]), "ecx"); + cpu_regs[R_EDX] = tcg_global_mem_new_i32(tcg_env, offsetof(CPUX86State, regs[R_EDX]), "edx"); + cpu_regs[R_EBX] = tcg_global_mem_new_i32(tcg_env, offsetof(CPUX86State, regs[R_EBX]), "ebx"); + cpu_regs[R_ESP] = tcg_global_mem_new_i32(tcg_env, offsetof(CPUX86State, regs[R_ESP]), "esp"); + cpu_regs[R_EBP] = tcg_global_mem_new_i32(tcg_env, offsetof(CPUX86State, regs[R_EBP]), "ebp"); + cpu_regs[R_ESI] = tcg_global_mem_new_i32(tcg_env, offsetof(CPUX86State, regs[R_ESI]), "esi"); + cpu_regs[R_EDI] = tcg_global_mem_new_i32(tcg_env, offsetof(CPUX86State, regs[R_EDI]), "edi"); #endif } @@ -7713,7 +7713,7 @@ static inline void gen_tb_start(TranslationBlock *tb) { tcg_ctx->exitreq_label = gen_new_label(); exit_request = tcg_temp_new_i32(); - tcg_gen_ld_i32(exit_request, cpu_env, offsetof(CPUArchState, exit_request)); + tcg_gen_ld_i32(exit_request, tcg_env, offsetof(CPUArchState, exit_request)); tcg_gen_brcondi_i32(TCG_COND_NE, exit_request, 0, tcg_ctx->exitreq_label); } @@ -7821,7 +7821,7 @@ static inline void gen_intermediate_code_internal(CPUX86State *env, TranslationB temporary one in case we regenerate LLVM bitcode */ #ifndef STATIC_TRANSLATOR tcg_gen_movi_i64(cpu_tmp1_i64, (uint64_t) tb); - tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offsetof(CPUArchState, se_current_tb)); + tcg_gen_st_i64(cpu_tmp1_i64, tcg_env, offsetof(CPUArchState, se_current_tb)); if (unlikely(*g_sqi.events.on_translate_block_start_signals_count && dc->instrument)) { g_sqi.events.on_translate_block_start(dc, tb, pc_start); diff --git a/libcpu/src/translate-all.c b/libcpu/src/translate-all.c index d73679d57..8cf02e6fd 100644 --- a/libcpu/src/translate-all.c +++ b/libcpu/src/translate-all.c @@ -177,7 +177,6 @@ int cpu_gen_code(CPUArchState *env, TranslationBlock *tb) { tcg_ctx->tlb_fast_offset = offsetof(CPUX86State, tlb_table); // (int) offsetof(ArchCPU, neg.tlb.f) - (int) offsetof(ArchCPU, env); #endif - tcg_ctx->insn_start_words = TARGET_INSN_START_WORDS; #ifdef TCG_GUEST_DEFAULT_MO tcg_ctx->guest_mo = TCG_GUEST_DEFAULT_MO; #else diff --git a/libs2ecore/src/CMakeLists.txt b/libs2ecore/src/CMakeLists.txt index 1951efea8..b27b16148 100644 --- a/libs2ecore/src/CMakeLists.txt +++ b/libs2ecore/src/CMakeLists.txt @@ -60,7 +60,7 @@ set(WERROR_FLAGS "-Werror -Wno-zero-length-array -Wno-c99-extensions \ -Wno-deprecated-declarations -Wno-initializer-overrides \ -Wno-zero-length-array") -set(COMMON_FLAGS "-D__STDC_FORMAT_MACROS -D_GNU_SOURCE -DNEED_CPU_H -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -DTARGET_PHYS_ADDR_BITS=64") +set(COMMON_FLAGS "-D__STDC_FORMAT_MACROS -D_GNU_SOURCE -DNEED_CPU_H -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -DTARGET_PHYS_ADDR_BITS=64 -DCOMPILING_PER_TARGET") set(COMMON_FLAGS "${COMMON_FLAGS} -Wall -fPIC -fno-strict-aliasing -fexceptions -std=c++20") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${WERROR_FLAGS} ${COMMON_FLAGS}") diff --git a/libs2ecore/src/CorePluginInterface.cpp b/libs2ecore/src/CorePluginInterface.cpp index 950fc2a3d..923f448b9 100644 --- a/libs2ecore/src/CorePluginInterface.cpp +++ b/libs2ecore/src/CorePluginInterface.cpp @@ -102,10 +102,10 @@ static void s2e_tcg_instrument_code(ExecutionSignal *signal, uint64_t pc, uint64 if (nextpc != (uint64_t) -1) { #if TCG_TARGET_REG_BITS == 64 && defined(TARGET_X86_64) TCGv_i64 tpc = tcg_constant_i64((tcg_target_ulong) nextpc); - tcg_gen_st_i64(tpc, cpu_env, offsetof(CPUX86State, eip)); + tcg_gen_st_i64(tpc, tcg_env, offsetof(CPUX86State, eip)); #else TCGv_i32 tpc = tcg_constant_i32((tcg_target_ulong) nextpc); - tcg_gen_st_i32(tpc, cpu_env, offsetof(CPUX86State, eip)); + tcg_gen_st_i32(tpc, tcg_env, offsetof(CPUX86State, eip)); #endif } diff --git a/libtcg/include/tcg/accel/getpc.h b/libtcg/include/tcg/accel/getpc.h new file mode 100644 index 000000000..35e9c1c7a --- /dev/null +++ b/libtcg/include/tcg/accel/getpc.h @@ -0,0 +1,21 @@ +/* + * Get host pc for helper unwinding. + * + * Copyright (c) 2003 Fabrice Bellard + * SPDX-License-Identifier: LGPL-2.1-or-later + */ + +#ifndef ACCEL_TCG_GETPC_H +#define ACCEL_TCG_GETPC_H + +/* GETPC is the true target of the return instruction that we'll execute. */ +#ifdef CONFIG_TCG_INTERPRETER +extern __thread uintptr_t tci_tb_ptr; +#define GETPC() tci_tb_ptr +#elif defined(SYMBEX_LLVM_LIB) +#define GETPC() 0 +#else +#define GETPC() ((uintptr_t) __builtin_extract_return_addr(__builtin_return_address(0))) +#endif + +#endif /* ACCEL_TCG_GETPC_H */ diff --git a/libtcg/include/tcg/accel/plugin-helpers.h b/libtcg/include/tcg/accel/plugin-helpers.h deleted file mode 100644 index 8e685e065..000000000 --- a/libtcg/include/tcg/accel/plugin-helpers.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifdef CONFIG_PLUGIN -DEF_HELPER_FLAGS_2(plugin_vcpu_udata_cb, TCG_CALL_NO_RWG | TCG_CALL_PLUGIN, void, i32, ptr) -DEF_HELPER_FLAGS_4(plugin_vcpu_mem_cb, TCG_CALL_NO_RWG | TCG_CALL_PLUGIN, void, i32, i32, i64, ptr) -#endif diff --git a/libtcg/include/tcg/accel/tcg-runtime.h b/libtcg/include/tcg/accel/tcg-runtime.h index 4033bdce7..d53cd1db2 100644 --- a/libtcg/include/tcg/accel/tcg-runtime.h +++ b/libtcg/include/tcg/accel/tcg-runtime.h @@ -54,6 +54,12 @@ DEF_HELPER_FLAGS_5(atomic_cmpxchgq_le, TCG_CALL_NO_WG, i64, env, i64, i64, i64, #if HAVE_CMPXCHG128 DEF_HELPER_FLAGS_5(atomic_cmpxchgo_be, TCG_CALL_NO_WG, i128, env, i64, i128, i128, i32) DEF_HELPER_FLAGS_5(atomic_cmpxchgo_le, TCG_CALL_NO_WG, i128, env, i64, i128, i128, i32) +DEF_HELPER_FLAGS_4(atomic_xchgo_be, TCG_CALL_NO_WG, i128, env, i64, i128, i32) +DEF_HELPER_FLAGS_4(atomic_xchgo_le, TCG_CALL_NO_WG, i128, env, i64, i128, i32) +DEF_HELPER_FLAGS_4(atomic_fetch_ando_be, TCG_CALL_NO_WG, i128, env, i64, i128, i32) +DEF_HELPER_FLAGS_4(atomic_fetch_ando_le, TCG_CALL_NO_WG, i128, env, i64, i128, i32) +DEF_HELPER_FLAGS_4(atomic_fetch_oro_be, TCG_CALL_NO_WG, i128, env, i64, i128, i32) +DEF_HELPER_FLAGS_4(atomic_fetch_oro_le, TCG_CALL_NO_WG, i128, env, i64, i128, i32) #endif DEF_HELPER_FLAGS_5(nonatomic_cmpxchgo, TCG_CALL_NO_WG, i128, env, i64, i128, i128, i32) @@ -275,4 +281,29 @@ DEF_HELPER_FLAGS_4(gvec_leu16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_leu32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_leu64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_eqs8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(gvec_eqs16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(gvec_eqs32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(gvec_eqs64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + +DEF_HELPER_FLAGS_4(gvec_lts8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(gvec_lts16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(gvec_lts32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(gvec_lts64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + +DEF_HELPER_FLAGS_4(gvec_les8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(gvec_les16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(gvec_les32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(gvec_les64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + +DEF_HELPER_FLAGS_4(gvec_ltus8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(gvec_ltus16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(gvec_ltus32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(gvec_ltus64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + +DEF_HELPER_FLAGS_4(gvec_leus8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(gvec_leus16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(gvec_leus32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(gvec_leus64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + DEF_HELPER_FLAGS_5(gvec_bitsel, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) diff --git a/libtcg/include/tcg/exec/helper-gen-common.h b/libtcg/include/tcg/exec/helper-gen-common.h index 449c33ef7..196225274 100644 --- a/libtcg/include/tcg/exec/helper-gen-common.h +++ b/libtcg/include/tcg/exec/helper-gen-common.h @@ -11,8 +11,4 @@ #include "helper-gen.h.inc" #undef HELPER_H -#define HELPER_H "tcg/accel/plugin-helpers.h" -#include "helper-gen.h.inc" -#undef HELPER_H - #endif /* HELPER_GEN_COMMON_H */ diff --git a/libtcg/include/tcg/exec/helper-gen.h.inc b/libtcg/include/tcg/exec/helper-gen.h.inc index e414e247f..cb0214678 100644 --- a/libtcg/include/tcg/exec/helper-gen.h.inc +++ b/libtcg/include/tcg/exec/helper-gen.h.inc @@ -8,13 +8,14 @@ #include "tcg/tcg.h" #include "tcg/helper-info.h" -#include "helper-head.h" +#include "helper-head.h.inc" #define DEF_HELPER_FLAGS_0(name, flags, ret) \ extern TCGHelperInfo glue(helper_info_, name); \ static inline void glue(gen_helper_, name)(dh_retvar_decl0(ret)) \ { \ - tcg_gen_call0(&glue(helper_info_, name), dh_retvar(ret)); \ + tcg_gen_call0(glue(helper_info_,name).func, \ + &glue(helper_info_,name), dh_retvar(ret)); \ } #define DEF_HELPER_FLAGS_1(name, flags, ret, t1) \ @@ -22,7 +23,8 @@ extern TCGHelperInfo glue(helper_info_, name); \ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \ dh_arg_decl(t1, 1)) \ { \ - tcg_gen_call1(&glue(helper_info_, name), dh_retvar(ret), \ + tcg_gen_call1(glue(helper_info_,name).func, \ + &glue(helper_info_,name), dh_retvar(ret), \ dh_arg(t1, 1)); \ } @@ -31,7 +33,8 @@ extern TCGHelperInfo glue(helper_info_, name); \ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \ dh_arg_decl(t1, 1), dh_arg_decl(t2, 2)) \ { \ - tcg_gen_call2(&glue(helper_info_, name), dh_retvar(ret), \ + tcg_gen_call2(glue(helper_info_,name).func, \ + &glue(helper_info_,name), dh_retvar(ret), \ dh_arg(t1, 1), dh_arg(t2, 2)); \ } @@ -40,7 +43,8 @@ extern TCGHelperInfo glue(helper_info_, name); \ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \ dh_arg_decl(t1, 1), dh_arg_decl(t2, 2), dh_arg_decl(t3, 3)) \ { \ - tcg_gen_call3(&glue(helper_info_, name), dh_retvar(ret), \ + tcg_gen_call3(glue(helper_info_,name).func, \ + &glue(helper_info_,name), dh_retvar(ret), \ dh_arg(t1, 1), dh_arg(t2, 2), dh_arg(t3, 3)); \ } @@ -50,7 +54,8 @@ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \ dh_arg_decl(t1, 1), dh_arg_decl(t2, 2), \ dh_arg_decl(t3, 3), dh_arg_decl(t4, 4)) \ { \ - tcg_gen_call4(&glue(helper_info_, name), dh_retvar(ret), \ + tcg_gen_call4(glue(helper_info_,name).func, \ + &glue(helper_info_,name), dh_retvar(ret), \ dh_arg(t1, 1), dh_arg(t2, 2), \ dh_arg(t3, 3), dh_arg(t4, 4)); \ } @@ -61,7 +66,8 @@ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \ dh_arg_decl(t1, 1), dh_arg_decl(t2, 2), dh_arg_decl(t3, 3), \ dh_arg_decl(t4, 4), dh_arg_decl(t5, 5)) \ { \ - tcg_gen_call5(&glue(helper_info_, name), dh_retvar(ret), \ + tcg_gen_call5(glue(helper_info_,name).func, \ + &glue(helper_info_,name), dh_retvar(ret), \ dh_arg(t1, 1), dh_arg(t2, 2), dh_arg(t3, 3), \ dh_arg(t4, 4), dh_arg(t5, 5)); \ } @@ -72,7 +78,8 @@ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \ dh_arg_decl(t1, 1), dh_arg_decl(t2, 2), dh_arg_decl(t3, 3), \ dh_arg_decl(t4, 4), dh_arg_decl(t5, 5), dh_arg_decl(t6, 6)) \ { \ - tcg_gen_call6(&glue(helper_info_, name), dh_retvar(ret), \ + tcg_gen_call6(glue(helper_info_,name).func, \ + &glue(helper_info_,name), dh_retvar(ret), \ dh_arg(t1, 1), dh_arg(t2, 2), dh_arg(t3, 3), \ dh_arg(t4, 4), dh_arg(t5, 5), dh_arg(t6, 6)); \ } @@ -84,7 +91,8 @@ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \ dh_arg_decl(t4, 4), dh_arg_decl(t5, 5), dh_arg_decl(t6, 6), \ dh_arg_decl(t7, 7)) \ { \ - tcg_gen_call7(&glue(helper_info_, name), dh_retvar(ret), \ + tcg_gen_call7(glue(helper_info_,name).func, \ + &glue(helper_info_,name), dh_retvar(ret), \ dh_arg(t1, 1), dh_arg(t2, 2), dh_arg(t3, 3), \ dh_arg(t4, 4), dh_arg(t5, 5), dh_arg(t6, 6), \ dh_arg(t7, 7)); \ diff --git a/libtcg/include/tcg/exec/helper-head.h b/libtcg/include/tcg/exec/helper-head.h deleted file mode 100644 index 70083ae91..000000000 --- a/libtcg/include/tcg/exec/helper-head.h +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Helper file for declaring TCG helper functions. - * Used by other helper files. - */ - -#ifndef EXEC_HELPER_HEAD_H -#define EXEC_HELPER_HEAD_H - -#include "fpu/softfloat-types.h" - -#define HELPER(name) glue(helper_, name) - -/* Some types that make sense in C, but not for TCG. */ -#define dh_alias_i32 i32 -#define dh_alias_s32 i32 -#define dh_alias_int i32 -#define dh_alias_i64 i64 -#define dh_alias_s64 i64 -#define dh_alias_i128 i128 -#define dh_alias_f16 i32 -#define dh_alias_f32 i32 -#define dh_alias_f64 i64 -#define dh_alias_ptr ptr -#define dh_alias_cptr ptr -#define dh_alias_env ptr -#define dh_alias_void void -#define dh_alias_noreturn noreturn -#define dh_alias(t) glue(dh_alias_, t) - -#define dh_ctype_i32 uint32_t -#define dh_ctype_s32 int32_t -#define dh_ctype_int int -#define dh_ctype_i64 uint64_t -#define dh_ctype_s64 int64_t -#define dh_ctype_i128 Int128 -#define dh_ctype_f16 uint32_t -#define dh_ctype_f32 float32 -#define dh_ctype_f64 float64 -#define dh_ctype_ptr void * -#define dh_ctype_cptr const void * -#define dh_ctype_env CPUArchState * -#define dh_ctype_void void -#define dh_ctype_noreturn QEMU_NORETURN void -#define dh_ctype(t) dh_ctype_##t - -#ifdef NEED_CPU_H -#ifdef TARGET_LONG_BITS -#if TARGET_LONG_BITS == 32 -#define dh_alias_tl i32 -#define dh_typecode_tl dh_typecode_i32 -#else -#define dh_alias_tl i64 -#define dh_typecode_tl dh_typecode_i64 -#endif -#endif -#define dh_ctype_tl target_ulong -#endif - -/* We can't use glue() here because it falls foul of C preprocessor - recursive expansion rules. */ -#define dh_retvar_decl0_void void -#define dh_retvar_decl0_noreturn void -#define dh_retvar_decl0_i32 TCGv_i32 retval -#define dh_retvar_decl0_i64 TCGv_i64 retval -#define dh_retval_decl0_i128 TCGv_i128 retval -#define dh_retvar_decl0_ptr TCGv_ptr retval -#define dh_retvar_decl0(t) glue(dh_retvar_decl0_, dh_alias(t)) - -#define dh_retvar_decl_void -#define dh_retvar_decl_noreturn -#define dh_retvar_decl_i32 TCGv_i32 retval, -#define dh_retvar_decl_i64 TCGv_i64 retval, -#define dh_retvar_decl_i128 TCGv_i128 retval, -#define dh_retvar_decl_ptr TCGv_ptr retval, -#define dh_retvar_decl(t) glue(dh_retvar_decl_, dh_alias(t)) - -#define dh_retvar_void NULL -#define dh_retvar_noreturn NULL -#define dh_retvar_i32 tcgv_i32_temp(retval) -#define dh_retvar_i64 tcgv_i64_temp(retval) -#define dh_retvar_i128 tcgv_i128_temp(retval) -#define dh_retvar_ptr tcgv_ptr_temp(retval) -#define dh_retvar(t) glue(dh_retvar_, dh_alias(t)) - -#define dh_typecode_void 0 -#define dh_typecode_noreturn 0 -#define dh_typecode_i32 2 -#define dh_typecode_s32 3 -#define dh_typecode_i64 4 -#define dh_typecode_s64 5 -#define dh_typecode_ptr 6 -#define dh_typecode_i128 7 -#define dh_typecode_int dh_typecode_s32 -#define dh_typecode_f16 dh_typecode_i32 -#define dh_typecode_f32 dh_typecode_i32 -#define dh_typecode_f64 dh_typecode_i64 -#define dh_typecode_cptr dh_typecode_ptr -#define dh_typecode_env dh_typecode_ptr -#define dh_typecode(t) dh_typecode_##t - -#define dh_callflag_i32 0 -#define dh_callflag_i64 0 -#define dh_callflag_i128 0 -#define dh_callflag_ptr 0 -#define dh_callflag_void 0 -#define dh_callflag_noreturn TCG_CALL_NO_RETURN -#define dh_callflag(t) glue(dh_callflag_, dh_alias(t)) - -#define dh_typemask(t, n) (dh_typecode(t) << (n * 3)) - -#define dh_arg(t, n) glue(glue(tcgv_, dh_alias(t)), _temp)(glue(arg, n)) - -#define dh_arg_decl(t, n) glue(TCGv_, dh_alias(t)) glue(arg, n) - -#define DEF_HELPER_0(name, ret) DEF_HELPER_FLAGS_0(name, 0, ret) -#define DEF_HELPER_1(name, ret, t1) DEF_HELPER_FLAGS_1(name, 0, ret, t1) -#define DEF_HELPER_2(name, ret, t1, t2) DEF_HELPER_FLAGS_2(name, 0, ret, t1, t2) -#define DEF_HELPER_3(name, ret, t1, t2, t3) DEF_HELPER_FLAGS_3(name, 0, ret, t1, t2, t3) -#define DEF_HELPER_4(name, ret, t1, t2, t3, t4) DEF_HELPER_FLAGS_4(name, 0, ret, t1, t2, t3, t4) -#define DEF_HELPER_5(name, ret, t1, t2, t3, t4, t5) DEF_HELPER_FLAGS_5(name, 0, ret, t1, t2, t3, t4, t5) -#define DEF_HELPER_6(name, ret, t1, t2, t3, t4, t5, t6) DEF_HELPER_FLAGS_6(name, 0, ret, t1, t2, t3, t4, t5, t6) -#define DEF_HELPER_7(name, ret, t1, t2, t3, t4, t5, t6, t7) DEF_HELPER_FLAGS_7(name, 0, ret, t1, t2, t3, t4, t5, t6, t7) - -/* MAX_CALL_IARGS must be set to n if last entry is DEF_HELPER_FLAGS_n. */ - -#endif /* EXEC_HELPER_HEAD_H */ diff --git a/libtcg/include/tcg/exec/helper-head.h.inc b/libtcg/include/tcg/exec/helper-head.h.inc new file mode 100644 index 000000000..ff19ba355 --- /dev/null +++ b/libtcg/include/tcg/exec/helper-head.h.inc @@ -0,0 +1,149 @@ +/* + * Helper file for declaring TCG helper functions. + * Used by other helper files. + */ + +#ifndef EXEC_HELPER_HEAD_H +#define EXEC_HELPER_HEAD_H + +#include + +#define HELPER(name) glue(helper_, name) + +/* Some types that make sense in C, but not for TCG. */ +#define dh_alias_i32 i32 +#define dh_alias_s32 i32 +#define dh_alias_int i32 +#define dh_alias_i64 i64 +#define dh_alias_s64 i64 +#define dh_alias_i128 i128 +#define dh_alias_f16 i32 +#define dh_alias_f32 i32 +#define dh_alias_f64 i64 +#define dh_alias_ptr ptr +#define dh_alias_cptr ptr +#define dh_alias_env ptr +#define dh_alias_fpst ptr +#define dh_alias_void void +#define dh_alias_noreturn noreturn +#define dh_alias(t) glue(dh_alias_, t) + +#define dh_ctype_i32 uint32_t +#define dh_ctype_s32 int32_t +#define dh_ctype_int int +#define dh_ctype_i64 uint64_t +#define dh_ctype_s64 int64_t +#define dh_ctype_i128 Int128 +#define dh_ctype_f16 uint32_t +#define dh_ctype_f32 float32 +#define dh_ctype_f64 float64 +#define dh_ctype_ptr void * +#define dh_ctype_cptr const void * +#define dh_ctype_env CPUArchState * +#define dh_ctype_fpst float_status * +#define dh_ctype_void void +#define dh_ctype_noreturn G_NORETURN void +#define dh_ctype(t) dh_ctype_##t + +#ifdef COMPILING_PER_TARGET +# ifdef TARGET_LONG_BITS +# if TARGET_LONG_BITS == 32 +# define dh_alias_tl i32 +# define dh_typecode_tl dh_typecode_i32 +# else +# define dh_alias_tl i64 +# define dh_typecode_tl dh_typecode_i64 +# endif +# endif +# define dh_ctype_tl target_ulong +#endif /* COMPILING_PER_TARGET */ + +#if __SIZEOF_POINTER__ == 4 +# define dh_alias_vaddr i32 +# define dh_typecode_vaddr dh_typecode_i32 +#elif __SIZEOF_POINTER__ == 8 +# define dh_alias_vaddr i64 +# define dh_typecode_vaddr dh_typecode_i64 +#else +# error "sizeof pointer is different from {4,8}" +#endif /* __SIZEOF_POINTER__ */ +# define dh_ctype_vaddr uintptr_t + +/* We can't use glue() here because it falls foul of C preprocessor + recursive expansion rules. */ +#define dh_retvar_decl0_void void +#define dh_retvar_decl0_noreturn void +#define dh_retvar_decl0_i32 TCGv_i32 retval +#define dh_retvar_decl0_i64 TCGv_i64 retval +#define dh_retval_decl0_i128 TCGv_i128 retval +#define dh_retvar_decl0_ptr TCGv_ptr retval +#define dh_retvar_decl0(t) glue(dh_retvar_decl0_, dh_alias(t)) + +#define dh_retvar_decl_void +#define dh_retvar_decl_noreturn +#define dh_retvar_decl_i32 TCGv_i32 retval, +#define dh_retvar_decl_i64 TCGv_i64 retval, +#define dh_retvar_decl_i128 TCGv_i128 retval, +#define dh_retvar_decl_ptr TCGv_ptr retval, +#define dh_retvar_decl(t) glue(dh_retvar_decl_, dh_alias(t)) + +#define dh_retvar_void NULL +#define dh_retvar_noreturn NULL +#define dh_retvar_i32 tcgv_i32_temp(retval) +#define dh_retvar_i64 tcgv_i64_temp(retval) +#define dh_retvar_i128 tcgv_i128_temp(retval) +#define dh_retvar_ptr tcgv_ptr_temp(retval) +#define dh_retvar(t) glue(dh_retvar_, dh_alias(t)) + +#define dh_typecode_void 0 +#define dh_typecode_noreturn 0 +#define dh_typecode_i32 2 +#define dh_typecode_s32 3 +#define dh_typecode_i64 4 +#define dh_typecode_s64 5 +#define dh_typecode_ptr 6 +#define dh_typecode_i128 7 +#define dh_typecode_int dh_typecode_s32 +#define dh_typecode_f16 dh_typecode_i32 +#define dh_typecode_f32 dh_typecode_i32 +#define dh_typecode_f64 dh_typecode_i64 +#define dh_typecode_cptr dh_typecode_ptr +#define dh_typecode_env dh_typecode_ptr +#define dh_typecode_fpst dh_typecode_ptr +#define dh_typecode(t) dh_typecode_##t + +#define dh_callflag_i32 0 +#define dh_callflag_i64 0 +#define dh_callflag_i128 0 +#define dh_callflag_ptr 0 +#define dh_callflag_void 0 +#define dh_callflag_noreturn TCG_CALL_NO_RETURN +#define dh_callflag(t) glue(dh_callflag_, dh_alias(t)) + +#define dh_typemask(t, n) (dh_typecode(t) << (n * 3)) + +#define dh_arg(t, n) \ + glue(glue(tcgv_, dh_alias(t)), _temp)(glue(arg, n)) + +#define dh_arg_decl(t, n) glue(TCGv_, dh_alias(t)) glue(arg, n) + +#define DEF_HELPER_0(name, ret) \ + DEF_HELPER_FLAGS_0(name, 0, ret) +#define DEF_HELPER_1(name, ret, t1) \ + DEF_HELPER_FLAGS_1(name, 0, ret, t1) +#define DEF_HELPER_2(name, ret, t1, t2) \ + DEF_HELPER_FLAGS_2(name, 0, ret, t1, t2) +#define DEF_HELPER_3(name, ret, t1, t2, t3) \ + DEF_HELPER_FLAGS_3(name, 0, ret, t1, t2, t3) +#define DEF_HELPER_4(name, ret, t1, t2, t3, t4) \ + DEF_HELPER_FLAGS_4(name, 0, ret, t1, t2, t3, t4) +#define DEF_HELPER_5(name, ret, t1, t2, t3, t4, t5) \ + DEF_HELPER_FLAGS_5(name, 0, ret, t1, t2, t3, t4, t5) +#define DEF_HELPER_6(name, ret, t1, t2, t3, t4, t5, t6) \ + DEF_HELPER_FLAGS_6(name, 0, ret, t1, t2, t3, t4, t5, t6) +#define DEF_HELPER_7(name, ret, t1, t2, t3, t4, t5, t6, t7) \ + DEF_HELPER_FLAGS_7(name, 0, ret, t1, t2, t3, t4, t5, t6, t7) + +/* MAX_CALL_IARGS must be set to n if last entry is DEF_HELPER_FLAGS_n. */ + +#endif /* EXEC_HELPER_HEAD_H */ diff --git a/libtcg/include/tcg/exec/helper-info.c.inc b/libtcg/include/tcg/exec/helper-info.c.inc index 88b774ff9..c816ed87f 100644 --- a/libtcg/include/tcg/exec/helper-info.c.inc +++ b/libtcg/include/tcg/exec/helper-info.c.inc @@ -7,7 +7,7 @@ #include #include -#include "helper-head.h" +#include "tcg/exec/helper-head.h.inc" /* * Need one more level of indirection before stringification diff --git a/libtcg/include/tcg/exec/helper-proto-common.h b/libtcg/include/tcg/exec/helper-proto-common.h index e6a566690..80b6afe77 100644 --- a/libtcg/include/tcg/exec/helper-proto-common.h +++ b/libtcg/include/tcg/exec/helper-proto-common.h @@ -13,8 +13,6 @@ #include "tcg/exec/helper-proto.h.inc" #undef HELPER_H -#define HELPER_H "tcg/accel/plugin-helpers.h" -#include "tcg/exec/helper-proto.h.inc" -#undef HELPER_H +#include "tcg/accel/getpc.h" #endif /* HELPER_PROTO_COMMON_H */ diff --git a/libtcg/include/tcg/exec/helper-proto.h.inc b/libtcg/include/tcg/exec/helper-proto.h.inc index 67df93c72..80cb6973a 100644 --- a/libtcg/include/tcg/exec/helper-proto.h.inc +++ b/libtcg/include/tcg/exec/helper-proto.h.inc @@ -5,7 +5,7 @@ * Define HELPER_H for the header file to be expanded. */ -#include "helper-head.h" +#include "helper-head.h.inc" /* * Work around an issue with --enable-lto, in which GCC's ipa-split pass diff --git a/libtcg/include/tcg/exec/plugin-gen.h b/libtcg/include/tcg/exec/plugin-gen.h new file mode 100644 index 000000000..10b39717a --- /dev/null +++ b/libtcg/include/tcg/exec/plugin-gen.h @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2017, Emilio G. Cota + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + * plugin-gen.h - TCG-dependent definitions for generating plugin code + * + * This header should be included only from plugin.c and C files that emit + * TCG code. + */ +#ifndef QEMU_PLUGIN_GEN_H +#define QEMU_PLUGIN_GEN_H + +#include + +struct DisasContextBase; +typedef struct CPUState CPUState; + +#ifdef CONFIG_PLUGIN + +bool plugin_gen_tb_start(CPUState *cpu, const struct DisasContextBase *db); +void plugin_gen_tb_end(CPUState *cpu, size_t num_insns); +void plugin_gen_insn_start(CPUState *cpu, const struct DisasContextBase *db); +void plugin_gen_insn_end(void); + +void plugin_gen_disable_mem_helpers(void); + +#else /* !CONFIG_PLUGIN */ + +static inline bool plugin_gen_tb_start(CPUState *cpu, const struct DisasContextBase *db) { + return false; +} + +static inline void plugin_gen_insn_start(CPUState *cpu, const struct DisasContextBase *db) { +} + +static inline void plugin_gen_insn_end(void) { +} + +static inline void plugin_gen_tb_end(CPUState *cpu, size_t num_insns) { +} + +static inline void plugin_gen_disable_mem_helpers(void) { +} + +#endif /* CONFIG_PLUGIN */ + +#endif /* QEMU_PLUGIN_GEN_H */ diff --git a/libtcg/include/tcg/helper-info.h b/libtcg/include/tcg/helper-info.h index 1c9da3475..7016eb875 100644 --- a/libtcg/include/tcg/helper-info.h +++ b/libtcg/include/tcg/helper-info.h @@ -1,5 +1,5 @@ /* - * TCG Helper Infomation Structure + * TCG Helper Information Structure * * Copyright (c) 2023 Linaro Ltd * @@ -12,12 +12,23 @@ #include #ifdef CONFIG_TCG_INTERPRETER +/* + * MacOSX 15 uses an old version of libffi which contains + * #if FFI_GO_CLOSURES + * but does not define that in , included from . + * This was fixed upstream with + * https://github.com/libffi/libffi/commit/c23e9a1c + * We don't care about go closures one way or the other; + * just suppress the warning. + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wundef" #include +#pragma GCC diagnostic pop #endif +#include "i386/tcg-target-reg-bits.h" -#ifdef __cplusplus -extern "C" { -#endif +#define MAX_CALL_IARGS 7 /* * Describe the calling convention of a given argument type. @@ -67,8 +78,6 @@ struct TCGHelperInfo { TCGCallArgumentLoc in[MAX_CALL_IARGS * (128 / TCG_TARGET_REG_BITS)]; }; -#ifdef __cplusplus -} -#endif +typedef struct TCGHelperInfo TCGHelperInfo; #endif /* TCG_HELPER_INFO_H */ diff --git a/libtcg/include/tcg/helper-tcg.h b/libtcg/include/tcg/helper-tcg.h deleted file mode 100644 index 25d5dbcf0..000000000 --- a/libtcg/include/tcg/helper-tcg.h +++ /dev/null @@ -1,75 +0,0 @@ -/* Helper file for declaring TCG helper functions. - This one defines data structures private to tcg.c. */ - -#ifndef HELPER_TCG_H -#define HELPER_TCG_H - -#include "helper-head.h" - -/* Need one more level of indirection before stringification - to get all the macros expanded first. */ -#define str(s) #s - -#define DEF_HELPER_FLAGS_0(NAME, FLAGS, ret) \ - {.func = HELPER(NAME), .name = str(NAME), .flags = FLAGS | dh_callflag(ret), .typemask = dh_typemask(ret, 0)}, - -#define DEF_HELPER_FLAGS_1(NAME, FLAGS, ret, t1) \ - {.func = HELPER(NAME), \ - .name = str(NAME), \ - .flags = FLAGS | dh_callflag(ret), \ - .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1)}, - -#define DEF_HELPER_FLAGS_2(NAME, FLAGS, ret, t1, t2) \ - {.func = HELPER(NAME), \ - .name = str(NAME), \ - .flags = FLAGS | dh_callflag(ret), \ - .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) | dh_typemask(t2, 2)}, - -#define DEF_HELPER_FLAGS_3(NAME, FLAGS, ret, t1, t2, t3) \ - {.func = HELPER(NAME), \ - .name = str(NAME), \ - .flags = FLAGS | dh_callflag(ret), \ - .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) | dh_typemask(t2, 2) | dh_typemask(t3, 3)}, - -#define DEF_HELPER_FLAGS_4(NAME, FLAGS, ret, t1, t2, t3, t4) \ - {.func = HELPER(NAME), \ - .name = str(NAME), \ - .flags = FLAGS | dh_callflag(ret), \ - .typemask = \ - dh_typemask(ret, 0) | dh_typemask(t1, 1) | dh_typemask(t2, 2) | dh_typemask(t3, 3) | dh_typemask(t4, 4)}, - -#define DEF_HELPER_FLAGS_5(NAME, FLAGS, ret, t1, t2, t3, t4, t5) \ - {.func = HELPER(NAME), \ - .name = str(NAME), \ - .flags = FLAGS | dh_callflag(ret), \ - .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) | dh_typemask(t2, 2) | dh_typemask(t3, 3) | \ - dh_typemask(t4, 4) | dh_typemask(t5, 5)}, - -#define DEF_HELPER_FLAGS_6(NAME, FLAGS, ret, t1, t2, t3, t4, t5, t6) \ - {.func = HELPER(NAME), \ - .name = str(NAME), \ - .flags = FLAGS | dh_callflag(ret), \ - .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) | dh_typemask(t2, 2) | dh_typemask(t3, 3) | \ - dh_typemask(t4, 4) | dh_typemask(t5, 5) | dh_typemask(t6, 6)}, - -#define DEF_HELPER_FLAGS_7(NAME, FLAGS, ret, t1, t2, t3, t4, t5, t6, t7) \ - {.func = HELPER(NAME), \ - .name = str(NAME), \ - .flags = FLAGS, \ - .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) | dh_typemask(t2, 2) | dh_typemask(t3, 3) | \ - dh_typemask(t4, 4) | dh_typemask(t5, 5) | dh_typemask(t6, 6) | dh_typemask(t7, 7)}, - -#include -// #include - -#undef str -#undef DEF_HELPER_FLAGS_0 -#undef DEF_HELPER_FLAGS_1 -#undef DEF_HELPER_FLAGS_2 -#undef DEF_HELPER_FLAGS_3 -#undef DEF_HELPER_FLAGS_4 -#undef DEF_HELPER_FLAGS_5 -#undef DEF_HELPER_FLAGS_6 -#undef DEF_HELPER_FLAGS_7 - -#endif /* HELPER_TCG_H */ diff --git a/libtcg/include/tcg/i386/tcg-target-has.h b/libtcg/include/tcg/i386/tcg-target-has.h new file mode 100644 index 000000000..51da8a323 --- /dev/null +++ b/libtcg/include/tcg/i386/tcg-target-has.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Define target-specific opcode support + * Copyright (c) 2008 Fabrice Bellard + */ + +#ifndef TCG_TARGET_HAS_H +#define TCG_TARGET_HAS_H + +#include + +#define have_bmi1 (cpuinfo & CPUINFO_BMI1) +#define have_popcnt (cpuinfo & CPUINFO_POPCNT) +#define have_avx1 (cpuinfo & CPUINFO_AVX1) +#define have_avx2 (cpuinfo & CPUINFO_AVX2) +#define have_movbe (cpuinfo & CPUINFO_MOVBE) + +/* + * There are interesting instructions in AVX512, so long as we have AVX512VL, + * which indicates support for EVEX on sizes smaller than 512 bits. + */ +#define have_avx512vl ((cpuinfo & CPUINFO_AVX512VL) && (cpuinfo & CPUINFO_AVX512F)) +#define have_avx512bw ((cpuinfo & CPUINFO_AVX512BW) && have_avx512vl) +#define have_avx512dq ((cpuinfo & CPUINFO_AVX512DQ) && have_avx512vl) +#define have_avx512vbmi2 ((cpuinfo & CPUINFO_AVX512VBMI2) && have_avx512vl) + +/* optional instructions */ +#if TCG_TARGET_REG_BITS == 64 +/* Keep 32-bit values zero-extended in a register. */ +#define TCG_TARGET_HAS_extr_i64_i32 1 +#endif + +#define TCG_TARGET_HAS_qemu_ldst_i128 (TCG_TARGET_REG_BITS == 64 && (cpuinfo & CPUINFO_ATOMIC_VMOVDQA)) + +#define TCG_TARGET_HAS_tst 1 + +/* We do not support older SSE systems, only beginning with AVX1. */ +#define TCG_TARGET_HAS_v64 have_avx1 +#define TCG_TARGET_HAS_v128 have_avx1 +#define TCG_TARGET_HAS_v256 have_avx2 + +#define TCG_TARGET_HAS_andc_vec 1 +#define TCG_TARGET_HAS_orc_vec have_avx512vl +#define TCG_TARGET_HAS_nand_vec have_avx512vl +#define TCG_TARGET_HAS_nor_vec have_avx512vl +#define TCG_TARGET_HAS_eqv_vec have_avx512vl +#define TCG_TARGET_HAS_not_vec have_avx512vl +#define TCG_TARGET_HAS_neg_vec 0 +#define TCG_TARGET_HAS_abs_vec 1 +#define TCG_TARGET_HAS_roti_vec have_avx512vl +#define TCG_TARGET_HAS_rots_vec 0 +#define TCG_TARGET_HAS_rotv_vec have_avx512vl +#define TCG_TARGET_HAS_shi_vec 1 +#define TCG_TARGET_HAS_shs_vec 1 +#define TCG_TARGET_HAS_shv_vec have_avx2 +#define TCG_TARGET_HAS_mul_vec 1 +#define TCG_TARGET_HAS_sat_vec 1 +#define TCG_TARGET_HAS_minmax_vec 1 +#define TCG_TARGET_HAS_bitsel_vec have_avx512vl +#define TCG_TARGET_HAS_cmpsel_vec 1 +#define TCG_TARGET_HAS_tst_vec have_avx512bw + +#define TCG_TARGET_deposit_valid(type, ofs, len) \ + (((ofs) == 0 && ((len) == 8 || (len) == 16)) || (TCG_TARGET_REG_BITS == 32 && (ofs) == 8 && (len) == 8)) + +/* + * Check for the possibility of low byte/word extraction, high-byte extraction + * and zero-extending 32-bit right-shift. + * + * We cannot sign-extend from high byte to 64-bits without using the + * REX prefix that explicitly excludes access to the high-byte registers. + */ +static inline bool tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len) { + switch (ofs) { + case 0: + switch (len) { + case 8: + case 16: + return true; + case 32: + return type == TCG_TYPE_I64; + } + return false; + case 8: + return len == 8 && type == TCG_TYPE_I32; + } + return false; +} +#define TCG_TARGET_sextract_valid tcg_target_sextract_valid + +static inline bool tcg_target_extract_valid(TCGType type, unsigned ofs, unsigned len) { + if (type == TCG_TYPE_I64 && ofs + len == 32) { + return true; + } + switch (ofs) { + case 0: + return len == 8 || len == 16; + case 8: + return len == 8; + } + return false; +} +#define TCG_TARGET_extract_valid tcg_target_extract_valid + +#endif diff --git a/libtcg/include/tcg/i386/tcg-target-mo.h b/libtcg/include/tcg/i386/tcg-target-mo.h new file mode 100644 index 000000000..7567dc724 --- /dev/null +++ b/libtcg/include/tcg/i386/tcg-target-mo.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Define target-specific memory model + * Copyright (c) 2008 Fabrice Bellard + */ + +#ifndef TCG_TARGET_MO_H +#define TCG_TARGET_MO_H + +/* + * This defines the natural memory order supported by this architecture + * before guarantees made by various barrier instructions. + * + * The x86 has a pretty strong memory ordering which only really + * allows for some stores to be re-ordered after loads. + */ +#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD) + +#endif diff --git a/libtcg/include/tcg/i386/tcg-target-opc.h.inc b/libtcg/include/tcg/i386/tcg-target-opc.h.inc new file mode 100644 index 000000000..8a5cb34db --- /dev/null +++ b/libtcg/include/tcg/i386/tcg-target-opc.h.inc @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2019 Linaro + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * Target-specific opcodes for host vector expansion. These will be + * emitted by tcg_expand_vec_op. For those familiar with GCC internals, + * consider these to be UNSPEC with names. + */ + +DEF(x86_shufps_vec, 1, 2, 1, TCG_OPF_VECTOR) +DEF(x86_blend_vec, 1, 2, 1, TCG_OPF_VECTOR) +DEF(x86_packss_vec, 1, 2, 0, TCG_OPF_VECTOR) +DEF(x86_packus_vec, 1, 2, 0, TCG_OPF_VECTOR) +DEF(x86_psrldq_vec, 1, 1, 1, TCG_OPF_VECTOR) +DEF(x86_vperm2i128_vec, 1, 2, 1, TCG_OPF_VECTOR) +DEF(x86_punpckl_vec, 1, 2, 0, TCG_OPF_VECTOR) +DEF(x86_punpckh_vec, 1, 2, 0, TCG_OPF_VECTOR) +DEF(x86_vpshldi_vec, 1, 2, 1, TCG_OPF_VECTOR) +DEF(x86_vpshldv_vec, 1, 3, 0, TCG_OPF_VECTOR) +DEF(x86_vpshrdv_vec, 1, 3, 0, TCG_OPF_VECTOR) +DEF(x86_vgf2p8affineqb_vec, 1, 2, 1, TCG_OPF_VECTOR) diff --git a/libtcg/include/tcg/i386/tcg-target.h b/libtcg/include/tcg/i386/tcg-target.h index 980e2f997..fefa2dc17 100644 --- a/libtcg/include/tcg/i386/tcg-target.h +++ b/libtcg/include/tcg/i386/tcg-target.h @@ -25,8 +25,6 @@ #ifndef I386_TCG_TARGET_H #define I386_TCG_TARGET_H -#include "tcg/utils/host/x86_64/cpuinfo.h" - #define TCG_TARGET_INSN_UNIT_SIZE 1 #ifdef __x86_64__ @@ -90,160 +88,4 @@ typedef enum { TCG_REG_CALL_STACK = TCG_REG_ESP } TCGReg; -/* used for function call generation */ -#define TCG_TARGET_STACK_ALIGN 16 -#if defined(_WIN64) -#define TCG_TARGET_CALL_STACK_OFFSET 32 -#else -#define TCG_TARGET_CALL_STACK_OFFSET 0 -#endif -#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL -#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL -#if defined(_WIN64) -#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_BY_REF -#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_VEC -#elif TCG_TARGET_REG_BITS == 64 -#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL -#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL -#else -#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL -#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF -#endif - -#define have_bmi1 (cpuinfo & CPUINFO_BMI1) -#define have_popcnt (cpuinfo & CPUINFO_POPCNT) -#define have_avx1 (cpuinfo & CPUINFO_AVX1) -#define have_avx2 (cpuinfo & CPUINFO_AVX2) -#define have_movbe (cpuinfo & CPUINFO_MOVBE) - -/* - * There are interesting instructions in AVX512, so long as we have AVX512VL, - * which indicates support for EVEX on sizes smaller than 512 bits. - */ -#define have_avx512vl ((cpuinfo & CPUINFO_AVX512VL) && (cpuinfo & CPUINFO_AVX512F)) -#define have_avx512bw ((cpuinfo & CPUINFO_AVX512BW) && have_avx512vl) -#define have_avx512dq ((cpuinfo & CPUINFO_AVX512DQ) && have_avx512vl) -#define have_avx512vbmi2 ((cpuinfo & CPUINFO_AVX512VBMI2) && have_avx512vl) - -/* optional instructions */ -#define TCG_TARGET_HAS_div2_i32 1 -#define TCG_TARGET_HAS_rot_i32 1 -#define TCG_TARGET_HAS_ext8s_i32 1 -#define TCG_TARGET_HAS_ext16s_i32 1 -#define TCG_TARGET_HAS_ext8u_i32 1 -#define TCG_TARGET_HAS_ext16u_i32 1 -#define TCG_TARGET_HAS_bswap16_i32 1 -#define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_neg_i32 1 -#define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_andc_i32 have_bmi1 -#define TCG_TARGET_HAS_orc_i32 0 -#define TCG_TARGET_HAS_eqv_i32 0 -#define TCG_TARGET_HAS_nand_i32 0 -#define TCG_TARGET_HAS_nor_i32 0 -#define TCG_TARGET_HAS_clz_i32 1 -#define TCG_TARGET_HAS_ctz_i32 1 -#define TCG_TARGET_HAS_ctpop_i32 have_popcnt -#define TCG_TARGET_HAS_deposit_i32 1 -#define TCG_TARGET_HAS_extract_i32 1 -#define TCG_TARGET_HAS_sextract_i32 1 -#define TCG_TARGET_HAS_extract2_i32 1 -#define TCG_TARGET_HAS_movcond_i32 1 -#define TCG_TARGET_HAS_add2_i32 1 -#define TCG_TARGET_HAS_sub2_i32 1 -#define TCG_TARGET_HAS_mulu2_i32 1 -#define TCG_TARGET_HAS_muls2_i32 1 -#define TCG_TARGET_HAS_muluh_i32 0 -#define TCG_TARGET_HAS_mulsh_i32 0 - -#if TCG_TARGET_REG_BITS == 64 -/* Keep 32-bit values zero-extended in a register. */ -#define TCG_TARGET_HAS_extrl_i64_i32 1 -#define TCG_TARGET_HAS_extrh_i64_i32 1 -#define TCG_TARGET_HAS_div2_i64 1 -#define TCG_TARGET_HAS_rot_i64 1 -#define TCG_TARGET_HAS_ext8s_i64 1 -#define TCG_TARGET_HAS_ext16s_i64 1 -#define TCG_TARGET_HAS_ext32s_i64 1 -#define TCG_TARGET_HAS_ext8u_i64 1 -#define TCG_TARGET_HAS_ext16u_i64 1 -#define TCG_TARGET_HAS_ext32u_i64 1 -#define TCG_TARGET_HAS_bswap16_i64 1 -#define TCG_TARGET_HAS_bswap32_i64 1 -#define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_neg_i64 1 -#define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_andc_i64 have_bmi1 -#define TCG_TARGET_HAS_orc_i64 0 -#define TCG_TARGET_HAS_eqv_i64 0 -#define TCG_TARGET_HAS_nand_i64 0 -#define TCG_TARGET_HAS_nor_i64 0 -#define TCG_TARGET_HAS_clz_i64 1 -#define TCG_TARGET_HAS_ctz_i64 1 -#define TCG_TARGET_HAS_ctpop_i64 have_popcnt -#define TCG_TARGET_HAS_deposit_i64 1 -#define TCG_TARGET_HAS_extract_i64 1 -#define TCG_TARGET_HAS_sextract_i64 0 -#define TCG_TARGET_HAS_extract2_i64 1 -#define TCG_TARGET_HAS_movcond_i64 1 -#define TCG_TARGET_HAS_add2_i64 1 -#define TCG_TARGET_HAS_sub2_i64 1 -#define TCG_TARGET_HAS_mulu2_i64 1 -#define TCG_TARGET_HAS_muls2_i64 1 -#define TCG_TARGET_HAS_muluh_i64 0 -#define TCG_TARGET_HAS_mulsh_i64 0 -#define TCG_TARGET_HAS_qemu_st8_i32 0 -#else -#define TCG_TARGET_HAS_qemu_st8_i32 1 -#endif - -#define TCG_TARGET_HAS_qemu_ldst_i128 (TCG_TARGET_REG_BITS == 64 && (cpuinfo & CPUINFO_ATOMIC_VMOVDQA)) - -/* We do not support older SSE systems, only beginning with AVX1. */ -#define TCG_TARGET_HAS_v64 have_avx1 -#define TCG_TARGET_HAS_v128 have_avx1 -#define TCG_TARGET_HAS_v256 have_avx2 - -#define TCG_TARGET_HAS_andc_vec 1 -#define TCG_TARGET_HAS_orc_vec have_avx512vl -#define TCG_TARGET_HAS_nand_vec have_avx512vl -#define TCG_TARGET_HAS_nor_vec have_avx512vl -#define TCG_TARGET_HAS_eqv_vec have_avx512vl -#define TCG_TARGET_HAS_not_vec have_avx512vl -#define TCG_TARGET_HAS_neg_vec 0 -#define TCG_TARGET_HAS_abs_vec 1 -#define TCG_TARGET_HAS_roti_vec have_avx512vl -#define TCG_TARGET_HAS_rots_vec 0 -#define TCG_TARGET_HAS_rotv_vec have_avx512vl -#define TCG_TARGET_HAS_shi_vec 1 -#define TCG_TARGET_HAS_shs_vec 1 -#define TCG_TARGET_HAS_shv_vec have_avx2 -#define TCG_TARGET_HAS_mul_vec 1 -#define TCG_TARGET_HAS_sat_vec 1 -#define TCG_TARGET_HAS_minmax_vec 1 -#define TCG_TARGET_HAS_bitsel_vec have_avx512vl -#define TCG_TARGET_HAS_cmpsel_vec -1 - -#define TCG_TARGET_deposit_i32_valid(ofs, len) \ - (((ofs) == 0 && (len) == 8) || ((ofs) == 8 && (len) == 8) || ((ofs) == 0 && (len) == 16)) -#define TCG_TARGET_deposit_i64_valid TCG_TARGET_deposit_i32_valid - -/* Check for the possibility of high-byte extraction and, for 64-bit, - zero-extending 32-bit right-shift. */ -#define TCG_TARGET_extract_i32_valid(ofs, len) ((ofs) == 8 && (len) == 8) -#define TCG_TARGET_extract_i64_valid(ofs, len) (((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32) - -/* This defines the natural memory order supported by this - * architecture before guarantees made by various barrier - * instructions. - * - * The x86 has a pretty strong memory ordering which only really - * allows for some stores to be re-ordered after loads. - */ -#include "tcg/tcg-mo.h" - -#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD) -#define TCG_TARGET_NEED_LDST_LABELS -#define TCG_TARGET_NEED_POOL_LABELS - #endif diff --git a/libtcg/include/tcg/insn-start-words.h b/libtcg/include/tcg/insn-start-words.h index 08b3fabba..c52aec50a 100644 --- a/libtcg/include/tcg/insn-start-words.h +++ b/libtcg/include/tcg/insn-start-words.h @@ -1,17 +1,12 @@ /* SPDX-License-Identifier: MIT */ /* - * Define TARGET_INSN_START_WORDS + * Define INSN_START_WORDS * Copyright (c) 2008 Fabrice Bellard */ -#ifndef TARGET_INSN_START_WORDS +#ifndef TCG_INSN_START_WORDS +#define TCG_INSN_START_WORDS -#include "cpu.h" +#define INSN_START_WORDS 3 -#ifndef TARGET_INSN_START_EXTRA_WORDS -#define TARGET_INSN_START_WORDS 1 -#else -#define TARGET_INSN_START_WORDS (1 + TARGET_INSN_START_EXTRA_WORDS) -#endif - -#endif /* TARGET_INSN_START_WORDS */ +#endif /* TCG_INSN_START_WORDS */ diff --git a/libtcg/include/tcg/perf.h b/libtcg/include/tcg/perf.h new file mode 100644 index 000000000..5fa21aba8 --- /dev/null +++ b/libtcg/include/tcg/perf.h @@ -0,0 +1,47 @@ +/* + * Linux perf perf-.map and jit-.dump integration. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef TCG_PERF_H +#define TCG_PERF_H + +#include +#include + +typedef struct TranslationBlock TranslationBlock; + +#if false +/* Start writing perf-.map. */ +void perf_enable_perfmap(void); + +/* Start writing jit-.dump. */ +void perf_enable_jitdump(void); + +/* Add information about TCG prologue to profiler maps. */ +void perf_report_prologue(const void *start, size_t size); + +/* Add information about JITted guest code to profiler maps. */ +void perf_report_code(uint64_t guest_pc, TranslationBlock *tb, const void *start); + +/* Stop writing perf-.map and/or jit-.dump. */ +void perf_exit(void); +#else +static inline void perf_enable_perfmap(void) { +} + +static inline void perf_enable_jitdump(void) { +} + +static inline void perf_report_prologue(const void *start, size_t size) { +} + +static inline void perf_report_code(uint64_t guest_pc, TranslationBlock *tb, const void *start) { +} + +static inline void perf_exit(void) { +} +#endif + +#endif diff --git a/libtcg/include/tcg/qemu-plugin.h b/libtcg/include/tcg/qemu-plugin.h new file mode 100644 index 000000000..d3008890c --- /dev/null +++ b/libtcg/include/tcg/qemu-plugin.h @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2017, Emilio G. Cota + * Copyright (C) 2019, Linaro + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef QEMU_PLUGIN_H +#define QEMU_PLUGIN_H + +enum qemu_plugin_mem_rw { + QEMU_PLUGIN_MEM_R = 1, + QEMU_PLUGIN_MEM_W, + QEMU_PLUGIN_MEM_RW, +}; + +#endif \ No newline at end of file diff --git a/libtcg/include/tcg/tcg-cond.h b/libtcg/include/tcg/tcg-cond.h index c5edb630b..d202d0c5b 100644 --- a/libtcg/include/tcg/tcg-cond.h +++ b/libtcg/include/tcg/tcg-cond.h @@ -35,26 +35,34 @@ extern "C" { * Conditions. Note that these are laid out for easy manipulation by * the functions below: * bit 0 is used for inverting; - * bit 1 is signed, - * bit 2 is unsigned, - * bit 3 is used with bit 0 for swapping signed/unsigned. + * bit 1 is used for conditions that need swapping (signed/unsigned). + * bit 2 is used with bit 1 for swapping. + * bit 3 is used for unsigned conditions. */ typedef enum { /* non-signed */ TCG_COND_NEVER = 0 | 0 | 0 | 0, TCG_COND_ALWAYS = 0 | 0 | 0 | 1, + + /* equality */ TCG_COND_EQ = 8 | 0 | 0 | 0, TCG_COND_NE = 8 | 0 | 0 | 1, + + /* "test" i.e. and then compare vs 0 */ + TCG_COND_TSTEQ = 8 | 4 | 0 | 0, + TCG_COND_TSTNE = 8 | 4 | 0 | 1, + /* signed */ TCG_COND_LT = 0 | 0 | 2 | 0, TCG_COND_GE = 0 | 0 | 2 | 1, - TCG_COND_LE = 8 | 0 | 2 | 0, - TCG_COND_GT = 8 | 0 | 2 | 1, + TCG_COND_GT = 0 | 4 | 2 | 0, + TCG_COND_LE = 0 | 4 | 2 | 1, + /* unsigned */ - TCG_COND_LTU = 0 | 4 | 0 | 0, - TCG_COND_GEU = 0 | 4 | 0 | 1, - TCG_COND_LEU = 8 | 4 | 0 | 0, - TCG_COND_GTU = 8 | 4 | 0 | 1, + TCG_COND_LTU = 8 | 0 | 2 | 0, + TCG_COND_GEU = 8 | 0 | 2 | 1, + TCG_COND_GTU = 8 | 4 | 2 | 0, + TCG_COND_LEU = 8 | 4 | 2 | 1, } TCGCond; /* Invert the sense of the comparison. */ @@ -64,22 +72,42 @@ static inline TCGCond tcg_invert_cond(TCGCond c) { /* Swap the operands in a comparison. */ static inline TCGCond tcg_swap_cond(TCGCond c) { - return c & 6 ? (TCGCond) (c ^ 9) : c; + return (TCGCond) (c ^ ((c & 2) << 1)); +} + +/* Must a comparison be considered signed? */ +static inline bool is_signed_cond(TCGCond c) { + return (c & (8 | 2)) == 2; +} + +/* Must a comparison be considered unsigned? */ +static inline bool is_unsigned_cond(TCGCond c) { + return (c & (8 | 2)) == (8 | 2); +} + +/* Must a comparison be considered a test? */ +static inline bool is_tst_cond(TCGCond c) { + return (c | 1) == TCG_COND_TSTNE; } /* Create an "unsigned" version of a "signed" comparison. */ static inline TCGCond tcg_unsigned_cond(TCGCond c) { - return c & 2 ? (TCGCond) (c ^ 6) : c; + return is_signed_cond(c) ? (TCGCond) (c + 8) : c; } /* Create a "signed" version of an "unsigned" comparison. */ static inline TCGCond tcg_signed_cond(TCGCond c) { - return c & 4 ? (TCGCond) (c ^ 6) : c; + return is_unsigned_cond(c) ? (TCGCond) (c - 8) : c; } -/* Must a comparison be considered unsigned? */ -static inline bool is_unsigned_cond(TCGCond c) { - return (c & 4) != 0; +/* Create the eq/ne version of a tsteq/tstne comparison. */ +static inline TCGCond tcg_tst_eqne_cond(TCGCond c) { + return is_tst_cond(c) ? (TCGCond) (c - 4) : c; +} + +/* Create the lt/ge version of a tstne/tsteq comparison of the sign. */ +static inline TCGCond tcg_tst_ltge_cond(TCGCond c) { + return is_tst_cond(c) ? (TCGCond) (c ^ 0xf) : c; } /* @@ -92,7 +120,7 @@ static inline TCGCond tcg_high_cond(TCGCond c) { case TCG_COND_LE: case TCG_COND_GEU: case TCG_COND_LEU: - return (TCGCond) (c ^ 8); + return (TCGCond) (c ^ (4 | 1)); default: return c; } diff --git a/libtcg/include/tcg/tcg-has.h b/libtcg/include/tcg/tcg-has.h new file mode 100644 index 000000000..37aed6cc3 --- /dev/null +++ b/libtcg/include/tcg/tcg-has.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Define target-specific opcode support + * Copyright (c) 2024 Linaro, Ltd. + */ + +#ifndef TCG_HAS_H +#define TCG_HAS_H + +#include "i386/tcg-target-has.h" + +#if TCG_TARGET_REG_BITS == 32 +/* Turn some undef macros into false macros. */ +#define TCG_TARGET_HAS_extr_i64_i32 0 +#endif + +#if !defined(TCG_TARGET_HAS_v64) && !defined(TCG_TARGET_HAS_v128) && !defined(TCG_TARGET_HAS_v256) +#define TCG_TARGET_MAYBE_vec 0 +#define TCG_TARGET_HAS_abs_vec 0 +#define TCG_TARGET_HAS_neg_vec 0 +#define TCG_TARGET_HAS_not_vec 0 +#define TCG_TARGET_HAS_andc_vec 0 +#define TCG_TARGET_HAS_orc_vec 0 +#define TCG_TARGET_HAS_nand_vec 0 +#define TCG_TARGET_HAS_nor_vec 0 +#define TCG_TARGET_HAS_eqv_vec 0 +#define TCG_TARGET_HAS_roti_vec 0 +#define TCG_TARGET_HAS_rots_vec 0 +#define TCG_TARGET_HAS_rotv_vec 0 +#define TCG_TARGET_HAS_shi_vec 0 +#define TCG_TARGET_HAS_shs_vec 0 +#define TCG_TARGET_HAS_shv_vec 0 +#define TCG_TARGET_HAS_mul_vec 0 +#define TCG_TARGET_HAS_sat_vec 0 +#define TCG_TARGET_HAS_minmax_vec 0 +#define TCG_TARGET_HAS_bitsel_vec 0 +#define TCG_TARGET_HAS_cmpsel_vec 0 +#define TCG_TARGET_HAS_tst_vec 0 +#else +#define TCG_TARGET_MAYBE_vec 1 +#endif +#ifndef TCG_TARGET_HAS_v64 +#define TCG_TARGET_HAS_v64 0 +#endif +#ifndef TCG_TARGET_HAS_v128 +#define TCG_TARGET_HAS_v128 0 +#endif +#ifndef TCG_TARGET_HAS_v256 +#define TCG_TARGET_HAS_v256 0 +#endif + +#endif diff --git a/libtcg/include/tcg/tcg-internal.h b/libtcg/include/tcg/tcg-internal.h index 6e00bbd9c..03d88a725 100644 --- a/libtcg/include/tcg/tcg-internal.h +++ b/libtcg/include/tcg/tcg-internal.h @@ -38,7 +38,7 @@ extern TCGContext **tcg_ctxs; extern unsigned int tcg_cur_ctxs; extern unsigned int tcg_max_ctxs; -void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus); +void tcg_region_init(size_t tb_size, int splitwx, unsigned max_threads); bool tcg_region_alloc(TCGContext *s); void tcg_region_initial_alloc(TCGContext *s); void tcg_region_prologue_set(TCGContext *s); @@ -48,7 +48,7 @@ static inline void *tcg_call_func(const TCGOp *op) { } static inline const TCGHelperInfo *tcg_call_info(const TCGOp *op) { - return (const TCGHelperInfo *) (uintptr_t) op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1]; + return (TCGHelperInfo *) (uintptr_t) op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1]; } static inline unsigned tcg_call_flags(TCGOp *op) { @@ -63,8 +63,8 @@ static inline TCGv_i32 TCGV_HIGH(TCGv_i64 t) { return temp_tcgv_i32(tcgv_i64_temp(t) + !HOST_BIG_ENDIAN); } #else -extern TCGv_i32 TCGV_LOW(TCGv_i64) QEMU_ERROR("32-bit code path is reachable"); -extern TCGv_i32 TCGV_HIGH(TCGv_i64) QEMU_ERROR("32-bit code path is reachable"); +TCGv_i32 TCGV_LOW(TCGv_i64) QEMU_ERROR("32-bit code path is reachable"); +TCGv_i32 TCGV_HIGH(TCGv_i64) QEMU_ERROR("32-bit code path is reachable"); #endif static inline TCGv_i64 TCGV128_LOW(TCGv_i128 t) { @@ -80,6 +80,30 @@ static inline TCGv_i64 TCGV128_HIGH(TCGv_i128 t) { bool tcg_target_has_memory_bswap(MemOp memop); +TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind); + +/* + * Locate or create a read-only temporary that is a constant. + * This kind of temporary need not be freed, but for convenience + * will be silently ignored by tcg_temp_free_*. + */ +TCGTemp *tcg_constant_internal(TCGType type, int64_t val); + +TCGOp *tcg_gen_op1(TCGOpcode, TCGType, TCGArg); +TCGOp *tcg_gen_op2(TCGOpcode, TCGType, TCGArg, TCGArg); +TCGOp *tcg_gen_op3(TCGOpcode, TCGType, TCGArg, TCGArg, TCGArg); +TCGOp *tcg_gen_op4(TCGOpcode, TCGType, TCGArg, TCGArg, TCGArg, TCGArg); +TCGOp *tcg_gen_op5(TCGOpcode, TCGType, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg); +TCGOp *tcg_gen_op6(TCGOpcode, TCGType, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg); + +void vec_gen_2(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg); +void vec_gen_3(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg); +void vec_gen_4(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg, TCGArg); +void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e); + +TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, TCGOpcode, TCGType, unsigned nargs); +TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode, TCGType, unsigned nargs); + #ifdef __cplusplus } #endif diff --git a/libtcg/include/tcg/tcg-memop.h b/libtcg/include/tcg/tcg-memop.h index cb9a9dc31..bf2d8ef37 100644 --- a/libtcg/include/tcg/tcg-memop.h +++ b/libtcg/include/tcg/tcg-memop.h @@ -39,7 +39,7 @@ typedef enum MemOp { MO_LE = 0, MO_BE = MO_BSWAP, #endif -#ifdef NEED_CPU_H +#ifdef COMPILING_PER_TARGET #if TARGET_BIG_ENDIAN MO_TE = MO_BE, #else @@ -76,6 +76,16 @@ typedef enum MemOp { MO_ALIGN_64 = 6 << MO_ASHIFT, MO_ALIGN = MO_AMASK, + /* + * MO_ALIGN_TLB_ONLY: + * Apply MO_AMASK only along the TCG slow path if TLB_CHECK_ALIGNED + * is set; otherwise unaligned access is permitted. + * This is used by target/arm, where unaligned accesses are + * permitted for pages marked Normal but aligned accesses are + * required for pages marked Device. + */ + MO_ALIGN_TLB_ONLY = 1 << 8, + /* * MO_ATOM_* describes the atomicity requirements of the operation: * MO_ATOM_IFALIGN: the operation must be single-copy atomic if it @@ -95,8 +105,12 @@ typedef enum MemOp { * Depending on alignment, one or both will be single-copy atomic. * This is the atomicity e.g. of Arm FEAT_LSE2 LDP. * MO_ATOM_SUBALIGN: the operation is single-copy atomic by parts - * by the alignment. E.g. if the address is 0 mod 4, then each - * 4-byte subobject is single-copy atomic. + * by the alignment. E.g. if an 8-byte value is accessed at an + * address which is 0 mod 8, then the whole 8-byte access is + * single-copy atomic; otherwise, if it is accessed at 0 mod 4 + * then each 4-byte subobject is single-copy atomic; otherwise + * if it is accessed at 0 mod 2 then the four 2-byte subobjects + * are single-copy atomic. * This is the atomicity e.g. of IBM Power. * MO_ATOM_NONE: the operation has no atomicity requirements. * @@ -104,7 +118,7 @@ typedef enum MemOp { * size of the operation, if aligned. This retains the behaviour * from before this field was introduced. */ - MO_ATOM_SHIFT = 8, + MO_ATOM_SHIFT = 9, MO_ATOM_IFALIGN = 0 << MO_ATOM_SHIFT, MO_ATOM_IFALIGN_PAIR = 1 << MO_ATOM_SHIFT, MO_ATOM_WITHIN16 = 2 << MO_ATOM_SHIFT, @@ -139,7 +153,7 @@ typedef enum MemOp { MO_BESL = MO_BE | MO_SL, MO_BESQ = MO_BE | MO_SQ, -#ifdef NEED_CPU_H +#ifdef COMPILING_PER_TARGET MO_TEUW = MO_TE | MO_UW, MO_TEUL = MO_TE | MO_UL, MO_TEUQ = MO_TE | MO_UQ, @@ -160,15 +174,42 @@ static inline unsigned memop_size(MemOp op) { /* Size in bytes to MemOp. */ static inline MemOp size_memop(unsigned size) { #ifdef CONFIG_DEBUG_TCG - /* Power of 2 up to 8. */ - assert((size & (size - 1)) == 0 && size >= 1 && size <= 8); + /* Power of 2 up to 1024 */ + assert(is_power_of_2(size) && size >= 1 && size <= (1 << MO_SIZE)); #endif return (MemOp) ctz32(size); } -/* Big endianness from MemOp. */ -static inline bool memop_big_endian(MemOp op) { - return (op & MO_BSWAP) == MO_BE; +/** + * memop_tlb_alignment_bits: + * @memop: MemOp value + * + * Extract the alignment size for use with TLB_CHECK_ALIGNED. + */ +static inline unsigned memop_tlb_alignment_bits(MemOp memop, bool tlb_check) { + unsigned a = memop & MO_AMASK; + + if (a == MO_UNALN || (!tlb_check && (memop & MO_ALIGN_TLB_ONLY))) { + /* No alignment required. */ + a = 0; + } else if (a == MO_ALIGN) { + /* A natural alignment requirement. */ + a = memop & MO_SIZE; + } else { + /* A specific alignment requirement. */ + a = a >> MO_ASHIFT; + } + return a; +} + +/** + * memop_alignment_bits: + * @memop: MemOp value + * + * Extract the alignment size from the memop. + */ +static inline unsigned memop_alignment_bits(MemOp memop) { + return memop_tlb_alignment_bits(memop, false); } #ifdef __cplusplus diff --git a/libtcg/include/tcg/tcg-op-common.h b/libtcg/include/tcg/tcg-op-common.h index 4f25b52c3..514f0d8c6 100644 --- a/libtcg/include/tcg/tcg-op-common.h +++ b/libtcg/include/tcg/tcg-op-common.h @@ -16,162 +16,26 @@ extern "C" { #endif -/* Basic output routines. Not for general consumption. */ - -void tcg_gen_op1(TCGOpcode, TCGArg); -void tcg_gen_op2(TCGOpcode, TCGArg, TCGArg); -void tcg_gen_op3(TCGOpcode, TCGArg, TCGArg, TCGArg); -void tcg_gen_op4(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg); -void tcg_gen_op5(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg); -void tcg_gen_op6(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg); - -void vec_gen_2(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg); -void vec_gen_3(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg); -void vec_gen_4(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg, TCGArg); - -static inline void tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 a1) { - tcg_gen_op1(opc, tcgv_i32_arg(a1)); -} - -static inline void tcg_gen_op1_i64(TCGOpcode opc, TCGv_i64 a1) { - tcg_gen_op1(opc, tcgv_i64_arg(a1)); -} - -static inline void tcg_gen_op1i(TCGOpcode opc, TCGArg a1) { - tcg_gen_op1(opc, a1); -} - -static inline void tcg_gen_op2_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2) { - tcg_gen_op2(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2)); -} - -static inline void tcg_gen_op2_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2) { - tcg_gen_op2(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2)); -} - -static inline void tcg_gen_op2i_i32(TCGOpcode opc, TCGv_i32 a1, TCGArg a2) { - tcg_gen_op2(opc, tcgv_i32_arg(a1), a2); -} - -static inline void tcg_gen_op2i_i64(TCGOpcode opc, TCGv_i64 a1, TCGArg a2) { - tcg_gen_op2(opc, tcgv_i64_arg(a1), a2); -} - -static inline void tcg_gen_op2ii(TCGOpcode opc, TCGArg a1, TCGArg a2) { - tcg_gen_op2(opc, a1, a2); -} - -static inline void tcg_gen_op3_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGv_i32 a3) { - tcg_gen_op3(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3)); -} - -static inline void tcg_gen_op3_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, TCGv_i64 a3) { - tcg_gen_op3(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3)); -} - -static inline void tcg_gen_op3i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGArg a3) { - tcg_gen_op3(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3); -} - -static inline void tcg_gen_op3i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, TCGArg a3) { - tcg_gen_op3(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3); -} - -static inline void tcg_gen_ldst_op_i32(TCGOpcode opc, TCGv_i32 val, TCGv_ptr base, TCGArg offset) { - tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_ptr_arg(base), offset); -} - -static inline void tcg_gen_ldst_op_i64(TCGOpcode opc, TCGv_i64 val, TCGv_ptr base, TCGArg offset) { - tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_ptr_arg(base), offset); -} - -static inline void tcg_gen_op4_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGv_i32 a3, TCGv_i32 a4) { - tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3), tcgv_i32_arg(a4)); -} - -static inline void tcg_gen_op4_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, TCGv_i64 a3, TCGv_i64 a4) { - tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3), tcgv_i64_arg(a4)); -} - -static inline void tcg_gen_op4i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGv_i32 a3, TCGArg a4) { - tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3), a4); -} - -static inline void tcg_gen_op4i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, TCGv_i64 a3, TCGArg a4) { - tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3), a4); -} - -static inline void tcg_gen_op4ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGArg a3, TCGArg a4) { - tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3, a4); -} - -static inline void tcg_gen_op4ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, TCGArg a3, TCGArg a4) { - tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3, a4); -} - -static inline void tcg_gen_op5_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGv_i32 a3, TCGv_i32 a4, TCGv_i32 a5) { - tcg_gen_op5(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5)); -} - -static inline void tcg_gen_op5_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, TCGv_i64 a3, TCGv_i64 a4, TCGv_i64 a5) { - tcg_gen_op5(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5)); -} - -static inline void tcg_gen_op5i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGv_i32 a3, TCGv_i32 a4, TCGArg a5) { - tcg_gen_op5(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3), tcgv_i32_arg(a4), a5); -} - -static inline void tcg_gen_op5i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, TCGv_i64 a3, TCGv_i64 a4, TCGArg a5) { - tcg_gen_op5(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3), tcgv_i64_arg(a4), a5); -} - -static inline void tcg_gen_op5ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGv_i32 a3, TCGArg a4, TCGArg a5) { - tcg_gen_op5(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3), a4, a5); -} - -static inline void tcg_gen_op5ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, TCGv_i64 a3, TCGArg a4, TCGArg a5) { - tcg_gen_op5(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3), a4, a5); -} - -static inline void tcg_gen_op6_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGv_i32 a3, TCGv_i32 a4, TCGv_i32 a5, - TCGv_i32 a6) { - tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5), - tcgv_i32_arg(a6)); -} - -static inline void tcg_gen_op6_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, TCGv_i64 a3, TCGv_i64 a4, TCGv_i64 a5, - TCGv_i64 a6) { - tcg_gen_op6(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5), - tcgv_i64_arg(a6)); -} - -static inline void tcg_gen_op6i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGv_i32 a3, TCGv_i32 a4, TCGv_i32 a5, - TCGArg a6) { - tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5), a6); -} - -static inline void tcg_gen_op6i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, TCGv_i64 a3, TCGv_i64 a4, TCGv_i64 a5, - TCGArg a6) { - tcg_gen_op6(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5), a6); -} - -static inline void tcg_gen_op6ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGv_i32 a3, TCGv_i32 a4, TCGArg a5, - TCGArg a6) { - tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3), tcgv_i32_arg(a4), a5, a6); -} - -static inline void tcg_gen_op6ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, TCGv_i64 a3, TCGv_i64 a4, TCGArg a5, - TCGArg a6) { - tcg_gen_op6(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3), tcgv_i64_arg(a4), a5, a6); -} +TCGv_i32 tcg_constant_i32(int32_t val); +TCGv_i64 tcg_constant_i64(int64_t val); +TCGv_vaddr tcg_constant_vaddr(uintptr_t val); +TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val); +TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val); + +TCGv_i32 tcg_temp_new_i32(void); +TCGv_i64 tcg_temp_new_i64(void); +TCGv_ptr tcg_temp_new_ptr(void); +TCGv_i128 tcg_temp_new_i128(void); +TCGv_vec tcg_temp_new_vec(TCGType type); +TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match); + +TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name); +TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name); +TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name); /* Generic ops. */ -static inline void gen_set_label(TCGLabel *l) { - l->present = 1; - tcg_gen_op1(INDEX_op_set_label, label_arg(l)); -} - +void gen_set_label(TCGLabel *l); void tcg_gen_br(TCGLabel *l); void tcg_gen_mb(TCGBar); @@ -195,7 +59,7 @@ void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx); * * See tcg/README for more info about this TCG operation. * - * NOTE: In softmmu emulation, direct jumps with goto_tb are only safe within + * NOTE: In system emulation, direct jumps with goto_tb are only safe within * the pages this TB resides in because we don't take care of direct jumps when * address mapping changes, e.g. in tlb_flush(). In user mode, there's only a * static address translation, so the destination address is always valid, TBs @@ -215,13 +79,8 @@ void tcg_gen_goto_tb(unsigned idx); */ void tcg_gen_lookup_and_goto_ptr(void); -static inline void tcg_gen_plugin_cb_start(unsigned from, unsigned type, unsigned wr) { - tcg_gen_op3(INDEX_op_plugin_cb_start, from, type, wr); -} - -static inline void tcg_gen_plugin_cb_end(void) { - tcg_emit_op(INDEX_op_plugin_cb_end, 0); -} +void tcg_gen_plugin_cb(unsigned from); +void tcg_gen_plugin_mem_cb(TCGv_i64 addr, unsigned meminfo); /* 32 bit ops */ @@ -264,9 +123,12 @@ void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *); void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, int32_t arg2, TCGLabel *); void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2); +void tcg_gen_negsetcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_negsetcondi_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2); void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1, TCGv_i32 c2, TCGv_i32 v1, TCGv_i32 v2); void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh); void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh); +void tcg_gen_addcio_i32(TCGv_i32 r, TCGv_i32 co, TCGv_i32 a, TCGv_i32 b, TCGv_i32 ci); void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2); void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2); void tcg_gen_mulsu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2); @@ -274,6 +136,7 @@ void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg); void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg); void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg); void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg); +void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc); void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags); void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg); void tcg_gen_hswap_i32(TCGv_i32 ret, TCGv_i32 arg); @@ -286,99 +149,30 @@ void tcg_gen_abs_i32(TCGv_i32, TCGv_i32); /* Replicate a value of size @vece from @in to all the lanes in @out */ void tcg_gen_dup_i32(unsigned vece, TCGv_i32 out, TCGv_i32 in); -static inline void tcg_gen_discard_i32(TCGv_i32 arg) { - tcg_gen_op1_i32(INDEX_op_discard, arg); -} - -static inline void tcg_gen_mov_i32(TCGv_i32 ret, TCGv_i32 arg) { - if (ret != arg) { - tcg_gen_op2_i32(INDEX_op_mov_i32, ret, arg); - } -} - -static inline void tcg_gen_ld8u_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i32(INDEX_op_ld8u_i32, ret, arg2, offset); -} - -static inline void tcg_gen_ld8s_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i32(INDEX_op_ld8s_i32, ret, arg2, offset); -} - -static inline void tcg_gen_ld16u_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i32(INDEX_op_ld16u_i32, ret, arg2, offset); -} - -static inline void tcg_gen_ld16s_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i32(INDEX_op_ld16s_i32, ret, arg2, offset); -} - -static inline void tcg_gen_ld_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i32(INDEX_op_ld_i32, ret, arg2, offset); -} - -static inline void tcg_gen_st8_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i32(INDEX_op_st8_i32, arg1, arg2, offset); -} - -static inline void tcg_gen_st16_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i32(INDEX_op_st16_i32, arg1, arg2, offset); -} - -static inline void tcg_gen_st_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i32(INDEX_op_st_i32, arg1, arg2, offset); -} - -static inline void tcg_gen_add_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - tcg_gen_op3_i32(INDEX_op_add_i32, ret, arg1, arg2); -} - -static inline void tcg_gen_sub_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - tcg_gen_op3_i32(INDEX_op_sub_i32, ret, arg1, arg2); -} - -static inline void tcg_gen_and_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - tcg_gen_op3_i32(INDEX_op_and_i32, ret, arg1, arg2); -} - -static inline void tcg_gen_or_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - tcg_gen_op3_i32(INDEX_op_or_i32, ret, arg1, arg2); -} - -static inline void tcg_gen_xor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - tcg_gen_op3_i32(INDEX_op_xor_i32, ret, arg1, arg2); -} - -static inline void tcg_gen_shl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - tcg_gen_op3_i32(INDEX_op_shl_i32, ret, arg1, arg2); -} - -static inline void tcg_gen_shr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - tcg_gen_op3_i32(INDEX_op_shr_i32, ret, arg1, arg2); -} - -static inline void tcg_gen_sar_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - tcg_gen_op3_i32(INDEX_op_sar_i32, ret, arg1, arg2); -} - -static inline void tcg_gen_mul_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - tcg_gen_op3_i32(INDEX_op_mul_i32, ret, arg1, arg2); -} - -static inline void tcg_gen_neg_i32(TCGv_i32 ret, TCGv_i32 arg) { - if (TCG_TARGET_HAS_neg_i32) { - tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg); - } else { - tcg_gen_subfi_i32(ret, 0, arg); - } -} - -static inline void tcg_gen_not_i32(TCGv_i32 ret, TCGv_i32 arg) { - if (TCG_TARGET_HAS_not_i32) { - tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg); - } else { - tcg_gen_xori_i32(ret, arg, -1); - } -} +void tcg_gen_discard_i32(TCGv_i32 arg); +void tcg_gen_mov_i32(TCGv_i32 ret, TCGv_i32 arg); + +void tcg_gen_ld8u_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_ld8s_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_ld16u_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_ld16s_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_ld_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset); + +void tcg_gen_st8_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_st16_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_st_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset); + +void tcg_gen_add_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_sub_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_and_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_or_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_xor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_shl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_shr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_sar_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_mul_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_neg_i32(TCGv_i32 ret, TCGv_i32 arg); +void tcg_gen_not_i32(TCGv_i32 ret, TCGv_i32 arg); /* 64 bit ops */ @@ -421,9 +215,12 @@ void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *); void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, TCGLabel *); void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2); +void tcg_gen_negsetcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_negsetcondi_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2); void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1, TCGv_i64 c2, TCGv_i64 v1, TCGv_i64 v2); void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh); void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh); +void tcg_gen_addcio_i64(TCGv_i64 r, TCGv_i64 co, TCGv_i64 a, TCGv_i64 b, TCGv_i64 ci); void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2); void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2); void tcg_gen_mulsu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2); @@ -434,6 +231,7 @@ void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg); void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg); void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg); void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg); +void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc); void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags); void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags); void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg); @@ -448,97 +246,6 @@ void tcg_gen_abs_i64(TCGv_i64, TCGv_i64); /* Replicate a value of size @vece from @in to all the lanes in @out */ void tcg_gen_dup_i64(unsigned vece, TCGv_i64 out, TCGv_i64 in); -#if TCG_TARGET_REG_BITS == 64 -static inline void tcg_gen_discard_i64(TCGv_i64 arg) { - tcg_gen_op1_i64(INDEX_op_discard, arg); -} - -static inline void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg) { - if (ret != arg) { - tcg_gen_op2_i64(INDEX_op_mov_i64, ret, arg); - } -} - -static inline void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i64(INDEX_op_ld8u_i64, ret, arg2, offset); -} - -static inline void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i64(INDEX_op_ld8s_i64, ret, arg2, offset); -} - -static inline void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i64(INDEX_op_ld16u_i64, ret, arg2, offset); -} - -static inline void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i64(INDEX_op_ld16s_i64, ret, arg2, offset); -} - -static inline void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i64(INDEX_op_ld32u_i64, ret, arg2, offset); -} - -static inline void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i64(INDEX_op_ld32s_i64, ret, arg2, offset); -} - -static inline void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i64(INDEX_op_ld_i64, ret, arg2, offset); -} - -static inline void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i64(INDEX_op_st8_i64, arg1, arg2, offset); -} - -static inline void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i64(INDEX_op_st16_i64, arg1, arg2, offset); -} - -static inline void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i64(INDEX_op_st32_i64, arg1, arg2, offset); -} - -static inline void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i64(INDEX_op_st_i64, arg1, arg2, offset); -} - -static inline void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - tcg_gen_op3_i64(INDEX_op_add_i64, ret, arg1, arg2); -} - -static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - tcg_gen_op3_i64(INDEX_op_sub_i64, ret, arg1, arg2); -} - -static inline void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - tcg_gen_op3_i64(INDEX_op_and_i64, ret, arg1, arg2); -} - -static inline void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - tcg_gen_op3_i64(INDEX_op_or_i64, ret, arg1, arg2); -} - -static inline void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - tcg_gen_op3_i64(INDEX_op_xor_i64, ret, arg1, arg2); -} - -static inline void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - tcg_gen_op3_i64(INDEX_op_shl_i64, ret, arg1, arg2); -} - -static inline void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - tcg_gen_op3_i64(INDEX_op_shr_i64, ret, arg1, arg2); -} - -static inline void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - tcg_gen_op3_i64(INDEX_op_sar_i64, ret, arg1, arg2); -} - -static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - tcg_gen_op3_i64(INDEX_op_mul_i64, ret, arg1, arg2); -} -#else /* TCG_TARGET_REG_BITS == 32 */ void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset); void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset); void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset); @@ -563,15 +270,7 @@ void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); -#endif /* TCG_TARGET_REG_BITS */ - -static inline void tcg_gen_neg_i64(TCGv_i64 ret, TCGv_i64 arg) { - if (TCG_TARGET_HAS_neg_i64) { - tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg); - } else { - tcg_gen_subfi_i64(ret, 0, arg); - } -} +void tcg_gen_neg_i64(TCGv_i64 ret, TCGv_i64 arg); /* Size changing operations. */ @@ -582,14 +281,16 @@ void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg); void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg); void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg); void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg); +void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi); -void tcg_gen_mov_i128(TCGv_i128 dst, TCGv_i128 src); void tcg_gen_extr_i128_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i128 arg); void tcg_gen_concat_i64_i128(TCGv_i128 ret, TCGv_i64 lo, TCGv_i64 hi); -static inline void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi) { - tcg_gen_deposit_i64(ret, lo, hi, 32, 32); -} +/* 128 bit ops */ + +void tcg_gen_mov_i128(TCGv_i128 dst, TCGv_i128 src); +void tcg_gen_ld_i128(TCGv_i128 ret, TCGv_ptr base, tcg_target_long offset); +void tcg_gen_st_i128(TCGv_i128 val, TCGv_ptr base, tcg_target_long offset); /* Local load/store bit ops */ @@ -612,6 +313,7 @@ void tcg_gen_nonatomic_cmpxchg_i128_chk(TCGv_i128, TCGTemp *, TCGv_i128, TCGv_i1 void tcg_gen_atomic_xchg_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32, TCGArg, MemOp, TCGType); void tcg_gen_atomic_xchg_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64, TCGArg, MemOp, TCGType); +void tcg_gen_atomic_xchg_i128_chk(TCGv_i128, TCGTemp *, TCGv_i128, TCGArg, MemOp, TCGType); void tcg_gen_atomic_fetch_add_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32, TCGArg, MemOp, TCGType); void tcg_gen_atomic_fetch_add_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64, TCGArg, MemOp, TCGType); @@ -647,6 +349,9 @@ void tcg_gen_atomic_smax_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64, TCGArg, Me void tcg_gen_atomic_umax_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32, TCGArg, MemOp, TCGType); void tcg_gen_atomic_umax_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64, TCGArg, MemOp, TCGType); +void tcg_gen_atomic_fetch_and_i128_chk(TCGv_i128, TCGTemp *, TCGv_i128, TCGArg, MemOp, TCGType); +void tcg_gen_atomic_fetch_or_i128_chk(TCGv_i128, TCGTemp *, TCGv_i128, TCGArg, MemOp, TCGType); + /* Vector ops */ void tcg_gen_mov_vec(TCGv_vec, TCGv_vec); @@ -713,6 +418,9 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t); #define NAT TCGv_i64 #endif +TCGv_ptr tcg_constant_ptr_int(intptr_t x); +#define tcg_constant_ptr(X) tcg_constant_ptr_int((intptr_t) (X)) + static inline void tcg_gen_ld_ptr(TCGv_ptr r, TCGv_ptr a, intptr_t o) { glue(tcg_gen_ld_, PTR)((NAT) r, a, o); } diff --git a/libtcg/include/tcg/tcg-op.h b/libtcg/include/tcg/tcg-op.h index d76f7365d..c1bc187af 100644 --- a/libtcg/include/tcg/tcg-op.h +++ b/libtcg/include/tcg/tcg-op.h @@ -26,20 +26,27 @@ extern "C" { #endif -#ifndef TARGET_INSN_START_EXTRA_WORDS +#if INSN_START_WORDS != 3 +#error Mismatch with insn-start-words.h +#endif + +#if TARGET_INSN_START_EXTRA_WORDS == 0 static inline void tcg_gen_insn_start(target_ulong pc) { - TCGOp *op = tcg_emit_op(INDEX_op_insn_start, 64 / TCG_TARGET_REG_BITS); + TCGOp *op = tcg_emit_op(INDEX_op_insn_start, INSN_START_WORDS * 64 / TCG_TARGET_REG_BITS); tcg_set_insn_start_param(op, 0, pc); + tcg_set_insn_start_param(op, 1, 0); + tcg_set_insn_start_param(op, 2, 0); } #elif TARGET_INSN_START_EXTRA_WORDS == 1 static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1) { - TCGOp *op = tcg_emit_op(INDEX_op_insn_start, 2 * 64 / TCG_TARGET_REG_BITS); + TCGOp *op = tcg_emit_op(INDEX_op_insn_start, INSN_START_WORDS * 64 / TCG_TARGET_REG_BITS); tcg_set_insn_start_param(op, 0, pc); tcg_set_insn_start_param(op, 1, a1); + tcg_set_insn_start_param(op, 2, 0); } #elif TARGET_INSN_START_EXTRA_WORDS == 2 static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1, target_ulong a2) { - TCGOp *op = tcg_emit_op(INDEX_op_insn_start, 3 * 64 / TCG_TARGET_REG_BITS); + TCGOp *op = tcg_emit_op(INDEX_op_insn_start, INSN_START_WORDS * 64 / TCG_TARGET_REG_BITS); tcg_set_insn_start_param(op, 0, pc); tcg_set_insn_start_param(op, 1, a1); tcg_set_insn_start_param(op, 2, a2); @@ -52,7 +59,6 @@ static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1, target_u typedef TCGv_i32 TCGv; #define tcg_temp_new() tcg_temp_new_i32() #define tcg_global_mem_new tcg_global_mem_new_i32 -#define tcg_temp_free tcg_temp_free_i32 #define tcgv_tl_temp tcgv_i32_temp #define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i32 #define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i32 @@ -60,7 +66,6 @@ typedef TCGv_i32 TCGv; typedef TCGv_i64 TCGv; #define tcg_temp_new() tcg_temp_new_i64() #define tcg_global_mem_new tcg_global_mem_new_i64 -#define tcg_temp_free tcg_temp_free_i64 #define tcgv_tl_temp tcgv_i64_temp #define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i64 #define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i64 @@ -156,13 +161,16 @@ DEF_ATOMIC3(tcg_gen_nonatomic_cmpxchg, i128) DEF_ATOMIC2(tcg_gen_atomic_xchg, i32) DEF_ATOMIC2(tcg_gen_atomic_xchg, i64) +DEF_ATOMIC2(tcg_gen_atomic_xchg, i128) DEF_ATOMIC2(tcg_gen_atomic_fetch_add, i32) DEF_ATOMIC2(tcg_gen_atomic_fetch_add, i64) DEF_ATOMIC2(tcg_gen_atomic_fetch_and, i32) DEF_ATOMIC2(tcg_gen_atomic_fetch_and, i64) +DEF_ATOMIC2(tcg_gen_atomic_fetch_and, i128) DEF_ATOMIC2(tcg_gen_atomic_fetch_or, i32) DEF_ATOMIC2(tcg_gen_atomic_fetch_or, i64) +DEF_ATOMIC2(tcg_gen_atomic_fetch_or, i128) DEF_ATOMIC2(tcg_gen_atomic_fetch_xor, i32) DEF_ATOMIC2(tcg_gen_atomic_fetch_xor, i64) DEF_ATOMIC2(tcg_gen_atomic_fetch_smin, i32) @@ -232,6 +240,8 @@ DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i64) #define tcg_gen_brcondi_tl tcg_gen_brcondi_i64 #define tcg_gen_setcond_tl tcg_gen_setcond_i64 #define tcg_gen_setcondi_tl tcg_gen_setcondi_i64 +#define tcg_gen_negsetcond_tl tcg_gen_negsetcond_i64 +#define tcg_gen_negsetcondi_tl tcg_gen_negsetcondi_i64 #define tcg_gen_mul_tl tcg_gen_mul_i64 #define tcg_gen_muli_tl tcg_gen_muli_i64 #define tcg_gen_div_tl tcg_gen_div_i64 @@ -251,6 +261,7 @@ DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i64) #define tcg_gen_ext16s_tl tcg_gen_ext16s_i64 #define tcg_gen_ext32u_tl tcg_gen_ext32u_i64 #define tcg_gen_ext32s_tl tcg_gen_ext32s_i64 +#define tcg_gen_ext_tl tcg_gen_ext_i64 #define tcg_gen_bswap16_tl tcg_gen_bswap16_i64 #define tcg_gen_bswap32_tl tcg_gen_bswap32_i64 #define tcg_gen_bswap64_tl tcg_gen_bswap64_i64 @@ -283,6 +294,7 @@ DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i64) #define tcg_gen_movcond_tl tcg_gen_movcond_i64 #define tcg_gen_add2_tl tcg_gen_add2_i64 #define tcg_gen_sub2_tl tcg_gen_sub2_i64 +#define tcg_gen_addcio_tl tcg_gen_addcio_i64 #define tcg_gen_mulu2_tl tcg_gen_mulu2_i64 #define tcg_gen_muls2_tl tcg_gen_muls2_i64 #define tcg_gen_mulsu2_tl tcg_gen_mulsu2_i64 @@ -349,6 +361,8 @@ DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i64) #define tcg_gen_brcondi_tl tcg_gen_brcondi_i32 #define tcg_gen_setcond_tl tcg_gen_setcond_i32 #define tcg_gen_setcondi_tl tcg_gen_setcondi_i32 +#define tcg_gen_negsetcond_tl tcg_gen_negsetcond_i32 +#define tcg_gen_negsetcondi_tl tcg_gen_negsetcondi_i32 #define tcg_gen_mul_tl tcg_gen_mul_i32 #define tcg_gen_muli_tl tcg_gen_muli_i32 #define tcg_gen_div_tl tcg_gen_div_i32 @@ -368,6 +382,7 @@ DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i64) #define tcg_gen_ext16s_tl tcg_gen_ext16s_i32 #define tcg_gen_ext32u_tl tcg_gen_mov_i32 #define tcg_gen_ext32s_tl tcg_gen_mov_i32 +#define tcg_gen_ext_tl tcg_gen_ext_i32 #define tcg_gen_bswap16_tl tcg_gen_bswap16_i32 #define tcg_gen_bswap32_tl(D, S, F) tcg_gen_bswap32_i32(D, S) #define tcg_gen_bswap_tl tcg_gen_bswap32_i32 @@ -398,6 +413,7 @@ DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i64) #define tcg_gen_movcond_tl tcg_gen_movcond_i32 #define tcg_gen_add2_tl tcg_gen_add2_i32 #define tcg_gen_sub2_tl tcg_gen_sub2_i32 +#define tcg_gen_addcio_tl tcg_gen_addcio_i32 #define tcg_gen_mulu2_tl tcg_gen_mulu2_i32 #define tcg_gen_muls2_tl tcg_gen_muls2_i32 #define tcg_gen_mulsu2_tl tcg_gen_mulsu2_i32 diff --git a/libtcg/include/tcg/tcg-opc.h b/libtcg/include/tcg/tcg-opc.h index d5f61fcaa..e7589ceb6 100644 --- a/libtcg/include/tcg/tcg-opc.h +++ b/libtcg/include/tcg/tcg-opc.h @@ -33,264 +33,160 @@ DEF(set_label, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT) /* variable number of parameters */ DEF(call, 0, 0, 3, TCG_OPF_CALL_CLOBBER | TCG_OPF_NOT_PRESENT) -DEF(br, 0, 0, 1, TCG_OPF_BB_END) - -#define IMPL(X) (__builtin_constant_p(X) && (X) <= 0 ? TCG_OPF_NOT_PRESENT : 0) -#if TCG_TARGET_REG_BITS == 32 -#define IMPL64 TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT -#else -#define IMPL64 TCG_OPF_64BIT -#endif - -DEF(mb, 0, 0, 1, 0) - -DEF(mov_i32, 1, 1, 0, TCG_OPF_NOT_PRESENT) -DEF(setcond_i32, 1, 2, 1, 0) -DEF(movcond_i32, 1, 4, 1, IMPL(TCG_TARGET_HAS_movcond_i32)) -/* load/store */ -DEF(ld8u_i32, 1, 1, 1, 0) -DEF(ld8s_i32, 1, 1, 1, 0) -DEF(ld16u_i32, 1, 1, 1, 0) -DEF(ld16s_i32, 1, 1, 1, 0) -DEF(ld_i32, 1, 1, 1, 0) -DEF(st8_i32, 0, 2, 1, 0) -DEF(st16_i32, 0, 2, 1, 0) -DEF(st_i32, 0, 2, 1, 0) -/* arith */ -DEF(add_i32, 1, 2, 0, 0) -DEF(sub_i32, 1, 2, 0, 0) -DEF(mul_i32, 1, 2, 0, 0) -DEF(div_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_div_i32)) -DEF(divu_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_div_i32)) -DEF(rem_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rem_i32)) -DEF(remu_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rem_i32)) -DEF(div2_i32, 2, 3, 0, IMPL(TCG_TARGET_HAS_div2_i32)) -DEF(divu2_i32, 2, 3, 0, IMPL(TCG_TARGET_HAS_div2_i32)) -DEF(and_i32, 1, 2, 0, 0) -DEF(or_i32, 1, 2, 0, 0) -DEF(xor_i32, 1, 2, 0, 0) -/* shifts/rotates */ -DEF(shl_i32, 1, 2, 0, 0) -DEF(shr_i32, 1, 2, 0, 0) -DEF(sar_i32, 1, 2, 0, 0) -DEF(rotl_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32)) -DEF(rotr_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32)) -DEF(deposit_i32, 1, 2, 2, IMPL(TCG_TARGET_HAS_deposit_i32)) -DEF(extract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_extract_i32)) -DEF(sextract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_sextract_i32)) -DEF(extract2_i32, 1, 2, 1, IMPL(TCG_TARGET_HAS_extract2_i32)) - -DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH) - -DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_add2_i32)) -DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_sub2_i32)) -DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_mulu2_i32)) -DEF(muls2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_muls2_i32)) -DEF(muluh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i32)) -DEF(mulsh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i32)) -DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH | IMPL(TCG_TARGET_REG_BITS == 32)) -DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32)) - -DEF(ext8s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8s_i32)) -DEF(ext16s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16s_i32)) -DEF(ext8u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8u_i32)) -DEF(ext16u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16u_i32)) -DEF(bswap16_i32, 1, 1, 1, IMPL(TCG_TARGET_HAS_bswap16_i32)) -DEF(bswap32_i32, 1, 1, 1, IMPL(TCG_TARGET_HAS_bswap32_i32)) -DEF(not_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_not_i32)) -DEF(neg_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_neg_i32)) -DEF(andc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_andc_i32)) -DEF(orc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_orc_i32)) -DEF(eqv_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_eqv_i32)) -DEF(nand_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nand_i32)) -DEF(nor_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nor_i32)) -DEF(clz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_clz_i32)) -DEF(ctz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_ctz_i32)) -DEF(ctpop_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ctpop_i32)) - -DEF(mov_i64, 1, 1, 0, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT) -DEF(setcond_i64, 1, 2, 1, IMPL64) -DEF(movcond_i64, 1, 4, 1, IMPL64 | IMPL(TCG_TARGET_HAS_movcond_i64)) -/* load/store */ -DEF(ld8u_i64, 1, 1, 1, IMPL64) -DEF(ld8s_i64, 1, 1, 1, IMPL64) -DEF(ld16u_i64, 1, 1, 1, IMPL64) -DEF(ld16s_i64, 1, 1, 1, IMPL64) -DEF(ld32u_i64, 1, 1, 1, IMPL64) -DEF(ld32s_i64, 1, 1, 1, IMPL64) -DEF(ld_i64, 1, 1, 1, IMPL64) -DEF(st8_i64, 0, 2, 1, IMPL64) -DEF(st16_i64, 0, 2, 1, IMPL64) -DEF(st32_i64, 0, 2, 1, IMPL64) -DEF(st_i64, 0, 2, 1, IMPL64) -/* arith */ -DEF(add_i64, 1, 2, 0, IMPL64) -DEF(sub_i64, 1, 2, 0, IMPL64) -DEF(mul_i64, 1, 2, 0, IMPL64) -DEF(div_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div_i64)) -DEF(divu_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div_i64)) -DEF(rem_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rem_i64)) -DEF(remu_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rem_i64)) -DEF(div2_i64, 2, 3, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div2_i64)) -DEF(divu2_i64, 2, 3, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div2_i64)) -DEF(and_i64, 1, 2, 0, IMPL64) -DEF(or_i64, 1, 2, 0, IMPL64) -DEF(xor_i64, 1, 2, 0, IMPL64) -/* shifts/rotates */ -DEF(shl_i64, 1, 2, 0, IMPL64) -DEF(shr_i64, 1, 2, 0, IMPL64) -DEF(sar_i64, 1, 2, 0, IMPL64) -DEF(rotl_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64)) -DEF(rotr_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64)) -DEF(deposit_i64, 1, 2, 2, IMPL64 | IMPL(TCG_TARGET_HAS_deposit_i64)) -DEF(extract_i64, 1, 1, 2, IMPL64 | IMPL(TCG_TARGET_HAS_extract_i64)) -DEF(sextract_i64, 1, 1, 2, IMPL64 | IMPL(TCG_TARGET_HAS_sextract_i64)) -DEF(extract2_i64, 1, 2, 1, IMPL64 | IMPL(TCG_TARGET_HAS_extract2_i64)) +DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT) +DEF(brcond, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH | TCG_OPF_INT) + +DEF(mb, 0, 0, 1, TCG_OPF_NOT_PRESENT) + +DEF(mov, 1, 1, 0, TCG_OPF_INT | TCG_OPF_NOT_PRESENT) + +DEF(add, 1, 2, 0, TCG_OPF_INT) +DEF(and, 1, 2, 0, TCG_OPF_INT) +DEF(andc, 1, 2, 0, TCG_OPF_INT) +DEF(bswap16, 1, 1, 1, TCG_OPF_INT) +DEF(bswap32, 1, 1, 1, TCG_OPF_INT) +DEF(bswap64, 1, 1, 1, TCG_OPF_INT) +DEF(clz, 1, 2, 0, TCG_OPF_INT) +DEF(ctpop, 1, 1, 0, TCG_OPF_INT) +DEF(ctz, 1, 2, 0, TCG_OPF_INT) +DEF(deposit, 1, 2, 2, TCG_OPF_INT) +DEF(divs, 1, 2, 0, TCG_OPF_INT) +DEF(divs2, 2, 3, 0, TCG_OPF_INT) +DEF(divu, 1, 2, 0, TCG_OPF_INT) +DEF(divu2, 2, 3, 0, TCG_OPF_INT) +DEF(eqv, 1, 2, 0, TCG_OPF_INT) +DEF(extract, 1, 1, 2, TCG_OPF_INT) +DEF(extract2, 1, 2, 1, TCG_OPF_INT) +DEF(ld8u, 1, 1, 1, TCG_OPF_INT) +DEF(ld8s, 1, 1, 1, TCG_OPF_INT) +DEF(ld16u, 1, 1, 1, TCG_OPF_INT) +DEF(ld16s, 1, 1, 1, TCG_OPF_INT) +DEF(ld32u, 1, 1, 1, TCG_OPF_INT) +DEF(ld32s, 1, 1, 1, TCG_OPF_INT) +DEF(ld, 1, 1, 1, TCG_OPF_INT) +DEF(movcond, 1, 4, 1, TCG_OPF_INT) +DEF(mul, 1, 2, 0, TCG_OPF_INT) +DEF(muls2, 2, 2, 0, TCG_OPF_INT) +DEF(mulsh, 1, 2, 0, TCG_OPF_INT) +DEF(mulu2, 2, 2, 0, TCG_OPF_INT) +DEF(muluh, 1, 2, 0, TCG_OPF_INT) +DEF(nand, 1, 2, 0, TCG_OPF_INT) +DEF(neg, 1, 1, 0, TCG_OPF_INT) +DEF(negsetcond, 1, 2, 1, TCG_OPF_INT) +DEF(nor, 1, 2, 0, TCG_OPF_INT) +DEF(not, 1, 1, 0, TCG_OPF_INT) +DEF(or, 1, 2, 0, TCG_OPF_INT) +DEF(orc, 1, 2, 0, TCG_OPF_INT) +DEF(rems, 1, 2, 0, TCG_OPF_INT) +DEF(remu, 1, 2, 0, TCG_OPF_INT) +DEF(rotl, 1, 2, 0, TCG_OPF_INT) +DEF(rotr, 1, 2, 0, TCG_OPF_INT) +DEF(sar, 1, 2, 0, TCG_OPF_INT) +DEF(setcond, 1, 2, 1, TCG_OPF_INT) +DEF(sextract, 1, 1, 2, TCG_OPF_INT) +DEF(shl, 1, 2, 0, TCG_OPF_INT) +DEF(shr, 1, 2, 0, TCG_OPF_INT) +DEF(st8, 0, 2, 1, TCG_OPF_INT) +DEF(st16, 0, 2, 1, TCG_OPF_INT) +DEF(st32, 0, 2, 1, TCG_OPF_INT) +DEF(st, 0, 2, 1, TCG_OPF_INT) +DEF(sub, 1, 2, 0, TCG_OPF_INT) +DEF(xor, 1, 2, 0, TCG_OPF_INT) + +DEF(addco, 1, 2, 0, TCG_OPF_INT | TCG_OPF_CARRY_OUT) +DEF(addc1o, 1, 2, 0, TCG_OPF_INT | TCG_OPF_CARRY_OUT) +DEF(addci, 1, 2, 0, TCG_OPF_INT | TCG_OPF_CARRY_IN) +DEF(addcio, 1, 2, 0, TCG_OPF_INT | TCG_OPF_CARRY_IN | TCG_OPF_CARRY_OUT) + +DEF(subbo, 1, 2, 0, TCG_OPF_INT | TCG_OPF_CARRY_OUT) +DEF(subb1o, 1, 2, 0, TCG_OPF_INT | TCG_OPF_CARRY_OUT) +DEF(subbi, 1, 2, 0, TCG_OPF_INT | TCG_OPF_CARRY_IN) +DEF(subbio, 1, 2, 0, TCG_OPF_INT | TCG_OPF_CARRY_IN | TCG_OPF_CARRY_OUT) + +DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH) +DEF(setcond2_i32, 1, 4, 1, 0) /* size changing ops */ -DEF(ext_i32_i64, 1, 1, 0, IMPL64) -DEF(extu_i32_i64, 1, 1, 0, IMPL64) -DEF(extrl_i64_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_extrl_i64_i32) | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0)) -DEF(extrh_i64_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_extrh_i64_i32) | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0)) - -DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH | IMPL64) -DEF(ext8s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8s_i64)) -DEF(ext16s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16s_i64)) -DEF(ext32s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32s_i64)) -DEF(ext8u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8u_i64)) -DEF(ext16u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16u_i64)) -DEF(ext32u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32u_i64)) -DEF(bswap16_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap16_i64)) -DEF(bswap32_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap32_i64)) -DEF(bswap64_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap64_i64)) -DEF(not_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_not_i64)) -DEF(neg_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_neg_i64)) -DEF(andc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_andc_i64)) -DEF(orc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_orc_i64)) -DEF(eqv_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_eqv_i64)) -DEF(nand_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nand_i64)) -DEF(nor_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nor_i64)) -DEF(clz_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_clz_i64)) -DEF(ctz_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ctz_i64)) -DEF(ctpop_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ctpop_i64)) - -DEF(add2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_add2_i64)) -DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64)) -DEF(mulu2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulu2_i64)) -DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64)) -DEF(muluh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muluh_i64)) -DEF(mulsh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulsh_i64)) +DEF(ext_i32_i64, 1, 1, 0, 0) +DEF(extu_i32_i64, 1, 1, 0, 0) +DEF(extrl_i64_i32, 1, 1, 0, 0) +DEF(extrh_i64_i32, 1, 1, 0, 0) #define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2) -/* There are tcg_ctx->insn_start_words here, not just one. */ -DEF(insn_start, 0, 0, DATA64_ARGS, TCG_OPF_NOT_PRESENT) +DEF(insn_start, 0, 0, DATA64_ARGS *INSN_START_WORDS, TCG_OPF_NOT_PRESENT) -DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END) -DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END) +DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT) +DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT) DEF(goto_ptr, 0, 1, 0, TCG_OPF_BB_EXIT | TCG_OPF_BB_END) -DEF(plugin_cb_start, 0, 0, 3, TCG_OPF_NOT_PRESENT) -DEF(plugin_cb_end, 0, 0, 0, TCG_OPF_NOT_PRESENT) - -/* Replicate ld/st ops for 32 and 64-bit guest addresses. */ -DEF(qemu_ld_a32_i32, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF(qemu_st_a32_i32, 0, 1 + 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF(qemu_ld_a32_i64, DATA64_ARGS, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT) -DEF(qemu_st_a32_i64, 0, DATA64_ARGS + 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT) - -DEF(qemu_ld_a64_i32, 1, DATA64_ARGS, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF(qemu_st_a64_i32, 0, 1 + DATA64_ARGS, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF(qemu_ld_a64_i64, DATA64_ARGS, DATA64_ARGS, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT) -DEF(qemu_st_a64_i64, 0, DATA64_ARGS + DATA64_ARGS, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT) +DEF(plugin_cb, 0, 0, 1, TCG_OPF_NOT_PRESENT) +DEF(plugin_mem_cb, 0, 1, 1, TCG_OPF_NOT_PRESENT) -/* Only used by i386 to cope with stupid register constraints. */ -DEF(qemu_st8_a32_i32, 0, 1 + 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | IMPL(TCG_TARGET_HAS_qemu_st8_i32)) -DEF(qemu_st8_a64_i32, 0, 1 + DATA64_ARGS, 1, - TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | IMPL(TCG_TARGET_HAS_qemu_st8_i32)) - -/* Only for 64-bit hosts at the moment. */ -DEF(qemu_ld_a32_i128, 2, 1, 1, - TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT | IMPL(TCG_TARGET_HAS_qemu_ldst_i128)) -DEF(qemu_ld_a64_i128, 2, 1, 1, - TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT | IMPL(TCG_TARGET_HAS_qemu_ldst_i128)) -DEF(qemu_st_a32_i128, 0, 3, 1, - TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT | IMPL(TCG_TARGET_HAS_qemu_ldst_i128)) -DEF(qemu_st_a64_i128, 0, 3, 1, - TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT | IMPL(TCG_TARGET_HAS_qemu_ldst_i128)) +DEF(qemu_ld, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_INT) +DEF(qemu_st, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_INT) +DEF(qemu_ld2, 2, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_INT) +DEF(qemu_st2, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_INT) /* Host vector support. */ -#define IMPLVEC TCG_OPF_VECTOR | IMPL(TCG_TARGET_MAYBE_vec) - DEF(mov_vec, 1, 1, 0, TCG_OPF_VECTOR | TCG_OPF_NOT_PRESENT) -DEF(dup_vec, 1, 1, 0, IMPLVEC) -DEF(dup2_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_REG_BITS == 32)) - -DEF(ld_vec, 1, 1, 1, IMPLVEC) -DEF(st_vec, 0, 2, 1, IMPLVEC) -DEF(dupm_vec, 1, 1, 1, IMPLVEC) - -DEF(add_vec, 1, 2, 0, IMPLVEC) -DEF(sub_vec, 1, 2, 0, IMPLVEC) -DEF(mul_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_mul_vec)) -DEF(neg_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_neg_vec)) -DEF(abs_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_abs_vec)) -DEF(ssadd_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec)) -DEF(usadd_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec)) -DEF(sssub_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec)) -DEF(ussub_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec)) -DEF(smin_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_minmax_vec)) -DEF(umin_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_minmax_vec)) -DEF(smax_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_minmax_vec)) -DEF(umax_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_minmax_vec)) - -DEF(and_vec, 1, 2, 0, IMPLVEC) -DEF(or_vec, 1, 2, 0, IMPLVEC) -DEF(xor_vec, 1, 2, 0, IMPLVEC) -DEF(andc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_andc_vec)) -DEF(orc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_orc_vec)) -DEF(nand_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_nand_vec)) -DEF(nor_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_nor_vec)) -DEF(eqv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_eqv_vec)) -DEF(not_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_not_vec)) - -DEF(shli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec)) -DEF(shri_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec)) -DEF(sari_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec)) -DEF(rotli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_roti_vec)) - -DEF(shls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec)) -DEF(shrs_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec)) -DEF(sars_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec)) -DEF(rotls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rots_vec)) - -DEF(shlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec)) -DEF(shrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec)) -DEF(sarv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec)) -DEF(rotlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rotv_vec)) -DEF(rotrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rotv_vec)) - -DEF(cmp_vec, 1, 2, 1, IMPLVEC) - -DEF(bitsel_vec, 1, 3, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_bitsel_vec)) -DEF(cmpsel_vec, 1, 4, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_cmpsel_vec)) +DEF(dup_vec, 1, 1, 0, TCG_OPF_VECTOR) +DEF(dup2_vec, 1, 2, 0, TCG_OPF_VECTOR) + +DEF(ld_vec, 1, 1, 1, TCG_OPF_VECTOR) +DEF(st_vec, 0, 2, 1, TCG_OPF_VECTOR) +DEF(dupm_vec, 1, 1, 1, TCG_OPF_VECTOR) + +DEF(add_vec, 1, 2, 0, TCG_OPF_VECTOR) +DEF(sub_vec, 1, 2, 0, TCG_OPF_VECTOR) +DEF(mul_vec, 1, 2, 0, TCG_OPF_VECTOR) +DEF(neg_vec, 1, 1, 0, TCG_OPF_VECTOR) +DEF(abs_vec, 1, 1, 0, TCG_OPF_VECTOR) +DEF(ssadd_vec, 1, 2, 0, TCG_OPF_VECTOR) +DEF(usadd_vec, 1, 2, 0, TCG_OPF_VECTOR) +DEF(sssub_vec, 1, 2, 0, TCG_OPF_VECTOR) +DEF(ussub_vec, 1, 2, 0, TCG_OPF_VECTOR) +DEF(smin_vec, 1, 2, 0, TCG_OPF_VECTOR) +DEF(umin_vec, 1, 2, 0, TCG_OPF_VECTOR) +DEF(smax_vec, 1, 2, 0, TCG_OPF_VECTOR) +DEF(umax_vec, 1, 2, 0, TCG_OPF_VECTOR) + +DEF(and_vec, 1, 2, 0, TCG_OPF_VECTOR) +DEF(or_vec, 1, 2, 0, TCG_OPF_VECTOR) +DEF(xor_vec, 1, 2, 0, TCG_OPF_VECTOR) +DEF(andc_vec, 1, 2, 0, TCG_OPF_VECTOR) +DEF(orc_vec, 1, 2, 0, TCG_OPF_VECTOR) +DEF(nand_vec, 1, 2, 0, TCG_OPF_VECTOR) +DEF(nor_vec, 1, 2, 0, TCG_OPF_VECTOR) +DEF(eqv_vec, 1, 2, 0, TCG_OPF_VECTOR) +DEF(not_vec, 1, 1, 0, TCG_OPF_VECTOR) + +DEF(shli_vec, 1, 1, 1, TCG_OPF_VECTOR) +DEF(shri_vec, 1, 1, 1, TCG_OPF_VECTOR) +DEF(sari_vec, 1, 1, 1, TCG_OPF_VECTOR) +DEF(rotli_vec, 1, 1, 1, TCG_OPF_VECTOR) + +DEF(shls_vec, 1, 2, 0, TCG_OPF_VECTOR) +DEF(shrs_vec, 1, 2, 0, TCG_OPF_VECTOR) +DEF(sars_vec, 1, 2, 0, TCG_OPF_VECTOR) +DEF(rotls_vec, 1, 2, 0, TCG_OPF_VECTOR) + +DEF(shlv_vec, 1, 2, 0, TCG_OPF_VECTOR) +DEF(shrv_vec, 1, 2, 0, TCG_OPF_VECTOR) +DEF(sarv_vec, 1, 2, 0, TCG_OPF_VECTOR) +DEF(rotlv_vec, 1, 2, 0, TCG_OPF_VECTOR) +DEF(rotrv_vec, 1, 2, 0, TCG_OPF_VECTOR) + +DEF(cmp_vec, 1, 2, 1, TCG_OPF_VECTOR) + +DEF(bitsel_vec, 1, 3, 0, TCG_OPF_VECTOR) +DEF(cmpsel_vec, 1, 4, 1, TCG_OPF_VECTOR) DEF(last_generic, 0, 0, 0, TCG_OPF_NOT_PRESENT) -#if TCG_TARGET_MAYBE_vec -#include "i386/tcg-target.opc.h" -#endif - -#ifdef TCG_TARGET_INTERPRETER -/* These opcodes are only for use between the tci generator and interpreter. */ -DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT) -DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT) -#endif +#include "i386/tcg-target-opc.h.inc" #undef DATA64_ARGS -#undef IMPL -#undef IMPL64 -#undef IMPLVEC #undef DEF diff --git a/libtcg/include/tcg/tcg-temp-internal.h b/libtcg/include/tcg/tcg-temp-internal.h index 63edd0825..fb866902f 100644 --- a/libtcg/include/tcg/tcg-temp-internal.h +++ b/libtcg/include/tcg/tcg-temp-internal.h @@ -31,44 +31,20 @@ void tcg_temp_free_internal(TCGTemp *); -static inline void tcg_temp_free_i32(TCGv_i32 arg) { - tcg_temp_free_internal(tcgv_i32_temp(arg)); -} - -static inline void tcg_temp_free_i64(TCGv_i64 arg) { - tcg_temp_free_internal(tcgv_i64_temp(arg)); -} - -static inline void tcg_temp_free_i128(TCGv_i128 arg) { - tcg_temp_free_internal(tcgv_i128_temp(arg)); -} - -static inline void tcg_temp_free_ptr(TCGv_ptr arg) { - tcg_temp_free_internal(tcgv_ptr_temp(arg)); -} +void tcg_temp_free_i32(TCGv_i32 arg); +void tcg_temp_free_i64(TCGv_i64 arg); +void tcg_temp_free_i128(TCGv_i128 arg); +void tcg_temp_free_ptr(TCGv_ptr arg); +void tcg_temp_free_vec(TCGv_vec arg); -static inline void tcg_temp_free_vec(TCGv_vec arg) { - tcg_temp_free_internal(tcgv_vec_temp(arg)); -} - -static inline TCGv_i32 tcg_temp_ebb_new_i32(void) { - TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB); - return temp_tcgv_i32(t); -} - -static inline TCGv_i64 tcg_temp_ebb_new_i64(void) { - TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB); - return temp_tcgv_i64(t); -} - -static inline TCGv_i128 tcg_temp_ebb_new_i128(void) { - TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB); - return temp_tcgv_i128(t); -} +TCGv_i32 tcg_temp_ebb_new_i32(void); +TCGv_i64 tcg_temp_ebb_new_i64(void); +TCGv_ptr tcg_temp_ebb_new_ptr(void); +TCGv_i128 tcg_temp_ebb_new_i128(void); -static inline TCGv_ptr tcg_temp_ebb_new_ptr(void) { - TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB); - return temp_tcgv_ptr(t); +/* Forget all freed EBB temps, so that new allocations produce new temps. */ +static inline void tcg_temp_ebb_reset_freed(TCGContext *s) { + memset(s->free_temps, 0, sizeof(s->free_temps)); } -#endif /* TCG_TEMP_FREE_H */ +#endif /* TCG_TEMP_INTERNAL_H */ diff --git a/libtcg/include/tcg/tcg.h b/libtcg/include/tcg/tcg.h index f03d05535..560df7cea 100644 --- a/libtcg/include/tcg/tcg.h +++ b/libtcg/include/tcg/tcg.h @@ -49,8 +49,10 @@ extern "C" { #include #include +#include #include #include +#include #include #include "tcg-mo.h" @@ -137,8 +139,6 @@ static inline bool qemu_log_in_addr_range(uint64_t addr) { /* XXX: make safe guess about sizes */ #define MAX_OP_PER_INSTR 266 -#define MAX_CALL_IARGS 7 - #define CPU_TEMP_BUF_NLONGS 128 #define TCG_STATIC_FRAME_SIZE (CPU_TEMP_BUF_NLONGS * sizeof(long)) @@ -164,110 +164,6 @@ typedef uint64_t TCGRegSet; #error unsupported #endif -#if TCG_TARGET_REG_BITS == 32 -/* Turn some undef macros into false macros. */ -#define TCG_TARGET_HAS_extrl_i64_i32 0 -#define TCG_TARGET_HAS_extrh_i64_i32 0 -#define TCG_TARGET_HAS_div_i64 0 -#define TCG_TARGET_HAS_rem_i64 0 -#define TCG_TARGET_HAS_div2_i64 0 -#define TCG_TARGET_HAS_rot_i64 0 -#define TCG_TARGET_HAS_ext8s_i64 0 -#define TCG_TARGET_HAS_ext16s_i64 0 -#define TCG_TARGET_HAS_ext32s_i64 0 -#define TCG_TARGET_HAS_ext8u_i64 0 -#define TCG_TARGET_HAS_ext16u_i64 0 -#define TCG_TARGET_HAS_ext32u_i64 0 -#define TCG_TARGET_HAS_bswap16_i64 0 -#define TCG_TARGET_HAS_bswap32_i64 0 -#define TCG_TARGET_HAS_bswap64_i64 0 -#define TCG_TARGET_HAS_neg_i64 0 -#define TCG_TARGET_HAS_not_i64 0 -#define TCG_TARGET_HAS_andc_i64 0 -#define TCG_TARGET_HAS_orc_i64 0 -#define TCG_TARGET_HAS_eqv_i64 0 -#define TCG_TARGET_HAS_nand_i64 0 -#define TCG_TARGET_HAS_nor_i64 0 -#define TCG_TARGET_HAS_clz_i64 0 -#define TCG_TARGET_HAS_ctz_i64 0 -#define TCG_TARGET_HAS_ctpop_i64 0 -#define TCG_TARGET_HAS_deposit_i64 0 -#define TCG_TARGET_HAS_extract_i64 0 -#define TCG_TARGET_HAS_sextract_i64 0 -#define TCG_TARGET_HAS_extract2_i64 0 -#define TCG_TARGET_HAS_movcond_i64 0 -#define TCG_TARGET_HAS_add2_i64 0 -#define TCG_TARGET_HAS_sub2_i64 0 -#define TCG_TARGET_HAS_mulu2_i64 0 -#define TCG_TARGET_HAS_muls2_i64 0 -#define TCG_TARGET_HAS_muluh_i64 0 -#define TCG_TARGET_HAS_mulsh_i64 0 -/* Turn some undef macros into true macros. */ -#define TCG_TARGET_HAS_add2_i32 1 -#define TCG_TARGET_HAS_sub2_i32 1 -#endif - -#ifndef TCG_TARGET_deposit_i32_valid -#define TCG_TARGET_deposit_i32_valid(ofs, len) 1 -#endif -#ifndef TCG_TARGET_deposit_i64_valid -#define TCG_TARGET_deposit_i64_valid(ofs, len) 1 -#endif -#ifndef TCG_TARGET_extract_i32_valid -#define TCG_TARGET_extract_i32_valid(ofs, len) 1 -#endif -#ifndef TCG_TARGET_extract_i64_valid -#define TCG_TARGET_extract_i64_valid(ofs, len) 1 -#endif - -/* Only one of DIV or DIV2 should be defined. */ -#if defined(TCG_TARGET_HAS_div_i32) -#define TCG_TARGET_HAS_div2_i32 0 -#elif defined(TCG_TARGET_HAS_div2_i32) -#define TCG_TARGET_HAS_div_i32 0 -#define TCG_TARGET_HAS_rem_i32 0 -#endif -#if defined(TCG_TARGET_HAS_div_i64) -#define TCG_TARGET_HAS_div2_i64 0 -#elif defined(TCG_TARGET_HAS_div2_i64) -#define TCG_TARGET_HAS_div_i64 0 -#define TCG_TARGET_HAS_rem_i64 0 -#endif - -#if !defined(TCG_TARGET_HAS_v64) && !defined(TCG_TARGET_HAS_v128) && !defined(TCG_TARGET_HAS_v256) -#define TCG_TARGET_MAYBE_vec 0 -#define TCG_TARGET_HAS_abs_vec 0 -#define TCG_TARGET_HAS_neg_vec 0 -#define TCG_TARGET_HAS_not_vec 0 -#define TCG_TARGET_HAS_andc_vec 0 -#define TCG_TARGET_HAS_orc_vec 0 -#define TCG_TARGET_HAS_nand_vec 0 -#define TCG_TARGET_HAS_nor_vec 0 -#define TCG_TARGET_HAS_eqv_vec 0 -#define TCG_TARGET_HAS_roti_vec 0 -#define TCG_TARGET_HAS_rots_vec 0 -#define TCG_TARGET_HAS_rotv_vec 0 -#define TCG_TARGET_HAS_shi_vec 0 -#define TCG_TARGET_HAS_shs_vec 0 -#define TCG_TARGET_HAS_shv_vec 0 -#define TCG_TARGET_HAS_mul_vec 0 -#define TCG_TARGET_HAS_sat_vec 0 -#define TCG_TARGET_HAS_minmax_vec 0 -#define TCG_TARGET_HAS_bitsel_vec 0 -#define TCG_TARGET_HAS_cmpsel_vec 0 -#else -#define TCG_TARGET_MAYBE_vec 1 -#endif -#ifndef TCG_TARGET_HAS_v64 -#define TCG_TARGET_HAS_v64 0 -#endif -#ifndef TCG_TARGET_HAS_v128 -#define TCG_TARGET_HAS_v128 0 -#endif -#ifndef TCG_TARGET_HAS_v256 -#define TCG_TARGET_HAS_v256 0 -#endif - typedef enum TCGOpcode { #define DEF(name, oargs, iargs, cargs, flags) INDEX_op_##name, #include "tcg/tcg-opc.h" @@ -379,32 +275,6 @@ static inline int tcg_type_size(TCGType t) { return 4 << i; } -/** - * get_alignment_bits - * @memop: MemOp value - * - * Extract the alignment size from the memop. - */ -static inline unsigned get_alignment_bits(MemOp memop) { - unsigned a = memop & MO_AMASK; - - if (a == MO_UNALN) { - /* No alignment required. */ - a = 0; - } else if (a == MO_ALIGN) { - /* A natural alignment requirement. */ - a = memop & MO_SIZE; - } else { - /* A specific alignment requirement. */ - a = a >> MO_ASHIFT; - } -#if defined(CONFIG_SOFTMMU) - /* The requested alignment cannot overlap the TLB flags. */ - tcg_debug_assert((TLB_FLAGS_MASK & ((1 << a) - 1)) == 0); -#endif - return a; -} - typedef tcg_target_ulong TCGArg; /* Define type and accessor macros for TCG variables. @@ -417,6 +287,7 @@ typedef tcg_target_ulong TCGArg; * TCGv_i64 : 64 bit integer type * TCGv_i128 : 128 bit integer type * TCGv_ptr : a host pointer type + * TCGv_vaddr: an integer type wide enough to hold a target pointer type * TCGv_vec : a host vector type; the exact size is not exposed to the CPU front-end code. * TCGv : an integer type the same size as target_ulong @@ -445,6 +316,14 @@ typedef struct TCGv_ptr_d *TCGv_ptr; typedef struct TCGv_vec_d *TCGv_vec; typedef TCGv_ptr TCGv_env; +#if __SIZEOF_POINTER__ == 4 +typedef TCGv_i32 TCGv_vaddr; +#elif __SIZEOF_POINTER__ == 8 +typedef TCGv_i64 TCGv_vaddr; +#else +#error "sizeof pointer is different from {4,8}" +#endif /* __SIZEOF_POINTER__ */ + /* call flags */ /* Helper does not read globals (either directly or through an exception). It implies TCG_CALL_NO_WRITE_GLOBALS. */ @@ -455,8 +334,6 @@ typedef TCGv_ptr TCGv_env; #define TCG_CALL_NO_SIDE_EFFECTS 0x0004 /* Helper is G_NORETURN. */ #define TCG_CALL_NO_RETURN 0x0008 -/* Helper is part of Plugins. */ -#define TCG_CALL_PLUGIN 0x0010 /* convenience version of most used call flags */ #define TCG_CALL_NO_RWG TCG_CALL_NO_READ_GLOBALS @@ -512,7 +389,7 @@ typedef struct TCGTemp { unsigned int mem_coherent : 1; unsigned int mem_allocated : 1; unsigned int temp_allocated : 1; - unsigned int temp_subindex : 1; + unsigned int temp_subindex : 2; int64_t val; struct TCGTemp *mem_base; @@ -565,8 +442,9 @@ struct TCGOp { #define TCGOP_CALLI(X) (X)->param1 #define TCGOP_CALLO(X) (X)->param2 -#define TCGOP_VECL(X) (X)->param1 -#define TCGOP_VECE(X) (X)->param2 +#define TCGOP_TYPE(X) (X)->param1 +#define TCGOP_FLAGS(X) (X)->param2 +#define TCGOP_VECE(X) (X)->param2 /* Make sure operands fit in the bitfields above. */ QEMU_BUILD_BUG_ON(NB_OPS > (1 << 8)); @@ -577,8 +455,7 @@ static inline TCGRegSet output_pref(const TCGOp *op, unsigned i) { struct TCGContext { unsigned tcg_struct_size; - - uint8_t *pool_cur, *pool_end; + uintptr_t pool_cur, pool_end; TCGPool *pool_first, *pool_current, *pool_first_large; int nb_labels; int nb_globals; @@ -593,7 +470,6 @@ struct TCGContext { uint8_t page_bits; uint8_t tlb_dyn_max_bits; #endif - uint8_t insn_start_words; TCGBar guest_mo; TCGRegSet reserved_regs; @@ -627,12 +503,8 @@ struct TCGContext { CPUX86State *cpu; /* *_trans */ /* These structures are private to tcg-target.c.inc. */ -#ifdef TCG_TARGET_NEED_LDST_LABELS QSIMPLEQ_HEAD(, TCGLabelQemuLdst) ldst_labels; -#endif -#ifdef TCG_TARGET_NEED_POOL_LABELS struct TCGLabelPoolData *pool_labels; -#endif TCGLabel *exitreq_label; @@ -645,11 +517,23 @@ struct TCGContext { * space for instructions (for variable-instruction-length ISAs). */ struct qemu_plugin_tb *plugin_tb; + const struct DisasContextBase *plugin_db; /* descriptor of the instruction being translated */ struct qemu_plugin_insn *plugin_insn; #endif + /* For host-specific values. */ +#ifdef __riscv + MemOp riscv_cur_vsew; + TCGType riscv_cur_type; +#endif + /* + * During the tcg_reg_alloc_op loop, we are within a sequence of + * carry-using opcodes like addco+addci. + */ + bool carry_live; + GHashTable *const_table[TCG_TYPE_COUNT]; TCGTempSet free_temps[TCG_TYPE_COUNT]; TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */ @@ -657,6 +541,12 @@ struct TCGContext { QTAILQ_HEAD(, TCGOp) ops, free_ops; QSIMPLEQ_HEAD(, TCGLabel) labels; + /* + * When clear, new ops are added to the tail of @ops. + * When set, new ops are added in front of @emit_before_op. + */ + TCGOp *emit_before_op; + /* Tells which temporary holds a given register. It does not take into account fixed registers */ TCGTemp *reg_to_temp[TCG_TARGET_NB_REGS]; @@ -716,10 +606,16 @@ static inline bool temp_readonly(TCGTemp *ts) { return ts->kind >= TEMP_FIXED; } +#ifdef CONFIG_USER_ONLY +extern bool tcg_use_softmmu; +#else +#define tcg_use_softmmu true +#endif + extern __thread TCGContext *tcg_ctx; extern const void *tcg_code_gen_epilogue; extern uintptr_t tcg_splitwx_diff; -extern TCGv_env cpu_env; +extern TCGv_env tcg_env; bool in_code_gen_buffer(const void *p); @@ -815,19 +711,24 @@ static inline TCGv_ptr temp_tcgv_ptr(TCGTemp *t) { return (TCGv_ptr) temp_tcgv_i32(t); } +static inline TCGv_vaddr temp_tcgv_vaddr(TCGTemp *t) { + return (TCGv_vaddr) temp_tcgv_i32(t); +} + static inline TCGv_vec temp_tcgv_vec(TCGTemp *t) { return (TCGv_vec) temp_tcgv_i32(t); } -static inline TCGArg tcg_get_insn_param(TCGOp *op, int arg) { +static inline TCGArg tcg_get_insn_param(TCGOp *op, unsigned arg) { return op->args[arg]; } -static inline void tcg_set_insn_param(TCGOp *op, int arg, TCGArg v) { +static inline void tcg_set_insn_param(TCGOp *op, unsigned arg, TCGArg v) { op->args[arg] = v; } -static inline uint64_t tcg_get_insn_start_param(TCGOp *op, int arg) { +static inline uint64_t tcg_get_insn_start_param(TCGOp *op, unsigned arg) { + tcg_debug_assert(arg < INSN_START_WORDS); if (TCG_TARGET_REG_BITS == 64) { return tcg_get_insn_param(op, arg); } else { @@ -835,7 +736,8 @@ static inline uint64_t tcg_get_insn_start_param(TCGOp *op, int arg) { } } -static inline void tcg_set_insn_start_param(TCGOp *op, int arg, uint64_t v) { +static inline void tcg_set_insn_start_param(TCGOp *op, unsigned arg, uint64_t v) { + tcg_debug_assert(arg < INSN_START_WORDS); if (TCG_TARGET_REG_BITS == 64) { tcg_set_insn_param(op, arg, v); } else { @@ -873,16 +775,57 @@ void tcg_region_reset_all(void); size_t tcg_code_size(void); size_t tcg_code_capacity(void); +/** + * tcg_tb_insert: + * @tb: translation block to insert + * + * Insert @tb into the region trees. + */ void tcg_tb_insert(TranslationBlock *tb); + +/** + * tcg_tb_remove: + * @tb: translation block to remove + * + * Remove @tb from the region trees. + */ void tcg_tb_remove(TranslationBlock *tb); + +/** + * tcg_tb_lookup: + * @tc_ptr: host PC to look up + * + * Look up a translation block inside the region trees by @tc_ptr. This is + * useful for exception handling, but must not be used for the purposes of + * executing the returned translation block. See struct tb_tc for more + * information. + * + * Returns: a translation block previously inserted into the region trees, + * such that @tc_ptr points anywhere inside the code generated for it, or + * NULL. + */ TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr); + +/** + * tcg_tb_foreach: + * @func: callback + * @user_data: opaque value to pass to @callback + * + * Call @func for each translation block inserted into the region trees. + */ void tcg_tb_foreach(GTraverseFunc func, gpointer user_data); + +/** + * tcg_nb_tbs: + * + * Returns: the number of translation blocks inserted into the region trees. + */ size_t tcg_nb_tbs(void); /* user-mode: Called with mmap_lock held. */ static inline void *tcg_malloc(int size) { TCGContext *s = tcg_ctx; - uint8_t *ptr, *ptr_end; + uintptr_t ptr, ptr_end; /* ??? This is a weak placeholder for minimum malloc alignment. */ size = ALIGN_UP(size, 8); @@ -893,7 +836,7 @@ static inline void *tcg_malloc(int size) { return tcg_malloc_internal(tcg_ctx, size); } else { s->pool_cur = ptr_end; - return ptr; + return (void *) ptr; } } @@ -908,50 +851,8 @@ void tb_target_set_jmp_target(const TranslationBlock *, int, uintptr_t, uintptr_ void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size); -TCGTemp *tcg_global_mem_new_internal(TCGType, TCGv_ptr, intptr_t, const char *); -TCGTemp *tcg_temp_new_internal(TCGType, TCGTempKind); -TCGv_vec tcg_temp_new_vec(TCGType type); -TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match); - -static inline TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t offset, const char *name) { - TCGTemp *t = tcg_global_mem_new_internal(TCG_TYPE_I32, reg, offset, name); - return temp_tcgv_i32(t); -} - -static inline TCGv_i32 tcg_temp_new_i32(void) { - TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB); - return temp_tcgv_i32(t); -} - -static inline TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t offset, const char *name) { - TCGTemp *t = tcg_global_mem_new_internal(TCG_TYPE_I64, reg, offset, name); - return temp_tcgv_i64(t); -} - -static inline TCGv_i64 tcg_temp_new_i64(void) { - TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB); - return temp_tcgv_i64(t); -} - -static inline TCGv_i128 tcg_temp_new_i128(void) { - TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB); - return temp_tcgv_i128(t); -} - -static inline TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t offset, const char *name) { - TCGTemp *t = tcg_global_mem_new_internal(TCG_TYPE_PTR, reg, offset, name); - return temp_tcgv_ptr(t); -} - -static inline TCGv_ptr tcg_temp_new_ptr(void) { - TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB); - return temp_tcgv_ptr(t); -} - -void tcg_dump_info(GString *buf); -void tcg_dump_op_count(GString *buf); - -#define TCG_CT_CONST 1 /* any constant of register size */ +#define TCG_CT_CONST 1 /* any constant of register size */ +#define TCG_CT_REG_ZERO 2 /* zero, in TCG_REG_ZERO */ typedef struct TCGArgConstraint { unsigned ct : 16; @@ -978,50 +879,56 @@ enum { /* Instruction has side effects: it cannot be removed if its outputs are not used, and might trigger exceptions. */ TCG_OPF_SIDE_EFFECTS = 0x08, - /* Instruction operands are 64-bits (otherwise 32-bits). */ - TCG_OPF_64BIT = 0x10, + /* Instruction operands may be I32 or I64 */ + TCG_OPF_INT = 0x10, /* Instruction is optional and not implemented by the host, or insn - is generic and should not be implemened by the host. */ + is generic and should not be implemented by the host. */ TCG_OPF_NOT_PRESENT = 0x20, /* Instruction operands are vectors. */ TCG_OPF_VECTOR = 0x40, /* Instruction is a conditional branch. */ - TCG_OPF_COND_BRANCH = 0x80 + TCG_OPF_COND_BRANCH = 0x80, + /* Instruction produces carry out. */ + TCG_OPF_CARRY_OUT = 0x100, + /* Instruction consumes carry in. */ + TCG_OPF_CARRY_IN = 0x200, }; typedef struct TCGOpDef { const char *name; uint8_t nb_oargs, nb_iargs, nb_cargs, nb_args; - uint8_t flags; - TCGArgConstraint *args_ct; + uint16_t flags; } TCGOpDef; -extern TCGOpDef tcg_op_defs[]; +extern const TCGOpDef tcg_op_defs[]; extern const size_t tcg_op_defs_max; -typedef struct TCGTargetOpDef { - TCGOpcode op; - const char *args_ct_str[TCG_MAX_OP_ARGS]; -} TCGTargetOpDef; - -bool tcg_op_supported(TCGOpcode op); - -typedef struct TCGHelperInfo TCGHelperInfo; - -void tcg_gen_call0(TCGHelperInfo *, TCGTemp *ret); -void tcg_gen_call1(TCGHelperInfo *, TCGTemp *ret, TCGTemp *); -void tcg_gen_call2(TCGHelperInfo *, TCGTemp *ret, TCGTemp *, TCGTemp *); -void tcg_gen_call3(TCGHelperInfo *, TCGTemp *ret, TCGTemp *, TCGTemp *, TCGTemp *); -void tcg_gen_call4(TCGHelperInfo *, TCGTemp *ret, TCGTemp *, TCGTemp *, TCGTemp *, TCGTemp *); -void tcg_gen_call5(TCGHelperInfo *, TCGTemp *ret, TCGTemp *, TCGTemp *, TCGTemp *, TCGTemp *, TCGTemp *); -void tcg_gen_call6(TCGHelperInfo *, TCGTemp *ret, TCGTemp *, TCGTemp *, TCGTemp *, TCGTemp *, TCGTemp *, TCGTemp *); -void tcg_gen_call7(TCGHelperInfo *, TCGTemp *ret, TCGTemp *, TCGTemp *, TCGTemp *, TCGTemp *, TCGTemp *, TCGTemp *, +/* + * tcg_op_supported: + * Query if @op, for @type and @flags, is supported by the host + * on which we are currently executing. + */ +bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags); +/* + * tcg_op_deposit_valid: + * Query if a deposit into (ofs, len) is supported for @type by + * the host on which we are currently executing. + */ +bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len); + +void tcg_gen_call0(void *func, TCGHelperInfo *, TCGTemp *ret); +void tcg_gen_call1(void *func, TCGHelperInfo *, TCGTemp *ret, TCGTemp *); +void tcg_gen_call2(void *func, TCGHelperInfo *, TCGTemp *ret, TCGTemp *, TCGTemp *); +void tcg_gen_call3(void *func, TCGHelperInfo *, TCGTemp *ret, TCGTemp *, TCGTemp *, TCGTemp *); +void tcg_gen_call4(void *func, TCGHelperInfo *, TCGTemp *ret, TCGTemp *, TCGTemp *, TCGTemp *, TCGTemp *); +void tcg_gen_call5(void *func, TCGHelperInfo *, TCGTemp *ret, TCGTemp *, TCGTemp *, TCGTemp *, TCGTemp *, TCGTemp *); +void tcg_gen_call6(void *func, TCGHelperInfo *, TCGTemp *ret, TCGTemp *, TCGTemp *, TCGTemp *, TCGTemp *, TCGTemp *, TCGTemp *); +void tcg_gen_call7(void *func, TCGHelperInfo *, TCGTemp *ret, TCGTemp *, TCGTemp *, TCGTemp *, TCGTemp *, TCGTemp *, + TCGTemp *, TCGTemp *); TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs); void tcg_op_remove(TCGContext *s, TCGOp *op); -TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, TCGOpcode opc, unsigned nargs); -TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc, unsigned nargs); /** * tcg_remove_ops_after: @@ -1035,30 +942,6 @@ void tcg_remove_ops_after(TCGOp *op); void tcg_optimize(TCGContext *s); -/* - * Locate or create a read-only temporary that is a constant. - * This kind of temporary need not be freed, but for convenience - * will be silently ignored by tcg_temp_free_*. - */ -TCGTemp *tcg_constant_internal(TCGType type, int64_t val); - -static inline TCGv_i32 tcg_constant_i32(int32_t val) { - return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val)); -} - -static inline TCGv_i64 tcg_constant_i64(int64_t val) { - return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val)); -} - -TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val); -TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val); - -#if UINTPTR_MAX == UINT32_MAX -#define tcg_constant_ptr(x) ((TCGv_ptr) tcg_constant_i32((intptr_t) (x))) -#else -#define tcg_constant_ptr(x) ((TCGv_ptr) tcg_constant_i64((intptr_t) (x))) -#endif - TCGLabel *gen_new_label(void); /** @@ -1198,21 +1081,15 @@ extern tcg_prologue_fn *tcg_qemu_tb_exec; void tcg_register_jit(const void *buf, size_t buf_size); -#if TCG_TARGET_MAYBE_vec /* Return zero if the tuple (opc, type, vece) is unsupportable; return > 0 if it is directly supportable; return < 0 if we must call tcg_expand_vec_op. */ int tcg_can_emit_vec_op(TCGOpcode, TCGType, unsigned); -#else -static inline int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve) { - return 0; -} -#endif /* Expand the tuple (opc, type, vece) on the given arguments. */ void tcg_expand_vec_op(TCGOpcode, TCGType, unsigned, TCGArg, ...); -/* Replicate a constant C accoring to the log2 of the element size. */ +/* Replicate a constant C according to the log2 of the element size. */ uint64_t dup_const(unsigned vece, uint64_t c); #define dup_const(VECE, C) \ @@ -1233,18 +1110,11 @@ static inline const TCGOpcode *tcg_swap_vecop_list(const TCGOpcode *n) { #endif } -void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs); - void tcg_target_force_tb_exit(uintptr_t gen_code, uintptr_t max_addr); bool tcg_can_emit_vecop_list(const TCGOpcode *, TCGType, unsigned); - -#define plugin_gen_disable_mem_helpers() - -enum qemu_plugin_mem_rw { - QEMU_PLUGIN_MEM_R = 1, - QEMU_PLUGIN_MEM_W, - QEMU_PLUGIN_MEM_RW, -}; +void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs); +/* tcg_dump_stats: Append TCG statistics to @buf */ +void tcg_dump_stats(GString *buf); #ifdef __cplusplus } diff --git a/libtcg/include/tcg/utils/host/i386/cpuinfo.h b/libtcg/include/tcg/utils/host/i386/cpuinfo.h index e23cfab26..27e2867f2 100644 --- a/libtcg/include/tcg/utils/host/i386/cpuinfo.h +++ b/libtcg/include/tcg/utils/host/i386/cpuinfo.h @@ -1,6 +1,6 @@ /* * SPDX-License-Identifier: GPL-2.0-or-later - * Host specific cpu indentification for x86. + * Host specific cpu identification for x86. */ #ifndef HOST_CPUINFO_H @@ -9,14 +9,13 @@ /* Digested version of */ #define CPUINFO_ALWAYS (1u << 0) /* so cpuinfo is nonzero */ -#define CPUINFO_CMOV (1u << 1) +#define CPUINFO_OSXSAVE (1u << 1) #define CPUINFO_MOVBE (1u << 2) #define CPUINFO_LZCNT (1u << 3) #define CPUINFO_POPCNT (1u << 4) #define CPUINFO_BMI1 (1u << 5) #define CPUINFO_BMI2 (1u << 6) #define CPUINFO_SSE2 (1u << 7) -#define CPUINFO_SSE4 (1u << 8) #define CPUINFO_AVX1 (1u << 9) #define CPUINFO_AVX2 (1u << 10) #define CPUINFO_AVX512F (1u << 11) @@ -27,6 +26,8 @@ #define CPUINFO_ATOMIC_VMOVDQA (1u << 16) #define CPUINFO_ATOMIC_VMOVDQU (1u << 17) #define CPUINFO_AES (1u << 18) +#define CPUINFO_PCLMUL (1u << 19) +#define CPUINFO_GFNI (1u << 20) /* Initialized with a constructor. */ extern unsigned cpuinfo; diff --git a/libtcg/include/tcg/utils/interval-tree.h b/libtcg/include/tcg/utils/interval-tree.h new file mode 100644 index 000000000..fb3a547ed --- /dev/null +++ b/libtcg/include/tcg/utils/interval-tree.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Interval trees. + * + * Derived from include/linux/interval_tree.h and its dependencies. + */ + +#ifndef QEMU_INTERVAL_TREE_H +#define QEMU_INTERVAL_TREE_H + +#include +#include +#include + +/* + * For now, don't expose Linux Red-Black Trees separately, but retain the + * separate type definitions to keep the implementation sane, and allow + * the possibility of disentangling them later. + */ +typedef struct RBNode { + /* Encodes parent with color in the lsb. */ + uintptr_t rb_parent_color; + struct RBNode *rb_right; + struct RBNode *rb_left; +} RBNode; + +typedef struct RBRoot { + RBNode *rb_node; +} RBRoot; + +typedef struct RBRootLeftCached { + RBRoot rb_root; + RBNode *rb_leftmost; +} RBRootLeftCached; + +typedef struct IntervalTreeNode { + RBNode rb; + + uint64_t start; /* Start of interval */ + uint64_t last; /* Last location _in_ interval */ + uint64_t subtree_last; +} IntervalTreeNode; + +typedef RBRootLeftCached IntervalTreeRoot; + +/** + * interval_tree_is_empty + * @root: root of the tree. + * + * Returns true if the tree contains no nodes. + */ +static inline bool interval_tree_is_empty(const IntervalTreeRoot *root) { + return root->rb_root.rb_node == NULL; +} + +/** + * interval_tree_insert + * @node: node to insert, + * @root: root of the tree. + * + * Insert @node into @root, and rebalance. + */ +void interval_tree_insert(IntervalTreeNode *node, IntervalTreeRoot *root); + +/** + * interval_tree_remove + * @node: node to remove, + * @root: root of the tree. + * + * Remove @node from @root, and rebalance. + */ +void interval_tree_remove(IntervalTreeNode *node, IntervalTreeRoot *root); + +/** + * interval_tree_iter_first: + * @root: root of the tree, + * @start, @last: the inclusive interval [start, last]. + * + * Locate the "first" of a set of nodes within the tree at @root + * that overlap the interval, where "first" is sorted by start. + * Returns NULL if no overlap found. + */ +IntervalTreeNode *interval_tree_iter_first(IntervalTreeRoot *root, uint64_t start, uint64_t last); + +/** + * interval_tree_iter_next: + * @node: previous search result + * @start, @last: the inclusive interval [start, last]. + * + * Locate the "next" of a set of nodes within the tree that overlap the + * interval; @next is the result of a previous call to + * interval_tree_iter_{first,next}. Returns NULL if @next was the last + * node in the set. + */ +IntervalTreeNode *interval_tree_iter_next(IntervalTreeNode *node, uint64_t start, uint64_t last); + +#endif /* QEMU_INTERVAL_TREE_H */ diff --git a/libtcg/src/CMakeLists.txt b/libtcg/src/CMakeLists.txt index 33c2f1cb9..b898639d9 100644 --- a/libtcg/src/CMakeLists.txt +++ b/libtcg/src/CMakeLists.txt @@ -22,7 +22,7 @@ set (TCG_SOURCES) list (APPEND TCG_SOURCES utils/cutils.c utils/osdep.c utils/memalign.c utils/cache.c utils/log.c - utils/cpuinfo-i386.c utils/qtree.c utils/host-utils.c + utils/cpuinfo-i386.c utils/qtree.c utils/host-utils.c utils/interval-tree.c region.c tcg.c tcg-s2e.c tcg-op-ldst.c tcg-common.c tcg-op.c optimize.c tcg-op-gvec.c tcg-op-vec.c tcg-rt/tcg-runtime-gvec.c tcg-rt/tcg-runtime.c atomic-helpers.c @@ -35,7 +35,7 @@ endif (WITH_SYMBEX) add_library(tcg ${TCG_SOURCES}) set (COMMON_FLAGS "-Wall -fPIC -Werror -Wno-zero-length-array -Wno-mismatched-tags -Wno-initializer-overrides") -set (COMMON_FLAGS "${COMMON_FLAGS} -DCONFIG_SOFTMMU -DCONFIG_INT128 -DCPU_TEMP_BUF_NLONGS=128 -DHOST_BIG_ENDIAN=0 -DHAVE_GLIB_WITH_SLICE_ALLOCATOR") +set (COMMON_FLAGS "${COMMON_FLAGS} -DCOMPILING_PER_TARGET -DCONFIG_SOFTMMU -DCONFIG_INT128 -DCPU_TEMP_BUF_NLONGS=128 -DHOST_BIG_ENDIAN=0 -DHAVE_GLIB_WITH_SLICE_ALLOCATOR") if (WITH_SYMBEX) set (COMMON_FLAGS "${COMMON_FLAGS} -DCONFIG_SYMBEX -DTCG_ENABLE_MEM_TRACING") diff --git a/libtcg/src/i386/tcg-target-con-set.h b/libtcg/src/i386/tcg-target-con-set.h index 5ea3a292f..458d69c3c 100644 --- a/libtcg/src/i386/tcg-target-con-set.h +++ b/libtcg/src/i386/tcg-target-con-set.h @@ -20,7 +20,7 @@ C_O0_I2(L, L) C_O0_I2(qi, r) C_O0_I2(re, r) C_O0_I2(ri, r) -C_O0_I2(r, re) +C_O0_I2(r, reT) C_O0_I2(s, L) C_O0_I2(x, r) C_O0_I3(L, L, L) @@ -33,8 +33,8 @@ C_O1_I1(r, q) C_O1_I1(r, r) C_O1_I1(x, r) C_O1_I1(x, x) -C_O1_I2(Q, 0, Q) -C_O1_I2(q, r, re) +C_O1_I2(q, 0, qi) +C_O1_I2(q, r, reT) C_O1_I2(r, 0, ci) C_O1_I2(r, 0, r) C_O1_I2(r, 0, re) @@ -42,18 +42,19 @@ C_O1_I2(r, 0, reZ) C_O1_I2(r, 0, ri) C_O1_I2(r, 0, rI) C_O1_I2(r, L, L) +C_O1_I2(r, r, r) C_O1_I2(r, r, re) C_O1_I2(r, r, ri) -C_O1_I2(r, r, rI) +C_O1_I2(r, rO, re) C_O1_I2(x, x, x) C_N1_I2(r, r, r) C_N1_I2(r, r, rW) C_O1_I3(x, 0, x, x) C_O1_I3(x, x, x, x) -C_O1_I4(r, r, re, r, 0) +C_O1_I4(x, x, x, xO, x) +C_O1_I4(r, r, reT, r, 0) C_O1_I4(r, r, r, ri, ri) C_O2_I1(r, r, L) C_O2_I2(a, d, a, r) C_O2_I2(r, r, L, L) C_O2_I3(a, d, 0, 1, r) -C_N1_O1_I4(r, r, 0, 1, re, re) diff --git a/libtcg/src/i386/tcg-target-con-str.h b/libtcg/src/i386/tcg-target-con-str.h index b1d849b82..df15a1f34 100644 --- a/libtcg/src/i386/tcg-target-con-str.h +++ b/libtcg/src/i386/tcg-target-con-str.h @@ -19,9 +19,8 @@ REGS('D', 1u << TCG_REG_EDI) REGS('r', ALL_GENERAL_REGS) REGS('x', ALL_VECTOR_REGS) REGS('q', ALL_BYTEL_REGS) /* regs that can be used as a byte operand */ -REGS('Q', ALL_BYTEH_REGS) /* regs with a second byte (e.g. %ah) */ REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS) /* qemu_ld/st */ -REGS('s', ALL_BYTEL_REGS & ~SOFTMMU_RESERVE_REGS) /* qemu_st8_i32 data */ +REGS('s', ALL_BYTEL_REGS & ~SOFTMMU_RESERVE_REGS) /* qemu_st MO_8 data */ /* * Define constraint letters for constants: @@ -29,5 +28,7 @@ REGS('s', ALL_BYTEL_REGS & ~SOFTMMU_RESERVE_REGS) /* qemu_st8_i32 data */ */ CONST('e', TCG_CT_CONST_S32) CONST('I', TCG_CT_CONST_I32) +CONST('O', TCG_CT_CONST_ZERO) +CONST('T', TCG_CT_CONST_TST) CONST('W', TCG_CT_CONST_WSZ) CONST('Z', TCG_CT_CONST_U32) diff --git a/libtcg/src/i386/tcg-target.c.inc b/libtcg/src/i386/tcg-target.c.inc index 74c09fd76..06e7c875c 100644 --- a/libtcg/src/i386/tcg-target.c.inc +++ b/libtcg/src/i386/tcg-target.c.inc @@ -22,8 +22,25 @@ * THE SOFTWARE. */ -#include "../tcg-ldst.c.inc" -#include "../tcg-pool.c.inc" +/* Used for function call generation. */ +#define TCG_TARGET_STACK_ALIGN 16 +#if defined(_WIN64) +#define TCG_TARGET_CALL_STACK_OFFSET 32 +#else +#define TCG_TARGET_CALL_STACK_OFFSET 0 +#endif +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL +#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL +#if defined(_WIN64) +#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_BY_REF +#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_VEC +#elif TCG_TARGET_REG_BITS == 64 +#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL +#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL +#else +#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL +#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF +#endif #ifdef CONFIG_DEBUG_TCG static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { @@ -117,10 +134,12 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) { } /* Constants we accept. */ -#define TCG_CT_CONST_S32 0x100 -#define TCG_CT_CONST_U32 0x200 -#define TCG_CT_CONST_I32 0x400 -#define TCG_CT_CONST_WSZ 0x800 +#define TCG_CT_CONST_S32 0x100 +#define TCG_CT_CONST_U32 0x200 +#define TCG_CT_CONST_I32 0x400 +#define TCG_CT_CONST_WSZ 0x800 +#define TCG_CT_CONST_TST 0x1000 +#define TCG_CT_CONST_ZERO 0x2000 /* Registers used with L constraint, which are the first argument registers on x86_64, and two random call clobbered registers on @@ -133,7 +152,6 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) { #define TCG_REG_L1 TCG_REG_EDX #endif -#define ALL_BYTEH_REGS 0x0000000fu #if TCG_TARGET_REG_BITS == 64 #define ALL_GENERAL_REGS 0x0000ffffu #define ALL_VECTOR_REGS 0xffff0000u @@ -141,20 +159,10 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) { #else #define ALL_GENERAL_REGS 0x000000ffu #define ALL_VECTOR_REGS 0x00ff0000u -#define ALL_BYTEL_REGS ALL_BYTEH_REGS -#endif -#ifdef CONFIG_SOFTMMU -#define SOFTMMU_RESERVE_REGS ((1 << TCG_REG_L0) | (1 << TCG_REG_L1)) -#else -#define SOFTMMU_RESERVE_REGS 0 +#define ALL_BYTEL_REGS 0x0000000fu #endif +#define SOFTMMU_RESERVE_REGS (tcg_use_softmmu ? (1 << TCG_REG_L0) | (1 << TCG_REG_L1) : 0) -/* For 64-bit, we always know that CMOV is available. */ -#if TCG_TARGET_REG_BITS == 64 -#define have_cmov true -#else -#define have_cmov (cpuinfo & CPUINFO_CMOV) -#endif #define have_bmi2 (cpuinfo & CPUINFO_BMI2) #define have_lzcnt (cpuinfo & CPUINFO_LZCNT) @@ -186,12 +194,12 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intpt } /* test if a constant matches the constraint */ -static bool tcg_target_const_match(int64_t val, TCGType type, int ct) { +static bool tcg_target_const_match(int64_t val, int ct, TCGType type, TCGCond cond, int vece) { if (ct & TCG_CT_CONST) { return 1; } if (type == TCG_TYPE_I32) { - if (ct & (TCG_CT_CONST_S32 | TCG_CT_CONST_U32 | TCG_CT_CONST_I32)) { + if (ct & (TCG_CT_CONST_S32 | TCG_CT_CONST_U32 | TCG_CT_CONST_I32 | TCG_CT_CONST_TST)) { return 1; } } else { @@ -204,10 +212,22 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) { if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t) ~val) { return 1; } + /* + * This will be used in combination with TCG_CT_CONST_S32, + * so "normal" TESTQ is already matched. Also accept: + * TESTQ -> TESTL (uint32_t) + * TESTQ -> BT (is_power_of_2) + */ + if ((ct & TCG_CT_CONST_TST) && is_tst_cond(cond) && (val == (uint32_t) val || is_power_of_2(val))) { + return 1; + } } if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) { return 1; } + if ((ct & TCG_CT_CONST_ZERO) && val == 0) { + return 1; + } return 0; } @@ -234,201 +254,236 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) { #define P_VEXL 0x80000 /* Set VEX.L = 1 */ #define P_EVEX 0x100000 /* Requires EVEX encoding */ -#define OPC_ARITH_EvIz (0x81) -#define OPC_ARITH_EvIb (0x83) -#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */ -#define OPC_ANDN (0xf2 | P_EXT38) -#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3)) -#define OPC_AND_GvEv (OPC_ARITH_GvEv | (ARITH_AND << 3)) -#define OPC_BLENDPS (0x0c | P_EXT3A | P_DATA16) -#define OPC_BSF (0xbc | P_EXT) -#define OPC_BSR (0xbd | P_EXT) -#define OPC_BSWAP (0xc8 | P_EXT) -#define OPC_CALL_Jz (0xe8) -#define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */ -#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3)) -#define OPC_DEC_r32 (0x48) -#define OPC_IMUL_GvEv (0xaf | P_EXT) -#define OPC_IMUL_GvEvIb (0x6b) -#define OPC_IMUL_GvEvIz (0x69) -#define OPC_INC_r32 (0x40) -#define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */ -#define OPC_JCC_short (0x70) /* ... plus condition code */ -#define OPC_JMP_long (0xe9) -#define OPC_JMP_short (0xeb) -#define OPC_LEA (0x8d) -#define OPC_LZCNT (0xbd | P_EXT | P_SIMDF3) -#define OPC_MOVB_EvGv (0x88) /* stores, more or less */ -#define OPC_MOVL_EvGv (0x89) /* stores, more or less */ -#define OPC_MOVL_GvEv (0x8b) /* loads, more or less */ -#define OPC_MOVB_EvIz (0xc6) -#define OPC_MOVL_EvIz (0xc7) -#define OPC_MOVL_Iv (0xb8) -#define OPC_MOVBE_GyMy (0xf0 | P_EXT38) -#define OPC_MOVBE_MyGy (0xf1 | P_EXT38) -#define OPC_MOVD_VyEy (0x6e | P_EXT | P_DATA16) -#define OPC_MOVD_EyVy (0x7e | P_EXT | P_DATA16) -#define OPC_MOVDDUP (0x12 | P_EXT | P_SIMDF2) -#define OPC_MOVDQA_VxWx (0x6f | P_EXT | P_DATA16) -#define OPC_MOVDQA_WxVx (0x7f | P_EXT | P_DATA16) -#define OPC_MOVDQU_VxWx (0x6f | P_EXT | P_SIMDF3) -#define OPC_MOVDQU_WxVx (0x7f | P_EXT | P_SIMDF3) -#define OPC_MOVQ_VqWq (0x7e | P_EXT | P_SIMDF3) -#define OPC_MOVQ_WqVq (0xd6 | P_EXT | P_DATA16) -#define OPC_MOVSBL (0xbe | P_EXT) -#define OPC_MOVSWL (0xbf | P_EXT) -#define OPC_MOVSLQ (0x63 | P_REXW) -#define OPC_MOVZBL (0xb6 | P_EXT) -#define OPC_MOVZWL (0xb7 | P_EXT) -#define OPC_PABSB (0x1c | P_EXT38 | P_DATA16) -#define OPC_PABSW (0x1d | P_EXT38 | P_DATA16) -#define OPC_PABSD (0x1e | P_EXT38 | P_DATA16) -#define OPC_VPABSQ (0x1f | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) -#define OPC_PACKSSDW (0x6b | P_EXT | P_DATA16) -#define OPC_PACKSSWB (0x63 | P_EXT | P_DATA16) -#define OPC_PACKUSDW (0x2b | P_EXT38 | P_DATA16) -#define OPC_PACKUSWB (0x67 | P_EXT | P_DATA16) -#define OPC_PADDB (0xfc | P_EXT | P_DATA16) -#define OPC_PADDW (0xfd | P_EXT | P_DATA16) -#define OPC_PADDD (0xfe | P_EXT | P_DATA16) -#define OPC_PADDQ (0xd4 | P_EXT | P_DATA16) -#define OPC_PADDSB (0xec | P_EXT | P_DATA16) -#define OPC_PADDSW (0xed | P_EXT | P_DATA16) -#define OPC_PADDUB (0xdc | P_EXT | P_DATA16) -#define OPC_PADDUW (0xdd | P_EXT | P_DATA16) -#define OPC_PAND (0xdb | P_EXT | P_DATA16) -#define OPC_PANDN (0xdf | P_EXT | P_DATA16) -#define OPC_PBLENDW (0x0e | P_EXT3A | P_DATA16) -#define OPC_PCMPEQB (0x74 | P_EXT | P_DATA16) -#define OPC_PCMPEQW (0x75 | P_EXT | P_DATA16) -#define OPC_PCMPEQD (0x76 | P_EXT | P_DATA16) -#define OPC_PCMPEQQ (0x29 | P_EXT38 | P_DATA16) -#define OPC_PCMPGTB (0x64 | P_EXT | P_DATA16) -#define OPC_PCMPGTW (0x65 | P_EXT | P_DATA16) -#define OPC_PCMPGTD (0x66 | P_EXT | P_DATA16) -#define OPC_PCMPGTQ (0x37 | P_EXT38 | P_DATA16) -#define OPC_PEXTRD (0x16 | P_EXT3A | P_DATA16) -#define OPC_PINSRD (0x22 | P_EXT3A | P_DATA16) -#define OPC_PMAXSB (0x3c | P_EXT38 | P_DATA16) -#define OPC_PMAXSW (0xee | P_EXT | P_DATA16) -#define OPC_PMAXSD (0x3d | P_EXT38 | P_DATA16) -#define OPC_VPMAXSQ (0x3d | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) -#define OPC_PMAXUB (0xde | P_EXT | P_DATA16) -#define OPC_PMAXUW (0x3e | P_EXT38 | P_DATA16) -#define OPC_PMAXUD (0x3f | P_EXT38 | P_DATA16) -#define OPC_VPMAXUQ (0x3f | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) -#define OPC_PMINSB (0x38 | P_EXT38 | P_DATA16) -#define OPC_PMINSW (0xea | P_EXT | P_DATA16) -#define OPC_PMINSD (0x39 | P_EXT38 | P_DATA16) -#define OPC_VPMINSQ (0x39 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) -#define OPC_PMINUB (0xda | P_EXT | P_DATA16) -#define OPC_PMINUW (0x3a | P_EXT38 | P_DATA16) -#define OPC_PMINUD (0x3b | P_EXT38 | P_DATA16) -#define OPC_VPMINUQ (0x3b | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) -#define OPC_PMOVSXBW (0x20 | P_EXT38 | P_DATA16) -#define OPC_PMOVSXWD (0x23 | P_EXT38 | P_DATA16) -#define OPC_PMOVSXDQ (0x25 | P_EXT38 | P_DATA16) -#define OPC_PMOVZXBW (0x30 | P_EXT38 | P_DATA16) -#define OPC_PMOVZXWD (0x33 | P_EXT38 | P_DATA16) -#define OPC_PMOVZXDQ (0x35 | P_EXT38 | P_DATA16) -#define OPC_PMULLW (0xd5 | P_EXT | P_DATA16) -#define OPC_PMULLD (0x40 | P_EXT38 | P_DATA16) -#define OPC_VPMULLQ (0x40 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) -#define OPC_POR (0xeb | P_EXT | P_DATA16) -#define OPC_PSHUFB (0x00 | P_EXT38 | P_DATA16) -#define OPC_PSHUFD (0x70 | P_EXT | P_DATA16) -#define OPC_PSHUFLW (0x70 | P_EXT | P_SIMDF2) -#define OPC_PSHUFHW (0x70 | P_EXT | P_SIMDF3) -#define OPC_PSHIFTW_Ib (0x71 | P_EXT | P_DATA16) /* /2 /6 /4 */ -#define OPC_PSHIFTD_Ib (0x72 | P_EXT | P_DATA16) /* /1 /2 /6 /4 */ -#define OPC_PSHIFTQ_Ib (0x73 | P_EXT | P_DATA16) /* /2 /6 /4 */ -#define OPC_PSLLW (0xf1 | P_EXT | P_DATA16) -#define OPC_PSLLD (0xf2 | P_EXT | P_DATA16) -#define OPC_PSLLQ (0xf3 | P_EXT | P_DATA16) -#define OPC_PSRAW (0xe1 | P_EXT | P_DATA16) -#define OPC_PSRAD (0xe2 | P_EXT | P_DATA16) -#define OPC_VPSRAQ (0xe2 | P_EXT | P_DATA16 | P_VEXW | P_EVEX) -#define OPC_PSRLW (0xd1 | P_EXT | P_DATA16) -#define OPC_PSRLD (0xd2 | P_EXT | P_DATA16) -#define OPC_PSRLQ (0xd3 | P_EXT | P_DATA16) -#define OPC_PSUBB (0xf8 | P_EXT | P_DATA16) -#define OPC_PSUBW (0xf9 | P_EXT | P_DATA16) -#define OPC_PSUBD (0xfa | P_EXT | P_DATA16) -#define OPC_PSUBQ (0xfb | P_EXT | P_DATA16) -#define OPC_PSUBSB (0xe8 | P_EXT | P_DATA16) -#define OPC_PSUBSW (0xe9 | P_EXT | P_DATA16) -#define OPC_PSUBUB (0xd8 | P_EXT | P_DATA16) -#define OPC_PSUBUW (0xd9 | P_EXT | P_DATA16) -#define OPC_PUNPCKLBW (0x60 | P_EXT | P_DATA16) -#define OPC_PUNPCKLWD (0x61 | P_EXT | P_DATA16) -#define OPC_PUNPCKLDQ (0x62 | P_EXT | P_DATA16) -#define OPC_PUNPCKLQDQ (0x6c | P_EXT | P_DATA16) -#define OPC_PUNPCKHBW (0x68 | P_EXT | P_DATA16) -#define OPC_PUNPCKHWD (0x69 | P_EXT | P_DATA16) -#define OPC_PUNPCKHDQ (0x6a | P_EXT | P_DATA16) -#define OPC_PUNPCKHQDQ (0x6d | P_EXT | P_DATA16) -#define OPC_PXOR (0xef | P_EXT | P_DATA16) -#define OPC_POP_r32 (0x58) -#define OPC_POPCNT (0xb8 | P_EXT | P_SIMDF3) -#define OPC_PUSH_r32 (0x50) -#define OPC_PUSH_Iv (0x68) -#define OPC_PUSH_Ib (0x6a) -#define OPC_RET (0xc3) -#define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */ -#define OPC_SHIFT_1 (0xd1) -#define OPC_SHIFT_Ib (0xc1) -#define OPC_SHIFT_cl (0xd3) -#define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3) -#define OPC_SHUFPS (0xc6 | P_EXT) -#define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16) -#define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2) -#define OPC_SHRD_Ib (0xac | P_EXT) -#define OPC_TESTL (0x85) -#define OPC_TZCNT (0xbc | P_EXT | P_SIMDF3) -#define OPC_UD2 (0x0b | P_EXT) -#define OPC_VPBLENDD (0x02 | P_EXT3A | P_DATA16) -#define OPC_VPBLENDVB (0x4c | P_EXT3A | P_DATA16) -#define OPC_VPINSRB (0x20 | P_EXT3A | P_DATA16) -#define OPC_VPINSRW (0xc4 | P_EXT | P_DATA16) -#define OPC_VBROADCASTSS (0x18 | P_EXT38 | P_DATA16) -#define OPC_VBROADCASTSD (0x19 | P_EXT38 | P_DATA16) -#define OPC_VPBROADCASTB (0x78 | P_EXT38 | P_DATA16) -#define OPC_VPBROADCASTW (0x79 | P_EXT38 | P_DATA16) -#define OPC_VPBROADCASTD (0x58 | P_EXT38 | P_DATA16) -#define OPC_VPBROADCASTQ (0x59 | P_EXT38 | P_DATA16) -#define OPC_VPERMQ (0x00 | P_EXT3A | P_DATA16 | P_VEXW) -#define OPC_VPERM2I128 (0x46 | P_EXT3A | P_DATA16 | P_VEXL) -#define OPC_VPROLVD (0x15 | P_EXT38 | P_DATA16 | P_EVEX) -#define OPC_VPROLVQ (0x15 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) -#define OPC_VPRORVD (0x14 | P_EXT38 | P_DATA16 | P_EVEX) -#define OPC_VPRORVQ (0x14 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) -#define OPC_VPSHLDW (0x70 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) -#define OPC_VPSHLDD (0x71 | P_EXT3A | P_DATA16 | P_EVEX) -#define OPC_VPSHLDQ (0x71 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) -#define OPC_VPSHLDVW (0x70 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) -#define OPC_VPSHLDVD (0x71 | P_EXT38 | P_DATA16 | P_EVEX) -#define OPC_VPSHLDVQ (0x71 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) -#define OPC_VPSHRDVW (0x72 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) -#define OPC_VPSHRDVD (0x73 | P_EXT38 | P_DATA16 | P_EVEX) -#define OPC_VPSHRDVQ (0x73 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) -#define OPC_VPSLLVW (0x12 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) -#define OPC_VPSLLVD (0x47 | P_EXT38 | P_DATA16) -#define OPC_VPSLLVQ (0x47 | P_EXT38 | P_DATA16 | P_VEXW) -#define OPC_VPSRAVW (0x11 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) -#define OPC_VPSRAVD (0x46 | P_EXT38 | P_DATA16) -#define OPC_VPSRAVQ (0x46 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) -#define OPC_VPSRLVW (0x10 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) -#define OPC_VPSRLVD (0x45 | P_EXT38 | P_DATA16) -#define OPC_VPSRLVQ (0x45 | P_EXT38 | P_DATA16 | P_VEXW) -#define OPC_VPTERNLOGQ (0x25 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) -#define OPC_VZEROUPPER (0x77 | P_EXT) -#define OPC_XCHG_ax_r32 (0x90) -#define OPC_XCHG_EvGv (0x87) +#define OPC_ARITH_EbIb (0x80) +#define OPC_ARITH_EvIz (0x81) +#define OPC_ARITH_EvIb (0x83) +#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */ +#define OPC_ANDN (0xf2 | P_EXT38) +#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3)) +#define OPC_AND_GvEv (OPC_ARITH_GvEv | (ARITH_AND << 3)) +#define OPC_BLENDPS (0x0c | P_EXT3A | P_DATA16) +#define OPC_BSF (0xbc | P_EXT) +#define OPC_BSR (0xbd | P_EXT) +#define OPC_BSWAP (0xc8 | P_EXT) +#define OPC_CALL_Jz (0xe8) +#define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */ +#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3)) +#define OPC_DEC_r32 (0x48) +#define OPC_IMUL_GvEv (0xaf | P_EXT) +#define OPC_IMUL_GvEvIb (0x6b) +#define OPC_IMUL_GvEvIz (0x69) +#define OPC_INC_r32 (0x40) +#define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */ +#define OPC_JCC_short (0x70) /* ... plus condition code */ +#define OPC_JMP_long (0xe9) +#define OPC_JMP_short (0xeb) +#define OPC_LEA (0x8d) +#define OPC_LZCNT (0xbd | P_EXT | P_SIMDF3) +#define OPC_MOVB_EvGv (0x88) /* stores, more or less */ +#define OPC_MOVL_EvGv (0x89) /* stores, more or less */ +#define OPC_MOVL_GvEv (0x8b) /* loads, more or less */ +#define OPC_MOVB_EvIz (0xc6) +#define OPC_MOVL_EvIz (0xc7) +#define OPC_MOVB_Ib (0xb0) +#define OPC_MOVL_Iv (0xb8) +#define OPC_MOVBE_GyMy (0xf0 | P_EXT38) +#define OPC_MOVBE_MyGy (0xf1 | P_EXT38) +#define OPC_MOVD_VyEy (0x6e | P_EXT | P_DATA16) +#define OPC_MOVD_EyVy (0x7e | P_EXT | P_DATA16) +#define OPC_MOVDDUP (0x12 | P_EXT | P_SIMDF2) +#define OPC_MOVDQA_VxWx (0x6f | P_EXT | P_DATA16) +#define OPC_MOVDQA_WxVx (0x7f | P_EXT | P_DATA16) +#define OPC_MOVDQU_VxWx (0x6f | P_EXT | P_SIMDF3) +#define OPC_MOVDQU_WxVx (0x7f | P_EXT | P_SIMDF3) +#define OPC_MOVQ_VqWq (0x7e | P_EXT | P_SIMDF3) +#define OPC_MOVQ_WqVq (0xd6 | P_EXT | P_DATA16) +#define OPC_MOVSBL (0xbe | P_EXT) +#define OPC_MOVSWL (0xbf | P_EXT) +#define OPC_MOVSLQ (0x63 | P_REXW) +#define OPC_MOVZBL (0xb6 | P_EXT) +#define OPC_MOVZWL (0xb7 | P_EXT) +#define OPC_PABSB (0x1c | P_EXT38 | P_DATA16) +#define OPC_PABSW (0x1d | P_EXT38 | P_DATA16) +#define OPC_PABSD (0x1e | P_EXT38 | P_DATA16) +#define OPC_VPABSQ (0x1f | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_PACKSSDW (0x6b | P_EXT | P_DATA16) +#define OPC_PACKSSWB (0x63 | P_EXT | P_DATA16) +#define OPC_PACKUSDW (0x2b | P_EXT38 | P_DATA16) +#define OPC_PACKUSWB (0x67 | P_EXT | P_DATA16) +#define OPC_PADDB (0xfc | P_EXT | P_DATA16) +#define OPC_PADDW (0xfd | P_EXT | P_DATA16) +#define OPC_PADDD (0xfe | P_EXT | P_DATA16) +#define OPC_PADDQ (0xd4 | P_EXT | P_DATA16) +#define OPC_PADDSB (0xec | P_EXT | P_DATA16) +#define OPC_PADDSW (0xed | P_EXT | P_DATA16) +#define OPC_PADDUB (0xdc | P_EXT | P_DATA16) +#define OPC_PADDUW (0xdd | P_EXT | P_DATA16) +#define OPC_PAND (0xdb | P_EXT | P_DATA16) +#define OPC_PANDN (0xdf | P_EXT | P_DATA16) +#define OPC_PBLENDW (0x0e | P_EXT3A | P_DATA16) +#define OPC_PCMPEQB (0x74 | P_EXT | P_DATA16) +#define OPC_PCMPEQW (0x75 | P_EXT | P_DATA16) +#define OPC_PCMPEQD (0x76 | P_EXT | P_DATA16) +#define OPC_PCMPEQQ (0x29 | P_EXT38 | P_DATA16) +#define OPC_PCMPGTB (0x64 | P_EXT | P_DATA16) +#define OPC_PCMPGTW (0x65 | P_EXT | P_DATA16) +#define OPC_PCMPGTD (0x66 | P_EXT | P_DATA16) +#define OPC_PCMPGTQ (0x37 | P_EXT38 | P_DATA16) +#define OPC_PEXTRD (0x16 | P_EXT3A | P_DATA16) +#define OPC_PINSRD (0x22 | P_EXT3A | P_DATA16) +#define OPC_PMAXSB (0x3c | P_EXT38 | P_DATA16) +#define OPC_PMAXSW (0xee | P_EXT | P_DATA16) +#define OPC_PMAXSD (0x3d | P_EXT38 | P_DATA16) +#define OPC_VPMAXSQ (0x3d | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_PMAXUB (0xde | P_EXT | P_DATA16) +#define OPC_PMAXUW (0x3e | P_EXT38 | P_DATA16) +#define OPC_PMAXUD (0x3f | P_EXT38 | P_DATA16) +#define OPC_VPMAXUQ (0x3f | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_PMINSB (0x38 | P_EXT38 | P_DATA16) +#define OPC_PMINSW (0xea | P_EXT | P_DATA16) +#define OPC_PMINSD (0x39 | P_EXT38 | P_DATA16) +#define OPC_VPMINSQ (0x39 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_PMINUB (0xda | P_EXT | P_DATA16) +#define OPC_PMINUW (0x3a | P_EXT38 | P_DATA16) +#define OPC_PMINUD (0x3b | P_EXT38 | P_DATA16) +#define OPC_VPMINUQ (0x3b | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_PMOVSXBW (0x20 | P_EXT38 | P_DATA16) +#define OPC_PMOVSXWD (0x23 | P_EXT38 | P_DATA16) +#define OPC_PMOVSXDQ (0x25 | P_EXT38 | P_DATA16) +#define OPC_PMOVZXBW (0x30 | P_EXT38 | P_DATA16) +#define OPC_PMOVZXWD (0x33 | P_EXT38 | P_DATA16) +#define OPC_PMOVZXDQ (0x35 | P_EXT38 | P_DATA16) +#define OPC_PMULLW (0xd5 | P_EXT | P_DATA16) +#define OPC_PMULLD (0x40 | P_EXT38 | P_DATA16) +#define OPC_VPMULLQ (0x40 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_POR (0xeb | P_EXT | P_DATA16) +#define OPC_PSHUFB (0x00 | P_EXT38 | P_DATA16) +#define OPC_PSHUFD (0x70 | P_EXT | P_DATA16) +#define OPC_PSHUFLW (0x70 | P_EXT | P_SIMDF2) +#define OPC_PSHUFHW (0x70 | P_EXT | P_SIMDF3) +#define OPC_PSHIFTW_Ib (0x71 | P_EXT | P_DATA16) /* /2 /6 /4 */ +#define OPC_PSHIFTD_Ib (0x72 | P_EXT | P_DATA16) /* /1 /2 /6 /4 */ +#define OPC_PSHIFTQ_Ib (0x73 | P_EXT | P_DATA16) /* /2 /6 /4 */ +#define OPC_PSLLW (0xf1 | P_EXT | P_DATA16) +#define OPC_PSLLD (0xf2 | P_EXT | P_DATA16) +#define OPC_PSLLQ (0xf3 | P_EXT | P_DATA16) +#define OPC_PSRAW (0xe1 | P_EXT | P_DATA16) +#define OPC_PSRAD (0xe2 | P_EXT | P_DATA16) +#define OPC_VPSRAQ (0xe2 | P_EXT | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_PSRLW (0xd1 | P_EXT | P_DATA16) +#define OPC_PSRLD (0xd2 | P_EXT | P_DATA16) +#define OPC_PSRLQ (0xd3 | P_EXT | P_DATA16) +#define OPC_PSUBB (0xf8 | P_EXT | P_DATA16) +#define OPC_PSUBW (0xf9 | P_EXT | P_DATA16) +#define OPC_PSUBD (0xfa | P_EXT | P_DATA16) +#define OPC_PSUBQ (0xfb | P_EXT | P_DATA16) +#define OPC_PSUBSB (0xe8 | P_EXT | P_DATA16) +#define OPC_PSUBSW (0xe9 | P_EXT | P_DATA16) +#define OPC_PSUBUB (0xd8 | P_EXT | P_DATA16) +#define OPC_PSUBUW (0xd9 | P_EXT | P_DATA16) +#define OPC_PUNPCKLBW (0x60 | P_EXT | P_DATA16) +#define OPC_PUNPCKLWD (0x61 | P_EXT | P_DATA16) +#define OPC_PUNPCKLDQ (0x62 | P_EXT | P_DATA16) +#define OPC_PUNPCKLQDQ (0x6c | P_EXT | P_DATA16) +#define OPC_PUNPCKHBW (0x68 | P_EXT | P_DATA16) +#define OPC_PUNPCKHWD (0x69 | P_EXT | P_DATA16) +#define OPC_PUNPCKHDQ (0x6a | P_EXT | P_DATA16) +#define OPC_PUNPCKHQDQ (0x6d | P_EXT | P_DATA16) +#define OPC_PXOR (0xef | P_EXT | P_DATA16) +#define OPC_POP_r32 (0x58) +#define OPC_POPCNT (0xb8 | P_EXT | P_SIMDF3) +#define OPC_PUSH_r32 (0x50) +#define OPC_PUSH_Iv (0x68) +#define OPC_PUSH_Ib (0x6a) +#define OPC_RET (0xc3) +#define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */ +#define OPC_SHIFT_1 (0xd1) +#define OPC_SHIFT_Ib (0xc1) +#define OPC_SHIFT_cl (0xd3) +#define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3) +#define OPC_SHUFPS (0xc6 | P_EXT) +#define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16) +#define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2) +#define OPC_SHRD_Ib (0xac | P_EXT) +#define OPC_STC (0xf9) +#define OPC_TESTB (0x84) +#define OPC_TESTL (0x85) +#define OPC_TZCNT (0xbc | P_EXT | P_SIMDF3) +#define OPC_UD2 (0x0b | P_EXT) +#define OPC_VPBLENDD (0x02 | P_EXT3A | P_DATA16) +#define OPC_VPBLENDVB (0x4c | P_EXT3A | P_DATA16) +#define OPC_VPBLENDMB (0x66 | P_EXT38 | P_DATA16 | P_EVEX) +#define OPC_VPBLENDMW (0x66 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPBLENDMD (0x64 | P_EXT38 | P_DATA16 | P_EVEX) +#define OPC_VPBLENDMQ (0x64 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPCMPB (0x3f | P_EXT3A | P_DATA16 | P_EVEX) +#define OPC_VPCMPUB (0x3e | P_EXT3A | P_DATA16 | P_EVEX) +#define OPC_VPCMPW (0x3f | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPCMPUW (0x3e | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPCMPD (0x1f | P_EXT3A | P_DATA16 | P_EVEX) +#define OPC_VPCMPUD (0x1e | P_EXT3A | P_DATA16 | P_EVEX) +#define OPC_VPCMPQ (0x1f | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPCMPUQ (0x1e | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPINSRB (0x20 | P_EXT3A | P_DATA16) +#define OPC_VPINSRW (0xc4 | P_EXT | P_DATA16) +#define OPC_VBROADCASTSS (0x18 | P_EXT38 | P_DATA16) +#define OPC_VBROADCASTSD (0x19 | P_EXT38 | P_DATA16) +#define OPC_VPBROADCASTB (0x78 | P_EXT38 | P_DATA16) +#define OPC_VPBROADCASTW (0x79 | P_EXT38 | P_DATA16) +#define OPC_VPBROADCASTD (0x58 | P_EXT38 | P_DATA16) +#define OPC_VPBROADCASTQ (0x59 | P_EXT38 | P_DATA16) +#define OPC_VGF2P8AFFINEQB (0xce | P_EXT3A | P_DATA16 | P_VEXW) +#define OPC_VPMOVM2B (0x28 | P_EXT38 | P_SIMDF3 | P_EVEX) +#define OPC_VPMOVM2W (0x28 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX) +#define OPC_VPMOVM2D (0x38 | P_EXT38 | P_SIMDF3 | P_EVEX) +#define OPC_VPMOVM2Q (0x38 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX) +#define OPC_VPERMQ (0x00 | P_EXT3A | P_DATA16 | P_VEXW) +#define OPC_VPERM2I128 (0x46 | P_EXT3A | P_DATA16 | P_VEXL) +#define OPC_VPROLVD (0x15 | P_EXT38 | P_DATA16 | P_EVEX) +#define OPC_VPROLVQ (0x15 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPRORVD (0x14 | P_EXT38 | P_DATA16 | P_EVEX) +#define OPC_VPRORVQ (0x14 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPSHLDW (0x70 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPSHLDD (0x71 | P_EXT3A | P_DATA16 | P_EVEX) +#define OPC_VPSHLDQ (0x71 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPSHLDVW (0x70 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPSHLDVD (0x71 | P_EXT38 | P_DATA16 | P_EVEX) +#define OPC_VPSHLDVQ (0x71 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPSHRDVW (0x72 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPSHRDVD (0x73 | P_EXT38 | P_DATA16 | P_EVEX) +#define OPC_VPSHRDVQ (0x73 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPSLLVW (0x12 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPSLLVD (0x47 | P_EXT38 | P_DATA16) +#define OPC_VPSLLVQ (0x47 | P_EXT38 | P_DATA16 | P_VEXW) +#define OPC_VPSRAVW (0x11 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPSRAVD (0x46 | P_EXT38 | P_DATA16) +#define OPC_VPSRAVQ (0x46 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPSRLVW (0x10 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPSRLVD (0x45 | P_EXT38 | P_DATA16) +#define OPC_VPSRLVQ (0x45 | P_EXT38 | P_DATA16 | P_VEXW) +#define OPC_VPTERNLOGQ (0x25 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPTESTMB (0x26 | P_EXT38 | P_DATA16 | P_EVEX) +#define OPC_VPTESTMW (0x26 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPTESTMD (0x27 | P_EXT38 | P_DATA16 | P_EVEX) +#define OPC_VPTESTMQ (0x27 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPTESTNMB (0x26 | P_EXT38 | P_SIMDF3 | P_EVEX) +#define OPC_VPTESTNMW (0x26 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX) +#define OPC_VPTESTNMD (0x27 | P_EXT38 | P_SIMDF3 | P_EVEX) +#define OPC_VPTESTNMQ (0x27 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX) +#define OPC_VZEROUPPER (0x77 | P_EXT) +#define OPC_XCHG_ax_r32 (0x90) +#define OPC_XCHG_EvGv (0x87) #define OPC_GRP3_Eb (0xf6) #define OPC_GRP3_Ev (0xf7) #define OPC_GRP5 (0xff) #define OPC_GRP14 (0x73 | P_EXT | P_DATA16) +#define OPC_GRPBT (0xba | P_EXT) + +#define OPC_GRPBT_BT 4 +#define OPC_GRPBT_BTS 5 +#define OPC_GRPBT_BTR 6 +#define OPC_GRPBT_BTC 7 /* Group 1 opcode extensions for 0x80-0x83. These are also used as modifiers for OPC_ARITH. */ @@ -483,9 +538,9 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) { #define JCC_JG 0xf static const uint8_t tcg_cond_to_jcc[] = { - [TCG_COND_EQ] = JCC_JE, [TCG_COND_NE] = JCC_JNE, [TCG_COND_LT] = JCC_JL, [TCG_COND_GE] = JCC_JGE, - [TCG_COND_LE] = JCC_JLE, [TCG_COND_GT] = JCC_JG, [TCG_COND_LTU] = JCC_JB, [TCG_COND_GEU] = JCC_JAE, - [TCG_COND_LEU] = JCC_JBE, [TCG_COND_GTU] = JCC_JA, + [TCG_COND_EQ] = JCC_JE, [TCG_COND_NE] = JCC_JNE, [TCG_COND_LT] = JCC_JL, [TCG_COND_GE] = JCC_JGE, + [TCG_COND_LE] = JCC_JLE, [TCG_COND_GT] = JCC_JG, [TCG_COND_LTU] = JCC_JB, [TCG_COND_GEU] = JCC_JAE, + [TCG_COND_LEU] = JCC_JBE, [TCG_COND_GTU] = JCC_JA, [TCG_COND_TSTEQ] = JCC_JE, [TCG_COND_TSTNE] = JCC_JNE, }; #if TCG_TARGET_REG_BITS == 64 @@ -615,7 +670,7 @@ static void tcg_out_vex_opc(TCGContext *s, int opc, int r, int v, int rm, int in tcg_out8(s, opc); } -static void tcg_out_evex_opc(TCGContext *s, int opc, int r, int v, int rm, int index) { +static void tcg_out_evex_opc(TCGContext *s, int opc, int r, int v, int rm, int index, int aaa, bool z) { /* The entire 4-byte evex prefix; with R' and V' set. */ uint32_t p = 0x08041062; int mm, pp; @@ -651,7 +706,9 @@ static void tcg_out_evex_opc(TCGContext *s, int opc, int r, int v, int rm, int i p = deposit32(p, 16, 2, pp); p = deposit32(p, 19, 4, ~v); p = deposit32(p, 23, 1, (opc & P_VEXW) != 0); + p = deposit32(p, 24, 3, aaa); p = deposit32(p, 29, 2, (opc & P_VEXL) != 0); + p = deposit32(p, 31, 1, z); tcg_out32(s, p); tcg_out8(s, opc); @@ -659,13 +716,28 @@ static void tcg_out_evex_opc(TCGContext *s, int opc, int r, int v, int rm, int i static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm) { if (opc & P_EVEX) { - tcg_out_evex_opc(s, opc, r, v, rm, 0); + tcg_out_evex_opc(s, opc, r, v, rm, 0, 0, false); } else { tcg_out_vex_opc(s, opc, r, v, rm, 0); } tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); } +static void tcg_out_vex_modrm_type(TCGContext *s, int opc, int r, int v, int rm, TCGType type) { + if (type == TCG_TYPE_V256) { + opc |= P_VEXL; + } + tcg_out_vex_modrm(s, opc, r, v, rm); +} + +static void tcg_out_evex_modrm_type(TCGContext *s, int opc, int r, int v, int rm, int aaa, bool z, TCGType type) { + if (type == TCG_TYPE_V256) { + opc |= P_VEXL; + } + tcg_out_evex_opc(s, opc, r, v, rm, 0, aaa, z); + tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); +} + /* Output an opcode with a full "rm + (index<carry_live) { tgen_arithr(s, ARITH_XOR, ret, ret); return; } @@ -1028,7 +1099,7 @@ static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val) { } } -static inline void tcg_out_mb(TCGContext *s, TCGArg a0) { +static void tcg_out_mb(TCGContext *s, unsigned a0) { /* Given the strength of x86 memory ordering, we only need care for store-load ordering. Experimentally, "lock orl $0,0(%esp)" is faster than "mfence", so don't bother with the sse insn. */ @@ -1175,15 +1246,30 @@ static inline void tcg_out_rolw_8(TCGContext *s, int reg) { } static void tcg_out_ext8u(TCGContext *s, TCGReg dest, TCGReg src) { - /* movzbl */ - tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64); + if (TCG_TARGET_REG_BITS == 32 && src >= 4) { + tcg_out_mov(s, TCG_TYPE_I32, dest, src); + if (dest >= 4) { + tcg_out_modrm(s, OPC_ARITH_EvIz, ARITH_AND, dest); + tcg_out32(s, 0xff); + return; + } + src = dest; + } tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src); } static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) { int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; - /* movsbl */ - tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64); + + if (TCG_TARGET_REG_BITS == 32 && src >= 4) { + tcg_out_mov(s, TCG_TYPE_I32, dest, src); + if (dest >= 4) { + tcg_out_shifti(s, SHIFT_SHL, dest, 24); + tcg_out_shifti(s, SHIFT_SAR, dest, 24); + return; + } + src = dest; + } tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src); } @@ -1234,40 +1320,68 @@ static void tgen_arithi(TCGContext *s, int c, int r0, tcg_target_long val, int c c &= 7; } - /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce - partial flags update stalls on Pentium4 and are not recommended - by current Intel optimization manuals. */ - if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) { - int is_inc = (c == ARITH_ADD) ^ (val < 0); - if (TCG_TARGET_REG_BITS == 64) { - /* The single-byte increment encodings are re-tasked as the - REX prefixes. Use the MODRM encoding. */ - tcg_out_modrm(s, OPC_GRP5 + rexw, (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0); - } else { - tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0); - } - return; - } + switch (c) { + case ARITH_ADD: + case ARITH_SUB: + if (!cf) { + /* + * ??? While INC is 2 bytes shorter than ADDL $1, they also induce + * partial flags update stalls on Pentium4 and are not recommended + * by current Intel optimization manuals. + */ + if (val == 1 || val == -1) { + int is_inc = (c == ARITH_ADD) ^ (val < 0); + if (TCG_TARGET_REG_BITS == 64) { + /* + * The single-byte increment encodings are re-tasked + * as the REX prefixes. Use the MODRM encoding. + */ + tcg_out_modrm(s, OPC_GRP5 + rexw, (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0); + } else { + tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0); + } + return; + } + if (val == 128) { + /* + * Facilitate using an 8-bit immediate. Carry is inverted + * by this transformation, so do it only if cf == 0. + */ + c ^= ARITH_ADD ^ ARITH_SUB; + val = -128; + } + } + break; - if (c == ARITH_AND) { - if (TCG_TARGET_REG_BITS == 64) { - if (val == 0xffffffffu) { - tcg_out_ext32u(s, r0, r0); + case ARITH_AND: + if (TCG_TARGET_REG_BITS == 64) { + if (val == 0xffffffffu) { + tcg_out_ext32u(s, r0, r0); + return; + } + if (val == (uint32_t) val) { + /* AND with no high bits set can use a 32-bit operation. */ + rexw = 0; + } + } + if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) { + tcg_out_ext8u(s, r0, r0); return; } - if (val == (uint32_t) val) { - /* AND with no high bits set can use a 32-bit operation. */ - rexw = 0; + if (val == 0xffffu) { + tcg_out_ext16u(s, r0, r0); + return; } - } - if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) { - tcg_out_ext8u(s, r0, r0); - return; - } - if (val == 0xffffu) { - tcg_out_ext16u(s, r0, r0); - return; - } + break; + + case ARITH_OR: + case ARITH_XOR: + if (val >= 0x80 && val <= 0xff && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) { + tcg_out_modrm(s, OPC_ARITH_EbIb + P_REXB_RM, c, r0); + tcg_out8(s, val); + return; + } + break; } if (val == (int8_t) val) { @@ -1333,210 +1447,357 @@ static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, bool small) { } } -static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2, int const_arg2, int rexw) { - if (const_arg2) { - if (arg2 == 0) { - /* test r, r */ +static void tcg_out_br(TCGContext *s, TCGLabel *l) { + tcg_out_jxx(s, JCC_JMP, l, 0); +} + +static int tcg_out_cmp(TCGContext *s, TCGCond cond, TCGArg arg1, TCGArg arg2, int const_arg2, int rexw) { + int jz, js; + + if (!is_tst_cond(cond)) { + if (!const_arg2) { + tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2); + } else if (arg2 == 0) { tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1); } else { + tcg_debug_assert(!rexw || arg2 == (int32_t) arg2); tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0); } - } else { - tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2); + return tcg_cond_to_jcc[cond]; + } + + jz = tcg_cond_to_jcc[cond]; + js = (cond == TCG_COND_TSTNE ? JCC_JS : JCC_JNS); + + if (!const_arg2) { + tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg2); + return jz; + } + + if (arg2 <= 0xff && (TCG_TARGET_REG_BITS == 64 || arg1 < 4)) { + if (arg2 == 0x80) { + tcg_out_modrm(s, OPC_TESTB | P_REXB_R, arg1, arg1); + return js; + } + if (arg2 == 0xff) { + tcg_out_modrm(s, OPC_TESTB | P_REXB_R, arg1, arg1); + return jz; + } + tcg_out_modrm(s, OPC_GRP3_Eb | P_REXB_RM, EXT3_TESTi, arg1); + tcg_out8(s, arg2); + return jz; + } + + if ((arg2 & ~0xff00) == 0 && arg1 < 4) { + if (arg2 == 0x8000) { + tcg_out_modrm(s, OPC_TESTB, arg1 + 4, arg1 + 4); + return js; + } + if (arg2 == 0xff00) { + tcg_out_modrm(s, OPC_TESTB, arg1 + 4, arg1 + 4); + return jz; + } + tcg_out_modrm(s, OPC_GRP3_Eb, EXT3_TESTi, arg1 + 4); + tcg_out8(s, arg2 >> 8); + return jz; } + + if (arg2 == 0xffff) { + tcg_out_modrm(s, OPC_TESTL | P_DATA16, arg1, arg1); + return jz; + } + if (arg2 == 0xffffffffu) { + tcg_out_modrm(s, OPC_TESTL, arg1, arg1); + return jz; + } + + if (is_power_of_2(rexw ? arg2 : (uint32_t) arg2)) { + int jc = (cond == TCG_COND_TSTNE ? JCC_JB : JCC_JAE); + int sh = ctz64(arg2); + + rexw = (sh & 32 ? P_REXW : 0); + if ((sh & 31) == 31) { + tcg_out_modrm(s, OPC_TESTL | rexw, arg1, arg1); + return js; + } else { + tcg_out_modrm(s, OPC_GRPBT | rexw, OPC_GRPBT_BT, arg1); + tcg_out8(s, sh); + return jc; + } + } + + if (rexw) { + if (arg2 == (uint32_t) arg2) { + rexw = 0; + } else { + tcg_debug_assert(arg2 == (int32_t) arg2); + } + } + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_TESTi, arg1); + tcg_out32(s, arg2); + return jz; } -static void tcg_out_brcond32(TCGContext *s, TCGCond cond, TCGArg arg1, TCGArg arg2, int const_arg2, TCGLabel *label, - int small) { - tcg_out_cmp(s, arg1, arg2, const_arg2, 0); - tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small); +static void tcg_out_brcond(TCGContext *s, int rexw, TCGCond cond, TCGArg arg1, TCGArg arg2, int const_arg2, + TCGLabel *label, bool small) { + int jcc = tcg_out_cmp(s, cond, arg1, arg2, const_arg2, rexw); + tcg_out_jxx(s, jcc, label, small); } -#if TCG_TARGET_REG_BITS == 64 -static void tcg_out_brcond64(TCGContext *s, TCGCond cond, TCGArg arg1, TCGArg arg2, int const_arg2, TCGLabel *label, - int small) { - tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW); - tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small); +static void tgen_brcond(TCGContext *s, TCGType type, TCGCond cond, TCGReg arg1, TCGReg arg2, TCGLabel *label) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_brcond(s, rexw, cond, arg1, arg2, false, label, false); } -#else -/* XXX: we implement it at the target level to avoid having to - handle cross basic blocks temporaries */ -static void tcg_out_brcond2(TCGContext *s, const TCGArg *args, const int *const_args, int small) { + +static void tgen_brcondi(TCGContext *s, TCGType type, TCGCond cond, TCGReg arg1, tcg_target_long arg2, + TCGLabel *label) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_brcond(s, rexw, cond, arg1, arg2, true, label, false); +} + +static const TCGOutOpBrcond outop_brcond = { + .base.static_constraint = C_O0_I2(r, reT), + .out_rr = tgen_brcond, + .out_ri = tgen_brcondi, +}; + +static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, TCGArg bl, bool blconst, TCGArg bh, + bool bhconst, TCGLabel *label_this, bool small) { TCGLabel *label_next = gen_new_label(); - TCGLabel *label_this = arg_label(args[5]); - switch (args[4]) { + switch (cond) { case TCG_COND_EQ: - tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2], label_next, 1); - tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3], label_this, small); + case TCG_COND_TSTEQ: + tcg_out_brcond(s, 0, tcg_invert_cond(cond), al, bl, blconst, label_next, true); + tcg_out_brcond(s, 0, cond, ah, bh, bhconst, label_this, small); break; + case TCG_COND_NE: - tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2], label_this, small); - tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3], label_this, small); - break; - case TCG_COND_LT: - tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3], label_this, small); - tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2], label_this, small); - break; - case TCG_COND_LE: - tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3], label_this, small); - tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2], label_this, small); + case TCG_COND_TSTNE: + tcg_out_brcond(s, 0, cond, al, bl, blconst, label_this, small); + tcg_out_brcond(s, 0, cond, ah, bh, bhconst, label_this, small); break; - case TCG_COND_GT: - tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3], label_this, small); + + default: + tcg_out_brcond(s, 0, tcg_high_cond(cond), ah, bh, bhconst, label_this, small); tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2], label_this, small); + tcg_out_brcond(s, 0, tcg_unsigned_cond(cond), al, bl, blconst, label_this, small); break; - case TCG_COND_GE: - tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3], label_this, small); - tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2], label_this, small); + } + tcg_out_label(s, label_next); +} + +static void tgen_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, TCGArg bl, bool blconst, TCGArg bh, + bool bhconst, TCGLabel *l) { + tcg_out_brcond2(s, cond, al, ah, bl, blconst, bh, bhconst, l, false); +} + +#if TCG_TARGET_REG_BITS != 32 +__attribute__((unused)) +#endif +static const TCGOutOpBrcond2 outop_brcond2 = { + .base.static_constraint = C_O0_I4(r, r, ri, ri), + .out = tgen_brcond2, +}; + +static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, TCGReg dest, TCGReg arg1, TCGArg arg2, + bool const_arg2, bool neg) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + int cmp_rexw = rexw; + bool inv = false; + bool cleared; + int jcc; + + switch (cond) { + case TCG_COND_NE: + inv = true; + /* fall through */ + case TCG_COND_EQ: + /* If arg2 is 0, convert to LTU/GEU vs 1. */ + if (const_arg2 && arg2 == 0) { + arg2 = 1; + goto do_ltu; + } break; - case TCG_COND_LTU: - tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3], label_this, small); - tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2], label_this, small); + + case TCG_COND_TSTNE: + inv = true; + /* fall through */ + case TCG_COND_TSTEQ: + /* If arg2 is -1, convert to LTU/GEU vs 1. */ + if (const_arg2 && arg2 == 0xffffffffu) { + arg2 = 1; + cmp_rexw = 0; + goto do_ltu; + } break; + case TCG_COND_LEU: - tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3], label_this, small); - tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2], label_this, small); - break; + inv = true; + /* fall through */ case TCG_COND_GTU: - tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3], label_this, small); - tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2], label_this, small); + /* If arg2 is a register, swap for LTU/GEU. */ + if (!const_arg2) { + TCGReg t = arg1; + arg1 = arg2; + arg2 = t; + goto do_ltu; + } break; + case TCG_COND_GEU: - tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3], label_this, small); - tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2], label_this, small); + inv = true; + /* fall through */ + case TCG_COND_LTU: + do_ltu: + /* + * Relying on the carry bit, use SBB to produce -1 if LTU, 0 if GEU. + * We can then use NEG or INC to produce the desired result. + * This is always smaller than the SETCC expansion. + */ + tcg_out_cmp(s, TCG_COND_LTU, arg1, arg2, const_arg2, cmp_rexw); + + /* X - X - C = -C = (C ? -1 : 0) */ + tgen_arithr(s, ARITH_SBB + (neg ? rexw : 0), dest, dest); + if (inv && neg) { + /* ~(C ? -1 : 0) = (C ? 0 : -1) */ + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, dest); + } else if (inv) { + /* (C ? -1 : 0) + 1 = (C ? 0 : 1) */ + tgen_arithi(s, ARITH_ADD, dest, 1, 0); + } else if (!neg) { + /* -(C ? -1 : 0) = (C ? 1 : 0) */ + tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_NEG, dest); + } + return; + + case TCG_COND_GE: + inv = true; + /* fall through */ + case TCG_COND_LT: + /* If arg2 is 0, extract the sign bit. */ + if (const_arg2 && arg2 == 0) { + tcg_out_mov(s, type, dest, arg1); + if (inv) { + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, dest); + } + tcg_out_shifti(s, (neg ? SHIFT_SAR : SHIFT_SHR) + rexw, dest, rexw ? 63 : 31); + return; + } break; + default: - g_assert_not_reached(); + break; + } + + /* + * If dest does not overlap the inputs, clearing it first is preferred. + * The XOR breaks any false dependency for the low-byte write to dest, + * and is also one byte smaller than MOVZBL. + */ + cleared = false; + if (dest != arg1 && (const_arg2 || dest != arg2)) { + tgen_arithr(s, ARITH_XOR, dest, dest); + cleared = true; + } + + jcc = tcg_out_cmp(s, cond, arg1, arg2, const_arg2, cmp_rexw); + tcg_out_modrm(s, OPC_SETCC | jcc, 0, dest); + + if (!cleared) { + tcg_out_ext8u(s, dest, dest); + } + if (neg) { + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, dest); } - tcg_out_label(s, label_next); } -#endif -static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest, TCGArg arg1, TCGArg arg2, int const_arg2) { - tcg_out_cmp(s, arg1, arg2, const_arg2, 0); - tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest); - tcg_out_ext8u(s, dest, dest); +static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, TCGReg dest, TCGReg arg1, TCGReg arg2) { + tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, false); } -#if TCG_TARGET_REG_BITS == 64 -static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest, TCGArg arg1, TCGArg arg2, int const_arg2) { - tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW); - tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest); - tcg_out_ext8u(s, dest, dest); +static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond, TCGReg dest, TCGReg arg1, tcg_target_long arg2) { + tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, false); } -#else -static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, const int *const_args) { - TCGArg new_args[6]; - TCGLabel *label_true, *label_over; - memcpy(new_args, args + 1, 5 * sizeof(TCGArg)); +static const TCGOutOpSetcond outop_setcond = { + .base.static_constraint = C_O1_I2(q, r, reT), + .out_rrr = tgen_setcond, + .out_rri = tgen_setcondi, +}; - if (args[0] == args[1] || args[0] == args[2] || (!const_args[3] && args[0] == args[3]) || - (!const_args[4] && args[0] == args[4])) { - /* When the destination overlaps with one of the argument - registers, don't do anything tricky. */ - label_true = gen_new_label(); - label_over = gen_new_label(); +static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond, TCGReg dest, TCGReg arg1, TCGReg arg2) { + tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, true); +} - new_args[5] = label_arg(label_true); - tcg_out_brcond2(s, new_args, const_args + 1, 1); +static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond, TCGReg dest, TCGReg arg1, + tcg_target_long arg2) { + tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, true); +} - tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); - tcg_out_jxx(s, JCC_JMP, label_over, 1); - tcg_out_label(s, label_true); +static const TCGOutOpSetcond outop_negsetcond = { + .base.static_constraint = C_O1_I2(q, r, reT), + .out_rrr = tgen_negsetcond, + .out_rri = tgen_negsetcondi, +}; - tcg_out_movi(s, TCG_TYPE_I32, args[0], 1); - tcg_out_label(s, label_over); - } else { - /* When the destination does not overlap one of the arguments, - clear the destination first, jump if cond false, and emit an - increment in the true case. This results in smaller code. */ +static void tgen_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg al, TCGReg ah, TCGArg bl, bool const_bl, + TCGArg bh, bool const_bh) { + TCGLabel *label_over = gen_new_label(); - tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); + if (ret == al || ret == ah || (!const_bl && ret == bl) || (!const_bh && ret == bh)) { + /* + * When the destination overlaps with one of the argument + * registers, don't do anything tricky. + */ + TCGLabel *label_true = gen_new_label(); - label_over = gen_new_label(); - new_args[4] = tcg_invert_cond(new_args[4]); - new_args[5] = label_arg(label_over); - tcg_out_brcond2(s, new_args, const_args + 1, 1); + tcg_out_brcond2(s, cond, al, ah, bl, const_bl, bh, const_bh, label_true, true); - tgen_arithi(s, ARITH_ADD, args[0], 1, 0); - tcg_out_label(s, label_over); - } -} -#endif + tcg_out_movi(s, TCG_TYPE_I32, ret, 0); + tcg_out_jxx(s, JCC_JMP, label_over, 1); + tcg_out_label(s, label_true); -static void tcg_out_cmov(TCGContext *s, TCGCond cond, int rexw, TCGReg dest, TCGReg v1) { - if (have_cmov) { - tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | rexw, dest, v1); + tcg_out_movi(s, TCG_TYPE_I32, ret, 1); } else { - TCGLabel *over = gen_new_label(); - tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1); - tcg_out_mov(s, TCG_TYPE_I32, dest, v1); - tcg_out_label(s, over); - } -} - -static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGReg dest, TCGReg c1, TCGArg c2, int const_c2, TCGReg v1) { - tcg_out_cmp(s, c1, c2, const_c2, 0); - tcg_out_cmov(s, cond, 0, dest, v1); -} + /* + * When the destination does not overlap one of the arguments, + * clear the destination first, jump if cond false, and emit an + * increment in the true case. This results in smaller code. + */ + tcg_out_movi(s, TCG_TYPE_I32, ret, 0); -#if TCG_TARGET_REG_BITS == 64 -static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGReg dest, TCGReg c1, TCGArg c2, int const_c2, TCGReg v1) { - tcg_out_cmp(s, c1, c2, const_c2, P_REXW); - tcg_out_cmov(s, cond, P_REXW, dest, v1); -} -#endif + tcg_out_brcond2(s, tcg_invert_cond(cond), al, ah, bl, const_bl, bh, const_bh, label_over, true); -static void tcg_out_ctz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1, TCGArg arg2, bool const_a2) { - if (have_bmi1) { - tcg_out_modrm(s, OPC_TZCNT + rexw, dest, arg1); - if (const_a2) { - tcg_debug_assert(arg2 == (rexw ? 64 : 32)); - } else { - tcg_debug_assert(dest != arg2); - tcg_out_cmov(s, TCG_COND_LTU, rexw, dest, arg2); - } - } else { - tcg_debug_assert(dest != arg2); - tcg_out_modrm(s, OPC_BSF + rexw, dest, arg1); - tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2); + tgen_arithi(s, ARITH_ADD, ret, 1, 0); } + tcg_out_label(s, label_over); } -static void tcg_out_clz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1, TCGArg arg2, bool const_a2) { - if (have_lzcnt) { - tcg_out_modrm(s, OPC_LZCNT + rexw, dest, arg1); - if (const_a2) { - tcg_debug_assert(arg2 == (rexw ? 64 : 32)); - } else { - tcg_debug_assert(dest != arg2); - tcg_out_cmov(s, TCG_COND_LTU, rexw, dest, arg2); - } - } else { - tcg_debug_assert(!const_a2); - tcg_debug_assert(dest != arg1); - tcg_debug_assert(dest != arg2); +#if TCG_TARGET_REG_BITS != 32 +__attribute__((unused)) +#endif +static const TCGOutOpSetcond2 outop_setcond2 = { + .base.static_constraint = C_O1_I4(r, r, r, ri, ri), + .out = tgen_setcond2, +}; - /* Recall that the output of BSR is the index not the count. */ - tcg_out_modrm(s, OPC_BSR + rexw, dest, arg1); - tgen_arithi(s, ARITH_XOR + rexw, dest, rexw ? 63 : 31, 0); +static void tcg_out_cmov(TCGContext *s, int jcc, int rexw, TCGReg dest, TCGReg v1) { + tcg_out_modrm(s, OPC_CMOVCC | jcc | rexw, dest, v1); +} - /* Since we have destroyed the flags from BSR, we have to re-test. */ - tcg_out_cmp(s, arg1, 0, 1, rexw); - tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2); - } +static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond, TCGReg dest, TCGReg c1, TCGArg c2, bool const_c2, + TCGArg vt, bool const_vt, TCGArg vf, bool consf_vf) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + int jcc = tcg_out_cmp(s, cond, c1, c2, const_c2, rexw); + tcg_out_cmov(s, jcc, rexw, dest, vt); } +static const TCGOutOpMovcond outop_movcond = { + .base.static_constraint = C_O1_I4(r, r, reT, r, 0), + .out = tgen_movcond, +}; + static void tcg_out_branch(TCGContext *s, int call, const tcg_insn_unit *dest) { intptr_t disp = tcg_pcrel_diff(s, dest) - 5; @@ -1593,21 +1854,6 @@ static void tcg_out_nopn(TCGContext *s, int n) { tcg_out8(s, 0x90); } -/* Test register R vs immediate bits I, setting Z flag for EQ/NE. */ -static void __attribute__((unused)) tcg_out_testi(TCGContext *s, TCGReg r, uint32_t i) { - /* - * This is used for testing alignment, so we can usually use testb. - * For i686, we have to use testl for %esi/%edi. - */ - if (i <= 0xff && (TCG_TARGET_REG_BITS == 64 || r < 4)) { - tcg_out_modrm(s, OPC_GRP3_Eb | P_REXB_RM, EXT3_TESTi, r); - tcg_out8(s, i); - } else { - tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_TESTi, r); - tcg_out32(s, i); - } -} - typedef struct { TCGReg base; int index; @@ -1717,7 +1963,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { return true; } -#ifndef CONFIG_SOFTMMU +#ifdef CONFIG_USER_ONLY static HostAddress x86_guest_base = {.index = -1}; #if defined(__x86_64__) && defined(__linux__) @@ -1730,6 +1976,7 @@ static inline int setup_guest_base_seg(void) { } return 0; } +#define setup_guest_base_seg setup_guest_base_seg #elif defined(__x86_64__) && (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) #include static inline int setup_guest_base_seg(void) { @@ -1738,12 +1985,18 @@ static inline int setup_guest_base_seg(void) { } return 0; } +#define setup_guest_base_seg setup_guest_base_seg +#endif #else -static inline int setup_guest_base_seg(void) { - return 0; -} -#endif /* setup_guest_base_seg */ -#endif /* !SOFTMMU */ +#define x86_guest_base \ + (*(HostAddress *) ({ \ + qemu_build_not_reached(); \ + NULL; \ + })) +#endif /* CONFIG_USER_ONLY */ +#ifndef setup_guest_base_seg +#define setup_guest_base_seg() 0 +#endif #define MIN_TLB_MASK_TABLE_OFS INT_MIN @@ -1753,112 +2006,97 @@ static inline int setup_guest_base_seg(void) { * In both cases, return a TCGLabelQemuLdst structure if the slow path * is required and fill in @h with the host address for the fast path. */ -static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, TCGReg addrlo, TCGReg addrhi, MemOpIdx oi, - bool is_ld) { +static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, TCGReg addr, MemOpIdx oi, bool is_ld) { TCGLabelQemuLdst *ldst = NULL; MemOp opc = get_memop(oi); MemOp s_bits = opc & MO_SIZE; unsigned a_mask; -#ifdef CONFIG_SOFTMMU - h->index = TCG_REG_L0; - h->ofs = 0; - h->seg = 0; -#else - *h = x86_guest_base; -#endif - h->base = addrlo; + if (tcg_use_softmmu) { + h->index = TCG_REG_L0; + h->ofs = 0; + h->seg = 0; + } else { + *h = x86_guest_base; + } + h->base = addr; h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128); a_mask = (1 << h->aa.align) - 1; -#ifdef CONFIG_SOFTMMU - int cmp_ofs = is_ld ? offsetof(CPUTLBEntry, addr_read) : offsetof(CPUTLBEntry, addr_write); - TCGType ttype = TCG_TYPE_I32; - TCGType tlbtype = TCG_TYPE_I32; - int trexw = 0, hrexw = 0, tlbrexw = 0; - unsigned mem_index = get_mmuidx(oi); - unsigned s_mask = (1 << s_bits) - 1; - int fast_ofs = tlb_mask_table_ofs(s, mem_index); - int tlb_mask; - - ldst = new_ldst_label(s); - ldst->is_ld = is_ld; - ldst->oi = oi; - ldst->addrlo_reg = addrlo; - ldst->addrhi_reg = addrhi; + if (tcg_use_softmmu) { + int cmp_ofs = is_ld ? offsetof(CPUTLBEntry, addr_read) : offsetof(CPUTLBEntry, addr_write); + TCGType ttype = TCG_TYPE_I32; + TCGType tlbtype = TCG_TYPE_I32; + int trexw = 0, hrexw = 0, tlbrexw = 0; + unsigned mem_index = get_mmuidx(oi); + unsigned s_mask = (1 << s_bits) - 1; + int fast_ofs = tlb_mask_table_ofs(s, mem_index); + int tlb_mask; - if (TCG_TARGET_REG_BITS == 64) { - ttype = s->addr_type; - trexw = (ttype == TCG_TYPE_I32 ? 0 : P_REXW); - if (TCG_TYPE_PTR == TCG_TYPE_I64) { - hrexw = P_REXW; - if (s->page_bits + s->tlb_dyn_max_bits > 32) { + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addr_reg = addr; + + if (TCG_TARGET_REG_BITS == 64) { + ttype = s->addr_type; + trexw = (ttype == TCG_TYPE_I32 ? 0 : P_REXW); + if (TCG_TYPE_PTR == TCG_TYPE_I64) { + hrexw = P_REXW; tlbtype = TCG_TYPE_I64; tlbrexw = P_REXW; } } - } - tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo); - tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0, s->page_bits - CPU_TLB_ENTRY_BITS); + tcg_out_mov(s, tlbtype, TCG_REG_L0, addr); + tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0, fast_ofs + offsetof(CPUTLBDescFast, mask)); + tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0, fast_ofs + offsetof(CPUTLBDescFast, mask)); - tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L0, TCG_AREG0, fast_ofs + offsetof(CPUTLBDescFast, table)); + tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L0, TCG_AREG0, + fast_ofs + offsetof(CPUTLBDescFast, table)); - /* - * If the required alignment is at least as large as the access, simply - * copy the address and mask. For lesser alignments, check that we don't - * cross pages for the complete access. - */ - if (a_mask >= s_mask) { - tcg_out_mov(s, ttype, TCG_REG_L1, addrlo); - } else { - tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1, addrlo, s_mask - a_mask); - } - tlb_mask = s->page_mask | a_mask; - tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0); - - /* cmp 0(TCG_REG_L0), TCG_REG_L1 */ - tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, TCG_REG_L1, TCG_REG_L0, cmp_ofs); - - /* jne slow_path */ - tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); - ldst->label_ptr[0] = s->code_ptr; - s->code_ptr += 4; + /* + * If the required alignment is at least as large as the access, + * simply copy the address and mask. For lesser alignments, + * check that we don't cross pages for the complete access. + */ + if (a_mask >= s_mask) { + tcg_out_mov(s, ttype, TCG_REG_L1, addr); + } else { + tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1, addr, s_mask - a_mask); + } + tlb_mask = TARGET_PAGE_MASK | a_mask; + tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0); - if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I64) { - /* cmp 4(TCG_REG_L0), addrhi */ - tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, TCG_REG_L0, cmp_ofs + 4); + /* cmp 0(TCG_REG_L0), TCG_REG_L1 */ + tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, TCG_REG_L1, TCG_REG_L0, cmp_ofs); /* jne slow_path */ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); - ldst->label_ptr[1] = s->code_ptr; + ldst->label_ptr[0] = s->code_ptr; s->code_ptr += 4; - } /* TLB Hit. */ #if defined(CONFIG_SYMBEX) && defined(CONFIG_SYMBEX_MP) - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L0, TCG_REG_L0, offsetof(CPUTLBEntry, se_addend)); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L0, TCG_REG_L0, offsetof(CPUTLBEntry, se_addend)); #else - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L0, TCG_REG_L0, offsetof(CPUTLBEntry, addend)); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L0, TCG_REG_L0, offsetof(CPUTLBEntry, addend)); #endif -#else - if (a_mask) { - ldst = new_ldst_label(s); + } else if (a_mask) { + int jcc; + ldst = new_ldst_label(s); ldst->is_ld = is_ld; ldst->oi = oi; - ldst->addrlo_reg = addrlo; - ldst->addrhi_reg = addrhi; + ldst->addr_reg = addr; - tcg_out_testi(s, addrlo, a_mask); /* jne slow_path */ - tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); + jcc = tcg_out_cmp(s, TCG_COND_TSTNE, addr, a_mask, true, false); + tcg_out_opc(s, OPC_JCC_long + jcc, 0, 0, 0); ldst->label_ptr[0] = s->code_ptr; s->code_ptr += 4; } -#endif return ldst; } @@ -1977,9 +2215,10 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, } else { TCGLabel *l1 = gen_new_label(); TCGLabel *l2 = gen_new_label(); + int jcc; - tcg_out_testi(s, h.base, 15); - tcg_out_jxx(s, JCC_JNE, l1, true); + jcc = tcg_out_cmp(s, TCG_COND_TSTNE, h.base, 15, true, false); + tcg_out_jxx(s, jcc, l1, true); tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_VxWx + h.seg, TCG_TMP_VEC, 0, h.base, h.index, 0, h.ofs); tcg_out_jxx(s, JCC_JMP, l2, true); @@ -1996,28 +2235,52 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, } } -static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, TCGReg addrlo, TCGReg addrhi, MemOpIdx oi, - TCGType data_type) { +static void tgen_qemu_ld(TCGContext *s, TCGType type, TCGReg data, TCGReg addr, MemOpIdx oi) { TCGLabelQemuLdst *ldst; HostAddress h; - ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true); - tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, get_memop(oi)); + ldst = prepare_host_addr(s, &h, addr, oi, true); + tcg_out_qemu_ld_direct(s, data, -1, h, type, get_memop(oi)); if (ldst) { - ldst->type = data_type; - ldst->datalo_reg = datalo; - ldst->datahi_reg = datahi; + ldst->type = type; + ldst->datalo_reg = data; + ldst->datahi_reg = -1; ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } } -static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, HostAddress h, MemOp memop) { - bool use_movbe = false; - int movop = OPC_MOVL_EvGv; +static const TCGOutOpQemuLdSt outop_qemu_ld = { + .base.static_constraint = C_O1_I1(r, L), + .out = tgen_qemu_ld, +}; - /* - * Do big-endian stores with movbe or softmmu. +static void tgen_qemu_ld2(TCGContext *s, TCGType type, TCGReg datalo, TCGReg datahi, TCGReg addr, MemOpIdx oi) { + TCGLabelQemuLdst *ldst; + HostAddress h; + + ldst = prepare_host_addr(s, &h, addr, oi, true); + tcg_out_qemu_ld_direct(s, datalo, datahi, h, type, get_memop(oi)); + + if (ldst) { + ldst->type = type; + ldst->datalo_reg = datalo; + ldst->datahi_reg = datahi; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); + } +} + +static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { + .base.static_constraint = C_O2_I1(r, r, L), + .out = tgen_qemu_ld2, +}; + +static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, HostAddress h, MemOp memop) { + bool use_movbe = false; + int movop = OPC_MOVL_EvGv; + + /* + * Do big-endian stores with movbe or system-mode. * User-only without movbe will have its swapping done generically. */ if (memop & MO_BSWAP) { @@ -2028,7 +2291,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, switch (memop & MO_SIZE) { case MO_8: - /* This is handled with constraints on INDEX_op_qemu_st8_i32. */ + /* This is handled with constraints in cset_qemu_st(). */ tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || datalo < 4); tcg_out_modrm_sib_offset(s, OPC_MOVB_EvGv + P_REXB_R + h.seg, datalo, h.base, h.index, 0, h.ofs); break; @@ -2085,9 +2348,10 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, } else { TCGLabel *l1 = gen_new_label(); TCGLabel *l2 = gen_new_label(); + int jcc; - tcg_out_testi(s, h.base, 15); - tcg_out_jxx(s, JCC_JNE, l1, true); + jcc = tcg_out_cmp(s, TCG_COND_TSTNE, h.base, 15, true, false); + tcg_out_jxx(s, jcc, l1, true); tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_WxVx + h.seg, TCG_TMP_VEC, 0, h.base, h.index, 0, h.ofs); tcg_out_jxx(s, JCC_JMP, l2, true); @@ -2103,22 +2367,51 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, } } -static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, TCGReg addrlo, TCGReg addrhi, MemOpIdx oi, - TCGType data_type) { +static void tgen_qemu_st(TCGContext *s, TCGType type, TCGReg data, TCGReg addr, MemOpIdx oi) { + TCGLabelQemuLdst *ldst; + HostAddress h; + + ldst = prepare_host_addr(s, &h, addr, oi, false); + tcg_out_qemu_st_direct(s, data, -1, h, get_memop(oi)); + + if (ldst) { + ldst->type = type; + ldst->datalo_reg = data; + ldst->datahi_reg = -1; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); + } +} + +static TCGConstraintSetIndex cset_qemu_st(TCGType type, unsigned flags) { + return flags == MO_8 ? C_O0_I2(s, L) : C_O0_I2(L, L); +} + +static const TCGOutOpQemuLdSt outop_qemu_st = { + .base.static_constraint = TCG_TARGET_REG_BITS == 32 ? C_Dynamic : C_O0_I2(L, L), + .base.dynamic_constraint = TCG_TARGET_REG_BITS == 32 ? cset_qemu_st : NULL, + .out = tgen_qemu_st, +}; + +static void tgen_qemu_st2(TCGContext *s, TCGType type, TCGReg datalo, TCGReg datahi, TCGReg addr, MemOpIdx oi) { TCGLabelQemuLdst *ldst; HostAddress h; - ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false); + ldst = prepare_host_addr(s, &h, addr, oi, false); tcg_out_qemu_st_direct(s, datalo, datahi, h, get_memop(oi)); if (ldst) { - ldst->type = data_type; + ldst->type = type; ldst->datalo_reg = datalo; ldst->datahi_reg = datahi; ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } } +static const TCGOutOpQemuLdSt2 outop_qemu_st2 = { + .base.static_constraint = C_O0_I3(L, L, L), + .out = tgen_qemu_st2, +}; + static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) { /* Reuse the zeroing that exists for goto_ptr. */ if (a0 == 0) { @@ -2144,6 +2437,11 @@ static void tcg_out_goto_tb(TCGContext *s, int which) { set_jmp_reset_offset(s, which); } +static void tcg_out_goto_ptr(TCGContext *s, TCGReg a0) { + /* Jump to the given host address (could be epilogue) */ + tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, a0); +} + void tb_target_set_jmp_target(const TranslationBlock *tb, int n, uintptr_t jmp_rx, uintptr_t jmp_rw) { /* patch the branch destination */ uintptr_t addr = tb->jmp_target_addr[n]; @@ -2151,447 +2449,974 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, uintptr_t jmp_r /* no need to flush icache explicitly */ } -static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], - const int const_args[TCG_MAX_OP_ARGS]) { - TCGArg a0, a1, a2; - int c, const_a2, vexop, rexw = 0; +static void tgen_add(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + + if (a0 == a1) { + tgen_arithr(s, ARITH_ADD + rexw, a0, a2); + } else if (a0 == a2) { + tgen_arithr(s, ARITH_ADD + rexw, a0, a1); + } else { + tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, 0); + } +} + +static void tgen_addi(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, tcg_target_long a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + + if (a0 == a1) { + tgen_arithi(s, ARITH_ADD + rexw, a0, a2, false); + } else { + tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, -1, 0, a2); + } +} + +static const TCGOutOpBinary outop_add = { + .base.static_constraint = C_O1_I2(r, r, re), + .out_rrr = tgen_add, + .out_rri = tgen_addi, +}; + +static void tgen_addco(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithr(s, ARITH_ADD + rexw, a0, a2); +} + +static void tgen_addco_imm(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, tcg_target_long a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithi(s, ARITH_ADD + rexw, a0, a2, true); +} + +static const TCGOutOpBinary outop_addco = { + .base.static_constraint = C_O1_I2(r, 0, re), + .out_rrr = tgen_addco, + .out_rri = tgen_addco_imm, +}; + +static void tgen_addcio(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithr(s, ARITH_ADC + rexw, a0, a2); +} + +static void tgen_addcio_imm(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, tcg_target_long a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithi(s, ARITH_ADC + rexw, a0, a2, true); +} + +static const TCGOutOpBinary outop_addcio = { + .base.static_constraint = C_O1_I2(r, 0, re), + .out_rrr = tgen_addcio, + .out_rri = tgen_addcio_imm, +}; + +static void tgen_addci_rrr(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { + /* Because "0O" is not a valid constraint, we must match ourselves. */ + if (a0 == a2) { + tgen_addcio(s, type, a0, a0, a1); + } else { + tcg_out_mov(s, type, a0, a1); + tgen_addcio(s, type, a0, a0, a2); + } +} + +static void tgen_addci_rri(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, tcg_target_long a2) { + tcg_out_mov(s, type, a0, a1); + tgen_addcio_imm(s, type, a0, a0, a2); +} + +static void tgen_addci_rir(TCGContext *s, TCGType type, TCGReg a0, tcg_target_long a1, TCGReg a2) { + tgen_addci_rri(s, type, a0, a2, a1); +} + +static void tgen_addci_rii(TCGContext *s, TCGType type, TCGReg a0, tcg_target_long a1, tcg_target_long a2) { + if (a2 == 0) { + /* Implement 0 + 0 + C with -(x - x - c). */ + tgen_arithr(s, ARITH_SBB, a0, a0); + tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_NEG, a0); + } else { + tcg_out_movi(s, type, a0, a2); + tgen_addcio_imm(s, type, a0, a0, a1); + } +} + +static const TCGOutOpAddSubCarry outop_addci = { + .base.static_constraint = C_O1_I2(r, rO, re), + .out_rrr = tgen_addci_rrr, + .out_rri = tgen_addci_rri, + .out_rir = tgen_addci_rir, + .out_rii = tgen_addci_rii, +}; + +static void tcg_out_set_carry(TCGContext *s) { + tcg_out8(s, OPC_STC); +} + +static void tgen_and(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithr(s, ARITH_AND + rexw, a0, a2); +} + +static void tgen_andi(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, tcg_target_long a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithi(s, ARITH_AND + rexw, a0, a2, false); +} + +static const TCGOutOpBinary outop_and = { + .base.static_constraint = C_O1_I2(r, 0, reZ), + .out_rrr = tgen_and, + .out_rri = tgen_andi, +}; + +static void tgen_andc(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_vex_modrm(s, OPC_ANDN + rexw, a0, a2, a1); +} + +static TCGConstraintSetIndex cset_andc(TCGType type, unsigned flags) { + return have_bmi1 ? C_O1_I2(r, r, r) : C_NotImplemented; +} + +static const TCGOutOpBinary outop_andc = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_andc, + .out_rrr = tgen_andc, +}; + +static void tgen_clz(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + int jcc; + + if (have_lzcnt) { + tcg_out_modrm(s, OPC_LZCNT + rexw, a0, a1); + jcc = JCC_JB; + } else { + /* Recall that the output of BSR is the index not the count. */ + tcg_out_modrm(s, OPC_BSR + rexw, a0, a1); + tgen_arithi(s, ARITH_XOR + rexw, a0, rexw ? 63 : 31, 0); + + /* Since we have destroyed the flags from BSR, we have to re-test. */ + jcc = tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, rexw); + } + tcg_out_cmov(s, jcc, rexw, a0, a2); +} + +static void tgen_clzi(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, tcg_target_long a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_LZCNT + rexw, a0, a1); +} + +static TCGConstraintSetIndex cset_clz(TCGType type, unsigned flags) { + return have_lzcnt ? C_N1_I2(r, r, rW) : C_N1_I2(r, r, r); +} + +static const TCGOutOpBinary outop_clz = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_clz, + .out_rrr = tgen_clz, + .out_rri = tgen_clzi, +}; + +static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_POPCNT + rexw, a0, a1); +} + +static TCGConstraintSetIndex cset_ctpop(TCGType type, unsigned flags) { + return have_popcnt ? C_O1_I1(r, r) : C_NotImplemented; +} + +static const TCGOutOpUnary outop_ctpop = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_ctpop, + .out_rr = tgen_ctpop, +}; + +static void tgen_ctz(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + int jcc; + + if (have_bmi1) { + tcg_out_modrm(s, OPC_TZCNT + rexw, a0, a1); + jcc = JCC_JB; + } else { + tcg_out_modrm(s, OPC_BSF + rexw, a0, a1); + jcc = JCC_JE; + } + tcg_out_cmov(s, jcc, rexw, a0, a2); +} + +static void tgen_ctzi(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, tcg_target_long a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_TZCNT + rexw, a0, a1); +} + +static TCGConstraintSetIndex cset_ctz(TCGType type, unsigned flags) { + return have_bmi1 ? C_N1_I2(r, r, rW) : C_N1_I2(r, r, r); +} + +static const TCGOutOpBinary outop_ctz = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_ctz, + .out_rrr = tgen_ctz, + .out_rri = tgen_ctzi, +}; + +static const TCGOutOpBinary outop_divs = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_divs2(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a4) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, a4); +} + +static const TCGOutOpDivRem outop_divs2 = { + .base.static_constraint = C_O2_I3(a, d, 0, 1, r), + .out_rr01r = tgen_divs2, +}; + +static const TCGOutOpBinary outop_divu = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_divu2(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a4) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, a4); +} + +static const TCGOutOpDivRem outop_divu2 = { + .base.static_constraint = C_O2_I3(a, d, 0, 1, r), + .out_rr01r = tgen_divu2, +}; + +static const TCGOutOpBinary outop_eqv = { + .base.static_constraint = C_NotImplemented, +}; #if TCG_TARGET_REG_BITS == 64 -#define OP_32_64(x) \ - case glue(glue(INDEX_op_, x), _i64): \ - rexw = P_REXW; /* FALLTHRU */ \ - case glue(glue(INDEX_op_, x), _i32) -#else -#define OP_32_64(x) case glue(glue(INDEX_op_, x), _i32) -#endif +static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) { + tcg_out_shifti(s, SHIFT_SHR + P_REXW, a0, 32); +} - /* Hoist the loads of the most common arguments. */ - a0 = args[0]; - a1 = args[1]; - a2 = args[2]; - const_a2 = const_args[2]; +static const TCGOutOpUnary outop_extrh_i64_i32 = { + .base.static_constraint = C_O1_I1(r, 0), + .out_rr = tgen_extrh_i64_i32, +}; +#endif /* TCG_TARGET_REG_BITS == 64 */ + +static void tgen_mul(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, a0, a2); +} + +static void tgen_muli(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, tcg_target_long a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + + if (a2 == (int8_t) a2) { + tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, a0, a0); + tcg_out8(s, a2); + } else { + tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, a0, a0); + tcg_out32(s, a2); + } +} + +static const TCGOutOpBinary outop_mul = { + .base.static_constraint = C_O1_I2(r, 0, re), + .out_rrr = tgen_mul, + .out_rri = tgen_muli, +}; + +static void tgen_muls2(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, a3); +} + +static const TCGOutOpMul2 outop_muls2 = { + .base.static_constraint = C_O2_I2(a, d, a, r), + .out_rrrr = tgen_muls2, +}; + +static const TCGOutOpBinary outop_mulsh = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_muluh = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_mulu2(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, a3); +} + +static const TCGOutOpMul2 outop_mulu2 = { + .base.static_constraint = C_O2_I2(a, d, a, r), + .out_rrrr = tgen_mulu2, +}; + +static const TCGOutOpBinary outop_nand = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_nor = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithr(s, ARITH_OR + rexw, a0, a2); +} + +static void tgen_ori(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, tcg_target_long a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithi(s, ARITH_OR + rexw, a0, a2, false); +} + +static const TCGOutOpBinary outop_or = { + .base.static_constraint = C_O1_I2(r, 0, re), + .out_rrr = tgen_or, + .out_rri = tgen_ori, +}; + +static const TCGOutOpBinary outop_orc = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_rems = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_remu = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_rotl(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_SHIFT_cl + rexw, SHIFT_ROL, a0); +} + +static void tgen_rotli(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, tcg_target_long a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_shifti(s, SHIFT_ROL + rexw, a0, a2); +} + +static const TCGOutOpBinary outop_rotl = { + .base.static_constraint = C_O1_I2(r, 0, ci), + .out_rrr = tgen_rotl, + .out_rri = tgen_rotli, +}; + +static void tgen_rotr(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_SHIFT_cl + rexw, SHIFT_ROR, a0); +} + +static void tgen_rotri(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, tcg_target_long a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_shifti(s, SHIFT_ROR + rexw, a0, a2); +} + +static const TCGOutOpBinary outop_rotr = { + .base.static_constraint = C_O1_I2(r, 0, ci), + .out_rrr = tgen_rotr, + .out_rri = tgen_rotri, +}; + +static TCGConstraintSetIndex cset_shift(TCGType type, unsigned flags) { + return have_bmi2 ? C_O1_I2(r, r, ri) : C_O1_I2(r, 0, ci); +} + +static void tgen_sar(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + if (have_bmi2) { + tcg_out_vex_modrm(s, OPC_SARX + rexw, a0, a2, a1); + } else { + tcg_out_modrm(s, OPC_SHIFT_cl + rexw, SHIFT_SAR, a0); + } +} + +static void tgen_sari(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, tcg_target_long a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + + tcg_out_mov(s, type, a0, a1); + tcg_out_shifti(s, SHIFT_SAR + rexw, a0, a2); +} + +static const TCGOutOpBinary outop_sar = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_shift, + .out_rrr = tgen_sar, + .out_rri = tgen_sari, +}; + +static void tgen_shl(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + if (have_bmi2) { + tcg_out_vex_modrm(s, OPC_SHLX + rexw, a0, a2, a1); + } else { + tcg_out_modrm(s, OPC_SHIFT_cl + rexw, SHIFT_SHL, a0); + } +} + +static void tgen_shli(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, tcg_target_long a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + + /* For small constant 3-operand shift, use LEA. */ + if (a0 != a1 && a2 >= 1 && a2 <= 3) { + if (a2 == 1) { + /* shl $1,a1,a0 -> lea (a1,a1),a0 */ + tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a1, 0, 0); + } else { + /* shl $n,a1,a0 -> lea 0(,a1,n),a0 */ + tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, -1, a1, a2, 0); + } + return; + } + tcg_out_mov(s, type, a0, a1); + tcg_out_shifti(s, SHIFT_SHL + rexw, a0, a2); +} + +static const TCGOutOpBinary outop_shl = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_shift, + .out_rrr = tgen_shl, + .out_rri = tgen_shli, +}; + +static void tgen_shr(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + if (have_bmi2) { + tcg_out_vex_modrm(s, OPC_SHRX + rexw, a0, a2, a1); + } else { + tcg_out_modrm(s, OPC_SHIFT_cl + rexw, SHIFT_SHR, a0); + } +} + +static void tgen_shri(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, tcg_target_long a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + + tcg_out_mov(s, type, a0, a1); + tcg_out_shifti(s, SHIFT_SHR + rexw, a0, a2); +} + +static const TCGOutOpBinary outop_shr = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_shift, + .out_rrr = tgen_shr, + .out_rri = tgen_shri, +}; + +static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithr(s, ARITH_SUB + rexw, a0, a2); +} + +static const TCGOutOpSubtract outop_sub = { + .base.static_constraint = C_O1_I2(r, 0, r), + .out_rrr = tgen_sub, +}; + +static void tgen_subbo_rri(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, tcg_target_long a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithi(s, ARITH_SUB + rexw, a0, a2, 1); +} + +static const TCGOutOpAddSubCarry outop_subbo = { + .base.static_constraint = C_O1_I2(r, 0, re), + .out_rrr = tgen_sub, + .out_rri = tgen_subbo_rri, +}; + +static void tgen_subbio_rrr(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithr(s, ARITH_SBB + rexw, a0, a2); +} + +static void tgen_subbio_rri(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, tcg_target_long a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithi(s, ARITH_SBB + rexw, a0, a2, 1); +} + +static const TCGOutOpAddSubCarry outop_subbio = { + .base.static_constraint = C_O1_I2(r, 0, re), + .out_rrr = tgen_subbio_rrr, + .out_rri = tgen_subbio_rri, +}; + +#define outop_subbi outop_subbio + +static void tcg_out_set_borrow(TCGContext *s) { + tcg_out8(s, OPC_STC); +} + +static void tgen_xor(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithr(s, ARITH_XOR + rexw, a0, a2); +} + +static void tgen_xori(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, tcg_target_long a2) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithi(s, ARITH_XOR + rexw, a0, a2, false); +} + +static const TCGOutOpBinary outop_xor = { + .base.static_constraint = C_O1_I2(r, 0, re), + .out_rrr = tgen_xor, + .out_rri = tgen_xori, +}; + +static void tgen_bswap16(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, unsigned flags) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + + if (flags & TCG_BSWAP_OS) { + /* Output must be sign-extended. */ + if (rexw) { + tcg_out_bswap64(s, a0); + tcg_out_shifti(s, SHIFT_SAR + rexw, a0, 48); + } else { + tcg_out_bswap32(s, a0); + tcg_out_shifti(s, SHIFT_SAR, a0, 16); + } + } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + /* Output must be zero-extended, but input isn't. */ + tcg_out_bswap32(s, a0); + tcg_out_shifti(s, SHIFT_SHR, a0, 16); + } else { + tcg_out_rolw_8(s, a0); + } +} + +static const TCGOutOpBswap outop_bswap16 = { + .base.static_constraint = C_O1_I1(r, 0), + .out_rr = tgen_bswap16, +}; + +static void tgen_bswap32(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, unsigned flags) { + tcg_out_bswap32(s, a0); + if (flags & TCG_BSWAP_OS) { + tcg_out_ext32s(s, a0, a0); + } +} + +static const TCGOutOpBswap outop_bswap32 = { + .base.static_constraint = C_O1_I1(r, 0), + .out_rr = tgen_bswap32, +}; - switch (opc) { - case INDEX_op_goto_ptr: - /* jmp to the given host address (could be epilogue) */ - tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, a0); - break; - case INDEX_op_br: - tcg_out_jxx(s, JCC_JMP, arg_label(a0), 0); - break; - OP_32_64(ld8u) - : /* Note that we can ignore REXW for the zero-extend to 64-bit. */ - tcg_out_modrm_offset(s, OPC_MOVZBL, a0, a1, a2); - break; - OP_32_64(ld8s) : tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, a0, a1, a2); - break; - OP_32_64(ld16u) - : /* Note that we can ignore REXW for the zero-extend to 64-bit. */ - tcg_out_modrm_offset(s, OPC_MOVZWL, a0, a1, a2); - break; - OP_32_64(ld16s) : tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, a0, a1, a2); - break; #if TCG_TARGET_REG_BITS == 64 - case INDEX_op_ld32u_i64: +static void tgen_bswap64(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { + tcg_out_bswap64(s, a0); +} + +static const TCGOutOpUnary outop_bswap64 = { + .base.static_constraint = C_O1_I1(r, 0), + .out_rr = tgen_bswap64, +}; #endif - case INDEX_op_ld_i32: - tcg_out_ld(s, TCG_TYPE_I32, a0, a1, a2); - break; - OP_32_64(st8) : if (const_args[0]) { - tcg_out_modrm_offset(s, OPC_MOVB_EvIz, 0, a1, a2); - tcg_out8(s, a0); - } - else { - tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, a0, a1, a2); - } - break; - OP_32_64(st16) : if (const_args[0]) { - tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16, 0, a1, a2); - tcg_out16(s, a0); - } - else { - tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, a0, a1, a2); - } - break; +static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, a0); +} + +static const TCGOutOpUnary outop_neg = { + .base.static_constraint = C_O1_I1(r, 0), + .out_rr = tgen_neg, +}; + +static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, a0); +} + +static const TCGOutOpUnary outop_not = { + .base.static_constraint = C_O1_I1(r, 0), + .out_rr = tgen_not, +}; + +static void tgen_deposit(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2, unsigned ofs, unsigned len) { + if (ofs == 0 && len == 8) { + tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, a2, a0); + } else if (ofs == 0 && len == 16) { + tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, a2, a0); + } else if (TCG_TARGET_REG_BITS == 32 && ofs == 8 && len == 8) { + tcg_out_modrm(s, OPC_MOVB_EvGv, a2, a0 + 4); + } else { + g_assert_not_reached(); + } +} + +static void tgen_depositi(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, tcg_target_long a2, unsigned ofs, + unsigned len) { + if (ofs == 0 && len == 8) { + tcg_out_opc(s, OPC_MOVB_Ib | P_REXB_RM | LOWREGMASK(a0), 0, a0, 0); + tcg_out8(s, a2); + } else if (ofs == 0 && len == 16) { + tcg_out_opc(s, OPC_MOVL_Iv | P_DATA16 | LOWREGMASK(a0), 0, a0, 0); + tcg_out16(s, a2); + } else if (TCG_TARGET_REG_BITS == 32 && ofs == 8 && len == 8) { + tcg_out8(s, OPC_MOVB_Ib + a0 + 4); + tcg_out8(s, a2); + } else { + g_assert_not_reached(); + } +} + +static const TCGOutOpDeposit outop_deposit = { + .base.static_constraint = C_O1_I2(q, 0, qi), + .out_rrr = tgen_deposit, + .out_rri = tgen_depositi, +}; + +static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, unsigned ofs, unsigned len) { + if (ofs == 0) { + switch (len) { + case 8: + tcg_out_ext8u(s, a0, a1); + return; + case 16: + tcg_out_ext16u(s, a0, a1); + return; + case 32: + tcg_out_ext32u(s, a0, a1); + return; + } + } else if (TCG_TARGET_REG_BITS == 64 && ofs + len == 32) { + /* This is a 32-bit zero-extending right shift. */ + tcg_out_mov(s, TCG_TYPE_I32, a0, a1); + tcg_out_shifti(s, SHIFT_SHR, a0, ofs); + return; + } else if (ofs == 8 && len == 8) { + /* + * On the off-chance that we can use the high-byte registers. + * Otherwise we emit the same ext16 + shift pattern that we + * would have gotten from the normal tcg-op.c expansion. + */ + if (a1 < 4 && (TCG_TARGET_REG_BITS == 32 || a0 < 8)) { + tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4); + } else { + tcg_out_ext16u(s, a0, a1); + tcg_out_shifti(s, SHIFT_SHR, a0, 8); + } + return; + } + g_assert_not_reached(); +} + +static const TCGOutOpExtract outop_extract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extract, +}; + +static void tgen_sextract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, unsigned ofs, unsigned len) { + if (ofs == 0) { + switch (len) { + case 8: + tcg_out_ext8s(s, type, a0, a1); + return; + case 16: + tcg_out_ext16s(s, type, a0, a1); + return; + case 32: + tcg_out_ext32s(s, a0, a1); + return; + } + } else if (ofs == 8 && len == 8) { + if (type == TCG_TYPE_I32 && a1 < 4 && a0 < 8) { + tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4); + } else { + tcg_out_ext16s(s, type, a0, a1); + tgen_sari(s, type, a0, a0, 8); + } + return; + } + g_assert_not_reached(); +} + +static const TCGOutOpExtract outop_sextract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_sextract, +}; + +static void tgen_extract2(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2, unsigned shr) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + + /* Note that SHRD outputs to the r/m operand. */ + tcg_out_modrm(s, OPC_SHRD_Ib + rexw, a2, a0); + tcg_out8(s, shr); +} + +static const TCGOutOpExtract2 outop_extract2 = { + .base.static_constraint = C_O1_I2(r, 0, r), + .out_rrr = tgen_extract2, +}; + +static void tgen_ld8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg base, ptrdiff_t offset) { + tcg_out_modrm_offset(s, OPC_MOVZBL, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld8u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8u, +}; + +static void tgen_ld8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg base, ptrdiff_t offset) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld8s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8s, +}; + +static void tgen_ld16u(TCGContext *s, TCGType type, TCGReg dest, TCGReg base, ptrdiff_t offset) { + tcg_out_modrm_offset(s, OPC_MOVZWL, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld16u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16u, +}; + +static void tgen_ld16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg base, ptrdiff_t offset) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld16s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16s, +}; + #if TCG_TARGET_REG_BITS == 64 - case INDEX_op_st32_i64: +static void tgen_ld32u(TCGContext *s, TCGType type, TCGReg dest, TCGReg base, ptrdiff_t offset) { + tcg_out_modrm_offset(s, OPC_MOVL_GvEv, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32u, +}; + +static void tgen_ld32s(TCGContext *s, TCGType type, TCGReg dest, TCGReg base, ptrdiff_t offset) { + tcg_out_modrm_offset(s, OPC_MOVSLQ, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32s, +}; #endif - case INDEX_op_st_i32: - if (const_args[0]) { - tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, a1, a2); - tcg_out32(s, a0); - } else { - tcg_out_st(s, TCG_TYPE_I32, a0, a1, a2); - } - break; - OP_32_64(add) - : /* For 3-operand addition, use LEA. */ - if (a0 != a1) { - TCGArg c3 = 0; - if (const_a2) { - c3 = a2, a2 = -1; - } else if (a0 == a2) { - /* Watch out for dest = src + dest, since we've removed - the matching constraint on the add. */ - tgen_arithr(s, ARITH_ADD + rexw, a0, a1); - break; - } +static void tgen_st8_r(TCGContext *s, TCGType type, TCGReg data, TCGReg base, ptrdiff_t offset) { + tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, data, base, offset); +} - tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3); - break; - } - c = ARITH_ADD; - goto gen_arith; - OP_32_64(sub) : c = ARITH_SUB; - goto gen_arith; - OP_32_64(and) : c = ARITH_AND; - goto gen_arith; - OP_32_64(or) : c = ARITH_OR; - goto gen_arith; - OP_32_64(xor) : c = ARITH_XOR; - goto gen_arith; - gen_arith: - if (const_a2) { - tgen_arithi(s, c + rexw, a0, a2, 0); - } else { - tgen_arithr(s, c + rexw, a0, a2); - } - break; +static void tgen_st8_i(TCGContext *s, TCGType type, tcg_target_long data, TCGReg base, ptrdiff_t offset) { + tcg_out_modrm_offset(s, OPC_MOVB_EvIz, 0, base, offset); + tcg_out8(s, data); +} - OP_32_64(andc) : if (const_a2) { - tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, a1); - tgen_arithi(s, ARITH_AND + rexw, a0, ~a2, 0); - } - else { - tcg_out_vex_modrm(s, OPC_ANDN + rexw, a0, a2, a1); - } - break; +static const TCGOutOpStore outop_st8 = { + .base.static_constraint = C_O0_I2(qi, r), + .out_r = tgen_st8_r, + .out_i = tgen_st8_i, +}; - OP_32_64(mul) : if (const_a2) { - int32_t val; - val = a2; - if (val == (int8_t) val) { - tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, a0, a0); - tcg_out8(s, val); - } else { - tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, a0, a0); - tcg_out32(s, val); - } - } - else { - tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, a0, a2); - } - break; +static void tgen_st16_r(TCGContext *s, TCGType type, TCGReg data, TCGReg base, ptrdiff_t offset) { + tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, data, base, offset); +} - OP_32_64(div2) : tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]); - break; - OP_32_64(divu2) : tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]); - break; +static void tgen_st16_i(TCGContext *s, TCGType type, tcg_target_long data, TCGReg base, ptrdiff_t offset) { + tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16, 0, base, offset); + tcg_out16(s, data); +} - OP_32_64(shl) - : /* For small constant 3-operand shift, use LEA. */ - if (const_a2 && a0 != a1 && (a2 - 1) < 3) { - if (a2 - 1 == 0) { - /* shl $1,a1,a0 -> lea (a1,a1),a0 */ - tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a1, 0, 0); - } else { - /* shl $n,a1,a0 -> lea 0(,a1,n),a0 */ - tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, -1, a1, a2, 0); - } - break; - } - c = SHIFT_SHL; - vexop = OPC_SHLX; - goto gen_shift_maybe_vex; - OP_32_64(shr) : c = SHIFT_SHR; - vexop = OPC_SHRX; - goto gen_shift_maybe_vex; - OP_32_64(sar) : c = SHIFT_SAR; - vexop = OPC_SARX; - goto gen_shift_maybe_vex; - OP_32_64(rotl) : c = SHIFT_ROL; - goto gen_shift; - OP_32_64(rotr) : c = SHIFT_ROR; - goto gen_shift; - gen_shift_maybe_vex: - if (have_bmi2) { - if (!const_a2) { - tcg_out_vex_modrm(s, vexop + rexw, a0, a2, a1); - break; - } - tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, a1); - } - /* FALLTHRU */ - gen_shift: - if (const_a2) { - tcg_out_shifti(s, c + rexw, a0, a2); - } else { - tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, a0); - } - break; +static const TCGOutOpStore outop_st16 = { + .base.static_constraint = C_O0_I2(ri, r), + .out_r = tgen_st16_r, + .out_i = tgen_st16_i, +}; + +static void tgen_st_i(TCGContext *s, TCGType type, tcg_target_long data, TCGReg base, ptrdiff_t offset) { + bool ok = tcg_out_sti(s, type, data, base, offset); + tcg_debug_assert(ok); +} + +static const TCGOutOpStore outop_st = { + .base.static_constraint = C_O0_I2(re, r), + .out_r = tcg_out_st, + .out_i = tgen_st_i, +}; + +static int const umin_insn[4] = {OPC_PMINUB, OPC_PMINUW, OPC_PMINUD, OPC_VPMINUQ}; + +static int const umax_insn[4] = {OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_VPMAXUQ}; + +static bool tcg_out_cmp_vec_noinv(TCGContext *s, TCGType type, unsigned vece, TCGReg v0, TCGReg v1, TCGReg v2, + TCGCond cond) { + static int const cmpeq_insn[4] = {OPC_PCMPEQB, OPC_PCMPEQW, OPC_PCMPEQD, OPC_PCMPEQQ}; + static int const cmpgt_insn[4] = {OPC_PCMPGTB, OPC_PCMPGTW, OPC_PCMPGTD, OPC_PCMPGTQ}; + + enum { + NEED_INV = 1, + NEED_SWAP = 2, + NEED_UMIN = 4, + NEED_UMAX = 8, + INVALID = 16, + }; + static const uint8_t cond_fixup[16] = { + [0 ... 15] = INVALID, + [TCG_COND_EQ] = 0, + [TCG_COND_GT] = 0, + [TCG_COND_NE] = NEED_INV, + [TCG_COND_LE] = NEED_INV, + [TCG_COND_LT] = NEED_SWAP, + [TCG_COND_GE] = NEED_SWAP | NEED_INV, + [TCG_COND_LEU] = NEED_UMIN, + [TCG_COND_GTU] = NEED_UMIN | NEED_INV, + [TCG_COND_GEU] = NEED_UMAX, + [TCG_COND_LTU] = NEED_UMAX | NEED_INV, + }; + int fixup = cond_fixup[cond]; + + assert(!(fixup & INVALID)); + + if (fixup & NEED_INV) { + cond = tcg_invert_cond(cond); + } - OP_32_64(ctz) : tcg_out_ctz(s, rexw, args[0], args[1], args[2], const_args[2]); - break; - OP_32_64(clz) : tcg_out_clz(s, rexw, args[0], args[1], args[2], const_args[2]); - break; - OP_32_64(ctpop) : tcg_out_modrm(s, OPC_POPCNT + rexw, a0, a1); - break; + if (fixup & NEED_SWAP) { + TCGReg swap = v1; + v1 = v2; + v2 = swap; + cond = tcg_swap_cond(cond); + } - case INDEX_op_brcond_i32: - tcg_out_brcond32(s, a2, a0, a1, const_args[1], arg_label(args[3]), 0); - break; - case INDEX_op_setcond_i32: - tcg_out_setcond32(s, args[3], a0, a1, a2, const_a2); - break; - case INDEX_op_movcond_i32: - tcg_out_movcond32(s, args[5], a0, a1, a2, const_a2, args[3]); - break; + if (fixup & (NEED_UMIN | NEED_UMAX)) { + int op = (fixup & NEED_UMIN ? umin_insn[vece] : umax_insn[vece]); - OP_32_64(bswap16) : if (a2 & TCG_BSWAP_OS) { - /* Output must be sign-extended. */ - if (rexw) { - tcg_out_bswap64(s, a0); - tcg_out_shifti(s, SHIFT_SAR + rexw, a0, 48); - } else { - tcg_out_bswap32(s, a0); - tcg_out_shifti(s, SHIFT_SAR, a0, 16); - } - } - else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { - /* Output must be zero-extended, but input isn't. */ - tcg_out_bswap32(s, a0); - tcg_out_shifti(s, SHIFT_SHR, a0, 16); - } - else { - tcg_out_rolw_8(s, a0); - } - break; - OP_32_64(bswap32) : tcg_out_bswap32(s, a0); - if (rexw && (a2 & TCG_BSWAP_OS)) { - tcg_out_ext32s(s, a0, a0); - } - break; + /* avx2 does not have 64-bit min/max; adjusted during expand. */ + assert(vece <= MO_32); - OP_32_64(neg) : tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, a0); - break; - OP_32_64(not) : tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, a0); - break; + tcg_out_vex_modrm_type(s, op, TCG_TMP_VEC, v1, v2, type); + v2 = TCG_TMP_VEC; + cond = TCG_COND_EQ; + } - case INDEX_op_qemu_ld_a64_i32: - if (TCG_TARGET_REG_BITS == 32) { - tcg_out_qemu_ld(s, a0, -1, a1, a2, args[3], TCG_TYPE_I32); - break; - } - /* fall through */ - case INDEX_op_qemu_ld_a32_i32: - tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I32); - break; - case INDEX_op_qemu_ld_a32_i64: - if (TCG_TARGET_REG_BITS == 64) { - tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I64); - } else { - tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I64); - } - break; - case INDEX_op_qemu_ld_a64_i64: - if (TCG_TARGET_REG_BITS == 64) { - tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I64); - } else { - tcg_out_qemu_ld(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64); - } + switch (cond) { + case TCG_COND_EQ: + tcg_out_vex_modrm_type(s, cmpeq_insn[vece], v0, v1, v2, type); break; - case INDEX_op_qemu_ld_a32_i128: - case INDEX_op_qemu_ld_a64_i128: - tcg_debug_assert(TCG_TARGET_REG_BITS == 64); - tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128); + case TCG_COND_GT: + tcg_out_vex_modrm_type(s, cmpgt_insn[vece], v0, v1, v2, type); break; + default: + g_assert_not_reached(); + } + return fixup & NEED_INV; +} - case INDEX_op_qemu_st_a64_i32: - case INDEX_op_qemu_st8_a64_i32: - if (TCG_TARGET_REG_BITS == 32) { - tcg_out_qemu_st(s, a0, -1, a1, a2, args[3], TCG_TYPE_I32); - break; - } - /* fall through */ - case INDEX_op_qemu_st_a32_i32: - case INDEX_op_qemu_st8_a32_i32: - tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I32); - break; - case INDEX_op_qemu_st_a32_i64: - if (TCG_TARGET_REG_BITS == 64) { - tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I64); - } else { - tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I64); - } - break; - case INDEX_op_qemu_st_a64_i64: - if (TCG_TARGET_REG_BITS == 64) { - tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I64); - } else { - tcg_out_qemu_st(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64); - } - break; - case INDEX_op_qemu_st_a32_i128: - case INDEX_op_qemu_st_a64_i128: - tcg_debug_assert(TCG_TARGET_REG_BITS == 64); - tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128); - break; +static void tcg_out_cmp_vec_k1(TCGContext *s, TCGType type, unsigned vece, TCGReg v1, TCGReg v2, TCGCond cond) { + static const int cmpm_insn[2][4] = {{OPC_VPCMPB, OPC_VPCMPW, OPC_VPCMPD, OPC_VPCMPQ}, + {OPC_VPCMPUB, OPC_VPCMPUW, OPC_VPCMPUD, OPC_VPCMPUQ}}; + static const int testm_insn[4] = {OPC_VPTESTMB, OPC_VPTESTMW, OPC_VPTESTMD, OPC_VPTESTMQ}; + static const int testnm_insn[4] = {OPC_VPTESTNMB, OPC_VPTESTNMW, OPC_VPTESTNMD, OPC_VPTESTNMQ}; - OP_32_64(mulu2) : tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]); - break; - OP_32_64(muls2) : tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]); - break; - OP_32_64(add2) : if (const_args[4]) { - tgen_arithi(s, ARITH_ADD + rexw, a0, args[4], 1); - } - else { - tgen_arithr(s, ARITH_ADD + rexw, a0, args[4]); - } - if (const_args[5]) { - tgen_arithi(s, ARITH_ADC + rexw, a1, args[5], 1); - } else { - tgen_arithr(s, ARITH_ADC + rexw, a1, args[5]); - } - break; - OP_32_64(sub2) : if (const_args[4]) { - tgen_arithi(s, ARITH_SUB + rexw, a0, args[4], 1); - } - else { - tgen_arithr(s, ARITH_SUB + rexw, a0, args[4]); - } - if (const_args[5]) { - tgen_arithi(s, ARITH_SBB + rexw, a1, args[5], 1); - } else { - tgen_arithr(s, ARITH_SBB + rexw, a1, args[5]); - } - break; + static const int cond_ext[16] = { + [TCG_COND_EQ] = 0, [TCG_COND_NE] = 4, [TCG_COND_LT] = 1, [TCG_COND_LTU] = 1, + [TCG_COND_LE] = 2, [TCG_COND_LEU] = 2, [TCG_COND_NEVER] = 3, [TCG_COND_GE] = 5, + [TCG_COND_GEU] = 5, [TCG_COND_GT] = 6, [TCG_COND_GTU] = 6, [TCG_COND_ALWAYS] = 7, + }; -#if TCG_TARGET_REG_BITS == 32 - case INDEX_op_brcond2_i32: - tcg_out_brcond2(s, args, const_args, 0); - break; - case INDEX_op_setcond2_i32: - tcg_out_setcond2(s, args, const_args); - break; -#else /* TCG_TARGET_REG_BITS == 64 */ - case INDEX_op_ld32s_i64: - tcg_out_modrm_offset(s, OPC_MOVSLQ, a0, a1, a2); + switch (cond) { + case TCG_COND_TSTNE: + tcg_out_vex_modrm_type(s, testm_insn[vece], /* k1 */ 1, v1, v2, type); break; - case INDEX_op_ld_i64: - tcg_out_ld(s, TCG_TYPE_I64, a0, a1, a2); + case TCG_COND_TSTEQ: + tcg_out_vex_modrm_type(s, testnm_insn[vece], /* k1 */ 1, v1, v2, type); break; - case INDEX_op_st_i64: - if (const_args[0]) { - tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW, 0, a1, a2); - tcg_out32(s, a0); - } else { - tcg_out_st(s, TCG_TYPE_I64, a0, a1, a2); - } + default: + tcg_out_vex_modrm_type(s, cmpm_insn[is_unsigned_cond(cond)][vece], + /* k1 */ 1, v1, v2, type); + tcg_out8(s, cond_ext[cond]); break; + } +} - case INDEX_op_brcond_i64: - tcg_out_brcond64(s, a2, a0, a1, const_args[1], arg_label(args[3]), 0); - break; - case INDEX_op_setcond_i64: - tcg_out_setcond64(s, args[3], a0, a1, a2, const_a2); - break; - case INDEX_op_movcond_i64: - tcg_out_movcond64(s, args[5], a0, a1, a2, const_a2, args[3]); - break; +static void tcg_out_k1_to_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg dest) { + static const int movm_insn[] = {OPC_VPMOVM2B, OPC_VPMOVM2W, OPC_VPMOVM2D, OPC_VPMOVM2Q}; + tcg_out_vex_modrm_type(s, movm_insn[vece], dest, 0, /* k1 */ 1, type); +} - case INDEX_op_bswap64_i64: - tcg_out_bswap64(s, a0); - break; - case INDEX_op_extrh_i64_i32: - tcg_out_shifti(s, SHIFT_SHR + P_REXW, a0, 32); - break; -#endif +static void tcg_out_cmp_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg v0, TCGReg v1, TCGReg v2, TCGCond cond) { + /* + * With avx512, we have a complete set of comparisons into mask. + * Unless there's a single insn expansion for the comparision, + * expand via a mask in k1. + */ + if ((vece <= MO_16 ? have_avx512bw : have_avx512dq) && cond != TCG_COND_EQ && cond != TCG_COND_LT && + cond != TCG_COND_GT) { + tcg_out_cmp_vec_k1(s, type, vece, v1, v2, cond); + tcg_out_k1_to_vec(s, type, vece, v0); + return; + } - OP_32_64(deposit) : if (args[3] == 0 && args[4] == 8) { - /* load bits 0..7 */ - tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, a2, a0); - } - else if (args[3] == 8 && args[4] == 8) { - /* load bits 8..15 */ - tcg_out_modrm(s, OPC_MOVB_EvGv, a2, a0 + 4); - } - else if (args[3] == 0 && args[4] == 16) { - /* load bits 0..15 */ - tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, a2, a0); - } - else { - g_assert_not_reached(); - } - break; + if (tcg_out_cmp_vec_noinv(s, type, vece, v0, v1, v2, cond)) { + tcg_out_dupi_vec(s, type, vece, TCG_TMP_VEC, -1); + tcg_out_vex_modrm_type(s, OPC_PXOR, v0, v0, TCG_TMP_VEC, type); + } +} - case INDEX_op_extract_i64: - if (a2 + args[3] == 32) { - /* This is a 32-bit zero-extending right shift. */ - tcg_out_mov(s, TCG_TYPE_I32, a0, a1); - tcg_out_shifti(s, SHIFT_SHR, a0, a2); - break; - } - /* FALLTHRU */ - case INDEX_op_extract_i32: - /* On the off-chance that we can use the high-byte registers. - Otherwise we emit the same ext16 + shift pattern that we - would have gotten from the normal tcg-op.c expansion. */ - tcg_debug_assert(a2 == 8 && args[3] == 8); - if (a1 < 4 && a0 < 8) { - tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4); - } else { - tcg_out_ext16u(s, a0, a1); - tcg_out_shifti(s, SHIFT_SHR, a0, 8); - } - break; +static void tcg_out_cmpsel_vec_k1(TCGContext *s, TCGType type, unsigned vece, TCGReg v0, TCGReg c1, TCGReg c2, + TCGReg v3, TCGReg v4, TCGCond cond) { + static const int vpblendm_insn[] = {OPC_VPBLENDMB, OPC_VPBLENDMW, OPC_VPBLENDMD, OPC_VPBLENDMQ}; + bool z = false; - case INDEX_op_sextract_i32: - /* We don't implement sextract_i64, as we cannot sign-extend to - 64-bits without using the REX prefix that explicitly excludes - access to the high-byte registers. */ - tcg_debug_assert(a2 == 8 && args[3] == 8); - if (a1 < 4 && a0 < 8) { - tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4); - } else { - tcg_out_ext16s(s, TCG_TYPE_I32, a0, a1); - tcg_out_shifti(s, SHIFT_SAR, a0, 8); - } - break; + /* Swap to place constant in V4 to take advantage of zero-masking. */ + if (!v3) { + z = true; + v3 = v4; + cond = tcg_invert_cond(cond); + } - OP_32_64(extract2) - : /* Note that SHRD outputs to the r/m operand. */ - tcg_out_modrm(s, OPC_SHRD_Ib + rexw, a2, a0); - tcg_out8(s, args[3]); - break; - - case INDEX_op_mb: - tcg_out_mb(s, a0); - break; - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ - case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ - case INDEX_op_ext8s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: - case INDEX_op_ext_i32_i64: - case INDEX_op_extu_i32_i64: - case INDEX_op_extrl_i64_i32: - default: - g_assert_not_reached(); + tcg_out_cmp_vec_k1(s, type, vece, c1, c2, cond); + tcg_out_evex_modrm_type(s, vpblendm_insn[vece], v0, v4, v3, + /* k1 */ 1, z, type); +} + +static void tcg_out_cmpsel_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg v0, TCGReg c1, TCGReg c2, TCGReg v3, + TCGReg v4, TCGCond cond) { + bool inv; + + if (vece <= MO_16 ? have_avx512bw : have_avx512vl) { + tcg_out_cmpsel_vec_k1(s, type, vece, v0, c1, c2, v3, v4, cond); + return; } -#undef OP_32_64 + inv = tcg_out_cmp_vec_noinv(s, type, vece, TCG_TMP_VEC, c1, c2, cond); + + /* + * Since XMM0 is 16, the only way we get 0 into V3 + * is via the constant zero constraint. + */ + if (!v3) { + if (inv) { + tcg_out_vex_modrm_type(s, OPC_PAND, v0, TCG_TMP_VEC, v4, type); + } else { + tcg_out_vex_modrm_type(s, OPC_PANDN, v0, TCG_TMP_VEC, v4, type); + } + } else { + if (inv) { + TCGReg swap = v3; + v3 = v4; + v4 = swap; + } + tcg_out_vex_modrm_type(s, OPC_VPBLENDVB, v0, v4, v3, type); + tcg_out8(s, (TCG_TMP_VEC - TCG_REG_XMM0) << 4); + } } static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, unsigned vece, @@ -2604,16 +3429,12 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, unsigned static int const ussub_insn[4] = {OPC_PSUBUB, OPC_PSUBUW, OPC_UD2, OPC_UD2}; static int const mul_insn[4] = {OPC_UD2, OPC_PMULLW, OPC_PMULLD, OPC_VPMULLQ}; static int const shift_imm_insn[4] = {OPC_UD2, OPC_PSHIFTW_Ib, OPC_PSHIFTD_Ib, OPC_PSHIFTQ_Ib}; - static int const cmpeq_insn[4] = {OPC_PCMPEQB, OPC_PCMPEQW, OPC_PCMPEQD, OPC_PCMPEQQ}; - static int const cmpgt_insn[4] = {OPC_PCMPGTB, OPC_PCMPGTW, OPC_PCMPGTD, OPC_PCMPGTQ}; static int const punpckl_insn[4] = {OPC_PUNPCKLBW, OPC_PUNPCKLWD, OPC_PUNPCKLDQ, OPC_PUNPCKLQDQ}; static int const punpckh_insn[4] = {OPC_PUNPCKHBW, OPC_PUNPCKHWD, OPC_PUNPCKHDQ, OPC_PUNPCKHQDQ}; static int const packss_insn[4] = {OPC_PACKSSWB, OPC_PACKSSDW, OPC_UD2, OPC_UD2}; static int const packus_insn[4] = {OPC_PACKUSWB, OPC_PACKUSDW, OPC_UD2, OPC_UD2}; static int const smin_insn[4] = {OPC_PMINSB, OPC_PMINSW, OPC_PMINSD, OPC_VPMINSQ}; static int const smax_insn[4] = {OPC_PMAXSB, OPC_PMAXSW, OPC_PMAXSD, OPC_VPMAXSQ}; - static int const umin_insn[4] = {OPC_PMINUB, OPC_PMINUW, OPC_PMINUD, OPC_VPMINUQ}; - static int const umax_insn[4] = {OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_VPMAXUQ}; static int const rotlv_insn[4] = {OPC_UD2, OPC_UD2, OPC_VPROLVD, OPC_VPROLVQ}; static int const rotrv_insn[4] = {OPC_UD2, OPC_UD2, OPC_VPRORVD, OPC_VPRORVQ}; static int const shlv_insn[4] = {OPC_UD2, OPC_VPSLLVW, OPC_VPSLLVD, OPC_VPSLLVQ}; @@ -2741,29 +3562,20 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, unsigned goto gen_simd; gen_simd: tcg_debug_assert(insn != OPC_UD2); - if (type == TCG_TYPE_V256) { - insn |= P_VEXL; - } - tcg_out_vex_modrm(s, insn, a0, a1, a2); + tcg_out_vex_modrm_type(s, insn, a0, a1, a2, type); break; case INDEX_op_cmp_vec: - sub = args[3]; - if (sub == TCG_COND_EQ) { - insn = cmpeq_insn[vece]; - } else if (sub == TCG_COND_GT) { - insn = cmpgt_insn[vece]; - } else { - g_assert_not_reached(); - } - goto gen_simd; + tcg_out_cmp_vec(s, type, vece, a0, a1, a2, args[3]); + break; + + case INDEX_op_cmpsel_vec: + tcg_out_cmpsel_vec(s, type, vece, a0, a1, a2, args[3], args[4], args[5]); + break; case INDEX_op_andc_vec: insn = OPC_PANDN; - if (type == TCG_TYPE_V256) { - insn |= P_VEXL; - } - tcg_out_vex_modrm(s, insn, a0, a2, a1); + tcg_out_vex_modrm_type(s, insn, a0, a2, a1, type); break; case INDEX_op_shli_vec: @@ -2791,10 +3603,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, unsigned goto gen_shift; gen_shift: tcg_debug_assert(vece != MO_8); - if (type == TCG_TYPE_V256) { - insn |= P_VEXL; - } - tcg_out_vex_modrm(s, insn, sub, a0, a1); + tcg_out_vex_modrm_type(s, insn, sub, a0, a1, type); tcg_out8(s, a2); break; @@ -2830,6 +3639,10 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, unsigned insn = vpshldi_insn[vece]; sub = args[3]; goto gen_simd_imm8; + case INDEX_op_x86_vgf2p8affineqb_vec: + insn = OPC_VGF2P8AFFINEQB; + sub = args[3]; + goto gen_simd_imm8; case INDEX_op_not_vec: insn = OPC_VPTERNLOGQ; @@ -2871,22 +3684,10 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, unsigned gen_simd_imm8: tcg_debug_assert(insn != OPC_UD2); - if (type == TCG_TYPE_V256) { - insn |= P_VEXL; - } - tcg_out_vex_modrm(s, insn, a0, a1, a2); + tcg_out_vex_modrm_type(s, insn, a0, a1, a2, type); tcg_out8(s, sub); break; - case INDEX_op_x86_vpblendvb_vec: - insn = OPC_VPBLENDVB; - if (type == TCG_TYPE_V256) { - insn |= P_VEXL; - } - tcg_out_vex_modrm(s, insn, a0, a1, a2); - tcg_out8(s, args[3] << 4); - break; - case INDEX_op_x86_psrldq_vec: tcg_out_vex_modrm(s, OPC_GRP14, 3, a0, a1); tcg_out8(s, a2); @@ -2899,193 +3700,8 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, unsigned } } -static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) { +static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_goto_ptr: - return C_O0_I1(r); - - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: - case INDEX_op_ld8s_i32: - case INDEX_op_ld8s_i64: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: - case INDEX_op_ld16s_i32: - case INDEX_op_ld16s_i64: - case INDEX_op_ld_i32: - case INDEX_op_ld32u_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_ld_i64: - return C_O1_I1(r, r); - - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - return C_O0_I2(qi, r); - - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: - case INDEX_op_st_i32: - case INDEX_op_st32_i64: - return C_O0_I2(ri, r); - - case INDEX_op_st_i64: - return C_O0_I2(re, r); - - case INDEX_op_add_i32: - case INDEX_op_add_i64: - return C_O1_I2(r, r, re); - - case INDEX_op_sub_i32: - case INDEX_op_sub_i64: - case INDEX_op_mul_i32: - case INDEX_op_mul_i64: - case INDEX_op_or_i32: - case INDEX_op_or_i64: - case INDEX_op_xor_i32: - case INDEX_op_xor_i64: - return C_O1_I2(r, 0, re); - - case INDEX_op_and_i32: - case INDEX_op_and_i64: - return C_O1_I2(r, 0, reZ); - - case INDEX_op_andc_i32: - case INDEX_op_andc_i64: - return C_O1_I2(r, r, rI); - - case INDEX_op_shl_i32: - case INDEX_op_shl_i64: - case INDEX_op_shr_i32: - case INDEX_op_shr_i64: - case INDEX_op_sar_i32: - case INDEX_op_sar_i64: - return have_bmi2 ? C_O1_I2(r, r, ri) : C_O1_I2(r, 0, ci); - - case INDEX_op_rotl_i32: - case INDEX_op_rotl_i64: - case INDEX_op_rotr_i32: - case INDEX_op_rotr_i64: - return C_O1_I2(r, 0, ci); - - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: - return C_O0_I2(r, re); - - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: - case INDEX_op_bswap64_i64: - case INDEX_op_neg_i32: - case INDEX_op_neg_i64: - case INDEX_op_not_i32: - case INDEX_op_not_i64: - case INDEX_op_extrh_i64_i32: - return C_O1_I1(r, 0); - - case INDEX_op_ext8s_i32: - case INDEX_op_ext8s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - return C_O1_I1(r, q); - - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: - case INDEX_op_ext_i32_i64: - case INDEX_op_extu_i32_i64: - case INDEX_op_extrl_i64_i32: - case INDEX_op_extract_i32: - case INDEX_op_extract_i64: - case INDEX_op_sextract_i32: - case INDEX_op_ctpop_i32: - case INDEX_op_ctpop_i64: - return C_O1_I1(r, r); - - case INDEX_op_extract2_i32: - case INDEX_op_extract2_i64: - return C_O1_I2(r, 0, r); - - case INDEX_op_deposit_i32: - case INDEX_op_deposit_i64: - return C_O1_I2(Q, 0, Q); - - case INDEX_op_setcond_i32: - case INDEX_op_setcond_i64: - return C_O1_I2(q, r, re); - - case INDEX_op_movcond_i32: - case INDEX_op_movcond_i64: - return C_O1_I4(r, r, re, r, 0); - - case INDEX_op_div2_i32: - case INDEX_op_div2_i64: - case INDEX_op_divu2_i32: - case INDEX_op_divu2_i64: - return C_O2_I3(a, d, 0, 1, r); - - case INDEX_op_mulu2_i32: - case INDEX_op_mulu2_i64: - case INDEX_op_muls2_i32: - case INDEX_op_muls2_i64: - return C_O2_I2(a, d, a, r); - - case INDEX_op_add2_i32: - case INDEX_op_add2_i64: - case INDEX_op_sub2_i32: - case INDEX_op_sub2_i64: - return C_N1_O1_I4(r, r, 0, 1, re, re); - - case INDEX_op_ctz_i32: - case INDEX_op_ctz_i64: - return have_bmi1 ? C_N1_I2(r, r, rW) : C_N1_I2(r, r, r); - - case INDEX_op_clz_i32: - case INDEX_op_clz_i64: - return have_lzcnt ? C_N1_I2(r, r, rW) : C_N1_I2(r, r, r); - - case INDEX_op_qemu_ld_a32_i32: - return C_O1_I1(r, L); - case INDEX_op_qemu_ld_a64_i32: - return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O1_I2(r, L, L); - - case INDEX_op_qemu_st_a32_i32: - return C_O0_I2(L, L); - case INDEX_op_qemu_st_a64_i32: - return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I3(L, L, L); - case INDEX_op_qemu_st8_a32_i32: - return C_O0_I2(s, L); - case INDEX_op_qemu_st8_a64_i32: - return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(s, L) : C_O0_I3(s, L, L); - - case INDEX_op_qemu_ld_a32_i64: - return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O2_I1(r, r, L); - case INDEX_op_qemu_ld_a64_i64: - return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O2_I2(r, r, L, L); - - case INDEX_op_qemu_st_a32_i64: - return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I3(L, L, L); - case INDEX_op_qemu_st_a64_i64: - return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I4(L, L, L, L); - - case INDEX_op_qemu_ld_a32_i128: - case INDEX_op_qemu_ld_a64_i128: - tcg_debug_assert(TCG_TARGET_REG_BITS == 64); - return C_O2_I1(r, r, L); - case INDEX_op_qemu_st_a32_i128: - case INDEX_op_qemu_st_a64_i128: - tcg_debug_assert(TCG_TARGET_REG_BITS == 64); - return C_O0_I3(L, L, L); - - case INDEX_op_brcond2_i32: - return C_O0_I4(r, r, ri, ri); - - case INDEX_op_setcond2_i32: - return C_O1_I4(r, r, r, ri, ri); - case INDEX_op_ld_vec: case INDEX_op_dupm_vec: return C_O1_I1(x, r); @@ -3129,6 +3745,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) { case INDEX_op_x86_punpckl_vec: case INDEX_op_x86_punpckh_vec: case INDEX_op_x86_vpshldi_vec: + case INDEX_op_x86_vgf2p8affineqb_vec: #if TCG_TARGET_REG_BITS == 32 case INDEX_op_dup2_vec: #endif @@ -3149,11 +3766,12 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) { return C_O1_I3(x, 0, x, x); case INDEX_op_bitsel_vec: - case INDEX_op_x86_vpblendvb_vec: return C_O1_I3(x, x, x, x); + case INDEX_op_cmpsel_vec: + return C_O1_I4(x, x, x, xO, x); default: - g_assert_not_reached(); + return C_NotImplemented; } } @@ -3275,52 +3893,64 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) { } } -static void expand_vec_shi(TCGType type, unsigned vece, TCGOpcode opc, TCGv_vec v0, TCGv_vec v1, TCGArg imm) { - TCGv_vec t1, t2; - - tcg_debug_assert(vece == MO_8); +static void gen_vgf2p8affineqb0(TCGType type, TCGv_vec v0, TCGv_vec v1, uint64_t matrix) { + vec_gen_4(INDEX_op_x86_vgf2p8affineqb_vec, type, MO_8, tcgv_vec_arg(v0), tcgv_vec_arg(v1), + tcgv_vec_arg(tcg_constant_vec(type, MO_64, matrix)), 0); +} - t1 = tcg_temp_new_vec(type); - t2 = tcg_temp_new_vec(type); +static void expand_vec_shi(TCGType type, unsigned vece, bool right, TCGv_vec v0, TCGv_vec v1, TCGArg imm) { + static const uint64_t gf2_shi[2][8] = { + /* left shift */ + {0, 0x0001020408102040ull, 0x0000010204081020ull, 0x0000000102040810ull, 0x0000000001020408ull, + 0x0000000000010204ull, 0x0000000000000102ull, 0x0000000000000001ull}, + /* right shift */ + {0, 0x0204081020408000ull, 0x0408102040800000ull, 0x0810204080000000ull, 0x1020408000000000ull, + 0x2040800000000000ull, 0x4080000000000000ull, 0x8000000000000000ull}}; + uint8_t mask; - /* - * Unpack to W, shift, and repack. Tricky bits: - * (1) Use punpck*bw x,x to produce DDCCBBAA, - * i.e. duplicate in other half of the 16-bit lane. - * (2) For right-shift, add 8 so that the high half of the lane - * becomes zero. For left-shift, and left-rotate, we must - * shift up and down again. - * (3) Step 2 leaves high half zero such that PACKUSWB - * (pack with unsigned saturation) does not modify - * the quantity. - */ - vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8, tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(v1)); - vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8, tcgv_vec_arg(t2), tcgv_vec_arg(v1), tcgv_vec_arg(v1)); + tcg_debug_assert(vece == MO_8); - if (opc != INDEX_op_rotli_vec) { - imm += 8; + if (cpuinfo & CPUINFO_GFNI) { + gen_vgf2p8affineqb0(type, v0, v1, gf2_shi[right][imm]); + return; } - if (opc == INDEX_op_shri_vec) { - tcg_gen_shri_vec(MO_16, t1, t1, imm); - tcg_gen_shri_vec(MO_16, t2, t2, imm); + + if (right) { + mask = 0xff >> imm; + tcg_gen_shri_vec(MO_16, v0, v1, imm); } else { - tcg_gen_shli_vec(MO_16, t1, t1, imm); - tcg_gen_shli_vec(MO_16, t2, t2, imm); - tcg_gen_shri_vec(MO_16, t1, t1, 8); - tcg_gen_shri_vec(MO_16, t2, t2, 8); + mask = 0xff << imm; + tcg_gen_shli_vec(MO_16, v0, v1, imm); } - - vec_gen_3(INDEX_op_x86_packus_vec, type, MO_8, tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(t2)); - tcg_temp_free_vec(t1); - tcg_temp_free_vec(t2); + tcg_gen_and_vec(MO_8, v0, v0, tcg_constant_vec(type, MO_8, mask)); } static void expand_vec_sari(TCGType type, unsigned vece, TCGv_vec v0, TCGv_vec v1, TCGArg imm) { + static const uint64_t gf2_sar[8] = { + 0, + 0x0204081020408080ull, + 0x0408102040808080ull, + 0x0810204080808080ull, + 0x1020408080808080ull, + 0x2040808080808080ull, + 0x4080808080808080ull, + 0x8080808080808080ull, + }; TCGv_vec t1, t2; + if (imm >= (8 << vece) - 1) { + tcg_gen_cmp_vec(TCG_COND_LT, vece, v0, v1, tcg_constant_vec(type, MO_64, 0)); + return; + } + switch (vece) { case MO_8: - /* Unpack to W, shift, and repack, as in expand_vec_shi. */ + if (cpuinfo & CPUINFO_GFNI) { + gen_vgf2p8affineqb0(type, v0, v1, gf2_sar[imm]); + break; + } + + /* Unpack to 16-bit, shift, and repack. */ t1 = tcg_temp_new_vec(type); t2 = tcg_temp_new_vec(type); vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8, tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(v1)); @@ -3350,7 +3980,7 @@ static void expand_vec_sari(TCGType type, unsigned vece, TCGv_vec v0, TCGv_vec v /* Otherwise we will need to use a compare vs 0 to produce * the sign-extend, shift and merge. */ - tcg_gen_cmp_vec(TCG_COND_GT, MO_64, t1, tcg_constant_vec(type, MO_64, 0), v1); + tcg_gen_cmp_vec(TCG_COND_LT, MO_64, t1, v1, tcg_constant_vec(type, MO_64, 0)); tcg_gen_shri_vec(MO_64, v0, v1, imm); tcg_gen_shli_vec(MO_64, t1, t1, 64 - imm); tcg_gen_or_vec(MO_64, v0, v0, t1); @@ -3364,16 +3994,28 @@ static void expand_vec_sari(TCGType type, unsigned vece, TCGv_vec v0, TCGv_vec v } static void expand_vec_rotli(TCGType type, unsigned vece, TCGv_vec v0, TCGv_vec v1, TCGArg imm) { + static const uint64_t gf2_rol[8] = { + 0, + 0x8001020408102040ull, + 0x4080010204081020ull, + 0x2040800102040810ull, + 0x1020408001020408ull, + 0x0810204080010204ull, + 0x0408102040800102ull, + 0x0204081020408001ull, + }; TCGv_vec t; if (vece == MO_8) { - expand_vec_shi(type, vece, INDEX_op_rotli_vec, v0, v1, imm); - return; - } - - if (have_avx512vbmi2) { - vec_gen_4(INDEX_op_x86_vpshldi_vec, type, vece, tcgv_vec_arg(v0), tcgv_vec_arg(v1), tcgv_vec_arg(v1), imm); - return; + if (cpuinfo & CPUINFO_GFNI) { + gen_vgf2p8affineqb0(type, v0, v1, gf2_rol[imm]); + return; + } + } else { + if (have_avx512vbmi2) { + vec_gen_4(INDEX_op_x86_vpshldi_vec, type, vece, tcgv_vec_arg(v0), tcgv_vec_arg(v1), tcgv_vec_arg(v1), imm); + return; + } } t = tcg_temp_new_vec(type); @@ -3492,142 +4134,59 @@ static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0, TCGv_vec v1 } } -static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0, TCGv_vec v1, TCGv_vec v2, TCGCond cond) { - enum { - NEED_INV = 1, - NEED_SWAP = 2, - NEED_BIAS = 4, - NEED_UMIN = 8, - NEED_UMAX = 16, - }; - TCGv_vec t1, t2, t3; - uint8_t fixup; - - switch (cond) { - case TCG_COND_EQ: - case TCG_COND_GT: - fixup = 0; - break; - case TCG_COND_NE: - case TCG_COND_LE: - fixup = NEED_INV; - break; - case TCG_COND_LT: - fixup = NEED_SWAP; - break; - case TCG_COND_GE: - fixup = NEED_SWAP | NEED_INV; - break; - case TCG_COND_LEU: - if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece)) { - fixup = NEED_UMIN; - } else { - fixup = NEED_BIAS | NEED_INV; - } - break; - case TCG_COND_GTU: - if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece)) { - fixup = NEED_UMIN | NEED_INV; - } else { - fixup = NEED_BIAS; - } - break; - case TCG_COND_GEU: - if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece)) { - fixup = NEED_UMAX; - } else { - fixup = NEED_BIAS | NEED_SWAP | NEED_INV; - } - break; - case TCG_COND_LTU: - if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece)) { - fixup = NEED_UMAX | NEED_INV; - } else { - fixup = NEED_BIAS | NEED_SWAP; - } - break; - default: - g_assert_not_reached(); - } - - if (fixup & NEED_INV) { - cond = tcg_invert_cond(cond); - } - if (fixup & NEED_SWAP) { - t1 = v1, v1 = v2, v2 = t1; - cond = tcg_swap_cond(cond); - } +static TCGCond expand_vec_cond(TCGType type, unsigned vece, TCGArg *a1, TCGArg *a2, TCGCond cond) { + /* + * Without AVX512, there are no 64-bit unsigned comparisons. + * We must bias the inputs so that they become signed. + * All other swapping and inversion are handled during code generation. + */ + if (vece == MO_64 && !have_avx512dq && is_unsigned_cond(cond)) { + TCGv_vec v1 = temp_tcgv_vec(arg_temp(*a1)); + TCGv_vec v2 = temp_tcgv_vec(arg_temp(*a2)); + TCGv_vec t1 = tcg_temp_new_vec(type); + TCGv_vec t2 = tcg_temp_new_vec(type); + TCGv_vec t3 = tcg_constant_vec(type, vece, 1ull << ((8 << vece) - 1)); - t1 = t2 = NULL; - if (fixup & (NEED_UMIN | NEED_UMAX)) { - t1 = tcg_temp_new_vec(type); - if (fixup & NEED_UMIN) { - tcg_gen_umin_vec(vece, t1, v1, v2); - } else { - tcg_gen_umax_vec(vece, t1, v1, v2); - } - v2 = t1; - cond = TCG_COND_EQ; - } else if (fixup & NEED_BIAS) { - t1 = tcg_temp_new_vec(type); - t2 = tcg_temp_new_vec(type); - t3 = tcg_constant_vec(type, vece, 1ull << ((8 << vece) - 1)); tcg_gen_sub_vec(vece, t1, v1, t3); tcg_gen_sub_vec(vece, t2, v2, t3); - v1 = t1; - v2 = t2; + *a1 = tcgv_vec_arg(t1); + *a2 = tcgv_vec_arg(t2); cond = tcg_signed_cond(cond); } - - tcg_debug_assert(cond == TCG_COND_EQ || cond == TCG_COND_GT); - /* Expand directly; do not recurse. */ - vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0), tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond); - - if (t1) { - tcg_temp_free_vec(t1); - if (t2) { - tcg_temp_free_vec(t2); - } - } - return fixup & NEED_INV; + return cond; } -static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, TCGv_vec v1, TCGv_vec v2, TCGCond cond) { - if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) { - tcg_gen_not_vec(vece, v0, v0); - } +static void expand_vec_cmp(TCGType type, unsigned vece, TCGArg a0, TCGArg a1, TCGArg a2, TCGCond cond) { + cond = expand_vec_cond(type, vece, &a1, &a2, cond); + /* Expand directly; do not recurse. */ + vec_gen_4(INDEX_op_cmp_vec, type, vece, a0, a1, a2, cond); } -static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0, TCGv_vec c1, TCGv_vec c2, TCGv_vec v3, - TCGv_vec v4, TCGCond cond) { - TCGv_vec t = tcg_temp_new_vec(type); - - if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) { - /* Invert the sense of the compare by swapping arguments. */ - TCGv_vec x; - x = v3, v3 = v4, v4 = x; - } - vec_gen_4(INDEX_op_x86_vpblendvb_vec, type, vece, tcgv_vec_arg(v0), tcgv_vec_arg(v4), tcgv_vec_arg(v3), - tcgv_vec_arg(t)); - tcg_temp_free_vec(t); +static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGArg a0, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4, + TCGCond cond) { + cond = expand_vec_cond(type, vece, &a1, &a2, cond); + /* Expand directly; do not recurse. */ + vec_gen_6(INDEX_op_cmpsel_vec, type, vece, a0, a1, a2, a3, a4, cond); } void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, TCGArg a0, ...) { va_list va; - TCGArg a2; - TCGv_vec v0, v1, v2, v3, v4; + TCGArg a1, a2, a3, a4, a5; + TCGv_vec v0, v1, v2; va_start(va, a0); - v0 = temp_tcgv_vec(arg_temp(a0)); - v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); + a1 = va_arg(va, TCGArg); a2 = va_arg(va, TCGArg); + v0 = temp_tcgv_vec(arg_temp(a0)); + v1 = temp_tcgv_vec(arg_temp(a1)); switch (opc) { case INDEX_op_shli_vec: + expand_vec_shi(type, vece, false, v0, v1, a2); + break; case INDEX_op_shri_vec: - expand_vec_shi(type, vece, opc, v0, v1, a2); + expand_vec_shi(type, vece, true, v0, v1, a2); break; - case INDEX_op_sari_vec: expand_vec_sari(type, vece, v0, v1, a2); break; @@ -3655,15 +4214,15 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, TCGArg a0, .. break; case INDEX_op_cmp_vec: - v2 = temp_tcgv_vec(arg_temp(a2)); - expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg)); + a3 = va_arg(va, TCGArg); + expand_vec_cmp(type, vece, a0, a1, a2, a3); break; case INDEX_op_cmpsel_vec: - v2 = temp_tcgv_vec(arg_temp(a2)); - v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); - v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); - expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg)); + a3 = va_arg(va, TCGArg); + a4 = va_arg(va, TCGArg); + a5 = va_arg(va, TCGArg); + expand_vec_cmpsel(type, vece, a0, a1, a2, a3, a4, a5); break; default: @@ -3713,33 +4272,33 @@ static void tcg_target_qemu_prologue(TCGContext *s) { tcg_out_push(s, tcg_target_callee_save_regs[i]); } -#if TCG_TARGET_REG_BITS == 32 - tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4); - tcg_out_addi(s, TCG_REG_ESP, -stack_addend); - /* jmp *tb. */ - tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP, - (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4 + stack_addend); -#else -#if !defined(CONFIG_SOFTMMU) - if (guest_base) { + if (!tcg_use_softmmu && guest_base) { int seg = setup_guest_base_seg(); if (seg != 0) { x86_guest_base.seg = seg; } else if (guest_base == (int32_t) guest_base) { x86_guest_base.ofs = guest_base; } else { + assert(TCG_TARGET_REG_BITS == 64); /* Choose R12 because, as a base, it requires a SIB byte. */ x86_guest_base.index = TCG_REG_R12; tcg_out_movi(s, TCG_TYPE_PTR, x86_guest_base.index, guest_base); tcg_regset_set_reg(s->reserved_regs, x86_guest_base.index); } } -#endif - tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); - tcg_out_addi(s, TCG_REG_ESP, -stack_addend); - /* jmp *tb. */ - tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]); -#endif + + if (TCG_TARGET_REG_BITS == 32) { + tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4); + tcg_out_addi(s, TCG_REG_ESP, -stack_addend); + /* jmp *tb. */ + tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP, + (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4 + stack_addend); + } else { + tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); + tcg_out_addi(s, TCG_REG_ESP, -stack_addend); + /* jmp *tb. */ + tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]); + } /* * Return path for goto_ptr. Set return value to 0, a-la exit_tb, @@ -3762,6 +4321,10 @@ static void tcg_target_qemu_prologue(TCGContext *s) { tcg_out_opc(s, OPC_RET, 0, 0, 0); } +static void tcg_out_tb_start(TCGContext *s) { + /* nothing to do */ +} + static void tcg_out_nop_fill(tcg_insn_unit *p, int count) { memset(p, 0x90, count); } diff --git a/libtcg/src/optimize.c b/libtcg/src/optimize.c index 0af1b0e76..315e05e7b 100644 --- a/libtcg/src/optimize.c +++ b/libtcg/src/optimize.c @@ -26,26 +26,30 @@ #include #include "tcg/utils/int128.h" +#include "tcg/utils/interval-tree.h" #include "tcg/utils/osdep.h" // clang-format off #include "tcg/tcg.h" #include "tcg/tcg-internal.h" #include "tcg/tcg-op-common.h" +#include "tcg/tcg-has.h" // clang-format on -#define CASE_OP_32_64(x) glue(glue(case INDEX_op_, x), _i32) : glue(glue(case INDEX_op_, x), _i64) - -#define CASE_OP_32_64_VEC(x) \ - glue(glue(case INDEX_op_, x), _i32) : glue(glue(case INDEX_op_, x), _i64) : glue(glue(case INDEX_op_, x), _vec) +typedef struct MemCopyInfo { + IntervalTreeNode itree; + QSIMPLEQ_ENTRY(MemCopyInfo) next; + TCGTemp *ts; + TCGType type; +} MemCopyInfo; typedef struct TempOptInfo { - bool is_const; TCGTemp *prev_copy; TCGTemp *next_copy; - uint64_t val; + QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy; uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */ - uint64_t s_mask; /* a left-aligned mask of clrsb(value) bits. */ + uint64_t o_mask; /* mask bit is 1 if and only if value bit is 1 */ + uint64_t s_mask; /* mask bit is 1 if value bit matches msb */ } TempOptInfo; typedef struct OptContext { @@ -53,86 +57,62 @@ typedef struct OptContext { TCGOp *prev_mb; TCGTempSet temps_used; + IntervalTreeRoot mem_copy; + QSIMPLEQ_HEAD(, MemCopyInfo) mem_free; + /* In flight values from optimization. */ - uint64_t a_mask; /* mask bit is 0 iff value identical to first input */ - uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */ - uint64_t s_mask; /* mask of clrsb(value) bits */ TCGType type; + int carry_state; /* -1 = non-constant, {0,1} = constant carry-in */ } OptContext; -/* Calculate the smask for a specific value. */ -static uint64_t smask_from_value(uint64_t value) { - int rep = clrsb64(value); - return ~(~0ull >> rep); +static inline TempOptInfo *ts_info(TCGTemp *ts) { + return ts->state_ptr; } -/* - * Calculate the smask for a given set of known-zeros. - * If there are lots of zeros on the left, we can consider the remainder - * an unsigned field, and thus the corresponding signed field is one bit - * larger. - */ -static uint64_t smask_from_zmask(uint64_t zmask) { - /* - * Only the 0 bits are significant for zmask, thus the msb itself - * must be zero, else we have no sign information. - */ - int rep = clz64(zmask); - if (rep == 0) { - return 0; - } - rep -= 1; - return ~(~0ull >> rep); +static inline TempOptInfo *arg_info(TCGArg arg) { + return ts_info(arg_temp(arg)); } -/* - * Recreate a properly left-aligned smask after manipulation. - * Some bit-shuffling, particularly shifts and rotates, may - * retain sign bits on the left, but may scatter disconnected - * sign bits on the right. Retain only what remains to the left. - */ -static uint64_t smask_from_smask(int64_t smask) { - /* Only the 1 bits are significant for smask */ - return smask_from_zmask(~smask); +static inline bool ti_is_const(TempOptInfo *ti) { + /* If all bits that are not known zeros are known ones, it's constant. */ + return ti->z_mask == ti->o_mask; } -static inline TempOptInfo *ts_info(TCGTemp *ts) { - return ts->state_ptr; +static inline uint64_t ti_const_val(TempOptInfo *ti) { + /* If constant, both z_mask and o_mask contain the value. */ + return ti->z_mask; } -static inline TempOptInfo *arg_info(TCGArg arg) { - return ts_info(arg_temp(arg)); +static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val) { + return ti_is_const(ti) && ti_const_val(ti) == val; } static inline bool ts_is_const(TCGTemp *ts) { - return ts_info(ts)->is_const; + return ti_is_const(ts_info(ts)); +} + +static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val) { + return ti_is_const_val(ts_info(ts), val); } static inline bool arg_is_const(TCGArg arg) { return ts_is_const(arg_temp(arg)); } -static inline bool ts_is_copy(TCGTemp *ts) { - return ts_info(ts)->next_copy != ts; +static inline uint64_t arg_const_val(TCGArg arg) { + return ti_const_val(arg_info(arg)); } -/* Reset TEMP's state, possibly removing the temp for the list of copies. */ -static void reset_ts(TCGTemp *ts) { - TempOptInfo *ti = ts_info(ts); - TempOptInfo *pi = ts_info(ti->prev_copy); - TempOptInfo *ni = ts_info(ti->next_copy); +static inline bool arg_is_const_val(TCGArg arg, uint64_t val) { + return ts_is_const_val(arg_temp(arg), val); +} - ni->prev_copy = ti->prev_copy; - pi->next_copy = ti->next_copy; - ti->next_copy = ts; - ti->prev_copy = ts; - ti->is_const = false; - ti->z_mask = -1; - ti->s_mask = 0; +static inline bool ts_is_copy(TCGTemp *ts) { + return ts_info(ts)->next_copy != ts; } -static void reset_temp(TCGArg arg) { - reset_ts(arg_temp(arg)); +static TCGTemp *cmp_better_copy(TCGTemp *a, TCGTemp *b) { + return a->kind < b->kind ? b : a; } /* Initialize and activate a temporary. */ @@ -153,41 +133,134 @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts) { ti->next_copy = ts; ti->prev_copy = ts; + QSIMPLEQ_INIT(&ti->mem_copy); if (ts->kind == TEMP_CONST) { - ti->is_const = true; - ti->val = ts->val; ti->z_mask = ts->val; - ti->s_mask = smask_from_value(ts->val); + ti->o_mask = ts->val; + ti->s_mask = INT64_MIN >> clrsb64(ts->val); } else { - ti->is_const = false; ti->z_mask = -1; + ti->o_mask = 0; ti->s_mask = 0; } } -static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts) { - TCGTemp *i, *g, *l; +static MemCopyInfo *mem_copy_first(OptContext *ctx, intptr_t s, intptr_t l) { + IntervalTreeNode *r = interval_tree_iter_first(&ctx->mem_copy, s, l); + return r ? container_of(r, MemCopyInfo, itree) : NULL; +} + +static MemCopyInfo *mem_copy_next(MemCopyInfo *mem, intptr_t s, intptr_t l) { + IntervalTreeNode *r = interval_tree_iter_next(&mem->itree, s, l); + return r ? container_of(r, MemCopyInfo, itree) : NULL; +} + +static void remove_mem_copy(OptContext *ctx, MemCopyInfo *mc) { + TCGTemp *ts = mc->ts; + TempOptInfo *ti = ts_info(ts); + + interval_tree_remove(&mc->itree, &ctx->mem_copy); + QSIMPLEQ_REMOVE(&ti->mem_copy, mc, MemCopyInfo, next); + QSIMPLEQ_INSERT_TAIL(&ctx->mem_free, mc, next); +} + +static void remove_mem_copy_in(OptContext *ctx, intptr_t s, intptr_t l) { + while (true) { + MemCopyInfo *mc = mem_copy_first(ctx, s, l); + if (!mc) { + break; + } + remove_mem_copy(ctx, mc); + } +} + +static void remove_mem_copy_all(OptContext *ctx) { + remove_mem_copy_in(ctx, 0, -1); + tcg_debug_assert(interval_tree_is_empty(&ctx->mem_copy)); +} + +static TCGTemp *find_better_copy(TCGTemp *ts) { + TCGTemp *i, *ret; /* If this is already readonly, we can't do better. */ if (temp_readonly(ts)) { return ts; } - g = l = NULL; + ret = ts; for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { - if (temp_readonly(i)) { - return i; - } else if (i->kind > ts->kind) { - if (i->kind == TEMP_GLOBAL) { - g = i; - } else if (i->kind == TEMP_TB) { - l = i; + ret = cmp_better_copy(ret, i); + } + return ret; +} + +static void move_mem_copies(TCGTemp *dst_ts, TCGTemp *src_ts) { + TempOptInfo *si = ts_info(src_ts); + TempOptInfo *di = ts_info(dst_ts); + MemCopyInfo *mc; + + QSIMPLEQ_FOREACH(mc, &si->mem_copy, next) { + tcg_debug_assert(mc->ts == src_ts); + mc->ts = dst_ts; + } + QSIMPLEQ_CONCAT(&di->mem_copy, &si->mem_copy); +} + +/* Reset TEMP's state, possibly removing the temp for the list of copies. */ +static void reset_ts(OptContext *ctx, TCGTemp *ts) { + TempOptInfo *ti = ts_info(ts); + TCGTemp *pts = ti->prev_copy; + TCGTemp *nts = ti->next_copy; + TempOptInfo *pi = ts_info(pts); + TempOptInfo *ni = ts_info(nts); + + ni->prev_copy = ti->prev_copy; + pi->next_copy = ti->next_copy; + ti->next_copy = ts; + ti->prev_copy = ts; + ti->z_mask = -1; + ti->o_mask = 0; + ti->s_mask = 0; + + if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) { + if (ts == nts) { + /* Last temp copy being removed, the mem copies die. */ + MemCopyInfo *mc; + QSIMPLEQ_FOREACH(mc, &ti->mem_copy, next) { + interval_tree_remove(&mc->itree, &ctx->mem_copy); } + QSIMPLEQ_CONCAT(&ctx->mem_free, &ti->mem_copy); + } else { + move_mem_copies(find_better_copy(nts), ts); } } +} + +static void reset_temp(OptContext *ctx, TCGArg arg) { + reset_ts(ctx, arg_temp(arg)); +} + +static void record_mem_copy(OptContext *ctx, TCGType type, TCGTemp *ts, intptr_t start, intptr_t last) { + MemCopyInfo *mc; + TempOptInfo *ti; + + mc = QSIMPLEQ_FIRST(&ctx->mem_free); + if (mc) { + QSIMPLEQ_REMOVE_HEAD(&ctx->mem_free, next); + } else { + mc = tcg_malloc(sizeof(*mc)); + } + + memset(mc, 0, sizeof(*mc)); + mc->itree.start = start; + mc->itree.last = last; + mc->type = type; + interval_tree_insert(&mc->itree, &ctx->mem_copy); - /* If we didn't find a better representation, return the same temp. */ - return g ? g : l ? l : ts; + ts = find_better_copy(ts); + ti = ts_info(ts); + mc->ts = ts; + QSIMPLEQ_INSERT_TAIL(&ti->mem_copy, mc, next); } static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2) { @@ -214,6 +287,45 @@ static bool args_are_copies(TCGArg arg1, TCGArg arg2) { return ts_are_copies(arg_temp(arg1), arg_temp(arg2)); } +static TCGTemp *find_mem_copy_for(OptContext *ctx, TCGType type, intptr_t s) { + MemCopyInfo *mc; + + for (mc = mem_copy_first(ctx, s, s); mc; mc = mem_copy_next(mc, s, s)) { + if (mc->itree.start == s && mc->type == type) { + return find_better_copy(mc->ts); + } + } + return NULL; +} + +static TCGArg arg_new_constant(OptContext *ctx, uint64_t val) { + TCGType type = ctx->type; + TCGTemp *ts; + + if (type == TCG_TYPE_I32) { + val = (int32_t) val; + } + + ts = tcg_constant_internal(type, val); + init_ts_info(ctx, ts); + + return temp_arg(ts); +} + +static TCGArg arg_new_temp(OptContext *ctx) { + TCGTemp *ts = tcg_temp_new_internal(ctx->type, TEMP_EBB); + init_ts_info(ctx, ts); + return temp_arg(ts); +} + +static TCGOp *opt_insert_after(OptContext *ctx, TCGOp *op, TCGOpcode opc, unsigned narg) { + return tcg_op_insert_after(ctx->tcg, op, opc, ctx->type, narg); +} + +static TCGOp *opt_insert_before(OptContext *ctx, TCGOp *op, TCGOpcode opc, unsigned narg) { + return tcg_op_insert_before(ctx->tcg, op, opc, ctx->type, narg); +} + static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) { TCGTemp *dst_ts = arg_temp(dst); TCGTemp *src_ts = arg_temp(src); @@ -226,21 +338,19 @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) return true; } - reset_ts(dst_ts); + reset_ts(ctx, dst_ts); di = ts_info(dst_ts); si = ts_info(src_ts); switch (ctx->type) { case TCG_TYPE_I32: - new_op = INDEX_op_mov_i32; - break; case TCG_TYPE_I64: - new_op = INDEX_op_mov_i64; + new_op = INDEX_op_mov; break; case TCG_TYPE_V64: case TCG_TYPE_V128: case TCG_TYPE_V256: - /* TCGOP_VECL and TCGOP_VECE remain unchanged. */ + /* TCGOP_TYPE and TCGOP_VECE remain unchanged. */ new_op = INDEX_op_mov_vec; break; default: @@ -251,6 +361,7 @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) op->args[1] = src; di->z_mask = si->z_mask; + di->o_mask = si->o_mask; di->s_mask = si->s_mask; if (src_ts->type == dst_ts->type) { @@ -260,161 +371,182 @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) di->prev_copy = src_ts; ni->prev_copy = dst_ts; si->next_copy = dst_ts; - di->is_const = si->is_const; - di->val = si->val; + + if (!QSIMPLEQ_EMPTY(&si->mem_copy) && cmp_better_copy(src_ts, dst_ts) == dst_ts) { + move_mem_copies(dst_ts, src_ts); + } + } else if (dst_ts->type == TCG_TYPE_I32) { + di->z_mask = (int32_t) di->z_mask; + di->o_mask = (int32_t) di->o_mask; + di->s_mask |= INT32_MIN; + } else { + di->z_mask |= MAKE_64BIT_MASK(32, 32); + di->o_mask = (uint32_t) di->o_mask; + di->s_mask = INT64_MIN; } return true; } static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op, TCGArg dst, uint64_t val) { - TCGTemp *tv; - - if (ctx->type == TCG_TYPE_I32) { - val = (int32_t) val; - } - /* Convert movi to mov with constant temp. */ - tv = tcg_constant_internal(ctx->type, val); - init_ts_info(ctx, tv); - return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv)); + return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val)); } -static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) { +static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type, uint64_t x, uint64_t y) { uint64_t l64, h64; switch (op) { - CASE_OP_32_64(add) : return x + y; + case INDEX_op_add: + return x + y; - CASE_OP_32_64(sub) : return x - y; + case INDEX_op_sub: + return x - y; - CASE_OP_32_64(mul) : return x * y; + case INDEX_op_mul: + return x * y; - CASE_OP_32_64_VEC(and) : return x & y; + case INDEX_op_and: + case INDEX_op_and_vec: + return x & y; - CASE_OP_32_64_VEC(or) : return x | y; + case INDEX_op_or: + case INDEX_op_or_vec: + return x | y; - CASE_OP_32_64_VEC(xor) : return x ^ y; + case INDEX_op_xor: + case INDEX_op_xor_vec: + return x ^ y; - case INDEX_op_shl_i32: - return (uint32_t) x << (y & 31); - - case INDEX_op_shl_i64: + case INDEX_op_shl: + if (type == TCG_TYPE_I32) { + return (uint32_t) x << (y & 31); + } return (uint64_t) x << (y & 63); - case INDEX_op_shr_i32: - return (uint32_t) x >> (y & 31); - - case INDEX_op_shr_i64: + case INDEX_op_shr: + if (type == TCG_TYPE_I32) { + return (uint32_t) x >> (y & 31); + } return (uint64_t) x >> (y & 63); - case INDEX_op_sar_i32: - return (int32_t) x >> (y & 31); - - case INDEX_op_sar_i64: + case INDEX_op_sar: + if (type == TCG_TYPE_I32) { + return (int32_t) x >> (y & 31); + } return (int64_t) x >> (y & 63); - case INDEX_op_rotr_i32: - return ror32(x, y & 31); - - case INDEX_op_rotr_i64: + case INDEX_op_rotr: + if (type == TCG_TYPE_I32) { + return ror32(x, y & 31); + } return ror64(x, y & 63); - case INDEX_op_rotl_i32: - return rol32(x, y & 31); - - case INDEX_op_rotl_i64: + case INDEX_op_rotl: + if (type == TCG_TYPE_I32) { + return rol32(x, y & 31); + } return rol64(x, y & 63); - CASE_OP_32_64_VEC(not) : return ~x; + case INDEX_op_not: + case INDEX_op_not_vec: + return ~x; - CASE_OP_32_64(neg) : return -x; + case INDEX_op_neg: + return -x; - CASE_OP_32_64_VEC(andc) : return x & ~y; + case INDEX_op_andc: + case INDEX_op_andc_vec: + return x & ~y; - CASE_OP_32_64_VEC(orc) : return x | ~y; + case INDEX_op_orc: + case INDEX_op_orc_vec: + return x | ~y; - CASE_OP_32_64_VEC(eqv) : return ~(x ^ y); + case INDEX_op_eqv: + case INDEX_op_eqv_vec: + return ~(x ^ y); - CASE_OP_32_64_VEC(nand) : return ~(x & y); + case INDEX_op_nand: + case INDEX_op_nand_vec: + return ~(x & y); - CASE_OP_32_64_VEC(nor) : return ~(x | y); + case INDEX_op_nor: + case INDEX_op_nor_vec: + return ~(x | y); - case INDEX_op_clz_i32: - return (uint32_t) x ? clz32(x) : y; - - case INDEX_op_clz_i64: + case INDEX_op_clz: + if (type == TCG_TYPE_I32) { + return (uint32_t) x ? clz32(x) : y; + } return x ? clz64(x) : y; - case INDEX_op_ctz_i32: - return (uint32_t) x ? ctz32(x) : y; - - case INDEX_op_ctz_i64: + case INDEX_op_ctz: + if (type == TCG_TYPE_I32) { + return (uint32_t) x ? ctz32(x) : y; + } return x ? ctz64(x) : y; - case INDEX_op_ctpop_i32: - return ctpop32(x); - - case INDEX_op_ctpop_i64: - return ctpop64(x); - - CASE_OP_32_64(ext8s) : return (int8_t) x; + case INDEX_op_ctpop: + return type == TCG_TYPE_I32 ? ctpop32(x) : ctpop64(x); - CASE_OP_32_64(ext16s) : return (int16_t) x; - - CASE_OP_32_64(ext8u) : return (uint8_t) x; - - CASE_OP_32_64(ext16u) : return (uint16_t) x; - - CASE_OP_32_64(bswap16) : x = bswap16(x); + case INDEX_op_bswap16: + x = bswap16(x); return y & TCG_BSWAP_OS ? (int16_t) x : x; - CASE_OP_32_64(bswap32) : x = bswap32(x); + case INDEX_op_bswap32: + x = bswap32(x); return y & TCG_BSWAP_OS ? (int32_t) x : x; - case INDEX_op_bswap64_i64: + case INDEX_op_bswap64: return bswap64(x); case INDEX_op_ext_i32_i64: - case INDEX_op_ext32s_i64: return (int32_t) x; case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: - case INDEX_op_ext32u_i64: return (uint32_t) x; case INDEX_op_extrh_i64_i32: return (uint64_t) x >> 32; - case INDEX_op_muluh_i32: - return ((uint64_t) (uint32_t) x * (uint32_t) y) >> 32; - case INDEX_op_mulsh_i32: - return ((int64_t) (int32_t) x * (int32_t) y) >> 32; - - case INDEX_op_muluh_i64: + case INDEX_op_muluh: + if (type == TCG_TYPE_I32) { + return ((uint64_t) (uint32_t) x * (uint32_t) y) >> 32; + } mulu64(&l64, &h64, x, y); return h64; - case INDEX_op_mulsh_i64: + + case INDEX_op_mulsh: + if (type == TCG_TYPE_I32) { + return ((int64_t) (int32_t) x * (int32_t) y) >> 32; + } muls64(&l64, &h64, x, y); return h64; - case INDEX_op_div_i32: + case INDEX_op_divs: /* Avoid crashing on divide by zero, otherwise undefined. */ - return (int32_t) x / ((int32_t) y ?: 1); - case INDEX_op_divu_i32: - return (uint32_t) x / ((uint32_t) y ?: 1); - case INDEX_op_div_i64: + if (type == TCG_TYPE_I32) { + return (int32_t) x / ((int32_t) y ?: 1); + } return (int64_t) x / ((int64_t) y ?: 1); - case INDEX_op_divu_i64: + + case INDEX_op_divu: + if (type == TCG_TYPE_I32) { + return (uint32_t) x / ((uint32_t) y ?: 1); + } return (uint64_t) x / ((uint64_t) y ?: 1); - case INDEX_op_rem_i32: - return (int32_t) x % ((int32_t) y ?: 1); - case INDEX_op_remu_i32: - return (uint32_t) x % ((uint32_t) y ?: 1); - case INDEX_op_rem_i64: + case INDEX_op_rems: + if (type == TCG_TYPE_I32) { + return (int32_t) x % ((int32_t) y ?: 1); + } return (int64_t) x % ((int64_t) y ?: 1); - case INDEX_op_remu_i64: + + case INDEX_op_remu: + if (type == TCG_TYPE_I32) { + return (uint32_t) x % ((uint32_t) y ?: 1); + } return (uint64_t) x % ((uint64_t) y ?: 1); default: @@ -423,7 +555,7 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) { } static uint64_t do_constant_folding(TCGOpcode op, TCGType type, uint64_t x, uint64_t y) { - uint64_t res = do_constant_folding_2(op, x, y); + uint64_t res = do_constant_folding_2(op, type, x, y); if (type == TCG_TYPE_I32) { res = (int32_t) res; } @@ -452,9 +584,15 @@ static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) { return x <= y; case TCG_COND_GTU: return x > y; - default: - g_assert_not_reached(); + case TCG_COND_TSTEQ: + return (x & y) == 0; + case TCG_COND_TSTNE: + return (x & y) != 0; + case TCG_COND_ALWAYS: + case TCG_COND_NEVER: + break; } + g_assert_not_reached(); } static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) { @@ -479,12 +617,18 @@ static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) { return x <= y; case TCG_COND_GTU: return x > y; - default: - g_assert_not_reached(); + case TCG_COND_TSTEQ: + return (x & y) == 0; + case TCG_COND_TSTNE: + return (x & y) != 0; + case TCG_COND_ALWAYS: + case TCG_COND_NEVER: + break; } + g_assert_not_reached(); } -static bool do_constant_folding_cond_eq(TCGCond c) { +static int do_constant_folding_cond_eq(TCGCond c) { switch (c) { case TCG_COND_GT: case TCG_COND_LTU: @@ -498,9 +642,14 @@ static bool do_constant_folding_cond_eq(TCGCond c) { case TCG_COND_LEU: case TCG_COND_EQ: return 1; - default: - g_assert_not_reached(); + case TCG_COND_TSTEQ: + case TCG_COND_TSTNE: + return -1; + case TCG_COND_ALWAYS: + case TCG_COND_NEVER: + break; } + g_assert_not_reached(); } /* @@ -509,8 +658,8 @@ static bool do_constant_folding_cond_eq(TCGCond c) { */ static int do_constant_folding_cond(TCGType type, TCGArg x, TCGArg y, TCGCond c) { if (arg_is_const(x) && arg_is_const(y)) { - uint64_t xv = arg_info(x)->val; - uint64_t yv = arg_info(y)->val; + uint64_t xv = arg_const_val(x); + uint64_t yv = arg_const_val(y); switch (type) { case TCG_TYPE_I32: @@ -523,11 +672,13 @@ static int do_constant_folding_cond(TCGType type, TCGArg x, TCGArg y, TCGCond c) } } else if (args_are_copies(x, y)) { return do_constant_folding_cond_eq(c); - } else if (arg_is_const(y) && arg_info(y)->val == 0) { + } else if (arg_is_const_val(y, 0)) { switch (c) { case TCG_COND_LTU: + case TCG_COND_TSTNE: return 0; case TCG_COND_GEU: + case TCG_COND_TSTEQ: return 1; default: return -1; @@ -536,42 +687,6 @@ static int do_constant_folding_cond(TCGType type, TCGArg x, TCGArg y, TCGCond c) return -1; } -/* - * Return -1 if the condition can't be simplified, - * and the result of the condition (0 or 1) if it can. - */ -static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) { - TCGArg al = p1[0], ah = p1[1]; - TCGArg bl = p2[0], bh = p2[1]; - - if (arg_is_const(bl) && arg_is_const(bh)) { - tcg_target_ulong blv = arg_info(bl)->val; - tcg_target_ulong bhv = arg_info(bh)->val; - uint64_t b = deposit64(blv, 32, 32, bhv); - - if (arg_is_const(al) && arg_is_const(ah)) { - tcg_target_ulong alv = arg_info(al)->val; - tcg_target_ulong ahv = arg_info(ah)->val; - uint64_t a = deposit64(alv, 32, 32, ahv); - return do_constant_folding_cond_64(a, b, c); - } - if (b == 0) { - switch (c) { - case TCG_COND_LTU: - return 0; - case TCG_COND_GEU: - return 1; - default: - break; - } - } - } - if (args_are_copies(al, bl) && args_are_copies(ah, bh)) { - return do_constant_folding_cond_eq(c); - } - return -1; -} - /** * swap_commutative: * @dest: TCGArg of the destination argument, or NO_DEST. @@ -585,11 +700,16 @@ static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) { #define NO_DEST temp_arg(NULL) +static int pref_commutative(TempOptInfo *ti) { + /* Slight preference for non-zero constants second. */ + return !ti_is_const(ti) ? 0 : ti_const_val(ti) ? 3 : 2; +} + static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) { TCGArg a1 = *p1, a2 = *p2; int sum = 0; - sum += arg_is_const(a1); - sum -= arg_is_const(a2); + sum += pref_commutative(arg_info(a1)); + sum -= pref_commutative(arg_info(a2)); /* Prefer the constant in second argument, and then the form op a, a, b, which is better handled on non-RISC hosts. */ @@ -603,10 +723,10 @@ static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) { static bool swap_commutative2(TCGArg *p1, TCGArg *p2) { int sum = 0; - sum += arg_is_const(p1[0]); - sum += arg_is_const(p1[1]); - sum -= arg_is_const(p2[0]); - sum -= arg_is_const(p2[1]); + sum += pref_commutative(arg_info(p1[0])); + sum += pref_commutative(arg_info(p1[1])); + sum -= pref_commutative(arg_info(p2[0])); + sum -= pref_commutative(arg_info(p2[1])); if (sum > 0) { TCGArg t; t = p1[0], p1[0] = p2[0], p2[0] = t; @@ -616,6 +736,168 @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2) { return false; } +/* + * Return -1 if the condition can't be simplified, + * and the result of the condition (0 or 1) if it can. + */ +static bool fold_and(OptContext *ctx, TCGOp *op); +static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest, TCGArg *p1, TCGArg *p2, TCGArg *pcond) { + TCGCond cond; + TempOptInfo *i1; + bool swap; + int r; + + swap = swap_commutative(dest, p1, p2); + cond = *pcond; + if (swap) { + *pcond = cond = tcg_swap_cond(cond); + } + + r = do_constant_folding_cond(ctx->type, *p1, *p2, cond); + if (r >= 0) { + return r; + } + if (!is_tst_cond(cond)) { + return -1; + } + + i1 = arg_info(*p1); + + /* + * TSTNE x,x -> NE x,0 + * TSTNE x,i -> NE x,0 if i includes all nonzero bits of x + */ + if (args_are_copies(*p1, *p2) || (arg_is_const(*p2) && (i1->z_mask & ~arg_const_val(*p2)) == 0)) { + *p2 = arg_new_constant(ctx, 0); + *pcond = tcg_tst_eqne_cond(cond); + return -1; + } + + /* TSTNE x,i -> LT x,0 if i only includes sign bit copies */ + if (arg_is_const(*p2) && (arg_const_val(*p2) & ~i1->s_mask) == 0) { + *p2 = arg_new_constant(ctx, 0); + *pcond = tcg_tst_ltge_cond(cond); + return -1; + } + + /* Expand to AND with a temporary if no backend support. */ + if (!TCG_TARGET_HAS_tst) { + TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3); + TCGArg tmp = arg_new_temp(ctx); + + op2->args[0] = tmp; + op2->args[1] = *p1; + op2->args[2] = *p2; + fold_and(ctx, op2); + + *p1 = tmp; + *p2 = arg_new_constant(ctx, 0); + *pcond = tcg_tst_eqne_cond(cond); + } + return -1; +} + +static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args) { + TCGArg al, ah, bl, bh; + TCGCond c; + bool swap; + int r; + + swap = swap_commutative2(args, args + 2); + c = args[4]; + if (swap) { + args[4] = c = tcg_swap_cond(c); + } + + al = args[0]; + ah = args[1]; + bl = args[2]; + bh = args[3]; + + if (arg_is_const(bl) && arg_is_const(bh)) { + tcg_target_ulong blv = arg_const_val(bl); + tcg_target_ulong bhv = arg_const_val(bh); + uint64_t b = deposit64(blv, 32, 32, bhv); + + if (arg_is_const(al) && arg_is_const(ah)) { + tcg_target_ulong alv = arg_const_val(al); + tcg_target_ulong ahv = arg_const_val(ah); + uint64_t a = deposit64(alv, 32, 32, ahv); + + r = do_constant_folding_cond_64(a, b, c); + if (r >= 0) { + return r; + } + } + + if (b == 0) { + switch (c) { + case TCG_COND_LTU: + case TCG_COND_TSTNE: + return 0; + case TCG_COND_GEU: + case TCG_COND_TSTEQ: + return 1; + default: + break; + } + } + + /* TSTNE x,-1 -> NE x,0 */ + if (b == -1 && is_tst_cond(c)) { + args[3] = args[2] = arg_new_constant(ctx, 0); + args[4] = tcg_tst_eqne_cond(c); + return -1; + } + + /* TSTNE x,sign -> LT x,0 */ + if (b == INT64_MIN && is_tst_cond(c)) { + /* bl must be 0, so copy that to bh */ + args[3] = bl; + args[4] = tcg_tst_ltge_cond(c); + return -1; + } + } + + if (args_are_copies(al, bl) && args_are_copies(ah, bh)) { + r = do_constant_folding_cond_eq(c); + if (r >= 0) { + return r; + } + + /* TSTNE x,x -> NE x,0 */ + if (is_tst_cond(c)) { + args[3] = args[2] = arg_new_constant(ctx, 0); + args[4] = tcg_tst_eqne_cond(c); + return -1; + } + } + + /* Expand to AND with a temporary if no backend support. */ + if (!TCG_TARGET_HAS_tst && is_tst_cond(c)) { + TCGOp *op1 = opt_insert_before(ctx, op, INDEX_op_and, 3); + TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3); + TCGArg t1 = arg_new_temp(ctx); + TCGArg t2 = arg_new_temp(ctx); + + op1->args[0] = t1; + op1->args[1] = al; + op1->args[2] = bl; + fold_and(ctx, op1); + + op2->args[0] = t2; + op2->args[1] = ah; + op2->args[2] = bh; + fold_and(ctx, op1); + + args[0] = t1; + args[1] = t2; + args[3] = args[2] = arg_new_constant(ctx, 0); + args[4] = tcg_tst_eqne_cond(c); + } + return -1; +} + static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args) { for (int i = 0; i < nb_args; i++) { TCGTemp *ts = arg_temp(op->args[i]); @@ -624,43 +906,36 @@ static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args) { } static void copy_propagate(OptContext *ctx, TCGOp *op, int nb_oargs, int nb_iargs) { - TCGContext *s = ctx->tcg; - for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { TCGTemp *ts = arg_temp(op->args[i]); if (ts_is_copy(ts)) { - op->args[i] = temp_arg(find_better_copy(s, ts)); + op->args[i] = temp_arg(find_better_copy(ts)); } } } -static void finish_folding(OptContext *ctx, TCGOp *op) { +static void finish_bb(OptContext *ctx) { + /* We only optimize memory barriers across basic blocks. */ + ctx->prev_mb = NULL; +} + +static void finish_ebb(OptContext *ctx) { + finish_bb(ctx); + /* We only optimize across extended basic blocks. */ + memset(&ctx->temps_used, 0, sizeof(ctx->temps_used)); + remove_mem_copy_all(ctx); +} + +static bool finish_folding(OptContext *ctx, TCGOp *op) { const TCGOpDef *def = &tcg_op_defs[op->opc]; int i, nb_oargs; - /* - * For an opcode that ends a BB, reset all temp data. - * We do no cross-BB optimization. - */ - if (def->flags & TCG_OPF_BB_END) { - memset(&ctx->temps_used, 0, sizeof(ctx->temps_used)); - ctx->prev_mb = NULL; - return; - } - nb_oargs = def->nb_oargs; for (i = 0; i < nb_oargs; i++) { TCGTemp *ts = arg_temp(op->args[i]); - reset_ts(ts); - /* - * Save the corresponding known-zero/sign bits mask for the - * first output argument (only one supported so far). - */ - if (i == 0) { - ts_info(ts)->z_mask = ctx->z_mask; - ts_info(ts)->s_mask = ctx->s_mask; - } + reset_ts(ctx, ts); } + return true; } /* @@ -676,9 +951,8 @@ static void finish_folding(OptContext *ctx, TCGOp *op) { static bool fold_const1(OptContext *ctx, TCGOp *op) { if (arg_is_const(op->args[1])) { - uint64_t t; + uint64_t t = arg_const_val(op->args[1]); - t = arg_info(op->args[1])->val; t = do_constant_folding(op->opc, ctx->type, t, 0); return tcg_opt_gen_movi(ctx, op, op->args[0], t); } @@ -687,8 +961,8 @@ static bool fold_const1(OptContext *ctx, TCGOp *op) { static bool fold_const2(OptContext *ctx, TCGOp *op) { if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { - uint64_t t1 = arg_info(op->args[1])->val; - uint64_t t2 = arg_info(op->args[2])->val; + uint64_t t1 = arg_const_val(op->args[1]); + uint64_t t2 = arg_const_val(op->args[2]); t1 = do_constant_folding(op->opc, ctx->type, t1, t2); return tcg_opt_gen_movi(ctx, op, op->args[0], t1); @@ -706,35 +980,91 @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op) { return fold_const2(ctx, op); } -static bool fold_masks(OptContext *ctx, TCGOp *op) { - uint64_t a_mask = ctx->a_mask; - uint64_t z_mask = ctx->z_mask; - uint64_t s_mask = ctx->s_mask; - - /* - * 32-bit ops generate 32-bit results, which for the purpose of +/* + * Record "zero" and "sign" masks for the single output of @op. + * See TempOptInfo definition of z_mask and s_mask. + * If z_mask allows, fold the output to constant zero. + * The passed s_mask may be augmented by z_mask. + */ +static bool fold_masks_zosa_int(OptContext *ctx, TCGOp *op, uint64_t z_mask, uint64_t o_mask, int64_t s_mask, + uint64_t a_mask) { + const TCGOpDef *def = &tcg_op_defs[op->opc]; + TCGTemp *ts; + TempOptInfo *ti; + int rep; + + /* Only single-output opcodes are supported here. */ + tcg_debug_assert(def->nb_oargs == 1); + + /* + * 32-bit ops generate 32-bit results, which for the purpose of * simplifying tcg are sign-extended. Certainly that's how we * represent our constants elsewhere. Note that the bits will * be reset properly for a 64-bit value when encountering the * type changing opcodes. */ if (ctx->type == TCG_TYPE_I32) { - a_mask = (int32_t) a_mask; z_mask = (int32_t) z_mask; - s_mask |= MAKE_64BIT_MASK(32, 32); - ctx->z_mask = z_mask; - ctx->s_mask = s_mask; + o_mask = (int32_t) o_mask; + s_mask |= INT32_MIN; + a_mask = (uint32_t) a_mask; } - if (z_mask == 0) { - return tcg_opt_gen_movi(ctx, op, op->args[0], 0); + /* Bits that are known 1 and bits that are known 0 must not overlap. */ + tcg_debug_assert((o_mask & ~z_mask) == 0); + + /* All bits that are not known zero are known one is a constant. */ + if (z_mask == o_mask) { + return tcg_opt_gen_movi(ctx, op, op->args[0], o_mask); } + + /* If no bits are affected, the operation devolves to a copy. */ if (a_mask == 0) { return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); } + + ts = arg_temp(op->args[0]); + reset_ts(ctx, ts); + + ti = ts_info(ts); + ti->z_mask = z_mask; + + /* Canonicalize s_mask and incorporate data from z_mask. */ + rep = clz64(~s_mask); + rep = MAX(rep, clz64(z_mask)); + rep = MAX(rep, clz64(~o_mask)); + rep = MAX(rep - 1, 0); + ti->s_mask = INT64_MIN >> rep; + return false; } +static bool fold_masks_zosa(OptContext *ctx, TCGOp *op, uint64_t z_mask, uint64_t o_mask, int64_t s_mask, + uint64_t a_mask) { + fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, -1); + return true; +} + +static bool fold_masks_zos(OptContext *ctx, TCGOp *op, uint64_t z_mask, uint64_t o_mask, uint64_t s_mask) { + return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, -1); +} + +static bool fold_masks_zo(OptContext *ctx, TCGOp *op, uint64_t z_mask, uint64_t o_mask) { + return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, -1); +} + +static bool fold_masks_zs(OptContext *ctx, TCGOp *op, uint64_t z_mask, uint64_t s_mask) { + return fold_masks_zosa(ctx, op, z_mask, 0, s_mask, -1); +} + +static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask) { + return fold_masks_zosa(ctx, op, z_mask, 0, 0, -1); +} + +static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask) { + return fold_masks_zosa(ctx, op, -1, 0, s_mask, -1); +} + /* * Convert @op to NOT, if NOT is supported by the host. * Return true f the conversion is successful, which will still @@ -747,12 +1077,9 @@ static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx) { switch (ctx->type) { case TCG_TYPE_I32: - not_op = INDEX_op_not_i32; - have_not = TCG_TARGET_HAS_not_i32; - break; case TCG_TYPE_I64: - not_op = INDEX_op_not_i64; - have_not = TCG_TARGET_HAS_not_i64; + not_op = INDEX_op_not; + have_not = tcg_op_supported(INDEX_op_not, ctx->type, 0); break; case TCG_TYPE_V64: case TCG_TYPE_V128: @@ -773,7 +1100,7 @@ static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx) { /* If the binary operation has first argument @i, fold to @i. */ static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i) { - if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) { + if (arg_is_const_val(op->args[1], i)) { return tcg_opt_gen_movi(ctx, op, op->args[0], i); } return false; @@ -781,7 +1108,7 @@ static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i) { /* If the binary operation has first argument @i, fold to NOT. */ static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i) { - if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) { + if (arg_is_const_val(op->args[1], i)) { return fold_to_not(ctx, op, 2); } return false; @@ -789,7 +1116,7 @@ static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i) { /* If the binary operation has second argument @i, fold to @i. */ static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i) { - if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) { + if (arg_is_const_val(op->args[2], i)) { return tcg_opt_gen_movi(ctx, op, op->args[0], i); } return false; @@ -797,7 +1124,7 @@ static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i) { /* If the binary operation has second argument @i, fold to identity. */ static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i) { - if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) { + if (arg_is_const_val(op->args[2], i)) { return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); } return false; @@ -805,7 +1132,7 @@ static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i) { /* If the binary operation has second argument @i, fold to NOT. */ static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i) { - if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) { + if (arg_is_const_val(op->args[2], i)) { return fold_to_not(ctx, op, 1); } return false; @@ -836,11 +1163,17 @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op) { * 3) those that produce information about the result value. */ +static bool fold_addco(OptContext *ctx, TCGOp *op); +static bool fold_or(OptContext *ctx, TCGOp *op); +static bool fold_orc(OptContext *ctx, TCGOp *op); +static bool fold_subbo(OptContext *ctx, TCGOp *op); +static bool fold_xor(OptContext *ctx, TCGOp *op); + static bool fold_add(OptContext *ctx, TCGOp *op) { if (fold_const2_commutative(ctx, op) || fold_xi_to_x(ctx, op, 0)) { return true; } - return false; + return finish_folding(ctx, op); } /* We cannot as yet do_constant_folding with vectors. */ @@ -848,128 +1181,315 @@ static bool fold_add_vec(OptContext *ctx, TCGOp *op) { if (fold_commutative(ctx, op) || fold_xi_to_x(ctx, op, 0)) { return true; } - return false; + return finish_folding(ctx, op); } -static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add) { - if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) && arg_is_const(op->args[4]) && - arg_is_const(op->args[5])) { - uint64_t al = arg_info(op->args[2])->val; - uint64_t ah = arg_info(op->args[3])->val; - uint64_t bl = arg_info(op->args[4])->val; - uint64_t bh = arg_info(op->args[5])->val; - TCGArg rl, rh; - TCGOp *op2; - - if (ctx->type == TCG_TYPE_I32) { - uint64_t a = deposit64(al, 32, 32, ah); - uint64_t b = deposit64(bl, 32, 32, bh); +static void squash_prev_carryout(OptContext *ctx, TCGOp *op) { + TempOptInfo *t2; - if (add) { - a += b; + op = QTAILQ_PREV(op, link); + switch (op->opc) { + case INDEX_op_addco: + op->opc = INDEX_op_add; + fold_add(ctx, op); + break; + case INDEX_op_addcio: + op->opc = INDEX_op_addci; + break; + case INDEX_op_addc1o: + op->opc = INDEX_op_add; + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1); + /* Perform other constant folding, if needed. */ + fold_add(ctx, op); } else { - a -= b; + TCGArg ret = op->args[0]; + op = opt_insert_after(ctx, op, INDEX_op_add, 3); + op->args[0] = ret; + op->args[1] = ret; + op->args[2] = arg_new_constant(ctx, 1); } + break; + default: + g_assert_not_reached(); + } +} + +static bool fold_addci(OptContext *ctx, TCGOp *op) { + fold_commutative(ctx, op); + + if (ctx->carry_state < 0) { + return finish_folding(ctx, op); + } + + squash_prev_carryout(ctx, op); + op->opc = INDEX_op_add; - al = sextract64(a, 0, 32); - ah = sextract64(a, 32, 32); + if (ctx->carry_state > 0) { + TempOptInfo *t2 = arg_info(op->args[2]); + + /* + * Propagate the known carry-in into a constant, if possible. + * Otherwise emit a second add +1. + */ + if (ti_is_const(t2)) { + op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1); } else { - Int128 a = int128_make128(al, ah); - Int128 b = int128_make128(bl, bh); + TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_add, 3); - if (add) { - a = int128_add(a, b); - } else { - a = int128_sub(a, b); - } + op2->args[0] = op->args[0]; + op2->args[1] = op->args[1]; + op2->args[2] = op->args[2]; + fold_add(ctx, op2); - al = int128_getlo(a); - ah = int128_gethi(a); + op->args[1] = op->args[0]; + op->args[2] = arg_new_constant(ctx, 1); } + } - rl = op->args[0]; - rh = op->args[1]; + ctx->carry_state = -1; + return fold_add(ctx, op); +} - /* The proper opcode is supplied by tcg_opt_gen_mov. */ - op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2); +static bool fold_addcio(OptContext *ctx, TCGOp *op) { + TempOptInfo *t1, *t2; + int carry_out = -1; + uint64_t sum, max; - tcg_opt_gen_movi(ctx, op, rl, al); - tcg_opt_gen_movi(ctx, op2, rh, ah); - return true; + fold_commutative(ctx, op); + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + /* + * The z_mask value is >= the maximum value that can be represented + * with the known zero bits. So adding the z_mask values will not + * overflow if and only if the true values cannot overflow. + */ + if (!uadd64_overflow(t1->z_mask, t2->z_mask, &sum) && !uadd64_overflow(sum, ctx->carry_state != 0, &sum)) { + carry_out = 0; } - return false; + + if (ctx->carry_state < 0) { + ctx->carry_state = carry_out; + return finish_folding(ctx, op); + } + + squash_prev_carryout(ctx, op); + if (ctx->carry_state == 0) { + goto do_addco; + } + + /* Propagate the known carry-in into a constant, if possible. */ + max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX; + if (ti_is_const(t2)) { + uint64_t v = ti_const_val(t2) & max; + if (v < max) { + op->args[2] = arg_new_constant(ctx, v + 1); + goto do_addco; + } + /* max + known carry in produces known carry out. */ + carry_out = 1; + } + if (ti_is_const(t1)) { + uint64_t v = ti_const_val(t1) & max; + if (v < max) { + op->args[1] = arg_new_constant(ctx, v + 1); + goto do_addco; + } + carry_out = 1; + } + + /* Adjust the opcode to remember the known carry-in. */ + op->opc = INDEX_op_addc1o; + ctx->carry_state = carry_out; + return finish_folding(ctx, op); + +do_addco: + op->opc = INDEX_op_addco; + return fold_addco(ctx, op); } -static bool fold_add2(OptContext *ctx, TCGOp *op) { - /* Note that the high and low parts may be independently swapped. */ - swap_commutative(op->args[0], &op->args[2], &op->args[4]); - swap_commutative(op->args[1], &op->args[3], &op->args[5]); +static bool fold_addco(OptContext *ctx, TCGOp *op) { + TempOptInfo *t1, *t2; + int carry_out = -1; + uint64_t ign; + + fold_commutative(ctx, op); + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); - return fold_addsub2(ctx, op, true); + if (ti_is_const(t2)) { + uint64_t v2 = ti_const_val(t2); + + if (ti_is_const(t1)) { + uint64_t v1 = ti_const_val(t1); + /* Given sign-extension of z_mask for I32, we need not truncate. */ + carry_out = uadd64_overflow(v1, v2, &ign); + } else if (v2 == 0) { + carry_out = 0; + } + } else { + /* + * The z_mask value is >= the maximum value that can be represented + * with the known zero bits. So adding the z_mask values will not + * overflow if and only if the true values cannot overflow. + */ + if (!uadd64_overflow(t1->z_mask, t2->z_mask, &ign)) { + carry_out = 0; + } + } + ctx->carry_state = carry_out; + return finish_folding(ctx, op); } static bool fold_and(OptContext *ctx, TCGOp *op) { - uint64_t z1, z2; + uint64_t z_mask, o_mask, s_mask, a_mask; + TempOptInfo *t1, *t2; - if (fold_const2_commutative(ctx, op) || fold_xi_to_i(ctx, op, 0) || fold_xi_to_x(ctx, op, -1) || - fold_xx_to_x(ctx, op)) { + if (fold_const2_commutative(ctx, op)) { return true; } - z1 = arg_info(op->args[1])->z_mask; - z2 = arg_info(op->args[2])->z_mask; - ctx->z_mask = z1 & z2; + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + z_mask = t1->z_mask & t2->z_mask; + o_mask = t1->o_mask & t2->o_mask; /* * Sign repetitions are perforce all identical, whether they are 1 or 0. * Bitwise operations preserve the relative quantity of the repetitions. */ - ctx->s_mask = arg_info(op->args[1])->s_mask & arg_info(op->args[2])->s_mask; + s_mask = t1->s_mask & t2->s_mask; - /* - * Known-zeros does not imply known-ones. Therefore unless - * arg2 is constant, we can't infer affected bits from it. - */ - if (arg_is_const(op->args[2])) { - ctx->a_mask = z1 & ~z2; - } + /* Affected bits are those not known zero, masked by those known one. */ + a_mask = t1->z_mask & ~t2->o_mask; - return fold_masks(ctx, op); + if (!fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, a_mask)) { + if (op->opc == INDEX_op_and && ti_is_const(t2)) { + /* + * Canonicalize on extract, if valid. This aids x86 with its + * 2 operand MOVZBL and 2 operand AND, selecting the TCGOpcode + * which does not require matching operands. Other backends can + * trivially expand the extract to AND during code generation. + */ + uint64_t val = ti_const_val(t2); + if (!(val & (val + 1))) { + unsigned len = ctz64(~val); + if (TCG_TARGET_extract_valid(ctx->type, 0, len)) { + op->opc = INDEX_op_extract; + op->args[2] = 0; + op->args[3] = len; + } + } + } else { + fold_xx_to_x(ctx, op); + } + } + return true; } static bool fold_andc(OptContext *ctx, TCGOp *op) { - uint64_t z1; + uint64_t z_mask, o_mask, s_mask, a_mask; + TempOptInfo *t1, *t2; - if (fold_const2(ctx, op) || fold_xx_to_i(ctx, op, 0) || fold_xi_to_x(ctx, op, 0) || fold_ix_to_not(ctx, op, -1)) { + if (fold_const2(ctx, op)) { return true; } - z1 = arg_info(op->args[1])->z_mask; + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); - /* - * Known-zeros does not imply known-ones. Therefore unless - * arg2 is constant, we can't infer anything from it. - */ - if (arg_is_const(op->args[2])) { - uint64_t z2 = ~arg_info(op->args[2])->z_mask; - ctx->a_mask = z1 & ~z2; - z1 &= z2; + if (ti_is_const(t2)) { + /* Fold andc r,x,i to and r,x,~i. */ + switch (ctx->type) { + case TCG_TYPE_I32: + case TCG_TYPE_I64: + op->opc = INDEX_op_and; + break; + case TCG_TYPE_V64: + case TCG_TYPE_V128: + case TCG_TYPE_V256: + op->opc = INDEX_op_and_vec; + break; + default: + g_assert_not_reached(); + } + op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); + return fold_and(ctx, op); + } + if (fold_xx_to_i(ctx, op, 0) || fold_ix_to_not(ctx, op, -1)) { + return true; } - ctx->z_mask = z1; - ctx->s_mask = arg_info(op->args[1])->s_mask & arg_info(op->args[2])->s_mask; - return fold_masks(ctx, op); + z_mask = t1->z_mask & ~t2->o_mask; + o_mask = t1->o_mask & ~t2->z_mask; + s_mask = t1->s_mask & t2->s_mask; + + /* Affected bits are those not known zero, masked by those known zero. */ + a_mask = t1->z_mask & t2->z_mask; + + return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask); } -static bool fold_brcond(OptContext *ctx, TCGOp *op) { - TCGCond cond = op->args[2]; - int i; +static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op) { + /* If true and false values are the same, eliminate the cmp. */ + if (args_are_copies(op->args[2], op->args[3])) { + return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]); + } + + if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { + uint64_t tv = arg_const_val(op->args[2]); + uint64_t fv = arg_const_val(op->args[3]); - if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) { - op->args[2] = cond = tcg_swap_cond(cond); + if (tv == -1 && fv == 0) { + return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); + } + if (tv == 0 && fv == -1) { + if (TCG_TARGET_HAS_not_vec) { + op->opc = INDEX_op_not_vec; + return fold_not(ctx, op); + } else { + op->opc = INDEX_op_xor_vec; + op->args[2] = arg_new_constant(ctx, -1); + return fold_xor(ctx, op); + } + } + } + if (arg_is_const(op->args[2])) { + uint64_t tv = arg_const_val(op->args[2]); + if (tv == -1) { + op->opc = INDEX_op_or_vec; + op->args[2] = op->args[3]; + return fold_or(ctx, op); + } + if (tv == 0 && TCG_TARGET_HAS_andc_vec) { + op->opc = INDEX_op_andc_vec; + op->args[2] = op->args[1]; + op->args[1] = op->args[3]; + return fold_andc(ctx, op); + } + } + if (arg_is_const(op->args[3])) { + uint64_t fv = arg_const_val(op->args[3]); + if (fv == 0) { + op->opc = INDEX_op_and_vec; + return fold_and(ctx, op); + } + if (fv == -1 && TCG_TARGET_HAS_orc_vec) { + TCGArg ta = op->args[2]; + op->opc = INDEX_op_orc_vec; + op->args[2] = op->args[1]; + op->args[1] = ta; + return fold_orc(ctx, op); + } } + return finish_folding(ctx, op); +} - i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond); +static bool fold_brcond(OptContext *ctx, TCGOp *op) { + int i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[0], &op->args[1], &op->args[2]); if (i == 0) { tcg_op_remove(ctx->tcg, op); return true; @@ -977,20 +1497,21 @@ static bool fold_brcond(OptContext *ctx, TCGOp *op) { if (i > 0) { op->opc = INDEX_op_br; op->args[0] = op->args[3]; + finish_ebb(ctx); + } else { + finish_bb(ctx); } - return false; + return true; } static bool fold_brcond2(OptContext *ctx, TCGOp *op) { - TCGCond cond = op->args[4]; - TCGArg label = op->args[5]; + TCGCond cond; + TCGArg label; int i, inv = 0; - if (swap_commutative2(&op->args[0], &op->args[2])) { - op->args[4] = cond = tcg_swap_cond(cond); - } - - i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond); + i = do_constant_folding_cond2(ctx, op, &op->args[0]); + cond = op->args[4]; + label = op->args[5]; if (i >= 0) { goto do_brcond_const; } @@ -1002,8 +1523,7 @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op) { * Simplify LT/GE comparisons vs zero to a single compare * vs the high word of the input. */ - if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 && arg_is_const(op->args[3]) && - arg_info(op->args[3])->val == 0) { + if (arg_is_const_val(op->args[2], 0) && arg_is_const_val(op->args[3], 0)) { goto do_brcond_high; } break; @@ -1029,24 +1549,37 @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op) { case 0: goto do_brcond_const; case 1: - op->opc = INDEX_op_brcond_i32; - op->args[1] = op->args[2]; - op->args[2] = cond; - op->args[3] = label; - break; + goto do_brcond_low; + } + break; + + case TCG_COND_TSTEQ: + case TCG_COND_TSTNE: + if (arg_is_const_val(op->args[2], 0)) { + goto do_brcond_high; + } + if (arg_is_const_val(op->args[3], 0)) { + goto do_brcond_low; } break; default: break; + do_brcond_low: + op->opc = INDEX_op_brcond; + op->args[1] = op->args[2]; + op->args[2] = cond; + op->args[3] = label; + return fold_brcond(ctx, op); + do_brcond_high: - op->opc = INDEX_op_brcond_i32; + op->opc = INDEX_op_brcond; op->args[0] = op->args[1]; op->args[1] = op->args[3]; op->args[2] = cond; op->args[3] = label; - break; + return fold_brcond(ctx, op); do_brcond_const: if (i == 0) { @@ -1055,63 +1588,59 @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op) { } op->opc = INDEX_op_br; op->args[0] = label; - break; + finish_ebb(ctx); + return true; } - return false; + + finish_bb(ctx); + return true; } static bool fold_bswap(OptContext *ctx, TCGOp *op) { - uint64_t z_mask, s_mask, sign; - - if (arg_is_const(op->args[1])) { - uint64_t t = arg_info(op->args[1])->val; + uint64_t z_mask, o_mask, s_mask; + TempOptInfo *t1 = arg_info(op->args[1]); + int flags = op->args[2]; - t = do_constant_folding(op->opc, ctx->type, t, op->args[2]); - return tcg_opt_gen_movi(ctx, op, op->args[0], t); + if (ti_is_const(t1)) { + return tcg_opt_gen_movi(ctx, op, op->args[0], do_constant_folding(op->opc, ctx->type, ti_const_val(t1), flags)); } - z_mask = arg_info(op->args[1])->z_mask; + z_mask = t1->z_mask; + o_mask = t1->o_mask; + s_mask = 0; switch (op->opc) { - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: + case INDEX_op_bswap16: z_mask = bswap16(z_mask); - sign = INT16_MIN; + o_mask = bswap16(o_mask); + if (flags & TCG_BSWAP_OS) { + z_mask = (int16_t) z_mask; + o_mask = (int16_t) o_mask; + s_mask = INT16_MIN; + } else if (!(flags & TCG_BSWAP_OZ)) { + z_mask |= MAKE_64BIT_MASK(16, 48); + } break; - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: + case INDEX_op_bswap32: z_mask = bswap32(z_mask); - sign = INT32_MIN; + o_mask = bswap32(o_mask); + if (flags & TCG_BSWAP_OS) { + z_mask = (int32_t) z_mask; + o_mask = (int32_t) o_mask; + s_mask = INT32_MIN; + } else if (!(flags & TCG_BSWAP_OZ)) { + z_mask |= MAKE_64BIT_MASK(32, 32); + } break; - case INDEX_op_bswap64_i64: + case INDEX_op_bswap64: z_mask = bswap64(z_mask); - sign = INT64_MIN; + o_mask = bswap64(o_mask); break; default: g_assert_not_reached(); } - s_mask = smask_from_zmask(z_mask); - switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) { - case TCG_BSWAP_OZ: - break; - case TCG_BSWAP_OS: - /* If the sign bit may be 1, force all the bits above to 1. */ - if (z_mask & sign) { - z_mask |= sign; - s_mask = sign << 1; - } - break; - default: - /* The high bits are undefined: force all bits above the sign to 1. */ - z_mask |= sign << 1; - s_mask = 0; - break; - } - ctx->z_mask = z_mask; - ctx->s_mask = s_mask; - - return fold_masks(ctx, op); + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } static bool fold_call(OptContext *ctx, TCGOp *op) { @@ -1130,14 +1659,19 @@ static bool fold_call(OptContext *ctx, TCGOp *op) { for (i = 0; i < nb_globals; i++) { if (test_bit(i, ctx->temps_used.l)) { - reset_ts(&ctx->tcg->temps[i]); + reset_ts(ctx, &ctx->tcg->temps[i]); } } } + /* If the function has side effects, reset mem data. */ + if (!(flags & TCG_CALL_NO_SIDE_EFFECTS)) { + remove_mem_copy_all(ctx); + } + /* Reset temp data for outputs. */ for (i = 0; i < nb_oargs; i++) { - reset_temp(op->args[i]); + reset_temp(ctx, op->args[i]); } /* Stop optimizing MB across calls. */ @@ -1145,11 +1679,41 @@ static bool fold_call(OptContext *ctx, TCGOp *op) { return true; } +static bool fold_cmp_vec(OptContext *ctx, TCGOp *op) { + /* Canonicalize the comparison to put immediate second. */ + if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) { + op->args[3] = tcg_swap_cond(op->args[3]); + } + return finish_folding(ctx, op); +} + +static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op) { + /* If true and false values are the same, eliminate the cmp. */ + if (args_are_copies(op->args[3], op->args[4])) { + return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]); + } + + /* Canonicalize the comparison to put immediate second. */ + if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) { + op->args[5] = tcg_swap_cond(op->args[5]); + } + /* + * Canonicalize the "false" input reg to match the destination, + * so that the tcg backend can implement "move if true". + */ + if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { + op->args[5] = tcg_invert_cond(op->args[5]); + } + return finish_folding(ctx, op); +} + static bool fold_count_zeros(OptContext *ctx, TCGOp *op) { - uint64_t z_mask; + uint64_t z_mask, s_mask; + TempOptInfo *t1 = arg_info(op->args[1]); + TempOptInfo *t2 = arg_info(op->args[2]); - if (arg_is_const(op->args[1])) { - uint64_t t = arg_info(op->args[1])->val; + if (ti_is_const(t1)) { + uint64_t t = ti_const_val(t1); if (t != 0) { t = do_constant_folding(op->opc, ctx->type, t, 0); @@ -1168,62 +1732,96 @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op) { default: g_assert_not_reached(); } - ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask; - ctx->s_mask = smask_from_zmask(ctx->z_mask); - return false; + s_mask = ~z_mask; + z_mask |= t2->z_mask; + s_mask &= t2->s_mask; + + return fold_masks_zs(ctx, op, z_mask, s_mask); } static bool fold_ctpop(OptContext *ctx, TCGOp *op) { + uint64_t z_mask; + if (fold_const1(ctx, op)) { return true; } switch (ctx->type) { case TCG_TYPE_I32: - ctx->z_mask = 32 | 31; + z_mask = 32 | 31; break; case TCG_TYPE_I64: - ctx->z_mask = 64 | 63; + z_mask = 64 | 63; break; default: g_assert_not_reached(); } - ctx->s_mask = smask_from_zmask(ctx->z_mask); - return false; + return fold_masks_z(ctx, op, z_mask); } static bool fold_deposit(OptContext *ctx, TCGOp *op) { - if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { - uint64_t t1 = arg_info(op->args[1])->val; - uint64_t t2 = arg_info(op->args[2])->val; + TempOptInfo *t1 = arg_info(op->args[1]); + TempOptInfo *t2 = arg_info(op->args[2]); + int ofs = op->args[3]; + int len = op->args[4]; + int width = 8 * tcg_type_size(ctx->type); + uint64_t z_mask, o_mask, s_mask; - t1 = deposit64(t1, op->args[3], op->args[4], t2); - return tcg_opt_gen_movi(ctx, op, op->args[0], t1); + if (ti_is_const(t1) && ti_is_const(t2)) { + return tcg_opt_gen_movi(ctx, op, op->args[0], deposit64(ti_const_val(t1), ofs, len, ti_const_val(t2))); } - ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask, op->args[3], op->args[4], arg_info(op->args[2])->z_mask); - return false; + /* Inserting a value into zero at offset 0. */ + if (ti_is_const_val(t1, 0) && ofs == 0) { + uint64_t mask = MAKE_64BIT_MASK(0, len); + + op->opc = INDEX_op_and; + op->args[1] = op->args[2]; + op->args[2] = arg_new_constant(ctx, mask); + return fold_and(ctx, op); + } + + /* Inserting zero into a value. */ + if (ti_is_const_val(t2, 0)) { + uint64_t mask = deposit64(-1, ofs, len, 0); + + op->opc = INDEX_op_and; + op->args[2] = arg_new_constant(ctx, mask); + return fold_and(ctx, op); + } + + /* The s_mask from the top portion of the deposit is still valid. */ + if (ofs + len == width) { + s_mask = t2->s_mask << ofs; + } else { + s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len); + } + + z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask); + o_mask = deposit64(t1->o_mask, ofs, len, t2->o_mask); + + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } static bool fold_divide(OptContext *ctx, TCGOp *op) { if (fold_const2(ctx, op) || fold_xi_to_x(ctx, op, 1)) { return true; } - return false; + return finish_folding(ctx, op); } static bool fold_dup(OptContext *ctx, TCGOp *op) { if (arg_is_const(op->args[1])) { - uint64_t t = arg_info(op->args[1])->val; + uint64_t t = arg_const_val(op->args[1]); t = dup_const(TCGOP_VECE(op), t); return tcg_opt_gen_movi(ctx, op, op->args[0], t); } - return false; + return finish_folding(ctx, op); } static bool fold_dup2(OptContext *ctx, TCGOp *op) { if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { - uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32, arg_info(op->args[2])->val); + uint64_t t = deposit64(arg_const_val(op->args[1]), 32, 32, arg_const_val(op->args[2])); return tcg_opt_gen_movi(ctx, op, op->args[0], t); } @@ -1231,140 +1829,138 @@ static bool fold_dup2(OptContext *ctx, TCGOp *op) { op->opc = INDEX_op_dup_vec; TCGOP_VECE(op) = MO_32; } - return false; + return finish_folding(ctx, op); } static bool fold_eqv(OptContext *ctx, TCGOp *op) { - if (fold_const2_commutative(ctx, op) || fold_xi_to_x(ctx, op, -1) || fold_xi_to_not(ctx, op, 0)) { + uint64_t z_mask, o_mask, s_mask; + TempOptInfo *t1, *t2; + + if (fold_const2_commutative(ctx, op)) { return true; } - ctx->s_mask = arg_info(op->args[1])->s_mask & arg_info(op->args[2])->s_mask; - return false; + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + /* Fold eqv r,x,i to xor r,x,~i. */ + switch (ctx->type) { + case TCG_TYPE_I32: + case TCG_TYPE_I64: + op->opc = INDEX_op_xor; + break; + case TCG_TYPE_V64: + case TCG_TYPE_V128: + case TCG_TYPE_V256: + op->opc = INDEX_op_xor_vec; + break; + default: + g_assert_not_reached(); + } + op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); + return fold_xor(ctx, op); + } + + t1 = arg_info(op->args[1]); + + z_mask = (t1->z_mask | ~t2->o_mask) & (t2->z_mask | ~t1->o_mask); + o_mask = ~(t1->z_mask | t2->z_mask) | (t1->o_mask & t2->o_mask); + s_mask = t1->s_mask & t2->s_mask; + + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } static bool fold_extract(OptContext *ctx, TCGOp *op) { - uint64_t z_mask_old, z_mask; + uint64_t z_mask, o_mask, a_mask; + TempOptInfo *t1 = arg_info(op->args[1]); int pos = op->args[2]; int len = op->args[3]; - if (arg_is_const(op->args[1])) { - uint64_t t; - - t = arg_info(op->args[1])->val; - t = extract64(t, pos, len); - return tcg_opt_gen_movi(ctx, op, op->args[0], t); + if (ti_is_const(t1)) { + return tcg_opt_gen_movi(ctx, op, op->args[0], extract64(ti_const_val(t1), pos, len)); } - z_mask_old = arg_info(op->args[1])->z_mask; - z_mask = extract64(z_mask_old, pos, len); - if (pos == 0) { - ctx->a_mask = z_mask_old ^ z_mask; - } - ctx->z_mask = z_mask; - ctx->s_mask = smask_from_zmask(z_mask); + z_mask = extract64(t1->z_mask, pos, len); + o_mask = extract64(t1->o_mask, pos, len); + a_mask = pos ? -1 : t1->z_mask ^ z_mask; - return fold_masks(ctx, op); + return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, a_mask); } static bool fold_extract2(OptContext *ctx, TCGOp *op) { - if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { - uint64_t v1 = arg_info(op->args[1])->val; - uint64_t v2 = arg_info(op->args[2])->val; - int shr = op->args[3]; + TempOptInfo *t1 = arg_info(op->args[1]); + TempOptInfo *t2 = arg_info(op->args[2]); + uint64_t z1 = t1->z_mask; + uint64_t z2 = t2->z_mask; + uint64_t o1 = t1->o_mask; + uint64_t o2 = t2->o_mask; + int shr = op->args[3]; - if (op->opc == INDEX_op_extract2_i64) { - v1 >>= shr; - v2 <<= 64 - shr; - } else { - v1 = (uint32_t) v1 >> shr; - v2 = (uint64_t) ((int32_t) v2 << (32 - shr)); - } - return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2); + if (ctx->type == TCG_TYPE_I32) { + z1 = (uint32_t) z1 >> shr; + o1 = (uint32_t) o1 >> shr; + z2 = (uint64_t) ((int32_t) z2 << (32 - shr)); + o2 = (uint64_t) ((int32_t) o2 << (32 - shr)); + } else { + z1 >>= shr; + o1 >>= shr; + z2 <<= 64 - shr; + o2 <<= 64 - shr; } - return false; + + return fold_masks_zo(ctx, op, z1 | z2, o1 | o2); } static bool fold_exts(OptContext *ctx, TCGOp *op) { - uint64_t s_mask_old, s_mask, z_mask, sign; - bool type_change = false; + uint64_t z_mask, o_mask, s_mask; + TempOptInfo *t1; if (fold_const1(ctx, op)) { return true; } - z_mask = arg_info(op->args[1])->z_mask; - s_mask = arg_info(op->args[1])->s_mask; - s_mask_old = s_mask; + t1 = arg_info(op->args[1]); + z_mask = t1->z_mask; + o_mask = t1->o_mask; + s_mask = t1->s_mask; switch (op->opc) { - CASE_OP_32_64(ext8s) : sign = INT8_MIN; - z_mask = (uint8_t) z_mask; - break; - CASE_OP_32_64(ext16s) : sign = INT16_MIN; - z_mask = (uint16_t) z_mask; - break; case INDEX_op_ext_i32_i64: - type_change = true; - QEMU_FALLTHROUGH; - case INDEX_op_ext32s_i64: - sign = INT32_MIN; - z_mask = (uint32_t) z_mask; + s_mask |= INT32_MIN; + z_mask = (int32_t) z_mask; + o_mask = (int32_t) o_mask; break; default: g_assert_not_reached(); } - - if (z_mask & sign) { - z_mask |= sign; - } - s_mask |= sign << 1; - - ctx->z_mask = z_mask; - ctx->s_mask = s_mask; - if (!type_change) { - ctx->a_mask = s_mask & ~s_mask_old; - } - - return fold_masks(ctx, op); + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } static bool fold_extu(OptContext *ctx, TCGOp *op) { - uint64_t z_mask_old, z_mask; - bool type_change = false; + uint64_t z_mask, o_mask; + TempOptInfo *t1; if (fold_const1(ctx, op)) { return true; } - z_mask_old = z_mask = arg_info(op->args[1])->z_mask; + t1 = arg_info(op->args[1]); + z_mask = t1->z_mask; + o_mask = t1->o_mask; switch (op->opc) { - CASE_OP_32_64(ext8u) : z_mask = (uint8_t) z_mask; - break; - CASE_OP_32_64(ext16u) : z_mask = (uint16_t) z_mask; - break; case INDEX_op_extrl_i64_i32: case INDEX_op_extu_i32_i64: - type_change = true; - QEMU_FALLTHROUGH; - case INDEX_op_ext32u_i64: z_mask = (uint32_t) z_mask; + o_mask = (uint32_t) o_mask; break; case INDEX_op_extrh_i64_i32: - type_change = true; z_mask >>= 32; + o_mask >>= 32; break; default: g_assert_not_reached(); } - - ctx->z_mask = z_mask; - ctx->s_mask = smask_from_zmask(z_mask); - if (!type_change) { - ctx->a_mask = z_mask_old ^ z_mask; - } - return fold_masks(ctx, op); + return fold_masks_zo(ctx, op, z_mask, o_mask); } static bool fold_mb(OptContext *ctx, TCGOp *op) { @@ -1395,95 +1991,99 @@ static bool fold_mov(OptContext *ctx, TCGOp *op) { } static bool fold_movcond(OptContext *ctx, TCGOp *op) { - TCGCond cond = op->args[5]; + uint64_t z_mask, o_mask, s_mask; + TempOptInfo *tt, *ft; int i; - if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) { - op->args[5] = cond = tcg_swap_cond(cond); + /* If true and false values are the same, eliminate the cmp. */ + if (args_are_copies(op->args[3], op->args[4])) { + return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]); } + /* * Canonicalize the "false" input reg to match the destination reg so * that the tcg backend can implement a "move if true" operation. */ if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { - op->args[5] = cond = tcg_invert_cond(cond); + op->args[5] = tcg_invert_cond(op->args[5]); } - i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond); + i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[1], &op->args[2], &op->args[5]); if (i >= 0) { return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]); } - ctx->z_mask = arg_info(op->args[3])->z_mask | arg_info(op->args[4])->z_mask; - ctx->s_mask = arg_info(op->args[3])->s_mask & arg_info(op->args[4])->s_mask; + tt = arg_info(op->args[3]); + ft = arg_info(op->args[4]); + z_mask = tt->z_mask | ft->z_mask; + o_mask = tt->o_mask & ft->o_mask; + s_mask = tt->s_mask & ft->s_mask; - if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) { - uint64_t tv = arg_info(op->args[3])->val; - uint64_t fv = arg_info(op->args[4])->val; - TCGOpcode opc; - - switch (ctx->type) { - case TCG_TYPE_I32: - opc = INDEX_op_setcond_i32; - break; - case TCG_TYPE_I64: - opc = INDEX_op_setcond_i64; - break; - default: - g_assert_not_reached(); - } + if (ti_is_const(tt) && ti_is_const(ft)) { + uint64_t tv = ti_const_val(tt); + uint64_t fv = ti_const_val(ft); + TCGCond cond = op->args[5]; if (tv == 1 && fv == 0) { - op->opc = opc; + op->opc = INDEX_op_setcond; op->args[3] = cond; } else if (fv == 1 && tv == 0) { - op->opc = opc; + op->opc = INDEX_op_setcond; + op->args[3] = tcg_invert_cond(cond); + } else if (tv == -1 && fv == 0) { + op->opc = INDEX_op_negsetcond; + op->args[3] = cond; + } else if (fv == -1 && tv == 0) { + op->opc = INDEX_op_negsetcond; op->args[3] = tcg_invert_cond(cond); } } - return false; + + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } static bool fold_mul(OptContext *ctx, TCGOp *op) { if (fold_const2(ctx, op) || fold_xi_to_i(ctx, op, 0) || fold_xi_to_x(ctx, op, 1)) { return true; } - return false; + return finish_folding(ctx, op); } static bool fold_mul_highpart(OptContext *ctx, TCGOp *op) { if (fold_const2_commutative(ctx, op) || fold_xi_to_i(ctx, op, 0)) { return true; } - return false; + return finish_folding(ctx, op); } static bool fold_multiply2(OptContext *ctx, TCGOp *op) { swap_commutative(op->args[0], &op->args[2], &op->args[3]); if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { - uint64_t a = arg_info(op->args[2])->val; - uint64_t b = arg_info(op->args[3])->val; + uint64_t a = arg_const_val(op->args[2]); + uint64_t b = arg_const_val(op->args[3]); uint64_t h, l; TCGArg rl, rh; TCGOp *op2; switch (op->opc) { - case INDEX_op_mulu2_i32: - l = (uint64_t) (uint32_t) a * (uint32_t) b; - h = (int32_t) (l >> 32); - l = (int32_t) l; - break; - case INDEX_op_muls2_i32: - l = (int64_t) (int32_t) a * (int32_t) b; - h = l >> 32; - l = (int32_t) l; + case INDEX_op_mulu2: + if (ctx->type == TCG_TYPE_I32) { + l = (uint64_t) (uint32_t) a * (uint32_t) b; + h = (int32_t) (l >> 32); + l = (int32_t) l; + } else { + mulu64(&l, &h, a, b); + } break; - case INDEX_op_mulu2_i64: - mulu64(&l, &h, a, b); - break; - case INDEX_op_muls2_i64: - muls64(&l, &h, a, b); + case INDEX_op_muls2: + if (ctx->type == TCG_TYPE_I32) { + l = (int64_t) (int32_t) a * (int32_t) b; + h = l >> 32; + l = (int32_t) l; + } else { + muls64(&l, &h, a, b); + } break; default: g_assert_not_reached(); @@ -1493,142 +2093,366 @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op) { rh = op->args[1]; /* The proper opcode is supplied by tcg_opt_gen_mov. */ - op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2); + op2 = opt_insert_before(ctx, op, 0, 2); tcg_opt_gen_movi(ctx, op, rl, l); tcg_opt_gen_movi(ctx, op2, rh, h); return true; } - return false; + return finish_folding(ctx, op); } static bool fold_nand(OptContext *ctx, TCGOp *op) { + uint64_t z_mask, o_mask, s_mask; + TempOptInfo *t1, *t2; + if (fold_const2_commutative(ctx, op) || fold_xi_to_not(ctx, op, -1)) { return true; } - ctx->s_mask = arg_info(op->args[1])->s_mask & arg_info(op->args[2])->s_mask; - return false; -} + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); -static bool fold_neg(OptContext *ctx, TCGOp *op) { - uint64_t z_mask; + z_mask = ~(t1->o_mask & t2->o_mask); + o_mask = ~(t1->z_mask & t2->z_mask); + s_mask = t1->s_mask & t2->s_mask; - if (fold_const1(ctx, op)) { - return true; - } + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); +} +static bool fold_neg_no_const(OptContext *ctx, TCGOp *op) { /* Set to 1 all bits to the left of the rightmost. */ - z_mask = arg_info(op->args[1])->z_mask; - ctx->z_mask = -(z_mask & -z_mask); + uint64_t z_mask = arg_info(op->args[1])->z_mask; + z_mask = -(z_mask & -z_mask); - /* - * Because of fold_sub_to_neg, we want to always return true, - * via finish_folding. - */ - finish_folding(ctx, op); - return true; + return fold_masks_z(ctx, op, z_mask); +} + +static bool fold_neg(OptContext *ctx, TCGOp *op) { + return fold_const1(ctx, op) || fold_neg_no_const(ctx, op); } static bool fold_nor(OptContext *ctx, TCGOp *op) { + uint64_t z_mask, o_mask, s_mask; + TempOptInfo *t1, *t2; + if (fold_const2_commutative(ctx, op) || fold_xi_to_not(ctx, op, 0)) { return true; } - ctx->s_mask = arg_info(op->args[1])->s_mask & arg_info(op->args[2])->s_mask; - return false; + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + z_mask = ~(t1->o_mask | t2->o_mask); + o_mask = ~(t1->z_mask | t2->z_mask); + s_mask = t1->s_mask & t2->s_mask; + + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } static bool fold_not(OptContext *ctx, TCGOp *op) { + TempOptInfo *t1; + if (fold_const1(ctx, op)) { return true; } - ctx->s_mask = arg_info(op->args[1])->s_mask; - - /* Because of fold_to_not, we want to always return true, via finish. */ - finish_folding(ctx, op); - return true; + t1 = arg_info(op->args[1]); + return fold_masks_zos(ctx, op, ~t1->o_mask, ~t1->z_mask, t1->s_mask); } static bool fold_or(OptContext *ctx, TCGOp *op) { + uint64_t z_mask, o_mask, s_mask, a_mask; + TempOptInfo *t1, *t2; + if (fold_const2_commutative(ctx, op) || fold_xi_to_x(ctx, op, 0) || fold_xx_to_x(ctx, op)) { return true; } - ctx->z_mask = arg_info(op->args[1])->z_mask | arg_info(op->args[2])->z_mask; - ctx->s_mask = arg_info(op->args[1])->s_mask & arg_info(op->args[2])->s_mask; - return fold_masks(ctx, op); + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + z_mask = t1->z_mask | t2->z_mask; + o_mask = t1->o_mask | t2->o_mask; + s_mask = t1->s_mask & t2->s_mask; + + /* Affected bits are those not known one, masked by those known zero. */ + a_mask = ~t1->o_mask & t2->z_mask; + + return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask); } static bool fold_orc(OptContext *ctx, TCGOp *op) { - if (fold_const2(ctx, op) || fold_xx_to_i(ctx, op, -1) || fold_xi_to_x(ctx, op, -1) || fold_ix_to_not(ctx, op, 0)) { + uint64_t z_mask, o_mask, s_mask, a_mask; + TempOptInfo *t1, *t2; + + if (fold_const2(ctx, op)) { return true; } - ctx->s_mask = arg_info(op->args[1])->s_mask & arg_info(op->args[2])->s_mask; - return false; + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + /* Fold orc r,x,i to or r,x,~i. */ + switch (ctx->type) { + case TCG_TYPE_I32: + case TCG_TYPE_I64: + op->opc = INDEX_op_or; + break; + case TCG_TYPE_V64: + case TCG_TYPE_V128: + case TCG_TYPE_V256: + op->opc = INDEX_op_or_vec; + break; + default: + g_assert_not_reached(); + } + op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); + return fold_or(ctx, op); + } + if (fold_xx_to_i(ctx, op, -1) || fold_ix_to_not(ctx, op, 0)) { + return true; + } + t1 = arg_info(op->args[1]); + + z_mask = t1->z_mask | ~t2->o_mask; + o_mask = t1->o_mask | ~t2->z_mask; + s_mask = t1->s_mask & t2->s_mask; + + /* Affected bits are those not known one, masked by those known one. */ + a_mask = ~t1->o_mask & t2->o_mask; + + return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask); } -static bool fold_qemu_ld(OptContext *ctx, TCGOp *op) { +static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op) { const TCGOpDef *def = &tcg_op_defs[op->opc]; MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs]; MemOp mop = get_memop(oi); int width = 8 * memop_size(mop); + uint64_t z_mask = -1, s_mask = 0; if (width < 64) { - ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width); - if (!(mop & MO_SIGN)) { - ctx->z_mask = MAKE_64BIT_MASK(0, width); - ctx->s_mask <<= 1; + if (mop & MO_SIGN) { + s_mask = MAKE_64BIT_MASK(width - 1, 64 - (width - 1)); + } else { + z_mask = MAKE_64BIT_MASK(0, width); } } /* Opcodes that touch guest memory stop the mb optimization. */ ctx->prev_mb = NULL; - return false; + + return fold_masks_zs(ctx, op, z_mask, s_mask); +} + +static bool fold_qemu_ld_2reg(OptContext *ctx, TCGOp *op) { + /* Opcodes that touch guest memory stop the mb optimization. */ + ctx->prev_mb = NULL; + return finish_folding(ctx, op); } static bool fold_qemu_st(OptContext *ctx, TCGOp *op) { /* Opcodes that touch guest memory stop the mb optimization. */ ctx->prev_mb = NULL; - return false; + return true; } static bool fold_remainder(OptContext *ctx, TCGOp *op) { if (fold_const2(ctx, op) || fold_xx_to_i(ctx, op, 0)) { return true; } - return false; + return finish_folding(ctx, op); } -static bool fold_setcond(OptContext *ctx, TCGOp *op) { +/* Return 1 if finished, -1 if simplified, 0 if unchanged. */ +static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) { + uint64_t a_zmask, b_val; + TCGCond cond; + + if (!arg_is_const(op->args[2])) { + return false; + } + + a_zmask = arg_info(op->args[1])->z_mask; + b_val = arg_const_val(op->args[2]); + cond = op->args[3]; + + if (ctx->type == TCG_TYPE_I32) { + a_zmask = (uint32_t) a_zmask; + b_val = (uint32_t) b_val; + } + + /* + * A with only low bits set vs B with high bits set means that A < B. + */ + if (a_zmask < b_val) { + bool inv = false; + + switch (cond) { + case TCG_COND_NE: + case TCG_COND_LEU: + case TCG_COND_LTU: + inv = true; + /* fall through */ + case TCG_COND_GTU: + case TCG_COND_GEU: + case TCG_COND_EQ: + return tcg_opt_gen_movi(ctx, op, op->args[0], neg ? -inv : inv); + default: + break; + } + } + + /* + * A with only lsb set is already boolean. + */ + if (a_zmask <= 1) { + bool convert = false; + bool inv = false; + + switch (cond) { + case TCG_COND_EQ: + inv = true; + /* fall through */ + case TCG_COND_NE: + convert = (b_val == 0); + break; + case TCG_COND_LTU: + case TCG_COND_TSTEQ: + inv = true; + /* fall through */ + case TCG_COND_GEU: + case TCG_COND_TSTNE: + convert = (b_val == 1); + break; + default: + break; + } + if (convert) { + if (!inv && !neg) { + return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); + } + + if (!inv) { + op->opc = INDEX_op_neg; + } else if (neg) { + op->opc = INDEX_op_add; + op->args[2] = arg_new_constant(ctx, -1); + } else { + op->opc = INDEX_op_xor; + op->args[2] = arg_new_constant(ctx, 1); + } + return -1; + } + } + return 0; +} + +static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) { TCGCond cond = op->args[3]; - int i; + TCGArg ret, src1, src2; + TCGOp *op2; + uint64_t val; + int sh; + bool inv; + + if (!is_tst_cond(cond) || !arg_is_const(op->args[2])) { + return; + } + + src2 = op->args[2]; + val = arg_const_val(src2); + if (!is_power_of_2(val)) { + return; + } + sh = ctz64(val); + + ret = op->args[0]; + src1 = op->args[1]; + inv = cond == TCG_COND_TSTEQ; + + if (sh && neg && !inv && TCG_TARGET_sextract_valid(ctx->type, sh, 1)) { + op->opc = INDEX_op_sextract; + op->args[1] = src1; + op->args[2] = sh; + op->args[3] = 1; + return; + } else if (sh && TCG_TARGET_extract_valid(ctx->type, sh, 1)) { + op->opc = INDEX_op_extract; + op->args[1] = src1; + op->args[2] = sh; + op->args[3] = 1; + } else { + if (sh) { + op2 = opt_insert_before(ctx, op, INDEX_op_shr, 3); + op2->args[0] = ret; + op2->args[1] = src1; + op2->args[2] = arg_new_constant(ctx, sh); + src1 = ret; + } + op->opc = INDEX_op_and; + op->args[1] = src1; + op->args[2] = arg_new_constant(ctx, 1); + } - if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) { - op->args[3] = cond = tcg_swap_cond(cond); + if (neg && inv) { + op2 = opt_insert_after(ctx, op, INDEX_op_add, 3); + op2->args[0] = ret; + op2->args[1] = ret; + op2->args[2] = arg_new_constant(ctx, -1); + } else if (inv) { + op2 = opt_insert_after(ctx, op, INDEX_op_xor, 3); + op2->args[0] = ret; + op2->args[1] = ret; + op2->args[2] = arg_new_constant(ctx, 1); + } else if (neg) { + op2 = opt_insert_after(ctx, op, INDEX_op_neg, 2); + op2->args[0] = ret; + op2->args[1] = ret; } +} - i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond); +static bool fold_setcond(OptContext *ctx, TCGOp *op) { + int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1], &op->args[2], &op->args[3]); if (i >= 0) { return tcg_opt_gen_movi(ctx, op, op->args[0], i); } - ctx->z_mask = 1; - ctx->s_mask = smask_from_zmask(1); - return false; + i = fold_setcond_zmask(ctx, op, false); + if (i > 0) { + return true; + } + if (i == 0) { + fold_setcond_tst_pow2(ctx, op, false); + } + + return fold_masks_z(ctx, op, 1); } -static bool fold_setcond2(OptContext *ctx, TCGOp *op) { - TCGCond cond = op->args[5]; - int i, inv = 0; +static bool fold_negsetcond(OptContext *ctx, TCGOp *op) { + int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1], &op->args[2], &op->args[3]); + if (i >= 0) { + return tcg_opt_gen_movi(ctx, op, op->args[0], -i); + } - if (swap_commutative2(&op->args[1], &op->args[3])) { - op->args[5] = cond = tcg_swap_cond(cond); + i = fold_setcond_zmask(ctx, op, true); + if (i > 0) { + return true; + } + if (i == 0) { + fold_setcond_tst_pow2(ctx, op, true); } - i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond); + /* Value is {0,-1} so all bits are repetitions of the sign. */ + return fold_masks_s(ctx, op, -1); +} + +static bool fold_setcond2(OptContext *ctx, TCGOp *op) { + TCGCond cond; + int i, inv = 0; + + i = do_constant_folding_cond2(ctx, op, &op->args[1]); + cond = op->args[5]; if (i >= 0) { goto do_setcond_const; } @@ -1640,8 +2464,7 @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op) { * Simplify LT/GE comparisons vs zero to a single compare * vs the high word of the input. */ - if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 && arg_is_const(op->args[4]) && - arg_info(op->args[4])->val == 0) { + if (arg_is_const_val(op->args[3], 0) && arg_is_const_val(op->args[4], 0)) { goto do_setcond_high; } break; @@ -1667,123 +2490,123 @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op) { case 0: goto do_setcond_const; case 1: - op->args[2] = op->args[3]; - op->args[3] = cond; - op->opc = INDEX_op_setcond_i32; - break; + goto do_setcond_low; + } + break; + + case TCG_COND_TSTEQ: + case TCG_COND_TSTNE: + if (arg_is_const_val(op->args[3], 0)) { + goto do_setcond_high; + } + if (arg_is_const_val(op->args[4], 0)) { + goto do_setcond_low; } break; default: break; + do_setcond_low: + op->args[2] = op->args[3]; + op->args[3] = cond; + op->opc = INDEX_op_setcond; + return fold_setcond(ctx, op); + do_setcond_high: op->args[1] = op->args[2]; op->args[2] = op->args[4]; op->args[3] = cond; - op->opc = INDEX_op_setcond_i32; - break; + op->opc = INDEX_op_setcond; + return fold_setcond(ctx, op); } - ctx->z_mask = 1; - ctx->s_mask = smask_from_zmask(1); - return false; + return fold_masks_z(ctx, op, 1); do_setcond_const: return tcg_opt_gen_movi(ctx, op, op->args[0], i); } static bool fold_sextract(OptContext *ctx, TCGOp *op) { - uint64_t z_mask, s_mask, s_mask_old; + uint64_t z_mask, o_mask, s_mask, a_mask; + TempOptInfo *t1 = arg_info(op->args[1]); int pos = op->args[2]; int len = op->args[3]; - if (arg_is_const(op->args[1])) { - uint64_t t; - - t = arg_info(op->args[1])->val; - t = sextract64(t, pos, len); - return tcg_opt_gen_movi(ctx, op, op->args[0], t); + if (ti_is_const(t1)) { + return tcg_opt_gen_movi(ctx, op, op->args[0], sextract64(ti_const_val(t1), pos, len)); } - z_mask = arg_info(op->args[1])->z_mask; - z_mask = sextract64(z_mask, pos, len); - ctx->z_mask = z_mask; + s_mask = t1->s_mask >> pos; + s_mask |= -1ull << (len - 1); + a_mask = pos ? -1 : s_mask & ~t1->s_mask; - s_mask_old = arg_info(op->args[1])->s_mask; - s_mask = sextract64(s_mask_old, pos, len); - s_mask |= MAKE_64BIT_MASK(len, 64 - len); - ctx->s_mask = s_mask; + z_mask = sextract64(t1->z_mask, pos, len); + o_mask = sextract64(t1->o_mask, pos, len); - if (pos == 0) { - ctx->a_mask = s_mask & ~s_mask_old; - } - - return fold_masks(ctx, op); + return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask); } static bool fold_shift(OptContext *ctx, TCGOp *op) { - uint64_t s_mask, z_mask, sign; + uint64_t s_mask, z_mask, o_mask; + TempOptInfo *t1, *t2; if (fold_const2(ctx, op) || fold_ix_to_i(ctx, op, 0) || fold_xi_to_x(ctx, op, 0)) { return true; } - s_mask = arg_info(op->args[1])->s_mask; - z_mask = arg_info(op->args[1])->z_mask; + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + s_mask = t1->s_mask; + z_mask = t1->z_mask; + o_mask = t1->o_mask; - if (arg_is_const(op->args[2])) { - int sh = arg_info(op->args[2])->val; - - ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh); + if (ti_is_const(t2)) { + int sh = ti_const_val(t2); + z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh); + o_mask = do_constant_folding(op->opc, ctx->type, o_mask, sh); s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh); - ctx->s_mask = smask_from_smask(s_mask); - return fold_masks(ctx, op); + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } switch (op->opc) { - CASE_OP_32_64(sar) - : /* - * Arithmetic right shift will not reduce the number of - * input sign repetitions. - */ - ctx->s_mask = s_mask; - break; - CASE_OP_32_64(shr) - : /* - * If the sign bit is known zero, then logical right shift - * will not reduced the number of input sign repetitions. - */ - sign = (s_mask & -s_mask) >> 1; - if (!(z_mask & sign)) { - ctx->s_mask = s_mask; - } - break; + case INDEX_op_sar: + /* + * Arithmetic right shift will not reduce the number of + * input sign repetitions. + */ + return fold_masks_s(ctx, op, s_mask); + case INDEX_op_shr: + /* + * If the sign bit is known zero, then logical right shift + * will not reduce the number of input sign repetitions. + */ + if (~z_mask & -s_mask) { + return fold_masks_s(ctx, op, s_mask); + } + break; default: break; } - return false; + return finish_folding(ctx, op); } static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op) { TCGOpcode neg_op; bool have_neg; - if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) { + if (!arg_is_const_val(op->args[1], 0)) { return false; } switch (ctx->type) { case TCG_TYPE_I32: - neg_op = INDEX_op_neg_i32; - have_neg = TCG_TARGET_HAS_neg_i32; - break; case TCG_TYPE_I64: - neg_op = INDEX_op_neg_i64; - have_neg = TCG_TARGET_HAS_neg_i64; + neg_op = INDEX_op_neg; + have_neg = true; break; case TCG_TYPE_V64: case TCG_TYPE_V128: @@ -1797,7 +2620,7 @@ static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op) { if (have_neg) { op->opc = neg_op; op->args[1] = op->args[2]; - return fold_neg(ctx, op); + return fold_neg_no_const(ctx, op); } return false; } @@ -1807,52 +2630,288 @@ static bool fold_sub_vec(OptContext *ctx, TCGOp *op) { if (fold_xx_to_i(ctx, op, 0) || fold_xi_to_x(ctx, op, 0) || fold_sub_to_neg(ctx, op)) { return true; } - return false; + return finish_folding(ctx, op); } static bool fold_sub(OptContext *ctx, TCGOp *op) { - return fold_const2(ctx, op) || fold_sub_vec(ctx, op); + if (fold_const2(ctx, op) || fold_xx_to_i(ctx, op, 0) || fold_xi_to_x(ctx, op, 0) || fold_sub_to_neg(ctx, op)) { + return true; + } + + /* Fold sub r,x,i to add r,x,-i */ + if (arg_is_const(op->args[2])) { + uint64_t val = arg_const_val(op->args[2]); + + op->opc = INDEX_op_add; + op->args[2] = arg_new_constant(ctx, -val); + } + return finish_folding(ctx, op); +} + +static void squash_prev_borrowout(OptContext *ctx, TCGOp *op) { + TempOptInfo *t2; + + op = QTAILQ_PREV(op, link); + switch (op->opc) { + case INDEX_op_subbo: + op->opc = INDEX_op_sub; + fold_sub(ctx, op); + break; + case INDEX_op_subbio: + op->opc = INDEX_op_subbi; + break; + case INDEX_op_subb1o: + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + op->opc = INDEX_op_add; + op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1)); + /* Perform other constant folding, if needed. */ + fold_add(ctx, op); + } else { + TCGArg ret = op->args[0]; + op->opc = INDEX_op_sub; + op = opt_insert_after(ctx, op, INDEX_op_add, 3); + op->args[0] = ret; + op->args[1] = ret; + op->args[2] = arg_new_constant(ctx, -1); + } + break; + default: + g_assert_not_reached(); + } } -static bool fold_sub2(OptContext *ctx, TCGOp *op) { - return fold_addsub2(ctx, op, false); +static bool fold_subbi(OptContext *ctx, TCGOp *op) { + TempOptInfo *t2; + int borrow_in = ctx->carry_state; + + if (borrow_in < 0) { + return finish_folding(ctx, op); + } + ctx->carry_state = -1; + + squash_prev_borrowout(ctx, op); + if (borrow_in == 0) { + op->opc = INDEX_op_sub; + return fold_sub(ctx, op); + } + + /* + * Propagate the known carry-in into any constant, then negate to + * transform from sub to add. If there is no constant, emit a + * separate add -1. + */ + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1)); + } else { + TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_sub, 3); + + op2->args[0] = op->args[0]; + op2->args[1] = op->args[1]; + op2->args[2] = op->args[2]; + fold_sub(ctx, op2); + + op->args[1] = op->args[0]; + op->args[2] = arg_new_constant(ctx, -1); + } + op->opc = INDEX_op_add; + return fold_add(ctx, op); +} + +static bool fold_subbio(OptContext *ctx, TCGOp *op) { + TempOptInfo *t1, *t2; + int borrow_out = -1; + + if (ctx->carry_state < 0) { + return finish_folding(ctx, op); + } + + squash_prev_borrowout(ctx, op); + if (ctx->carry_state == 0) { + goto do_subbo; + } + + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + /* Propagate the known borrow-in into a constant, if possible. */ + if (ti_is_const(t2)) { + uint64_t max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX; + uint64_t v = ti_const_val(t2) & max; + + if (v < max) { + op->args[2] = arg_new_constant(ctx, v + 1); + goto do_subbo; + } + /* subtracting max + 1 produces known borrow out. */ + borrow_out = 1; + } + if (ti_is_const(t1)) { + uint64_t v = ti_const_val(t1); + if (v != 0) { + op->args[2] = arg_new_constant(ctx, v - 1); + goto do_subbo; + } + } + + /* Adjust the opcode to remember the known carry-in. */ + op->opc = INDEX_op_subb1o; + ctx->carry_state = borrow_out; + return finish_folding(ctx, op); + +do_subbo: + op->opc = INDEX_op_subbo; + return fold_subbo(ctx, op); +} + +static bool fold_subbo(OptContext *ctx, TCGOp *op) { + TempOptInfo *t1 = arg_info(op->args[1]); + TempOptInfo *t2 = arg_info(op->args[2]); + int borrow_out = -1; + + if (ti_is_const(t2)) { + uint64_t v2 = ti_const_val(t2); + if (v2 == 0) { + borrow_out = 0; + } else if (ti_is_const(t1)) { + uint64_t v1 = ti_const_val(t1); + borrow_out = v1 < v2; + } + } + ctx->carry_state = borrow_out; + return finish_folding(ctx, op); } static bool fold_tcg_ld(OptContext *ctx, TCGOp *op) { + uint64_t z_mask = -1, s_mask = 0; + /* We can't do any folding with a load, but we can record bits. */ switch (op->opc) { - CASE_OP_32_64(ld8s) : ctx->s_mask = MAKE_64BIT_MASK(8, 56); - break; - CASE_OP_32_64(ld8u) : ctx->z_mask = MAKE_64BIT_MASK(0, 8); - ctx->s_mask = MAKE_64BIT_MASK(9, 55); - break; - CASE_OP_32_64(ld16s) : ctx->s_mask = MAKE_64BIT_MASK(16, 48); - break; - CASE_OP_32_64(ld16u) : ctx->z_mask = MAKE_64BIT_MASK(0, 16); - ctx->s_mask = MAKE_64BIT_MASK(17, 47); - break; - case INDEX_op_ld32s_i64: - ctx->s_mask = MAKE_64BIT_MASK(32, 32); + case INDEX_op_ld8s: + s_mask = INT8_MIN; + break; + case INDEX_op_ld8u: + z_mask = MAKE_64BIT_MASK(0, 8); + break; + case INDEX_op_ld16s: + s_mask = INT16_MIN; + break; + case INDEX_op_ld16u: + z_mask = MAKE_64BIT_MASK(0, 16); + break; + case INDEX_op_ld32s: + s_mask = INT32_MIN; + break; + case INDEX_op_ld32u: + z_mask = MAKE_64BIT_MASK(0, 32); + break; + default: + g_assert_not_reached(); + } + return fold_masks_zs(ctx, op, z_mask, s_mask); +} + +static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op) { + TCGTemp *dst, *src; + intptr_t ofs; + TCGType type; + + if (op->args[1] != tcgv_ptr_arg(tcg_env)) { + return finish_folding(ctx, op); + } + + type = ctx->type; + ofs = op->args[2]; + dst = arg_temp(op->args[0]); + src = find_mem_copy_for(ctx, type, ofs); + if (src && src->base_type == type) { + return tcg_opt_gen_mov(ctx, op, temp_arg(dst), temp_arg(src)); + } + + reset_ts(ctx, dst); + record_mem_copy(ctx, type, dst, ofs, ofs + tcg_type_size(type) - 1); + return true; +} + +static bool fold_tcg_st(OptContext *ctx, TCGOp *op) { + intptr_t ofs = op->args[2]; + intptr_t lm1; + + if (op->args[1] != tcgv_ptr_arg(tcg_env)) { + remove_mem_copy_all(ctx); + return true; + } + + switch (op->opc) { + case INDEX_op_st8: + lm1 = 0; + break; + case INDEX_op_st16: + lm1 = 1; break; - case INDEX_op_ld32u_i64: - ctx->z_mask = MAKE_64BIT_MASK(0, 32); - ctx->s_mask = MAKE_64BIT_MASK(33, 31); + case INDEX_op_st32: + lm1 = 3; + break; + case INDEX_op_st: + case INDEX_op_st_vec: + lm1 = tcg_type_size(ctx->type) - 1; break; default: g_assert_not_reached(); } - return false; + remove_mem_copy_in(ctx, ofs, ofs + lm1); + return true; +} + +static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op) { + TCGTemp *src; + intptr_t ofs, last; + TCGType type; + + if (op->args[1] != tcgv_ptr_arg(tcg_env)) { + return fold_tcg_st(ctx, op); + } + + src = arg_temp(op->args[0]); + ofs = op->args[2]; + type = ctx->type; + + /* + * Eliminate duplicate stores of a constant. + * This happens frequently when the target ISA zero-extends. + */ + if (ts_is_const(src)) { + TCGTemp *prev = find_mem_copy_for(ctx, type, ofs); + if (src == prev) { + tcg_op_remove(ctx->tcg, op); + return true; + } + } + + last = ofs + tcg_type_size(type) - 1; + remove_mem_copy_in(ctx, ofs, last); + record_mem_copy(ctx, type, src, ofs, last); + return true; } static bool fold_xor(OptContext *ctx, TCGOp *op) { + uint64_t z_mask, o_mask, s_mask; + TempOptInfo *t1, *t2; + if (fold_const2_commutative(ctx, op) || fold_xx_to_i(ctx, op, 0) || fold_xi_to_x(ctx, op, 0) || fold_xi_to_not(ctx, op, -1)) { return true; } - ctx->z_mask = arg_info(op->args[1])->z_mask | arg_info(op->args[2])->z_mask; - ctx->s_mask = arg_info(op->args[1])->s_mask & arg_info(op->args[2])->s_mask; - return fold_masks(ctx, op); + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + z_mask = (t1->z_mask | t2->z_mask) & ~(t1->o_mask & t2->o_mask); + o_mask = (t1->o_mask & ~t2->z_mask) | (t2->o_mask & ~t1->z_mask); + s_mask = t1->s_mask & t2->s_mask; + + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } /* Propagate constants and copies, fold constant expressions. */ @@ -1861,6 +2920,8 @@ void tcg_optimize(TCGContext *s) { TCGOp *op, *op_next; OptContext ctx = {.tcg = s}; + QSIMPLEQ_INIT(&ctx.mem_free); + /* Array VALS has an element for each temp. If this temp holds a constant then its value is kept in VALS' element. If this temp is a copy of other ones then the other copies are @@ -1887,50 +2948,60 @@ void tcg_optimize(TCGContext *s) { copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs); /* Pre-compute the type of the operation. */ - if (def->flags & TCG_OPF_VECTOR) { - ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op); - } else if (def->flags & TCG_OPF_64BIT) { - ctx.type = TCG_TYPE_I64; - } else { - ctx.type = TCG_TYPE_I32; - } - - /* Assume all bits affected, no bits known zero, no sign reps. */ - ctx.a_mask = -1; - ctx.z_mask = -1; - ctx.s_mask = 0; + ctx.type = TCGOP_TYPE(op); /* * Process each opcode. * Sorted alphabetically by opcode as much as possible. */ switch (opc) { - CASE_OP_32_64(add) : done = fold_add(&ctx, op); - break; + case INDEX_op_add: + done = fold_add(&ctx, op); + break; case INDEX_op_add_vec: done = fold_add_vec(&ctx, op); break; - CASE_OP_32_64(add2) : done = fold_add2(&ctx, op); + case INDEX_op_addci: + done = fold_addci(&ctx, op); + break; + case INDEX_op_addcio: + done = fold_addcio(&ctx, op); break; - CASE_OP_32_64_VEC(and) : done = fold_and(&ctx, op); + case INDEX_op_addco: + done = fold_addco(&ctx, op); break; - CASE_OP_32_64_VEC(andc) : done = fold_andc(&ctx, op); + case INDEX_op_and: + case INDEX_op_and_vec: + done = fold_and(&ctx, op); break; - CASE_OP_32_64(brcond) : done = fold_brcond(&ctx, op); + case INDEX_op_andc: + case INDEX_op_andc_vec: + done = fold_andc(&ctx, op); + break; + case INDEX_op_brcond: + done = fold_brcond(&ctx, op); break; case INDEX_op_brcond2_i32: done = fold_brcond2(&ctx, op); break; - CASE_OP_32_64(bswap16) : CASE_OP_32_64(bswap32) : case INDEX_op_bswap64_i64: + case INDEX_op_bswap16: + case INDEX_op_bswap32: + case INDEX_op_bswap64: done = fold_bswap(&ctx, op); break; - CASE_OP_32_64(clz) : CASE_OP_32_64(ctz) : done = fold_count_zeros(&ctx, op); + case INDEX_op_clz: + case INDEX_op_ctz: + done = fold_count_zeros(&ctx, op); break; - CASE_OP_32_64(ctpop) : done = fold_ctpop(&ctx, op); + case INDEX_op_ctpop: + done = fold_ctpop(&ctx, op); break; - CASE_OP_32_64(deposit) : done = fold_deposit(&ctx, op); + case INDEX_op_deposit: + done = fold_deposit(&ctx, op); break; - CASE_OP_32_64(div) : CASE_OP_32_64(divu) : done = fold_divide(&ctx, op); + case INDEX_op_divs: + case INDEX_op_divu: + done = fold_divide(&ctx, op); break; case INDEX_op_dup_vec: done = fold_dup(&ctx, op); @@ -1938,98 +3009,162 @@ void tcg_optimize(TCGContext *s) { case INDEX_op_dup2_vec: done = fold_dup2(&ctx, op); break; - CASE_OP_32_64_VEC(eqv) : done = fold_eqv(&ctx, op); + case INDEX_op_eqv: + case INDEX_op_eqv_vec: + done = fold_eqv(&ctx, op); break; - CASE_OP_32_64(extract) : done = fold_extract(&ctx, op); + case INDEX_op_extract: + done = fold_extract(&ctx, op); break; - CASE_OP_32_64(extract2) : done = fold_extract2(&ctx, op); + case INDEX_op_extract2: + done = fold_extract2(&ctx, op); break; - CASE_OP_32_64(ext8s) : CASE_OP_32_64(ext16s) : case INDEX_op_ext32s_i64: case INDEX_op_ext_i32_i64: done = fold_exts(&ctx, op); break; - CASE_OP_32_64(ext8u) : CASE_OP_32_64(ext16u) : case INDEX_op_ext32u_i64: case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: done = fold_extu(&ctx, op); break; - CASE_OP_32_64(ld8s) - : CASE_OP_32_64(ld8u) : CASE_OP_32_64(ld16s) : CASE_OP_32_64(ld16u) : case INDEX_op_ld32s_i64: - case INDEX_op_ld32u_i64: + case INDEX_op_ld8s: + case INDEX_op_ld8u: + case INDEX_op_ld16s: + case INDEX_op_ld16u: + case INDEX_op_ld32s: + case INDEX_op_ld32u: done = fold_tcg_ld(&ctx, op); break; + case INDEX_op_ld: + case INDEX_op_ld_vec: + done = fold_tcg_ld_memcopy(&ctx, op); + break; + case INDEX_op_st8: + case INDEX_op_st16: + case INDEX_op_st32: + done = fold_tcg_st(&ctx, op); + break; + case INDEX_op_st: + case INDEX_op_st_vec: + done = fold_tcg_st_memcopy(&ctx, op); + break; case INDEX_op_mb: done = fold_mb(&ctx, op); break; - CASE_OP_32_64_VEC(mov) : done = fold_mov(&ctx, op); + case INDEX_op_mov: + case INDEX_op_mov_vec: + done = fold_mov(&ctx, op); break; - CASE_OP_32_64(movcond) : done = fold_movcond(&ctx, op); + case INDEX_op_movcond: + done = fold_movcond(&ctx, op); break; - CASE_OP_32_64(mul) : done = fold_mul(&ctx, op); + case INDEX_op_mul: + done = fold_mul(&ctx, op); break; - CASE_OP_32_64(mulsh) : CASE_OP_32_64(muluh) : done = fold_mul_highpart(&ctx, op); + case INDEX_op_mulsh: + case INDEX_op_muluh: + done = fold_mul_highpart(&ctx, op); break; - CASE_OP_32_64(muls2) : CASE_OP_32_64(mulu2) : done = fold_multiply2(&ctx, op); + case INDEX_op_muls2: + case INDEX_op_mulu2: + done = fold_multiply2(&ctx, op); break; - CASE_OP_32_64_VEC(nand) : done = fold_nand(&ctx, op); + case INDEX_op_nand: + case INDEX_op_nand_vec: + done = fold_nand(&ctx, op); break; - CASE_OP_32_64(neg) : done = fold_neg(&ctx, op); + case INDEX_op_neg: + done = fold_neg(&ctx, op); break; - CASE_OP_32_64_VEC(nor) : done = fold_nor(&ctx, op); + case INDEX_op_nor: + case INDEX_op_nor_vec: + done = fold_nor(&ctx, op); break; - CASE_OP_32_64_VEC(not) : done = fold_not(&ctx, op); + case INDEX_op_not: + case INDEX_op_not_vec: + done = fold_not(&ctx, op); break; - CASE_OP_32_64_VEC(or) : done = fold_or(&ctx, op); + case INDEX_op_or: + case INDEX_op_or_vec: + done = fold_or(&ctx, op); break; - CASE_OP_32_64_VEC(orc) : done = fold_orc(&ctx, op); + case INDEX_op_orc: + case INDEX_op_orc_vec: + done = fold_orc(&ctx, op); break; - case INDEX_op_qemu_ld_a32_i32: - case INDEX_op_qemu_ld_a64_i32: - case INDEX_op_qemu_ld_a32_i64: - case INDEX_op_qemu_ld_a64_i64: - case INDEX_op_qemu_ld_a32_i128: - case INDEX_op_qemu_ld_a64_i128: - done = fold_qemu_ld(&ctx, op); + case INDEX_op_qemu_ld: + done = fold_qemu_ld_1reg(&ctx, op); break; - case INDEX_op_qemu_st8_a32_i32: - case INDEX_op_qemu_st8_a64_i32: - case INDEX_op_qemu_st_a32_i32: - case INDEX_op_qemu_st_a64_i32: - case INDEX_op_qemu_st_a32_i64: - case INDEX_op_qemu_st_a64_i64: - case INDEX_op_qemu_st_a32_i128: - case INDEX_op_qemu_st_a64_i128: + case INDEX_op_qemu_ld2: + done = fold_qemu_ld_2reg(&ctx, op); + break; + case INDEX_op_qemu_st: + case INDEX_op_qemu_st2: done = fold_qemu_st(&ctx, op); break; - CASE_OP_32_64(rem) : CASE_OP_32_64(remu) : done = fold_remainder(&ctx, op); + case INDEX_op_rems: + case INDEX_op_remu: + done = fold_remainder(&ctx, op); + break; + case INDEX_op_rotl: + case INDEX_op_rotr: + case INDEX_op_sar: + case INDEX_op_shl: + case INDEX_op_shr: + done = fold_shift(&ctx, op); break; - CASE_OP_32_64(rotl) - : CASE_OP_32_64(rotr) - : CASE_OP_32_64(sar) : CASE_OP_32_64(shl) : CASE_OP_32_64(shr) : done = fold_shift(&ctx, op); + case INDEX_op_setcond: + done = fold_setcond(&ctx, op); break; - CASE_OP_32_64(setcond) : done = fold_setcond(&ctx, op); + case INDEX_op_negsetcond: + done = fold_negsetcond(&ctx, op); break; case INDEX_op_setcond2_i32: done = fold_setcond2(&ctx, op); break; - CASE_OP_32_64(sextract) : done = fold_sextract(&ctx, op); + case INDEX_op_cmp_vec: + done = fold_cmp_vec(&ctx, op); break; - CASE_OP_32_64(sub) : done = fold_sub(&ctx, op); + case INDEX_op_cmpsel_vec: + done = fold_cmpsel_vec(&ctx, op); + break; + case INDEX_op_bitsel_vec: + done = fold_bitsel_vec(&ctx, op); + break; + case INDEX_op_sextract: + done = fold_sextract(&ctx, op); + break; + case INDEX_op_sub: + done = fold_sub(&ctx, op); + break; + case INDEX_op_subbi: + done = fold_subbi(&ctx, op); + break; + case INDEX_op_subbio: + done = fold_subbio(&ctx, op); + break; + case INDEX_op_subbo: + done = fold_subbo(&ctx, op); break; case INDEX_op_sub_vec: done = fold_sub_vec(&ctx, op); break; - CASE_OP_32_64(sub2) : done = fold_sub2(&ctx, op); + case INDEX_op_xor: + case INDEX_op_xor_vec: + done = fold_xor(&ctx, op); break; - CASE_OP_32_64_VEC(xor) : done = fold_xor(&ctx, op); + case INDEX_op_set_label: + case INDEX_op_br: + case INDEX_op_exit_tb: + case INDEX_op_goto_tb: + case INDEX_op_goto_ptr: + finish_ebb(&ctx); + done = true; break; default: + done = finish_folding(&ctx, op); break; } - - if (!done) { - finish_folding(&ctx, op); - } + tcg_debug_assert(done); } } diff --git a/libtcg/src/region.c b/libtcg/src/region.c index 01a52c802..e21f0afa6 100644 --- a/libtcg/src/region.c +++ b/libtcg/src/region.c @@ -53,6 +53,16 @@ #include +/* + * Local source-level compatibility with Unix. + * Used by tcg_region_init below. + */ +#if defined(_WIN32) +#define PROT_READ 1 +#define PROT_WRITE 2 +#define PROT_EXEC 4 +#endif + struct tcg_region_tree { mutex_t lock; QTree *tree; @@ -100,6 +110,17 @@ bool in_code_gen_buffer(const void *p) { return (size_t) (p - region.start_aligned) <= region.total_size; } +#ifndef CONFIG_TCG_INTERPRETER +static int host_prot_read_exec(void) { +#if defined(CONFIG_LINUX) && defined(HOST_AARCH64) && defined(PROT_BTI) + if (cpuinfo & CPUINFO_BTI) { + return PROT_READ | PROT_EXEC | PROT_BTI; + } +#endif + return PROT_READ | PROT_EXEC; +} +#endif + #ifdef CONFIG_DEBUG_TCG const void *tcg_splitwx_to_rx(void *rw) { /* Pass NULL pointers unchanged. */ @@ -394,7 +415,7 @@ void tcg_region_reset_all(void) { tcg_region_tree_reset_all(); } -static size_t tcg_n_regions(size_t tb_size, unsigned max_cpus) { +static size_t tcg_n_regions(size_t tb_size, unsigned max_threads) { #ifdef CONFIG_USER_ONLY return 1; #else @@ -402,24 +423,25 @@ static size_t tcg_n_regions(size_t tb_size, unsigned max_cpus) { /* * It is likely that some vCPUs will translate more code than others, - * so we first try to set more regions than max_cpus, with those regions + * so we first try to set more regions than threads, with those regions * being of reasonable size. If that's not possible we make do by evenly * dividing the code_gen_buffer among the vCPUs. + * + * Use a single region if all we have is one vCPU thread. */ - /* Use a single region if all we have is one vCPU thread */ - if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) { + if (max_threads == 1) { return 1; } /* - * Try to have more regions than max_cpus, with each region being >= 2 MB. + * Try to have more regions than threads, with each region being >= 2 MB. * If we can't, then just allocate one region per vCPU thread. */ n_regions = tb_size / (2 * MiB); - if (n_regions <= max_cpus) { - return max_cpus; + if (n_regions <= max_threads) { + return max_threads; } - return MIN(n_regions, max_cpus * 8); + return MIN(n_regions, max_threads * 8); #endif } @@ -497,14 +519,6 @@ static int alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp) { return PROT_READ | PROT_WRITE; } #elif defined(_WIN32) -/* - * Local source-level compatibility with Unix. - * Used by tcg_region_init below. - */ -#define PROT_READ 1 -#define PROT_WRITE 2 -#define PROT_EXEC 4 - static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp) { void *buf; @@ -552,9 +566,10 @@ static int alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp) { goto fail; } - buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0); + buf_rx = mmap(NULL, size, host_prot_read_exec(), MAP_SHARED, fd, 0); if (buf_rx == MAP_FAILED) { - goto fail_rx; + error_setg_errno(errp, errno, "failed to map shared memory for execute"); + goto fail; } close(fd); @@ -564,12 +579,8 @@ static int alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp) { return PROT_READ | PROT_WRITE; -fail_rx: - error_setg_errno(errp, errno, "failed to map shared memory for execute"); fail: - if (buf_rx != MAP_FAILED) { - munmap(buf_rx, size); - } + /* buf_rx is always equal to MAP_FAILED here and does not require cleanup */ if (buf_rw) { munmap(buf_rw, size); } @@ -609,7 +620,7 @@ static int alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp) { return -1; } - if (mprotect((void *) buf_rx, size, PROT_READ | PROT_EXEC) != 0) { + if (mprotect((void *) buf_rx, size, host_prot_read_exec()) != 0) { error_setg_errno(errp, errno, "mprotect for jit splitwx"); munmap((void *) buf_rx, size); munmap((void *) buf_rw, size); @@ -683,11 +694,7 @@ static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp) { * and then assigning regions to TCG threads so that the threads can translate * code in parallel without synchronization. * - * In softmmu the number of TCG threads is bounded by max_cpus, so we use at - * least max_cpus regions in MTTCG. In !MTTCG we use a single region. - * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...]) - * must have been parsed before calling this function, since it calls - * qemu_tcg_mttcg_enabled(). + * In system-mode the number of TCG threads is bounded by max_threads, * * In user-mode we use a single region. Having multiple regions in user-mode * is not supported, because the number of vCPU threads (recall that each thread @@ -699,9 +706,9 @@ static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp) { * * However, this user-mode limitation is unlikely to be a significant problem * in practice. Multi-threaded guests share most if not all of their translated - * code, which makes parallel code generation less appealing than in softmmu. + * code, which makes parallel code generation less appealing than in system-mode */ -void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus) { +void tcg_region_init(size_t tb_size, int splitwx, unsigned max_threads) { const size_t page_size = qemu_real_host_page_size(); size_t region_size; int have_prot, need_prot; @@ -737,7 +744,7 @@ void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus) { * As a result of this we might end up with a few extra pages at the end of * the buffer; we will assign those to the last region. */ - region.n = tcg_n_regions(tb_size, max_cpus); + region.n = tcg_n_regions(tb_size, max_threads); region_size = tb_size / region.n; region_size = ALIGN_DOWN(region_size, page_size); @@ -768,7 +775,7 @@ void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus) { need_prot = PROT_READ | PROT_WRITE; #ifndef CONFIG_TCG_INTERPRETER if (tcg_splitwx_diff == 0) { - need_prot |= PROT_EXEC; + need_prot |= host_prot_read_exec(); } #endif for (size_t i = 0, n = region.n; i < n; i++) { @@ -783,10 +790,17 @@ void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus) { } else if (need_prot == (PROT_READ | PROT_WRITE)) { rc = qemu_mprotect_rw(start, end - start); } else { +#ifdef CONFIG_POSIX + rc = mprotect(start, end - start, need_prot); + if (rc) { + error_report("mprotect of jit buffer: %s", strerror(errno)); + } +#else g_assert_not_reached(); +#endif } if (rc) { - error_setg_errno(&error_fatal, errno, "mprotect of jit buffer"); + exit(1); } } if (have_prot != 0) { diff --git a/libtcg/src/tcg-common.c b/libtcg/src/tcg-common.c index 43405fa69..b3e7faf91 100644 --- a/libtcg/src/tcg-common.c +++ b/libtcg/src/tcg-common.c @@ -25,13 +25,9 @@ #include #include -#if defined(CONFIG_TCG_INTERPRETER) -uintptr_t tci_tb_ptr; -#endif - -TCGOpDef tcg_op_defs[] = { +const TCGOpDef tcg_op_defs[] = { #define DEF(s, oargs, iargs, cargs, flags) {#s, oargs, iargs, cargs, iargs + oargs + cargs, flags}, -#include +#include "tcg/tcg-opc.h" #undef DEF }; -const size_t tcg_op_defs_max = ARRAY_SIZE(tcg_op_defs); +const size_t tcg_op_defs_max = ARRAY_SIZE(tcg_op_defs); \ No newline at end of file diff --git a/libtcg/src/tcg-llvm.cpp b/libtcg/src/tcg-llvm.cpp index c5df44b8b..c9e14d858 100644 --- a/libtcg/src/tcg-llvm.cpp +++ b/libtcg/src/tcg-llvm.cpp @@ -726,11 +726,27 @@ int TCGLLVMTranslator::generateOperation(const TCGOp *op) { startNewBasicBlock(bb); \ } break; - __OP_BRCOND(INDEX_op_brcond_i32, 32) - -#if TCG_TARGET_REG_BITS == 64 - __OP_BRCOND(INDEX_op_brcond_i64, 64) -#endif + case INDEX_op_brcond: { + // Generic brcond - determine bits from operand types + assert(getValue(op->args[0])->getType() == getValue(op->args[1])->getType()); + switch (op->args[2]) { + __OP_BRCOND_C(TCG_COND_EQ, EQ) + __OP_BRCOND_C(TCG_COND_NE, NE) + __OP_BRCOND_C(TCG_COND_LT, SLT) + __OP_BRCOND_C(TCG_COND_GE, SGE) + __OP_BRCOND_C(TCG_COND_LE, SLE) + __OP_BRCOND_C(TCG_COND_GT, SGT) + __OP_BRCOND_C(TCG_COND_LTU, ULT) + __OP_BRCOND_C(TCG_COND_GEU, UGE) + __OP_BRCOND_C(TCG_COND_LEU, ULE) + __OP_BRCOND_C(TCG_COND_GTU, UGT) + default: + tcg_abort(); + } + BasicBlock *bb = BasicBlock::Create(getContext()); + m_builder.CreateCondBr(v, getLabel(op->args[3]), bb); + startNewBasicBlock(bb); + } break; #undef __OP_BRCOND_C #undef __OP_BRCOND @@ -740,26 +756,25 @@ int TCGLLVMTranslator::generateOperation(const TCGOp *op) { startNewBasicBlock(getLabel(op->args[0])); } break; - /*case INDEX_op_movi_i32: - setValue(op->args[0], ConstantInt::get(intType(32), op->args[1])); - break;*/ - - case INDEX_op_mov_i32: - // Move operation may perform truncation of the value - assert(getValue(op->args[1])->getType() == intType(32) || getValue(op->args[1])->getType() == intType(64)); - setValue(op->args[0], m_builder.CreateTrunc(getValue(op->args[1]), intType(32))); - break; - -#if TCG_TARGET_REG_BITS == 64 - /*case INDEX_op_movi_i64: - setValue(op->args[0], ConstantInt::get(intType(64), op->args[1])); - break;*/ - - case INDEX_op_mov_i64: - assert(getValue(op->args[1])->getType() == intType(64)); - setValue(op->args[0], getValue(op->args[1])); - break; -#endif + case INDEX_op_mov: { + // Generic mov - handles both 32-bit and 64-bit + // May perform truncation if destination is smaller than source + int destIdx = temp_idx(arg_temp(op->args[0])); + unsigned destBits = getValueBits(destIdx); + Value *src = getValue(op->args[1]); + unsigned srcBits = src->getType()->getIntegerBitWidth(); + + if (srcBits > destBits) { + // Truncate if source is larger than destination + setValue(op->args[0], m_builder.CreateTrunc(src, intType(destBits))); + } else if (srcBits == destBits) { + // Direct copy if same size + setValue(op->args[0], src); + } else { + // This shouldn't happen in well-formed TCG, but handle it + setValue(op->args[0], m_builder.CreateZExt(src, intType(destBits))); + } + } break; /* size extensions */ #define __EXT_OP(opc_name, truncBits, opBits, signE) \ @@ -776,7 +791,7 @@ int TCGLLVMTranslator::generateOperation(const TCGOp *op) { setValue(op->args[0], sext); \ } break; - /* extract ops */ + /* extract ops */ #define __EXTR_OP(opc_name, sourceBits, truncBits) \ case opc_name: { \ auto source = getValue(op->args[1]); \ @@ -785,84 +800,97 @@ int TCGLLVMTranslator::generateOperation(const TCGOp *op) { setValue(op->args[0], trunc); \ } break; - __EXT_OP(INDEX_op_ext8s_i32, 8, 32, S) - __EXT_OP(INDEX_op_ext8u_i32, 8, 32, Z) - __EXT_OP(INDEX_op_ext16s_i32, 16, 32, S) - __EXT_OP(INDEX_op_ext16u_i32, 16, 32, Z) - #if TCG_TARGET_REG_BITS == 64 - __EXT_OP(INDEX_op_ext8s_i64, 8, 64, S) - __EXT_OP(INDEX_op_ext8u_i64, 8, 64, Z) - __EXT_OP(INDEX_op_ext16s_i64, 16, 64, S) - __EXT_OP(INDEX_op_ext16u_i64, 16, 64, Z) - __EXT_OP(INDEX_op_ext_i32_i64, 32, 64, S) - __EXT_OP(INDEX_op_ext32s_i64, 32, 64, S) - __EXT_OP(INDEX_op_extu_i32_i64, 32, 64, Z) - __EXT_OP(INDEX_op_ext32u_i64, 32, 64, Z) __EXTR_OP(INDEX_op_extrl_i64_i32, 64, 32) #endif #undef __EXT_OP +#undef __EXTR_OP -#define __LD_OP(opc_name, memBits, regBits, signE) \ - case opc_name: \ - generateQemuCpuLoad(op->args, memBits, regBits, signE == 'S'); \ - break; + /* Generic load/store opcodes - determine register size from operands */ + case INDEX_op_ld8u: { + int destIdx = temp_idx(arg_temp(op->args[0])); + unsigned regBits = getValueBits(destIdx); + generateQemuCpuLoad(op->args, 8, regBits, false); + } break; -#define __ST_OP(opc_name, memBits, regBits) \ - case opc_name: { \ - assert(getValue(op->args[0])->getType() == intType(regBits)); \ - assert(getValue(op->args[1])->getType() == wordType()); \ - Value *valueToStore = getValue(op->args[0]); \ - \ - generateQemuCpuStore(op->args, memBits, valueToStore); \ - } break; + case INDEX_op_ld8s: { + int destIdx = temp_idx(arg_temp(op->args[0])); + unsigned regBits = getValueBits(destIdx); + generateQemuCpuLoad(op->args, 8, regBits, true); + } break; - __LD_OP(INDEX_op_ld8u_i32, 8, 32, 'Z') - __LD_OP(INDEX_op_ld8s_i32, 8, 32, 'S') - __LD_OP(INDEX_op_ld16u_i32, 16, 32, 'Z') - __LD_OP(INDEX_op_ld16s_i32, 16, 32, 'S') - __LD_OP(INDEX_op_ld_i32, 32, 32, 'Z') + case INDEX_op_ld16u: { + int destIdx = temp_idx(arg_temp(op->args[0])); + unsigned regBits = getValueBits(destIdx); + generateQemuCpuLoad(op->args, 16, regBits, false); + } break; - __ST_OP(INDEX_op_st8_i32, 8, 32) - __ST_OP(INDEX_op_st16_i32, 16, 32) - __ST_OP(INDEX_op_st_i32, 32, 32) + case INDEX_op_ld16s: { + int destIdx = temp_idx(arg_temp(op->args[0])); + unsigned regBits = getValueBits(destIdx); + generateQemuCpuLoad(op->args, 16, regBits, true); + } break; -#if TCG_TARGET_REG_BITS == 64 - __LD_OP(INDEX_op_ld8u_i64, 8, 64, 'Z') - __LD_OP(INDEX_op_ld8s_i64, 8, 64, 'S') - __LD_OP(INDEX_op_ld16u_i64, 16, 64, 'Z') - __LD_OP(INDEX_op_ld16s_i64, 16, 64, 'S') - __LD_OP(INDEX_op_ld32u_i64, 32, 64, 'Z') - __LD_OP(INDEX_op_ld32s_i64, 32, 64, 'S') - __LD_OP(INDEX_op_ld_i64, 64, 64, 'Z') - - __ST_OP(INDEX_op_st8_i64, 8, 64) - __ST_OP(INDEX_op_st16_i64, 16, 64) - __ST_OP(INDEX_op_st32_i64, 32, 64) - __ST_OP(INDEX_op_st_i64, 64, 64) -#endif + case INDEX_op_ld32u: { + int destIdx = temp_idx(arg_temp(op->args[0])); + unsigned regBits = getValueBits(destIdx); + generateQemuCpuLoad(op->args, 32, regBits, false); + } break; + + case INDEX_op_ld32s: { + int destIdx = temp_idx(arg_temp(op->args[0])); + unsigned regBits = getValueBits(destIdx); + generateQemuCpuLoad(op->args, 32, regBits, true); + } break; + + case INDEX_op_ld: { + int destIdx = temp_idx(arg_temp(op->args[0])); + unsigned regBits = getValueBits(destIdx); + generateQemuCpuLoad(op->args, regBits, regBits, false); + } break; + + case INDEX_op_st8: { + assert(getValue(op->args[1])->getType() == wordType()); + Value *valueToStore = getValue(op->args[0]); + generateQemuCpuStore(op->args, 8, valueToStore); + } break; + + case INDEX_op_st16: { + assert(getValue(op->args[1])->getType() == wordType()); + Value *valueToStore = getValue(op->args[0]); + generateQemuCpuStore(op->args, 16, valueToStore); + } break; + + case INDEX_op_st32: { + assert(getValue(op->args[1])->getType() == wordType()); + Value *valueToStore = getValue(op->args[0]); + generateQemuCpuStore(op->args, 32, valueToStore); + } break; -#undef __LD_OP -#undef __ST_OP + case INDEX_op_st: { + int srcIdx = temp_idx(arg_temp(op->args[0])); + unsigned regBits = getValueBits(srcIdx); + assert(getValue(op->args[1])->getType() == wordType()); + Value *valueToStore = getValue(op->args[0]); + generateQemuCpuStore(op->args, regBits, valueToStore); + } break; -/* arith */ -#define __ARITH_OP(opc_name, op1, bits) \ +/* arith - Generic macros for QEMU 10.2+ unified opcodes */ +#define __ARITH_OP_GENERIC(opc_name, op1) \ case opc_name: { \ Value *v1 = getValue(op->args[1]); \ Value *v2 = getValue(op->args[2]); \ - adjustTypeSize(bits, &v1, &v2); \ - assert(v1->getType() == intType(bits)); \ - assert(v2->getType() == intType(bits)); \ + assert(v1->getType() == v2->getType()); \ setValue(op->args[0], m_builder.Create##op1(v1, v2)); \ } break; -#define __ARITH_OP_DIV2(opc_name, signE, bits) \ - case opc_name: \ - assert(getValue(op->args[2])->getType() == intType(bits)); \ +#define __ARITH_OP_DIV2_GENERIC(opc_name, signE) \ + case opc_name: { \ + unsigned bits = getValue(op->args[2])->getType()->getIntegerBitWidth(); \ assert(getValue(op->args[3])->getType() == intType(bits)); \ assert(getValue(op->args[4])->getType() == intType(bits)); \ v = m_builder.CreateShl(m_builder.CreateZExt(getValue(op->args[3]), intType(bits * 2)), \ @@ -870,130 +898,85 @@ int TCGLLVMTranslator::generateOperation(const TCGOp *op) { v = m_builder.CreateOr(v, m_builder.CreateZExt(getValue(op->args[2]), intType(bits * 2))); \ setValue(op->args[0], m_builder.Create##signE##Div(v, getValue(op->args[4]))); \ setValue(op->args[1], m_builder.Create##signE##Rem(v, getValue(op->args[4]))); \ - break; + } break; -#define __ARITH_OP_ROT(opc_name, op1, op2, bits) \ - case opc_name: \ - assert(getValue(op->args[1])->getType() == intType(bits)); \ +#define __ARITH_OP_ROT_GENERIC(opc_name, op1, op2) \ + case opc_name: { \ + unsigned bits = getValue(op->args[1])->getType()->getIntegerBitWidth(); \ assert(getValue(op->args[2])->getType() == intType(bits)); \ v = m_builder.CreateSub(ConstantInt::get(intType(bits), bits), getValue(op->args[2])); \ setValue(op->args[0], m_builder.CreateOr(m_builder.Create##op1(getValue(op->args[1]), getValue(op->args[2])), \ m_builder.Create##op2(getValue(op->args[1]), v))); \ - break; + } break; -#define __ARITH_OP_I(opc_name, op1, i, bits) \ - case opc_name: \ - assert(getValue(op->args[1])->getType() == intType(bits)); \ +#define __ARITH_OP_I_GENERIC(opc_name, op1, i) \ + case opc_name: { \ + unsigned bits = getValue(op->args[1])->getType()->getIntegerBitWidth(); \ setValue(op->args[0], m_builder.Create##op1(ConstantInt::get(intType(bits), i), getValue(op->args[1]))); \ - break; + } break; -#define __ARITH_OP_BSWAP(opc_name, sBits, bits) \ +#define __ARITH_OP_BSWAP_GENERIC(opc_name, sBits) \ case opc_name: { \ - assert(getValue(op->args[1])->getType() == intType(bits)); \ + unsigned bits = getValue(op->args[1])->getType()->getIntegerBitWidth(); \ Type *Tys[] = {intType(sBits)}; \ Function *bswap = Intrinsic::getDeclaration(m_module.get(), Intrinsic::bswap, ArrayRef(Tys, 1)); \ v = m_builder.CreateTrunc(getValue(op->args[1]), intType(sBits)); \ setValue(op->args[0], m_builder.CreateZExt(m_builder.CreateCall(bswap, v), intType(bits))); \ } break; - __ARITH_OP(INDEX_op_add_i32, Add, 32) - __ARITH_OP(INDEX_op_sub_i32, Sub, 32) - __ARITH_OP(INDEX_op_mul_i32, Mul, 32) + /* Generic arithmetic operations */ + __ARITH_OP_GENERIC(INDEX_op_add, Add) + __ARITH_OP_GENERIC(INDEX_op_sub, Sub) + __ARITH_OP_GENERIC(INDEX_op_mul, Mul) #ifdef TCG_TARGET_HAS_div_i32 - __ARITH_OP(INDEX_op_div_i32, SDiv, 32) - __ARITH_OP(INDEX_op_divu_i32, UDiv, 32) - __ARITH_OP(INDEX_op_rem_i32, SRem, 32) - __ARITH_OP(INDEX_op_remu_i32, URem, 32) -#else - __ARITH_OP_DIV2(INDEX_op_div2_i32, S, 32) - __ARITH_OP_DIV2(INDEX_op_divu2_i32, U, 32) -#endif - - __ARITH_OP(INDEX_op_and_i32, And, 32) - __ARITH_OP(INDEX_op_or_i32, Or, 32) - __ARITH_OP(INDEX_op_xor_i32, Xor, 32) - - __ARITH_OP(INDEX_op_shl_i32, Shl, 32) - __ARITH_OP(INDEX_op_shr_i32, LShr, 32) - __ARITH_OP(INDEX_op_sar_i32, AShr, 32) - - __ARITH_OP_ROT(INDEX_op_rotl_i32, Shl, LShr, 32) - __ARITH_OP_ROT(INDEX_op_rotr_i32, LShr, Shl, 32) - - __ARITH_OP_I(INDEX_op_not_i32, Xor, (uint64_t) -1, 32) - __ARITH_OP_I(INDEX_op_neg_i32, Sub, 0, 32) - - __ARITH_OP_BSWAP(INDEX_op_bswap16_i32, 16, 32) - __ARITH_OP_BSWAP(INDEX_op_bswap32_i32, 32, 32) - -#if TCG_TARGET_REG_BITS == 64 - __ARITH_OP(INDEX_op_add_i64, Add, 64) - __ARITH_OP(INDEX_op_sub_i64, Sub, 64) - __ARITH_OP(INDEX_op_mul_i64, Mul, 64) - -#ifdef TCG_TARGET_HAS_div_i64 - __ARITH_OP(INDEX_op_div_i64, SDiv, 64) - __ARITH_OP(INDEX_op_divu_i64, UDiv, 64) - __ARITH_OP(INDEX_op_rem_i64, SRem, 64) - __ARITH_OP(INDEX_op_remu_i64, URem, 64) + __ARITH_OP_GENERIC(INDEX_op_divs, SDiv) + __ARITH_OP_GENERIC(INDEX_op_divu, UDiv) + __ARITH_OP_GENERIC(INDEX_op_rems, SRem) + __ARITH_OP_GENERIC(INDEX_op_remu, URem) #else - __ARITH_OP_DIV2(INDEX_op_div2_i64, S, 64) - __ARITH_OP_DIV2(INDEX_op_divu2_i64, U, 64) + __ARITH_OP_DIV2_GENERIC(INDEX_op_divs2, S) + __ARITH_OP_DIV2_GENERIC(INDEX_op_divu2, U) #endif - __ARITH_OP(INDEX_op_and_i64, And, 64) - __ARITH_OP(INDEX_op_or_i64, Or, 64) - __ARITH_OP(INDEX_op_xor_i64, Xor, 64) + __ARITH_OP_GENERIC(INDEX_op_and, And) + __ARITH_OP_GENERIC(INDEX_op_or, Or) + __ARITH_OP_GENERIC(INDEX_op_xor, Xor) - __ARITH_OP(INDEX_op_shl_i64, Shl, 64) - __ARITH_OP(INDEX_op_shr_i64, LShr, 64) - __ARITH_OP(INDEX_op_sar_i64, AShr, 64) + __ARITH_OP_GENERIC(INDEX_op_shl, Shl) + __ARITH_OP_GENERIC(INDEX_op_shr, LShr) + __ARITH_OP_GENERIC(INDEX_op_sar, AShr) - __ARITH_OP_ROT(INDEX_op_rotl_i64, Shl, LShr, 64) - __ARITH_OP_ROT(INDEX_op_rotr_i64, LShr, Shl, 64) + __ARITH_OP_ROT_GENERIC(INDEX_op_rotl, Shl, LShr) + __ARITH_OP_ROT_GENERIC(INDEX_op_rotr, LShr, Shl) - __ARITH_OP_I(INDEX_op_not_i64, Xor, (uint64_t) -1, 64) - __ARITH_OP_I(INDEX_op_neg_i64, Sub, 0, 64) + __ARITH_OP_I_GENERIC(INDEX_op_not, Xor, (uint64_t) -1) + __ARITH_OP_I_GENERIC(INDEX_op_neg, Sub, 0) - __ARITH_OP_BSWAP(INDEX_op_bswap16_i64, 16, 64) - __ARITH_OP_BSWAP(INDEX_op_bswap32_i64, 32, 64) - __ARITH_OP_BSWAP(INDEX_op_bswap64_i64, 64, 64) -#endif + __ARITH_OP_BSWAP_GENERIC(INDEX_op_bswap16, 16) + __ARITH_OP_BSWAP_GENERIC(INDEX_op_bswap32, 32) + __ARITH_OP_BSWAP_GENERIC(INDEX_op_bswap64, 64) -#undef __ARITH_OP_BSWAP -#undef __ARITH_OP_I -#undef __ARITH_OP_ROT -#undef __ARITH_OP_DIV2 -#undef __ARITH_OP - -/* QEMU specific */ -#define __OP_QEMU_ST(opc_name, bits) \ - case opc_name: \ - generateQemuMemOp(false, m_builder.CreateIntCast(getValue(op->args[0]), intType(bits), false), \ - getValue(op->args[1]), op->args[2], bits); \ - break; +#undef __ARITH_OP_BSWAP_GENERIC +#undef __ARITH_OP_I_GENERIC +#undef __ARITH_OP_ROT_GENERIC +#undef __ARITH_OP_DIV2_GENERIC +#undef __ARITH_OP_GENERIC -#define __OP_QEMU_LD(opc_name, bits) \ - case opc_name: \ - v = generateQemuMemOp(true, NULL, getValue(op->args[1]), op->args[2], bits); \ - setValue(op->args[0], v); \ - break; - - __OP_QEMU_ST(INDEX_op_qemu_st_a64_i32, 32) - __OP_QEMU_ST(INDEX_op_qemu_st_a64_i64, 64) - - __OP_QEMU_ST(INDEX_op_qemu_st_a32_i32, 32) - __OP_QEMU_ST(INDEX_op_qemu_st_a32_i64, 64) - - __OP_QEMU_LD(INDEX_op_qemu_ld_a64_i32, 32) - __OP_QEMU_LD(INDEX_op_qemu_ld_a64_i64, 64) - - __OP_QEMU_LD(INDEX_op_qemu_ld_a32_i32, 32) - __OP_QEMU_LD(INDEX_op_qemu_ld_a32_i64, 64) + /* QEMU specific */ + case INDEX_op_qemu_st: { + int dataIdx = temp_idx(arg_temp(op->args[0])); + unsigned dataBits = getValueBits(dataIdx); + generateQemuMemOp(false, m_builder.CreateIntCast(getValue(op->args[0]), intType(dataBits), false), + getValue(op->args[1]), op->args[2], dataBits); + } break; -#undef __OP_QEMU_LD -#undef __OP_QEMU_ST + case INDEX_op_qemu_ld: { + int destIdx = temp_idx(arg_temp(op->args[0])); + unsigned destBits = getValueBits(destIdx); + v = generateQemuMemOp(true, NULL, getValue(op->args[1]), op->args[2], destBits); + setValue(op->args[0], v); + } break; case INDEX_op_exit_tb: { m_builder.CreateRet(ConstantInt::get(wordType(), op->args[0])); @@ -1003,61 +986,107 @@ int TCGLLVMTranslator::generateOperation(const TCGOp *op) { // tb linking is disabled break; - case INDEX_op_deposit_i32: { - Value *arg1 = getValue(op->args[1]); - Value *arg2 = getValue(op->args[2]); - arg2 = m_builder.CreateTrunc(arg2, intType(32)); - - uint32_t ofs = op->args[3]; - uint32_t len = op->args[4]; - - if (ofs == 0 && len == 32) { - setValue(op->args[0], arg2); - break; + case INDEX_op_extract: { + // Extract a bit field: extract 'len' bits starting at bit 'ofs' from source + // Zero-extends the extracted field to destination size + Value *src = getValue(op->args[1]); + int destIdx = temp_idx(arg_temp(op->args[0])); + unsigned destBits = getValueBits(destIdx); + unsigned srcBits = src->getType()->getIntegerBitWidth(); + + uint32_t ofs = op->args[2]; // bit offset + uint32_t len = op->args[3]; // bit length + + Value *result; + if (ofs == 0 && len == srcBits) { + // Extracting entire value - just copy (with potential extension) + if (srcBits < destBits) { + result = m_builder.CreateZExt(src, intType(destBits)); + } else { + result = src; + } + } else { + // Shift right to position the field, then mask to extract + Value *shifted = m_builder.CreateLShr(src, APInt(srcBits, ofs)); + uint64_t mask = (1ULL << len) - 1; + Value *extracted = m_builder.CreateAnd(shifted, APInt(srcBits, mask)); + + // Extend to destination size if needed + if (srcBits < destBits) { + result = m_builder.CreateZExt(extracted, intType(destBits)); + } else if (len < destBits) { + result = m_builder.CreateZExt(m_builder.CreateTrunc(extracted, intType(len)), intType(destBits)); + } else { + result = extracted; + } } + setValue(op->args[0], result); + } break; - uint32_t mask = (1u << len) - 1; - Value *t1, *ret; - if (ofs + len < 32) { - t1 = m_builder.CreateAnd(arg2, APInt(32, mask)); - t1 = m_builder.CreateShl(t1, APInt(32, ofs)); + case INDEX_op_sextract: { + // Extract a bit field and sign-extend: extract 'len' bits starting at bit 'ofs' + // Sign-extends the extracted field to destination size + Value *src = getValue(op->args[1]); + int destIdx = temp_idx(arg_temp(op->args[0])); + unsigned destBits = getValueBits(destIdx); + unsigned srcBits = src->getType()->getIntegerBitWidth(); + + uint32_t ofs = op->args[2]; // bit offset + uint32_t len = op->args[3]; // bit length + + Value *result; + if (ofs == 0 && len == srcBits) { + // Extracting entire value - just copy (with potential sign-extension) + if (srcBits < destBits) { + result = m_builder.CreateSExt(src, intType(destBits)); + } else { + result = src; + } } else { - t1 = m_builder.CreateShl(arg2, APInt(32, ofs)); + // Shift right to position the field, then mask to extract + Value *shifted = m_builder.CreateLShr(src, APInt(srcBits, ofs)); + uint64_t mask = (1ULL << len) - 1; + Value *extracted = m_builder.CreateAnd(shifted, APInt(srcBits, mask)); + + // Truncate to the exact field size, then sign-extend to destination + Value *truncated = m_builder.CreateTrunc(extracted, intType(len)); + if (len < destBits) { + result = m_builder.CreateSExt(truncated, intType(destBits)); + } else { + result = truncated; + } } - - ret = m_builder.CreateAnd(arg1, APInt(32, ~(mask << ofs))); - ret = m_builder.CreateOr(ret, t1); - setValue(op->args[0], ret); + setValue(op->args[0], result); } break; -#if TCG_TARGET_REG_BITS == 64 - case INDEX_op_deposit_i64: { + + case INDEX_op_deposit: { Value *arg1 = getValue(op->args[1]); Value *arg2 = getValue(op->args[2]); - arg2 = m_builder.CreateTrunc(arg2, intType(64)); + int destIdx = temp_idx(arg_temp(op->args[0])); + unsigned bits = getValueBits(destIdx); + arg2 = m_builder.CreateTrunc(arg2, intType(bits)); uint32_t ofs = op->args[3]; uint32_t len = op->args[4]; - if (0 == ofs && 64 == len) { + if (ofs == 0 && len == bits) { setValue(op->args[0], arg2); break; } - uint64_t mask = (1u << len) - 1; + uint64_t mask = (1ULL << len) - 1; Value *t1, *ret; - - if (ofs + len < 64) { - t1 = m_builder.CreateAnd(arg2, APInt(64, mask)); - t1 = m_builder.CreateShl(t1, APInt(64, ofs)); + if (ofs + len < bits) { + t1 = m_builder.CreateAnd(arg2, APInt(bits, mask)); + t1 = m_builder.CreateShl(t1, APInt(bits, ofs)); } else { - t1 = m_builder.CreateShl(arg2, APInt(64, ofs)); + t1 = m_builder.CreateShl(arg2, APInt(bits, ofs)); } - ret = m_builder.CreateAnd(arg1, APInt(64, ~(mask << ofs))); + ret = m_builder.CreateAnd(arg1, APInt(bits, ~(mask << ofs))); ret = m_builder.CreateOr(ret, t1); setValue(op->args[0], ret); } break; -#endif case INDEX_op_mb: // Memory barriers not supported. @@ -1170,7 +1199,7 @@ Function *TCGLLVMTranslator::generateCode(TCGContext *s, TranslationBlock *tb) { switch (opc) { #if defined(CONFIG_SYMBEX) case INDEX_op_insn_start: { - assert(TARGET_INSN_START_WORDS == 2); + assert(INSN_START_WORDS == 3); uint64_t curpc = op->args[0] - tb->cs_base; uint64_t cc_op = op->args[1]; diff --git a/libtcg/src/tcg-op-gvec-common.h b/libtcg/src/tcg-op-gvec-common.h index 2c9875d9c..fa28ce337 100644 --- a/libtcg/src/tcg-op-gvec-common.h +++ b/libtcg/src/tcg-op-gvec-common.h @@ -167,6 +167,8 @@ typedef struct { bool prefer_i64; /* Load dest as a 3rd source operand. */ bool load_dest; + /* Write aofs as a 2nd dest operand. */ + bool write_aofs; } GVecGen3i; typedef struct { @@ -209,24 +211,66 @@ typedef struct { bool prefer_i64; } GVecGen4i; -void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz, const GVecGen2 *); -void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz, int64_t c, const GVecGen2i *); -void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz, TCGv_i64 c, const GVecGen2s *); -void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz, const GVecGen3 *); +/* Expand (dbase+dofs) = op(abase+aofs), length @oprsz, clearing to @maxsz. */ +void tcg_gen_gvec_2_var(TCGv_ptr dbase, uint32_t dofs, TCGv_ptr abase, uint32_t aofs, uint32_t oprsz, uint32_t maxsz, + const GVecGen2 *op); +/* Similarly, expand (env+dofs) = op(env+aofs). */ +void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz, const GVecGen2 *op); +/* Similarly, expand (env+dofs) = op(env+aofs, c). */ +void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz, int64_t c, const GVecGen2i *op); +/* Similarly, expand (env+dofs) = op(env+aofs, s). */ +void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz, TCGv_i64 c, const GVecGen2s *op); + +/* + * Expand (dbase+dofs) = op(abase+aofs, bbase+bofs), + * length @oprsz, clearing to @maxsz. + */ +void tcg_gen_gvec_3_var(TCGv_ptr dbase, uint32_t dofs, TCGv_ptr abase, uint32_t aofs, TCGv_ptr bbase, uint32_t bofs, + uint32_t oprsz, uint32_t maxsz, const GVecGen3 *op); +/* Similarly, expand (env+dofs) = op(env+aofs, env+bofs). */ +void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz, const GVecGen3 *op); + +/* + * Depending on op->load_dest and op->write_aofs, expand + * (env+dofs) = op(env+aofs, env+bofs, c) + * or + * (env+dofs) = op(env+dofs, env+aofs, env+bofs, c) + * or + * (env+dofs), (env+aofs) = op(env+aofs, env+bofs, c) + * or + * (env+dofs), (env+aofs) = op(env+dofs, env+aofs, env+bofs, c) + */ void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz, int64_t c, - const GVecGen3i *); + const GVecGen3i *op); + +/* + * Depending on op->write_aofs, expand + * (env+dofs) = op(env+aofs, env+bofs, env+cofs) + * or + * (env+dofs), (env+aofs) = op(env+aofs, env+bofs, env+cofs) + */ void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, uint32_t oprsz, uint32_t maxsz, const GVecGen4 *); + +/* Expand (env+dofs) = op(env+aofs, env+bofs, env+cofs, c). */ void tcg_gen_gvec_4i(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, uint32_t oprsz, uint32_t maxsz, int64_t c, const GVecGen4i *); /* Expand a specific vector operation. */ +void tcg_gen_gvec_mov_var(unsigned vece, TCGv_ptr dbase, uint32_t dofs, TCGv_ptr abase, uint32_t aofs, uint32_t oprsz, + uint32_t maxsz); + void tcg_gen_gvec_mov(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz); void tcg_gen_gvec_not(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz); void tcg_gen_gvec_neg(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz); void tcg_gen_gvec_abs(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_add_var(unsigned vece, TCGv_ptr dbase, uint32_t dofs, TCGv_ptr abase, uint32_t aofs, TCGv_ptr bbase, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_sub_var(unsigned vece, TCGv_ptr dbase, uint32_t dofs, TCGv_ptr abase, uint32_t aofs, TCGv_ptr bbase, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); + void tcg_gen_gvec_add(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz); void tcg_gen_gvec_sub(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz); void tcg_gen_gvec_mul(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz); @@ -273,6 +317,9 @@ void tcg_gen_gvec_dup_imm(unsigned vece, uint32_t dofs, uint32_t s, uint32_t m, void tcg_gen_gvec_dup_i32(unsigned vece, uint32_t dofs, uint32_t s, uint32_t m, TCGv_i32); void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t s, uint32_t m, TCGv_i64); +void tcg_gen_gvec_dup_imm_var(unsigned vece, TCGv_ptr dbase, uint32_t dofs, uint32_t oprsz, uint32_t maxsz, + uint64_t imm); + void tcg_gen_gvec_shli(unsigned vece, uint32_t dofs, uint32_t aofs, int64_t shift, uint32_t oprsz, uint32_t maxsz); void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs, int64_t shift, uint32_t oprsz, uint32_t maxsz); void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs, int64_t shift, uint32_t oprsz, uint32_t maxsz); @@ -297,6 +344,10 @@ void tcg_gen_gvec_rotrv(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bo void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_cmpi(TCGCond cond, unsigned vece, uint32_t dofs, uint32_t aofs, int64_t c, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_cmps(TCGCond cond, unsigned vece, uint32_t dofs, uint32_t aofs, TCGv_i64 c, uint32_t oprsz, + uint32_t maxsz); /* * Perform vector bit select: d = (b & a) | (c & ~a). diff --git a/libtcg/src/tcg-op-gvec.c b/libtcg/src/tcg-op-gvec.c index ebfb7c711..c954120a1 100644 --- a/libtcg/src/tcg-op-gvec.c +++ b/libtcg/src/tcg-op-gvec.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "tcg-op-gvec.h" // clang-format on @@ -59,26 +60,32 @@ static void check_size_align(uint32_t oprsz, uint32_t maxsz, uint32_t ofs) { tcg_debug_assert((ofs & max_align) == 0); } -/* Verify vector overlap rules for two operands. */ -static void check_overlap_2(uint32_t d, uint32_t a, uint32_t s) { - tcg_debug_assert(d == a || d + s <= a || a + s <= d); +/* + * Verify vector overlap rules for two operands. + * When dbase and abase are not the same pointer, we cannot check for + * overlap at compile-time, but the runtime restrictions remain. + */ +static void check_overlap_2(TCGv_ptr dbase, uint32_t d, TCGv_ptr abase, uint32_t a, uint32_t s) { + tcg_debug_assert(dbase != abase || d == a || d + s <= a || a + s <= d); } /* Verify vector overlap rules for three operands. */ -static void check_overlap_3(uint32_t d, uint32_t a, uint32_t b, uint32_t s) { - check_overlap_2(d, a, s); - check_overlap_2(d, b, s); - check_overlap_2(a, b, s); +static void check_overlap_3(TCGv_ptr dbase, uint32_t d, TCGv_ptr abase, uint32_t a, TCGv_ptr bbase, uint32_t b, + uint32_t s) { + check_overlap_2(dbase, d, abase, a, s); + check_overlap_2(dbase, d, bbase, b, s); + check_overlap_2(abase, a, bbase, b, s); } /* Verify vector overlap rules for four operands. */ -static void check_overlap_4(uint32_t d, uint32_t a, uint32_t b, uint32_t c, uint32_t s) { - check_overlap_2(d, a, s); - check_overlap_2(d, b, s); - check_overlap_2(d, c, s); - check_overlap_2(a, b, s); - check_overlap_2(a, c, s); - check_overlap_2(b, c, s); +static void check_overlap_4(TCGv_ptr dbase, uint32_t d, TCGv_ptr abase, uint32_t a, TCGv_ptr bbase, uint32_t b, + TCGv_ptr cbase, uint32_t c, uint32_t s) { + check_overlap_2(dbase, d, abase, a, s); + check_overlap_2(dbase, d, bbase, b, s); + check_overlap_2(dbase, d, cbase, c, s); + check_overlap_2(abase, a, bbase, b, s); + check_overlap_2(abase, a, cbase, c, s); + check_overlap_2(bbase, b, cbase, c, s); } /* Create a descriptor from components. */ @@ -86,7 +93,19 @@ uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data) { uint32_t desc = 0; check_size_align(oprsz, maxsz, 0); - tcg_debug_assert(data == sextract32(data, 0, SIMD_DATA_BITS)); + + /* + * We want to check that 'data' will fit into SIMD_DATA_BITS. + * However, some callers want to treat the data as a signed + * value (which they can later get back with simd_data()) + * and some want to treat it as an unsigned value. + * So here we assert only that the data will fit into the + * field in at least one way. This means that some invalid + * values from the caller will not be detected, e.g. if the + * caller wants to handle the value as a signed integer but + * incorrectly passes us 1 << (SIMD_DATA_BITS - 1). + */ + tcg_debug_assert(data == sextract32(data, 0, SIMD_DATA_BITS) || data == extract32(data, 0, SIMD_DATA_BITS)); oprsz = (oprsz / 8) - 1; maxsz = (maxsz / 8) - 1; @@ -108,16 +127,16 @@ uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data) { } /* Generate a call to a gvec-style helper with two vector operands. */ -void tcg_gen_gvec_2_ool(uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz, int32_t data, - gen_helper_gvec_2 *fn) { +static void expand_2_ool(TCGv_ptr dbase, uint32_t dofs, TCGv_ptr abase, uint32_t aofs, uint32_t oprsz, uint32_t maxsz, + int32_t data, gen_helper_gvec_2 *fn) { TCGv_ptr a0, a1; TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data)); a0 = tcg_temp_ebb_new_ptr(); a1 = tcg_temp_ebb_new_ptr(); - tcg_gen_addi_ptr(a0, cpu_env, dofs); - tcg_gen_addi_ptr(a1, cpu_env, aofs); + tcg_gen_addi_ptr(a0, dbase, dofs); + tcg_gen_addi_ptr(a1, abase, aofs); fn(a0, a1, desc); @@ -125,6 +144,11 @@ void tcg_gen_gvec_2_ool(uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t m tcg_temp_free_ptr(a1); } +void tcg_gen_gvec_2_ool(uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz, int32_t data, + gen_helper_gvec_2 *fn) { + expand_2_ool(tcg_env, dofs, tcg_env, aofs, oprsz, maxsz, data, fn); +} + /* Generate a call to a gvec-style helper with two vector operands and one scalar operand. */ void tcg_gen_gvec_2i_ool(uint32_t dofs, uint32_t aofs, TCGv_i64 c, uint32_t oprsz, uint32_t maxsz, int32_t data, @@ -135,8 +159,8 @@ void tcg_gen_gvec_2i_ool(uint32_t dofs, uint32_t aofs, TCGv_i64 c, uint32_t oprs a0 = tcg_temp_ebb_new_ptr(); a1 = tcg_temp_ebb_new_ptr(); - tcg_gen_addi_ptr(a0, cpu_env, dofs); - tcg_gen_addi_ptr(a1, cpu_env, aofs); + tcg_gen_addi_ptr(a0, tcg_env, dofs); + tcg_gen_addi_ptr(a1, tcg_env, aofs); fn(a0, a1, c, desc); @@ -145,8 +169,8 @@ void tcg_gen_gvec_2i_ool(uint32_t dofs, uint32_t aofs, TCGv_i64 c, uint32_t oprs } /* Generate a call to a gvec-style helper with three vector operands. */ -void tcg_gen_gvec_3_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz, int32_t data, - gen_helper_gvec_3 *fn) { +static void expand_3_ool(TCGv_ptr dbase, uint32_t dofs, TCGv_ptr abase, uint32_t aofs, TCGv_ptr bbase, uint32_t bofs, + uint32_t oprsz, uint32_t maxsz, int32_t data, gen_helper_gvec_3 *fn) { TCGv_ptr a0, a1, a2; TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data)); @@ -154,9 +178,9 @@ void tcg_gen_gvec_3_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t op a1 = tcg_temp_ebb_new_ptr(); a2 = tcg_temp_ebb_new_ptr(); - tcg_gen_addi_ptr(a0, cpu_env, dofs); - tcg_gen_addi_ptr(a1, cpu_env, aofs); - tcg_gen_addi_ptr(a2, cpu_env, bofs); + tcg_gen_addi_ptr(a0, dbase, dofs); + tcg_gen_addi_ptr(a1, abase, aofs); + tcg_gen_addi_ptr(a2, bbase, bofs); fn(a0, a1, a2, desc); @@ -165,6 +189,11 @@ void tcg_gen_gvec_3_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t op tcg_temp_free_ptr(a2); } +void tcg_gen_gvec_3_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz, int32_t data, + gen_helper_gvec_3 *fn) { + expand_3_ool(tcg_env, dofs, tcg_env, aofs, tcg_env, bofs, oprsz, maxsz, data, fn); +} + /* Generate a call to a gvec-style helper with four vector operands. */ void tcg_gen_gvec_4_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, uint32_t oprsz, uint32_t maxsz, int32_t data, gen_helper_gvec_4 *fn) { @@ -176,10 +205,10 @@ void tcg_gen_gvec_4_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t co a2 = tcg_temp_ebb_new_ptr(); a3 = tcg_temp_ebb_new_ptr(); - tcg_gen_addi_ptr(a0, cpu_env, dofs); - tcg_gen_addi_ptr(a1, cpu_env, aofs); - tcg_gen_addi_ptr(a2, cpu_env, bofs); - tcg_gen_addi_ptr(a3, cpu_env, cofs); + tcg_gen_addi_ptr(a0, tcg_env, dofs); + tcg_gen_addi_ptr(a1, tcg_env, aofs); + tcg_gen_addi_ptr(a2, tcg_env, bofs); + tcg_gen_addi_ptr(a3, tcg_env, cofs); fn(a0, a1, a2, a3, desc); @@ -201,11 +230,11 @@ void tcg_gen_gvec_5_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t co a3 = tcg_temp_ebb_new_ptr(); a4 = tcg_temp_ebb_new_ptr(); - tcg_gen_addi_ptr(a0, cpu_env, dofs); - tcg_gen_addi_ptr(a1, cpu_env, aofs); - tcg_gen_addi_ptr(a2, cpu_env, bofs); - tcg_gen_addi_ptr(a3, cpu_env, cofs); - tcg_gen_addi_ptr(a4, cpu_env, xofs); + tcg_gen_addi_ptr(a0, tcg_env, dofs); + tcg_gen_addi_ptr(a1, tcg_env, aofs); + tcg_gen_addi_ptr(a2, tcg_env, bofs); + tcg_gen_addi_ptr(a3, tcg_env, cofs); + tcg_gen_addi_ptr(a4, tcg_env, xofs); fn(a0, a1, a2, a3, a4, desc); @@ -226,8 +255,8 @@ void tcg_gen_gvec_2_ptr(uint32_t dofs, uint32_t aofs, TCGv_ptr ptr, uint32_t opr a0 = tcg_temp_ebb_new_ptr(); a1 = tcg_temp_ebb_new_ptr(); - tcg_gen_addi_ptr(a0, cpu_env, dofs); - tcg_gen_addi_ptr(a1, cpu_env, aofs); + tcg_gen_addi_ptr(a0, tcg_env, dofs); + tcg_gen_addi_ptr(a1, tcg_env, aofs); fn(a0, a1, ptr, desc); @@ -246,9 +275,9 @@ void tcg_gen_gvec_3_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs, TCGv_ptr pt a1 = tcg_temp_ebb_new_ptr(); a2 = tcg_temp_ebb_new_ptr(); - tcg_gen_addi_ptr(a0, cpu_env, dofs); - tcg_gen_addi_ptr(a1, cpu_env, aofs); - tcg_gen_addi_ptr(a2, cpu_env, bofs); + tcg_gen_addi_ptr(a0, tcg_env, dofs); + tcg_gen_addi_ptr(a1, tcg_env, aofs); + tcg_gen_addi_ptr(a2, tcg_env, bofs); fn(a0, a1, a2, ptr, desc); @@ -269,10 +298,10 @@ void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t co a2 = tcg_temp_ebb_new_ptr(); a3 = tcg_temp_ebb_new_ptr(); - tcg_gen_addi_ptr(a0, cpu_env, dofs); - tcg_gen_addi_ptr(a1, cpu_env, aofs); - tcg_gen_addi_ptr(a2, cpu_env, bofs); - tcg_gen_addi_ptr(a3, cpu_env, cofs); + tcg_gen_addi_ptr(a0, tcg_env, dofs); + tcg_gen_addi_ptr(a1, tcg_env, aofs); + tcg_gen_addi_ptr(a2, tcg_env, bofs); + tcg_gen_addi_ptr(a3, tcg_env, cofs); fn(a0, a1, a2, a3, ptr, desc); @@ -295,11 +324,11 @@ void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t co a3 = tcg_temp_ebb_new_ptr(); a4 = tcg_temp_ebb_new_ptr(); - tcg_gen_addi_ptr(a0, cpu_env, dofs); - tcg_gen_addi_ptr(a1, cpu_env, aofs); - tcg_gen_addi_ptr(a2, cpu_env, bofs); - tcg_gen_addi_ptr(a3, cpu_env, cofs); - tcg_gen_addi_ptr(a4, cpu_env, eofs); + tcg_gen_addi_ptr(a0, tcg_env, dofs); + tcg_gen_addi_ptr(a1, tcg_env, aofs); + tcg_gen_addi_ptr(a2, tcg_env, bofs); + tcg_gen_addi_ptr(a3, tcg_env, cofs); + tcg_gen_addi_ptr(a4, tcg_env, eofs); fn(a0, a1, a2, a3, a4, ptr, desc); @@ -343,7 +372,7 @@ static inline bool check_size_impl(uint32_t oprsz, uint32_t lnsz) { return q <= MAX_UNROLL; } -static void expand_clr(uint32_t dofs, uint32_t maxsz); +static void expand_clr(TCGv_ptr dbase, uint32_t dofs, uint32_t maxsz); /* Duplicate C as per VECE. */ uint64_t(dup_const)(unsigned vece, uint64_t c) { @@ -431,7 +460,7 @@ static TCGType choose_vector_type(const TCGOpcode *list, unsigned vece, uint32_t return 0; } -static void do_dup_store(TCGType type, uint32_t dofs, uint32_t oprsz, uint32_t maxsz, TCGv_vec t_vec) { +static void do_dup_store(TCGType type, TCGv_ptr dbase, uint32_t dofs, uint32_t oprsz, uint32_t maxsz, TCGv_vec t_vec) { uint32_t i = 0; tcg_debug_assert(oprsz >= 8); @@ -442,7 +471,7 @@ static void do_dup_store(TCGType type, uint32_t dofs, uint32_t oprsz, uint32_t m * are misaligned wrt the maximum vector size, so do that first. */ if (dofs & 8) { - tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V64); + tcg_gen_stl_vec(t_vec, dbase, dofs + i, TCG_TYPE_V64); i += 8; } @@ -454,17 +483,17 @@ static void do_dup_store(TCGType type, uint32_t dofs, uint32_t oprsz, uint32_t m * that e.g. size == 80 would be expanded with 2x32 + 1x16. */ for (; i + 32 <= oprsz; i += 32) { - tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V256); + tcg_gen_stl_vec(t_vec, dbase, dofs + i, TCG_TYPE_V256); } /* fallthru */ case TCG_TYPE_V128: for (; i + 16 <= oprsz; i += 16) { - tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V128); + tcg_gen_stl_vec(t_vec, dbase, dofs + i, TCG_TYPE_V128); } break; case TCG_TYPE_V64: for (; i < oprsz; i += 8) { - tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V64); + tcg_gen_stl_vec(t_vec, dbase, dofs + i, TCG_TYPE_V64); } break; default: @@ -472,16 +501,17 @@ static void do_dup_store(TCGType type, uint32_t dofs, uint32_t oprsz, uint32_t m } if (oprsz < maxsz) { - expand_clr(dofs + oprsz, maxsz - oprsz); + expand_clr(dbase, dofs + oprsz, maxsz - oprsz); } } -/* Set OPRSZ bytes at DOFS to replications of IN_32, IN_64 or IN_C. +/* + * Set OPRSZ bytes at DBASE + DOFS to replications of IN_32, IN_64 or IN_C. * Only one of IN_32 or IN_64 may be set; * IN_C is used if IN_32 and IN_64 are unset. */ -static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz, uint32_t maxsz, TCGv_i32 in_32, TCGv_i64 in_64, - uint64_t in_c) { +static void do_dup(unsigned vece, TCGv_ptr dbase, uint32_t dofs, uint32_t oprsz, uint32_t maxsz, TCGv_i32 in_32, + TCGv_i64 in_64, uint64_t in_c) { TCGType type; TCGv_i64 t_64; TCGv_i32 t_32, t_desc; @@ -517,8 +547,7 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz, uint32_t maxsz, } else { tcg_gen_dupi_vec(vece, t_vec, in_c); } - do_dup_store(type, dofs, oprsz, maxsz, t_vec); - tcg_temp_free_vec(t_vec); + do_dup_store(type, dbase, dofs, oprsz, maxsz, t_vec); return; } @@ -559,14 +588,14 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz, uint32_t maxsz, /* Implement inline if we picked an implementation size above. */ if (t_32) { for (i = 0; i < oprsz; i += 4) { - tcg_gen_st_i32(t_32, cpu_env, dofs + i); + tcg_gen_st_i32(t_32, dbase, dofs + i); } tcg_temp_free_i32(t_32); goto done; } if (t_64) { for (i = 0; i < oprsz; i += 8) { - tcg_gen_st_i64(t_64, cpu_env, dofs + i); + tcg_gen_st_i64(t_64, dbase, dofs + i); } tcg_temp_free_i64(t_64); goto done; @@ -575,7 +604,7 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz, uint32_t maxsz, /* Otherwise implement out of line. */ t_ptr = tcg_temp_ebb_new_ptr(); - tcg_gen_addi_ptr(t_ptr, cpu_env, dofs); + tcg_gen_addi_ptr(t_ptr, dbase, dofs); /* * This may be expand_clr for the tail of an operation, e.g. @@ -640,29 +669,29 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz, uint32_t maxsz, done: if (oprsz < maxsz) { - expand_clr(dofs + oprsz, maxsz - oprsz); + expand_clr(dbase, dofs + oprsz, maxsz - oprsz); } } /* Likewise, but with zero. */ -static void expand_clr(uint32_t dofs, uint32_t maxsz) { - do_dup(MO_8, dofs, maxsz, maxsz, NULL, NULL, 0); +static void expand_clr(TCGv_ptr dbase, uint32_t dofs, uint32_t maxsz) { + do_dup(MO_8, dbase, dofs, maxsz, maxsz, NULL, NULL, 0); } /* Expand OPSZ bytes worth of two-operand operations using i32 elements. */ -static void expand_2_i32(uint32_t dofs, uint32_t aofs, uint32_t oprsz, bool load_dest, +static void expand_2_i32(TCGv_ptr dbase, uint32_t dofs, TCGv_ptr abase, uint32_t aofs, uint32_t oprsz, bool load_dest, void (*fni)(TCGv_i32, TCGv_i32)) { TCGv_i32 t0 = tcg_temp_new_i32(); TCGv_i32 t1 = tcg_temp_new_i32(); uint32_t i; for (i = 0; i < oprsz; i += 4) { - tcg_gen_ld_i32(t0, cpu_env, aofs + i); + tcg_gen_ld_i32(t0, abase, aofs + i); if (load_dest) { - tcg_gen_ld_i32(t1, cpu_env, dofs + i); + tcg_gen_ld_i32(t1, dbase, dofs + i); } fni(t1, t0); - tcg_gen_st_i32(t1, cpu_env, dofs + i); + tcg_gen_st_i32(t1, dbase, dofs + i); } tcg_temp_free_i32(t0); tcg_temp_free_i32(t1); @@ -675,12 +704,12 @@ static void expand_2i_i32(uint32_t dofs, uint32_t aofs, uint32_t oprsz, int32_t uint32_t i; for (i = 0; i < oprsz; i += 4) { - tcg_gen_ld_i32(t0, cpu_env, aofs + i); + tcg_gen_ld_i32(t0, tcg_env, aofs + i); if (load_dest) { - tcg_gen_ld_i32(t1, cpu_env, dofs + i); + tcg_gen_ld_i32(t1, tcg_env, dofs + i); } fni(t1, t0, c); - tcg_gen_st_i32(t1, cpu_env, dofs + i); + tcg_gen_st_i32(t1, tcg_env, dofs + i); } tcg_temp_free_i32(t0); tcg_temp_free_i32(t1); @@ -693,34 +722,34 @@ static void expand_2s_i32(uint32_t dofs, uint32_t aofs, uint32_t oprsz, TCGv_i32 uint32_t i; for (i = 0; i < oprsz; i += 4) { - tcg_gen_ld_i32(t0, cpu_env, aofs + i); + tcg_gen_ld_i32(t0, tcg_env, aofs + i); if (scalar_first) { fni(t1, c, t0); } else { fni(t1, t0, c); } - tcg_gen_st_i32(t1, cpu_env, dofs + i); + tcg_gen_st_i32(t1, tcg_env, dofs + i); } tcg_temp_free_i32(t0); tcg_temp_free_i32(t1); } /* Expand OPSZ bytes worth of three-operand operations using i32 elements. */ -static void expand_3_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, bool load_dest, - void (*fni)(TCGv_i32, TCGv_i32, TCGv_i32)) { +static void expand_3_i32(TCGv_ptr dbase, uint32_t dofs, TCGv_ptr abase, uint32_t aofs, TCGv_ptr bbase, uint32_t bofs, + uint32_t oprsz, bool load_dest, void (*fni)(TCGv_i32, TCGv_i32, TCGv_i32)) { TCGv_i32 t0 = tcg_temp_new_i32(); TCGv_i32 t1 = tcg_temp_new_i32(); TCGv_i32 t2 = tcg_temp_new_i32(); uint32_t i; for (i = 0; i < oprsz; i += 4) { - tcg_gen_ld_i32(t0, cpu_env, aofs + i); - tcg_gen_ld_i32(t1, cpu_env, bofs + i); + tcg_gen_ld_i32(t0, abase, aofs + i); + tcg_gen_ld_i32(t1, bbase, bofs + i); if (load_dest) { - tcg_gen_ld_i32(t2, cpu_env, dofs + i); + tcg_gen_ld_i32(t2, dbase, dofs + i); } fni(t2, t0, t1); - tcg_gen_st_i32(t2, cpu_env, dofs + i); + tcg_gen_st_i32(t2, dbase, dofs + i); } tcg_temp_free_i32(t2); tcg_temp_free_i32(t1); @@ -728,20 +757,23 @@ static void expand_3_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t o } static void expand_3i_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, int32_t c, bool load_dest, - void (*fni)(TCGv_i32, TCGv_i32, TCGv_i32, int32_t)) { + bool write_aofs, void (*fni)(TCGv_i32, TCGv_i32, TCGv_i32, int32_t)) { TCGv_i32 t0 = tcg_temp_new_i32(); TCGv_i32 t1 = tcg_temp_new_i32(); TCGv_i32 t2 = tcg_temp_new_i32(); uint32_t i; for (i = 0; i < oprsz; i += 4) { - tcg_gen_ld_i32(t0, cpu_env, aofs + i); - tcg_gen_ld_i32(t1, cpu_env, bofs + i); + tcg_gen_ld_i32(t0, tcg_env, aofs + i); + tcg_gen_ld_i32(t1, tcg_env, bofs + i); if (load_dest) { - tcg_gen_ld_i32(t2, cpu_env, dofs + i); + tcg_gen_ld_i32(t2, tcg_env, dofs + i); } fni(t2, t0, t1, c); - tcg_gen_st_i32(t2, cpu_env, dofs + i); + tcg_gen_st_i32(t2, tcg_env, dofs + i); + if (write_aofs) { + tcg_gen_st_i32(t0, tcg_env, aofs + i); + } } tcg_temp_free_i32(t0); tcg_temp_free_i32(t1); @@ -758,13 +790,13 @@ static void expand_4_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t c uint32_t i; for (i = 0; i < oprsz; i += 4) { - tcg_gen_ld_i32(t1, cpu_env, aofs + i); - tcg_gen_ld_i32(t2, cpu_env, bofs + i); - tcg_gen_ld_i32(t3, cpu_env, cofs + i); + tcg_gen_ld_i32(t1, tcg_env, aofs + i); + tcg_gen_ld_i32(t2, tcg_env, bofs + i); + tcg_gen_ld_i32(t3, tcg_env, cofs + i); fni(t0, t1, t2, t3); - tcg_gen_st_i32(t0, cpu_env, dofs + i); + tcg_gen_st_i32(t0, tcg_env, dofs + i); if (write_aofs) { - tcg_gen_st_i32(t1, cpu_env, aofs + i); + tcg_gen_st_i32(t1, tcg_env, aofs + i); } } tcg_temp_free_i32(t3); @@ -782,11 +814,11 @@ static void expand_4i_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t uint32_t i; for (i = 0; i < oprsz; i += 4) { - tcg_gen_ld_i32(t1, cpu_env, aofs + i); - tcg_gen_ld_i32(t2, cpu_env, bofs + i); - tcg_gen_ld_i32(t3, cpu_env, cofs + i); + tcg_gen_ld_i32(t1, tcg_env, aofs + i); + tcg_gen_ld_i32(t2, tcg_env, bofs + i); + tcg_gen_ld_i32(t3, tcg_env, cofs + i); fni(t0, t1, t2, t3, c); - tcg_gen_st_i32(t0, cpu_env, dofs + i); + tcg_gen_st_i32(t0, tcg_env, dofs + i); } tcg_temp_free_i32(t3); tcg_temp_free_i32(t2); @@ -795,19 +827,19 @@ static void expand_4i_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t } /* Expand OPSZ bytes worth of two-operand operations using i64 elements. */ -static void expand_2_i64(uint32_t dofs, uint32_t aofs, uint32_t oprsz, bool load_dest, +static void expand_2_i64(TCGv_ptr dbase, uint32_t dofs, TCGv_ptr abase, uint32_t aofs, uint32_t oprsz, bool load_dest, void (*fni)(TCGv_i64, TCGv_i64)) { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); uint32_t i; for (i = 0; i < oprsz; i += 8) { - tcg_gen_ld_i64(t0, cpu_env, aofs + i); + tcg_gen_ld_i64(t0, abase, aofs + i); if (load_dest) { - tcg_gen_ld_i64(t1, cpu_env, dofs + i); + tcg_gen_ld_i64(t1, dbase, dofs + i); } fni(t1, t0); - tcg_gen_st_i64(t1, cpu_env, dofs + i); + tcg_gen_st_i64(t1, dbase, dofs + i); } tcg_temp_free_i64(t0); tcg_temp_free_i64(t1); @@ -820,12 +852,12 @@ static void expand_2i_i64(uint32_t dofs, uint32_t aofs, uint32_t oprsz, int64_t uint32_t i; for (i = 0; i < oprsz; i += 8) { - tcg_gen_ld_i64(t0, cpu_env, aofs + i); + tcg_gen_ld_i64(t0, tcg_env, aofs + i); if (load_dest) { - tcg_gen_ld_i64(t1, cpu_env, dofs + i); + tcg_gen_ld_i64(t1, tcg_env, dofs + i); } fni(t1, t0, c); - tcg_gen_st_i64(t1, cpu_env, dofs + i); + tcg_gen_st_i64(t1, tcg_env, dofs + i); } tcg_temp_free_i64(t0); tcg_temp_free_i64(t1); @@ -838,34 +870,34 @@ static void expand_2s_i64(uint32_t dofs, uint32_t aofs, uint32_t oprsz, TCGv_i64 uint32_t i; for (i = 0; i < oprsz; i += 8) { - tcg_gen_ld_i64(t0, cpu_env, aofs + i); + tcg_gen_ld_i64(t0, tcg_env, aofs + i); if (scalar_first) { fni(t1, c, t0); } else { fni(t1, t0, c); } - tcg_gen_st_i64(t1, cpu_env, dofs + i); + tcg_gen_st_i64(t1, tcg_env, dofs + i); } tcg_temp_free_i64(t0); tcg_temp_free_i64(t1); } /* Expand OPSZ bytes worth of three-operand operations using i64 elements. */ -static void expand_3_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, bool load_dest, - void (*fni)(TCGv_i64, TCGv_i64, TCGv_i64)) { +static void expand_3_i64(TCGv_ptr dbase, uint32_t dofs, TCGv_ptr abase, uint32_t aofs, TCGv_ptr bbase, uint32_t bofs, + uint32_t oprsz, bool load_dest, void (*fni)(TCGv_i64, TCGv_i64, TCGv_i64)) { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 t2 = tcg_temp_new_i64(); uint32_t i; for (i = 0; i < oprsz; i += 8) { - tcg_gen_ld_i64(t0, cpu_env, aofs + i); - tcg_gen_ld_i64(t1, cpu_env, bofs + i); + tcg_gen_ld_i64(t0, abase, aofs + i); + tcg_gen_ld_i64(t1, bbase, bofs + i); if (load_dest) { - tcg_gen_ld_i64(t2, cpu_env, dofs + i); + tcg_gen_ld_i64(t2, dbase, dofs + i); } fni(t2, t0, t1); - tcg_gen_st_i64(t2, cpu_env, dofs + i); + tcg_gen_st_i64(t2, dbase, dofs + i); } tcg_temp_free_i64(t2); tcg_temp_free_i64(t1); @@ -873,20 +905,23 @@ static void expand_3_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t o } static void expand_3i_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, int64_t c, bool load_dest, - void (*fni)(TCGv_i64, TCGv_i64, TCGv_i64, int64_t)) { + bool write_aofs, void (*fni)(TCGv_i64, TCGv_i64, TCGv_i64, int64_t)) { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 t2 = tcg_temp_new_i64(); uint32_t i; for (i = 0; i < oprsz; i += 8) { - tcg_gen_ld_i64(t0, cpu_env, aofs + i); - tcg_gen_ld_i64(t1, cpu_env, bofs + i); + tcg_gen_ld_i64(t0, tcg_env, aofs + i); + tcg_gen_ld_i64(t1, tcg_env, bofs + i); if (load_dest) { - tcg_gen_ld_i64(t2, cpu_env, dofs + i); + tcg_gen_ld_i64(t2, tcg_env, dofs + i); } fni(t2, t0, t1, c); - tcg_gen_st_i64(t2, cpu_env, dofs + i); + tcg_gen_st_i64(t2, tcg_env, dofs + i); + if (write_aofs) { + tcg_gen_st_i64(t0, tcg_env, aofs + i); + } } tcg_temp_free_i64(t0); tcg_temp_free_i64(t1); @@ -903,13 +938,13 @@ static void expand_4_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t c uint32_t i; for (i = 0; i < oprsz; i += 8) { - tcg_gen_ld_i64(t1, cpu_env, aofs + i); - tcg_gen_ld_i64(t2, cpu_env, bofs + i); - tcg_gen_ld_i64(t3, cpu_env, cofs + i); + tcg_gen_ld_i64(t1, tcg_env, aofs + i); + tcg_gen_ld_i64(t2, tcg_env, bofs + i); + tcg_gen_ld_i64(t3, tcg_env, cofs + i); fni(t0, t1, t2, t3); - tcg_gen_st_i64(t0, cpu_env, dofs + i); + tcg_gen_st_i64(t0, tcg_env, dofs + i); if (write_aofs) { - tcg_gen_st_i64(t1, cpu_env, aofs + i); + tcg_gen_st_i64(t1, tcg_env, aofs + i); } } tcg_temp_free_i64(t3); @@ -927,11 +962,11 @@ static void expand_4i_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t uint32_t i; for (i = 0; i < oprsz; i += 8) { - tcg_gen_ld_i64(t1, cpu_env, aofs + i); - tcg_gen_ld_i64(t2, cpu_env, bofs + i); - tcg_gen_ld_i64(t3, cpu_env, cofs + i); + tcg_gen_ld_i64(t1, tcg_env, aofs + i); + tcg_gen_ld_i64(t2, tcg_env, bofs + i); + tcg_gen_ld_i64(t3, tcg_env, cofs + i); fni(t0, t1, t2, t3, c); - tcg_gen_st_i64(t0, cpu_env, dofs + i); + tcg_gen_st_i64(t0, tcg_env, dofs + i); } tcg_temp_free_i64(t3); tcg_temp_free_i64(t2); @@ -940,83 +975,71 @@ static void expand_4i_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t } /* Expand OPSZ bytes worth of two-operand operations using host vectors. */ -static void expand_2_vec(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t tysz, TCGType type, - bool load_dest, void (*fni)(unsigned, TCGv_vec, TCGv_vec)) { - TCGv_vec t0 = tcg_temp_new_vec(type); - TCGv_vec t1 = tcg_temp_new_vec(type); - uint32_t i; +static void expand_2_vec(unsigned vece, TCGv_ptr dbase, uint32_t dofs, TCGv_ptr abase, uint32_t aofs, uint32_t oprsz, + uint32_t tysz, TCGType type, bool load_dest, void (*fni)(unsigned, TCGv_vec, TCGv_vec)) { + for (uint32_t i = 0; i < oprsz; i += tysz) { + TCGv_vec t0 = tcg_temp_new_vec(type); + TCGv_vec t1 = tcg_temp_new_vec(type); - for (i = 0; i < oprsz; i += tysz) { - tcg_gen_ld_vec(t0, cpu_env, aofs + i); + tcg_gen_ld_vec(t0, abase, aofs + i); if (load_dest) { - tcg_gen_ld_vec(t1, cpu_env, dofs + i); + tcg_gen_ld_vec(t1, dbase, dofs + i); } fni(vece, t1, t0); - tcg_gen_st_vec(t1, cpu_env, dofs + i); + tcg_gen_st_vec(t1, dbase, dofs + i); } - tcg_temp_free_vec(t0); - tcg_temp_free_vec(t1); } /* Expand OPSZ bytes worth of two-vector operands and an immediate operand using host vectors. */ static void expand_2i_vec(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t tysz, TCGType type, int64_t c, bool load_dest, void (*fni)(unsigned, TCGv_vec, TCGv_vec, int64_t)) { - TCGv_vec t0 = tcg_temp_new_vec(type); - TCGv_vec t1 = tcg_temp_new_vec(type); - uint32_t i; + for (uint32_t i = 0; i < oprsz; i += tysz) { + TCGv_vec t0 = tcg_temp_new_vec(type); + TCGv_vec t1 = tcg_temp_new_vec(type); - for (i = 0; i < oprsz; i += tysz) { - tcg_gen_ld_vec(t0, cpu_env, aofs + i); + tcg_gen_ld_vec(t0, tcg_env, aofs + i); if (load_dest) { - tcg_gen_ld_vec(t1, cpu_env, dofs + i); + tcg_gen_ld_vec(t1, tcg_env, dofs + i); } fni(vece, t1, t0, c); - tcg_gen_st_vec(t1, cpu_env, dofs + i); + tcg_gen_st_vec(t1, tcg_env, dofs + i); } - tcg_temp_free_vec(t0); - tcg_temp_free_vec(t1); } static void expand_2s_vec(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t tysz, TCGType type, TCGv_vec c, bool scalar_first, void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec)) { - TCGv_vec t0 = tcg_temp_new_vec(type); - TCGv_vec t1 = tcg_temp_new_vec(type); - uint32_t i; + for (uint32_t i = 0; i < oprsz; i += tysz) { + TCGv_vec t0 = tcg_temp_new_vec(type); + TCGv_vec t1 = tcg_temp_new_vec(type); - for (i = 0; i < oprsz; i += tysz) { - tcg_gen_ld_vec(t0, cpu_env, aofs + i); + tcg_gen_ld_vec(t0, tcg_env, aofs + i); if (scalar_first) { fni(vece, t1, c, t0); } else { fni(vece, t1, t0, c); } - tcg_gen_st_vec(t1, cpu_env, dofs + i); + tcg_gen_st_vec(t1, tcg_env, dofs + i); } - tcg_temp_free_vec(t0); - tcg_temp_free_vec(t1); } /* Expand OPSZ bytes worth of three-operand operations using host vectors. */ -static void expand_3_vec(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t tysz, - TCGType type, bool load_dest, void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec)) { - TCGv_vec t0 = tcg_temp_new_vec(type); - TCGv_vec t1 = tcg_temp_new_vec(type); - TCGv_vec t2 = tcg_temp_new_vec(type); - uint32_t i; - - for (i = 0; i < oprsz; i += tysz) { - tcg_gen_ld_vec(t0, cpu_env, aofs + i); - tcg_gen_ld_vec(t1, cpu_env, bofs + i); +static void expand_3_vec(unsigned vece, TCGv_ptr dbase, uint32_t dofs, TCGv_ptr abase, uint32_t aofs, TCGv_ptr bbase, + uint32_t bofs, uint32_t oprsz, uint32_t tysz, TCGType type, bool load_dest, + void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec)) { + for (uint32_t i = 0; i < oprsz; i += tysz) { + TCGv_vec t0 = tcg_temp_new_vec(type); + TCGv_vec t1 = tcg_temp_new_vec(type); + TCGv_vec t2 = tcg_temp_new_vec(type); + + tcg_gen_ld_vec(t0, abase, aofs + i); + tcg_gen_ld_vec(t1, bbase, bofs + i); if (load_dest) { - tcg_gen_ld_vec(t2, cpu_env, dofs + i); + tcg_gen_ld_vec(t2, dbase, dofs + i); } fni(vece, t2, t0, t1); - tcg_gen_st_vec(t2, cpu_env, dofs + i); + tcg_gen_st_vec(t2, dbase, dofs + i); } - tcg_temp_free_vec(t2); - tcg_temp_free_vec(t1); - tcg_temp_free_vec(t0); } /* @@ -1024,51 +1047,45 @@ static void expand_3_vec(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t b * using host vectors. */ static void expand_3i_vec(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t tysz, - TCGType type, int64_t c, bool load_dest, + TCGType type, int64_t c, bool load_dest, bool write_aofs, void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec, int64_t)) { - TCGv_vec t0 = tcg_temp_new_vec(type); - TCGv_vec t1 = tcg_temp_new_vec(type); - TCGv_vec t2 = tcg_temp_new_vec(type); - uint32_t i; + for (uint32_t i = 0; i < oprsz; i += tysz) { + TCGv_vec t0 = tcg_temp_new_vec(type); + TCGv_vec t1 = tcg_temp_new_vec(type); + TCGv_vec t2 = tcg_temp_new_vec(type); - for (i = 0; i < oprsz; i += tysz) { - tcg_gen_ld_vec(t0, cpu_env, aofs + i); - tcg_gen_ld_vec(t1, cpu_env, bofs + i); + tcg_gen_ld_vec(t0, tcg_env, aofs + i); + tcg_gen_ld_vec(t1, tcg_env, bofs + i); if (load_dest) { - tcg_gen_ld_vec(t2, cpu_env, dofs + i); + tcg_gen_ld_vec(t2, tcg_env, dofs + i); } fni(vece, t2, t0, t1, c); - tcg_gen_st_vec(t2, cpu_env, dofs + i); + tcg_gen_st_vec(t2, tcg_env, dofs + i); + if (write_aofs) { + tcg_gen_st_vec(t0, tcg_env, aofs + i); + } } - tcg_temp_free_vec(t0); - tcg_temp_free_vec(t1); - tcg_temp_free_vec(t2); } /* Expand OPSZ bytes worth of four-operand operations using host vectors. */ static void expand_4_vec(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, uint32_t oprsz, uint32_t tysz, TCGType type, bool write_aofs, void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec, TCGv_vec)) { - TCGv_vec t0 = tcg_temp_new_vec(type); - TCGv_vec t1 = tcg_temp_new_vec(type); - TCGv_vec t2 = tcg_temp_new_vec(type); - TCGv_vec t3 = tcg_temp_new_vec(type); - uint32_t i; - - for (i = 0; i < oprsz; i += tysz) { - tcg_gen_ld_vec(t1, cpu_env, aofs + i); - tcg_gen_ld_vec(t2, cpu_env, bofs + i); - tcg_gen_ld_vec(t3, cpu_env, cofs + i); + for (uint32_t i = 0; i < oprsz; i += tysz) { + TCGv_vec t0 = tcg_temp_new_vec(type); + TCGv_vec t1 = tcg_temp_new_vec(type); + TCGv_vec t2 = tcg_temp_new_vec(type); + TCGv_vec t3 = tcg_temp_new_vec(type); + + tcg_gen_ld_vec(t1, tcg_env, aofs + i); + tcg_gen_ld_vec(t2, tcg_env, bofs + i); + tcg_gen_ld_vec(t3, tcg_env, cofs + i); fni(vece, t0, t1, t2, t3); - tcg_gen_st_vec(t0, cpu_env, dofs + i); + tcg_gen_st_vec(t0, tcg_env, dofs + i); if (write_aofs) { - tcg_gen_st_vec(t1, cpu_env, aofs + i); + tcg_gen_st_vec(t1, tcg_env, aofs + i); } } - tcg_temp_free_vec(t3); - tcg_temp_free_vec(t2); - tcg_temp_free_vec(t1); - tcg_temp_free_vec(t0); } /* @@ -1078,34 +1095,30 @@ static void expand_4_vec(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t b static void expand_4i_vec(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, uint32_t oprsz, uint32_t tysz, TCGType type, int64_t c, void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec, TCGv_vec, int64_t)) { - TCGv_vec t0 = tcg_temp_new_vec(type); - TCGv_vec t1 = tcg_temp_new_vec(type); - TCGv_vec t2 = tcg_temp_new_vec(type); - TCGv_vec t3 = tcg_temp_new_vec(type); - uint32_t i; - - for (i = 0; i < oprsz; i += tysz) { - tcg_gen_ld_vec(t1, cpu_env, aofs + i); - tcg_gen_ld_vec(t2, cpu_env, bofs + i); - tcg_gen_ld_vec(t3, cpu_env, cofs + i); + for (uint32_t i = 0; i < oprsz; i += tysz) { + TCGv_vec t0 = tcg_temp_new_vec(type); + TCGv_vec t1 = tcg_temp_new_vec(type); + TCGv_vec t2 = tcg_temp_new_vec(type); + TCGv_vec t3 = tcg_temp_new_vec(type); + + tcg_gen_ld_vec(t1, tcg_env, aofs + i); + tcg_gen_ld_vec(t2, tcg_env, bofs + i); + tcg_gen_ld_vec(t3, tcg_env, cofs + i); fni(vece, t0, t1, t2, t3, c); - tcg_gen_st_vec(t0, cpu_env, dofs + i); + tcg_gen_st_vec(t0, tcg_env, dofs + i); } - tcg_temp_free_vec(t3); - tcg_temp_free_vec(t2); - tcg_temp_free_vec(t1); - tcg_temp_free_vec(t0); } /* Expand a vector two-operand operation. */ -void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz, const GVecGen2 *g) { +void tcg_gen_gvec_2_var(TCGv_ptr dbase, uint32_t dofs, TCGv_ptr abase, uint32_t aofs, uint32_t oprsz, uint32_t maxsz, + const GVecGen2 *g) { const TCGOpcode *this_list = g->opt_opc ?: vecop_list_empty; const TCGOpcode *hold_list = tcg_swap_vecop_list(this_list); TCGType type; uint32_t some; check_size_align(oprsz, maxsz, dofs | aofs); - check_overlap_2(dofs, aofs, maxsz); + check_overlap_2(dbase, dofs, abase, aofs, maxsz); type = 0; if (g->fniv) { @@ -1118,7 +1131,7 @@ void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz * that e.g. size == 80 would be expanded with 2x32 + 1x16. */ some = QEMU_ALIGN_DOWN(oprsz, 32); - expand_2_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256, g->load_dest, g->fniv); + expand_2_vec(g->vece, dbase, dofs, abase, aofs, some, 32, TCG_TYPE_V256, g->load_dest, g->fniv); if (some == oprsz) { break; } @@ -1128,20 +1141,20 @@ void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz maxsz -= some; /* fallthru */ case TCG_TYPE_V128: - expand_2_vec(g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128, g->load_dest, g->fniv); + expand_2_vec(g->vece, dbase, dofs, abase, aofs, oprsz, 16, TCG_TYPE_V128, g->load_dest, g->fniv); break; case TCG_TYPE_V64: - expand_2_vec(g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64, g->load_dest, g->fniv); + expand_2_vec(g->vece, dbase, dofs, abase, aofs, oprsz, 8, TCG_TYPE_V64, g->load_dest, g->fniv); break; case 0: if (g->fni8 && check_size_impl(oprsz, 8)) { - expand_2_i64(dofs, aofs, oprsz, g->load_dest, g->fni8); + expand_2_i64(dbase, dofs, abase, aofs, oprsz, g->load_dest, g->fni8); } else if (g->fni4 && check_size_impl(oprsz, 4)) { - expand_2_i32(dofs, aofs, oprsz, g->load_dest, g->fni4); + expand_2_i32(dbase, dofs, abase, aofs, oprsz, g->load_dest, g->fni4); } else { assert(g->fno != NULL); - tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, g->data, g->fno); + expand_2_ool(dbase, dofs, abase, aofs, oprsz, maxsz, g->data, g->fno); oprsz = maxsz; } break; @@ -1152,10 +1165,14 @@ void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz tcg_swap_vecop_list(hold_list); if (oprsz < maxsz) { - expand_clr(dofs + oprsz, maxsz - oprsz); + expand_clr(dbase, dofs + oprsz, maxsz - oprsz); } } +void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz, const GVecGen2 *g) { + tcg_gen_gvec_2_var(tcg_env, dofs, tcg_env, aofs, oprsz, maxsz, g); +} + /* Expand a vector operation with two vectors and an immediate. */ void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz, int64_t c, const GVecGen2i *g) { const TCGOpcode *this_list = g->opt_opc ?: vecop_list_empty; @@ -1164,7 +1181,7 @@ void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxs uint32_t some; check_size_align(oprsz, maxsz, dofs | aofs); - check_overlap_2(dofs, aofs, maxsz); + check_overlap_2(tcg_env, dofs, tcg_env, aofs, maxsz); type = 0; if (g->fniv) { @@ -1215,7 +1232,7 @@ void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxs tcg_swap_vecop_list(hold_list); if (oprsz < maxsz) { - expand_clr(dofs + oprsz, maxsz - oprsz); + expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz); } } @@ -1224,7 +1241,7 @@ void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxs TCGType type; check_size_align(oprsz, maxsz, dofs | aofs); - check_overlap_2(dofs, aofs, maxsz); + check_overlap_2(tcg_env, dofs, tcg_env, aofs, maxsz); type = 0; if (g->fniv) { @@ -1287,19 +1304,20 @@ void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxs } if (oprsz < maxsz) { - expand_clr(dofs + oprsz, maxsz - oprsz); + expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz); } } /* Expand a vector three-operand operation. */ -void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz, const GVecGen3 *g) { +void tcg_gen_gvec_3_var(TCGv_ptr dbase, uint32_t dofs, TCGv_ptr abase, uint32_t aofs, TCGv_ptr bbase, uint32_t bofs, + uint32_t oprsz, uint32_t maxsz, const GVecGen3 *g) { const TCGOpcode *this_list = g->opt_opc ?: vecop_list_empty; const TCGOpcode *hold_list = tcg_swap_vecop_list(this_list); TCGType type; uint32_t some; check_size_align(oprsz, maxsz, dofs | aofs | bofs); - check_overlap_3(dofs, aofs, bofs, maxsz); + check_overlap_3(dbase, dofs, abase, aofs, bbase, bofs, maxsz); type = 0; if (g->fniv) { @@ -1312,7 +1330,8 @@ void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, * that e.g. size == 80 would be expanded with 2x32 + 1x16. */ some = QEMU_ALIGN_DOWN(oprsz, 32); - expand_3_vec(g->vece, dofs, aofs, bofs, some, 32, TCG_TYPE_V256, g->load_dest, g->fniv); + expand_3_vec(g->vece, dbase, dofs, abase, aofs, bbase, bofs, some, 32, TCG_TYPE_V256, g->load_dest, + g->fniv); if (some == oprsz) { break; } @@ -1323,20 +1342,21 @@ void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, maxsz -= some; /* fallthru */ case TCG_TYPE_V128: - expand_3_vec(g->vece, dofs, aofs, bofs, oprsz, 16, TCG_TYPE_V128, g->load_dest, g->fniv); + expand_3_vec(g->vece, dbase, dofs, abase, aofs, bbase, bofs, oprsz, 16, TCG_TYPE_V128, g->load_dest, + g->fniv); break; case TCG_TYPE_V64: - expand_3_vec(g->vece, dofs, aofs, bofs, oprsz, 8, TCG_TYPE_V64, g->load_dest, g->fniv); + expand_3_vec(g->vece, dbase, dofs, abase, aofs, bbase, bofs, oprsz, 8, TCG_TYPE_V64, g->load_dest, g->fniv); break; case 0: if (g->fni8 && check_size_impl(oprsz, 8)) { - expand_3_i64(dofs, aofs, bofs, oprsz, g->load_dest, g->fni8); + expand_3_i64(dbase, dofs, abase, aofs, bbase, bofs, oprsz, g->load_dest, g->fni8); } else if (g->fni4 && check_size_impl(oprsz, 4)) { - expand_3_i32(dofs, aofs, bofs, oprsz, g->load_dest, g->fni4); + expand_3_i32(dbase, dofs, abase, aofs, bbase, bofs, oprsz, g->load_dest, g->fni4); } else { assert(g->fno != NULL); - tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz, maxsz, g->data, g->fno); + expand_3_ool(dbase, dofs, abase, aofs, bbase, bofs, oprsz, maxsz, g->data, g->fno); oprsz = maxsz; } break; @@ -1347,10 +1367,14 @@ void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, tcg_swap_vecop_list(hold_list); if (oprsz < maxsz) { - expand_clr(dofs + oprsz, maxsz - oprsz); + expand_clr(dbase, dofs + oprsz, maxsz - oprsz); } } +void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz, const GVecGen3 *g) { + tcg_gen_gvec_3_var(tcg_env, dofs, tcg_env, aofs, tcg_env, bofs, oprsz, maxsz, g); +} + /* Expand a vector operation with three vectors and an immediate. */ void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz, int64_t c, const GVecGen3i *g) { @@ -1360,7 +1384,7 @@ void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz uint32_t some; check_size_align(oprsz, maxsz, dofs | aofs | bofs); - check_overlap_3(dofs, aofs, bofs, maxsz); + check_overlap_3(tcg_env, dofs, tcg_env, aofs, tcg_env, bofs, maxsz); type = 0; if (g->fniv) { @@ -1374,7 +1398,7 @@ void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz * that e.g. size == 80 would be expanded with 2x32 + 1x16. */ some = QEMU_ALIGN_DOWN(oprsz, 32); - expand_3i_vec(g->vece, dofs, aofs, bofs, some, 32, TCG_TYPE_V256, c, g->load_dest, g->fniv); + expand_3i_vec(g->vece, dofs, aofs, bofs, some, 32, TCG_TYPE_V256, c, g->load_dest, g->write_aofs, g->fniv); if (some == oprsz) { break; } @@ -1385,17 +1409,17 @@ void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz maxsz -= some; /* fallthru */ case TCG_TYPE_V128: - expand_3i_vec(g->vece, dofs, aofs, bofs, oprsz, 16, TCG_TYPE_V128, c, g->load_dest, g->fniv); + expand_3i_vec(g->vece, dofs, aofs, bofs, oprsz, 16, TCG_TYPE_V128, c, g->load_dest, g->write_aofs, g->fniv); break; case TCG_TYPE_V64: - expand_3i_vec(g->vece, dofs, aofs, bofs, oprsz, 8, TCG_TYPE_V64, c, g->load_dest, g->fniv); + expand_3i_vec(g->vece, dofs, aofs, bofs, oprsz, 8, TCG_TYPE_V64, c, g->load_dest, g->write_aofs, g->fniv); break; case 0: if (g->fni8 && check_size_impl(oprsz, 8)) { - expand_3i_i64(dofs, aofs, bofs, oprsz, c, g->load_dest, g->fni8); + expand_3i_i64(dofs, aofs, bofs, oprsz, c, g->load_dest, g->write_aofs, g->fni8); } else if (g->fni4 && check_size_impl(oprsz, 4)) { - expand_3i_i32(dofs, aofs, bofs, oprsz, c, g->load_dest, g->fni4); + expand_3i_i32(dofs, aofs, bofs, oprsz, c, g->load_dest, g->write_aofs, g->fni4); } else { assert(g->fno != NULL); tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz, maxsz, c, g->fno); @@ -1409,7 +1433,7 @@ void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz tcg_swap_vecop_list(hold_list); if (oprsz < maxsz) { - expand_clr(dofs + oprsz, maxsz - oprsz); + expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz); } } @@ -1422,7 +1446,7 @@ void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, uint32_t some; check_size_align(oprsz, maxsz, dofs | aofs | bofs | cofs); - check_overlap_4(dofs, aofs, bofs, cofs, maxsz); + check_overlap_4(tcg_env, dofs, tcg_env, aofs, tcg_env, bofs, tcg_env, cofs, maxsz); type = 0; if (g->fniv) { @@ -1471,7 +1495,7 @@ void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, tcg_swap_vecop_list(hold_list); if (oprsz < maxsz) { - expand_clr(dofs + oprsz, maxsz - oprsz); + expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz); } } @@ -1484,7 +1508,7 @@ void tcg_gen_gvec_4i(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, uint32_t some; check_size_align(oprsz, maxsz, dofs | aofs | bofs | cofs); - check_overlap_4(dofs, aofs, bofs, cofs, maxsz); + check_overlap_4(tcg_env, dofs, tcg_env, aofs, tcg_env, bofs, tcg_env, cofs, maxsz); type = 0; if (g->fniv) { @@ -1534,7 +1558,7 @@ void tcg_gen_gvec_4i(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, tcg_swap_vecop_list(hold_list); if (oprsz < maxsz) { - expand_clr(dofs + oprsz, maxsz - oprsz); + expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz); } } @@ -1546,33 +1570,40 @@ static void vec_mov2(unsigned vece, TCGv_vec a, TCGv_vec b) { tcg_gen_mov_vec(a, b); } -void tcg_gen_gvec_mov(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz) { +void tcg_gen_gvec_mov_var(unsigned vece, TCGv_ptr dbase, uint32_t dofs, TCGv_ptr abase, uint32_t aofs, uint32_t oprsz, + uint32_t maxsz) { static const GVecGen2 g = { .fni8 = tcg_gen_mov_i64, .fniv = vec_mov2, .fno = gen_helper_gvec_mov, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; - if (dofs != aofs) { - tcg_gen_gvec_2(dofs, aofs, oprsz, maxsz, &g); - } else { + + if (dofs == aofs && dbase == abase) { check_size_align(oprsz, maxsz, dofs); if (oprsz < maxsz) { - expand_clr(dofs + oprsz, maxsz - oprsz); + expand_clr(dbase, dofs + oprsz, maxsz - oprsz); } + return; } + + tcg_gen_gvec_2_var(dbase, dofs, abase, aofs, oprsz, maxsz, &g); +} + +void tcg_gen_gvec_mov(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz) { + tcg_gen_gvec_mov_var(vece, tcg_env, dofs, tcg_env, aofs, oprsz, maxsz); } void tcg_gen_gvec_dup_i32(unsigned vece, uint32_t dofs, uint32_t oprsz, uint32_t maxsz, TCGv_i32 in) { check_size_align(oprsz, maxsz, dofs); tcg_debug_assert(vece <= MO_32); - do_dup(vece, dofs, oprsz, maxsz, in, NULL, 0); + do_dup(vece, tcg_env, dofs, oprsz, maxsz, in, NULL, 0); } void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t oprsz, uint32_t maxsz, TCGv_i64 in) { check_size_align(oprsz, maxsz, dofs); tcg_debug_assert(vece <= MO_64); - do_dup(vece, dofs, oprsz, maxsz, NULL, in, 0); + do_dup(vece, tcg_env, dofs, oprsz, maxsz, NULL, in, 0); } void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz) { @@ -1581,28 +1612,27 @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t TCGType type = choose_vector_type(NULL, vece, oprsz, 0); if (type != 0) { TCGv_vec t_vec = tcg_temp_new_vec(type); - tcg_gen_dup_mem_vec(vece, t_vec, cpu_env, aofs); - do_dup_store(type, dofs, oprsz, maxsz, t_vec); - tcg_temp_free_vec(t_vec); + tcg_gen_dup_mem_vec(vece, t_vec, tcg_env, aofs); + do_dup_store(type, tcg_env, dofs, oprsz, maxsz, t_vec); } else if (vece <= MO_32) { TCGv_i32 in = tcg_temp_ebb_new_i32(); switch (vece) { case MO_8: - tcg_gen_ld8u_i32(in, cpu_env, aofs); + tcg_gen_ld8u_i32(in, tcg_env, aofs); break; case MO_16: - tcg_gen_ld16u_i32(in, cpu_env, aofs); + tcg_gen_ld16u_i32(in, tcg_env, aofs); break; default: - tcg_gen_ld_i32(in, cpu_env, aofs); + tcg_gen_ld_i32(in, tcg_env, aofs); break; } - do_dup(vece, dofs, oprsz, maxsz, in, NULL, 0); + do_dup(vece, tcg_env, dofs, oprsz, maxsz, in, NULL, 0); tcg_temp_free_i32(in); } else { TCGv_i64 in = tcg_temp_ebb_new_i64(); - tcg_gen_ld_i64(in, cpu_env, aofs); - do_dup(vece, dofs, oprsz, maxsz, NULL, in, 0); + tcg_gen_ld_i64(in, tcg_env, aofs); + do_dup(vece, tcg_env, dofs, oprsz, maxsz, NULL, in, 0); tcg_temp_free_i64(in); } } else if (vece == 4) { @@ -1613,26 +1643,25 @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t if (TCG_TARGET_HAS_v128) { TCGv_vec in = tcg_temp_new_vec(TCG_TYPE_V128); - tcg_gen_ld_vec(in, cpu_env, aofs); + tcg_gen_ld_vec(in, tcg_env, aofs); for (i = (aofs == dofs) * 16; i < oprsz; i += 16) { - tcg_gen_st_vec(in, cpu_env, dofs + i); + tcg_gen_st_vec(in, tcg_env, dofs + i); } - tcg_temp_free_vec(in); } else { TCGv_i64 in0 = tcg_temp_ebb_new_i64(); TCGv_i64 in1 = tcg_temp_ebb_new_i64(); - tcg_gen_ld_i64(in0, cpu_env, aofs); - tcg_gen_ld_i64(in1, cpu_env, aofs + 8); + tcg_gen_ld_i64(in0, tcg_env, aofs); + tcg_gen_ld_i64(in1, tcg_env, aofs + 8); for (i = (aofs == dofs) * 16; i < oprsz; i += 16) { - tcg_gen_st_i64(in0, cpu_env, dofs + i); - tcg_gen_st_i64(in1, cpu_env, dofs + i + 8); + tcg_gen_st_i64(in0, tcg_env, dofs + i); + tcg_gen_st_i64(in1, tcg_env, dofs + i + 8); } tcg_temp_free_i64(in0); tcg_temp_free_i64(in1); } if (oprsz < maxsz) { - expand_clr(dofs + oprsz, maxsz - oprsz); + expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz); } } else if (vece == 5) { /* 256-bit duplicate. */ @@ -1643,34 +1672,31 @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t if (TCG_TARGET_HAS_v256) { TCGv_vec in = tcg_temp_new_vec(TCG_TYPE_V256); - tcg_gen_ld_vec(in, cpu_env, aofs); + tcg_gen_ld_vec(in, tcg_env, aofs); for (i = (aofs == dofs) * 32; i < oprsz; i += 32) { - tcg_gen_st_vec(in, cpu_env, dofs + i); + tcg_gen_st_vec(in, tcg_env, dofs + i); } - tcg_temp_free_vec(in); } else if (TCG_TARGET_HAS_v128) { TCGv_vec in0 = tcg_temp_new_vec(TCG_TYPE_V128); TCGv_vec in1 = tcg_temp_new_vec(TCG_TYPE_V128); - tcg_gen_ld_vec(in0, cpu_env, aofs); - tcg_gen_ld_vec(in1, cpu_env, aofs + 16); + tcg_gen_ld_vec(in0, tcg_env, aofs); + tcg_gen_ld_vec(in1, tcg_env, aofs + 16); for (i = (aofs == dofs) * 32; i < oprsz; i += 32) { - tcg_gen_st_vec(in0, cpu_env, dofs + i); - tcg_gen_st_vec(in1, cpu_env, dofs + i + 16); + tcg_gen_st_vec(in0, tcg_env, dofs + i); + tcg_gen_st_vec(in1, tcg_env, dofs + i + 16); } - tcg_temp_free_vec(in0); - tcg_temp_free_vec(in1); } else { TCGv_i64 in[4]; int j; for (j = 0; j < 4; ++j) { in[j] = tcg_temp_ebb_new_i64(); - tcg_gen_ld_i64(in[j], cpu_env, aofs + j * 8); + tcg_gen_ld_i64(in[j], tcg_env, aofs + j * 8); } for (i = (aofs == dofs) * 32; i < oprsz; i += 32) { for (j = 0; j < 4; ++j) { - tcg_gen_st_i64(in[j], cpu_env, dofs + i + j * 8); + tcg_gen_st_i64(in[j], tcg_env, dofs + i + j * 8); } } for (j = 0; j < 4; ++j) { @@ -1678,16 +1704,21 @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t } } if (oprsz < maxsz) { - expand_clr(dofs + oprsz, maxsz - oprsz); + expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz); } } else { g_assert_not_reached(); } } -void tcg_gen_gvec_dup_imm(unsigned vece, uint32_t dofs, uint32_t oprsz, uint32_t maxsz, uint64_t x) { +void tcg_gen_gvec_dup_imm_var(unsigned vece, TCGv_ptr dbase, uint32_t dofs, uint32_t oprsz, uint32_t maxsz, + uint64_t x) { check_size_align(oprsz, maxsz, dofs); - do_dup(vece, dofs, oprsz, maxsz, NULL, NULL, x); + do_dup(vece, dbase, dofs, oprsz, maxsz, NULL, NULL, x); +} + +void tcg_gen_gvec_dup_imm(unsigned vece, uint32_t dofs, uint32_t oprsz, uint32_t maxsz, uint64_t x) { + tcg_gen_gvec_dup_imm_var(vece, tcg_env, dofs, oprsz, maxsz, x); } void tcg_gen_gvec_not(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz) { @@ -1777,7 +1808,8 @@ void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) { static const TCGOpcode vecop_list_add[] = {INDEX_op_add_vec, 0}; -void tcg_gen_gvec_add(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { +void tcg_gen_gvec_add_var(unsigned vece, TCGv_ptr dbase, uint32_t dofs, TCGv_ptr abase, uint32_t aofs, TCGv_ptr bbase, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { static const GVecGen3 g[4] = { {.fni8 = tcg_gen_vec_add8_i64, .fniv = tcg_gen_add_vec, @@ -1803,7 +1835,11 @@ void tcg_gen_gvec_add(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs }; tcg_debug_assert(vece <= MO_64); - tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]); + tcg_gen_gvec_3_var(dbase, dofs, abase, aofs, bbase, bofs, oprsz, maxsz, &g[vece]); +} + +void tcg_gen_gvec_add(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { + tcg_gen_gvec_add_var(vece, tcg_env, dofs, tcg_env, aofs, tcg_env, bofs, oprsz, maxsz); } void tcg_gen_gvec_adds(unsigned vece, uint32_t dofs, uint32_t aofs, TCGv_i64 c, uint32_t oprsz, uint32_t maxsz) { @@ -1944,7 +1980,8 @@ void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) { tcg_temp_free_i64(t2); } -void tcg_gen_gvec_sub(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { +void tcg_gen_gvec_sub_var(unsigned vece, TCGv_ptr dbase, uint32_t dofs, TCGv_ptr abase, uint32_t aofs, TCGv_ptr bbase, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { static const GVecGen3 g[4] = { {.fni8 = tcg_gen_vec_sub8_i64, .fniv = tcg_gen_sub_vec, @@ -1970,7 +2007,11 @@ void tcg_gen_gvec_sub(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs }; tcg_debug_assert(vece <= MO_64); - tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]); + tcg_gen_gvec_3_var(dbase, dofs, abase, aofs, bbase, bofs, oprsz, maxsz, &g[vece]); +} + +void tcg_gen_gvec_sub(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { + tcg_gen_gvec_sub_var(vece, tcg_env, dofs, tcg_env, aofs, tcg_env, bofs, oprsz, maxsz); } static const TCGOpcode vecop_list_mul[] = {INDEX_op_mul_vec, 0}; @@ -2780,15 +2821,14 @@ typedef struct { static void expand_2sh_vec(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t tysz, TCGType type, TCGv_i32 shift, void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_i32)) { - TCGv_vec t0 = tcg_temp_new_vec(type); - uint32_t i; + for (uint32_t i = 0; i < oprsz; i += tysz) { + TCGv_vec t0 = tcg_temp_new_vec(type); + TCGv_vec t1 = tcg_temp_new_vec(type); - for (i = 0; i < oprsz; i += tysz) { - tcg_gen_ld_vec(t0, cpu_env, aofs + i); - fni(vece, t0, t0, shift); - tcg_gen_st_vec(t0, cpu_env, dofs + i); + tcg_gen_ld_vec(t0, tcg_env, aofs + i); + fni(vece, t1, t0, shift); + tcg_gen_st_vec(t1, tcg_env, dofs + i); } - tcg_temp_free_vec(t0); } static void do_gvec_shifts(unsigned vece, uint32_t dofs, uint32_t aofs, TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz, @@ -2797,7 +2837,7 @@ static void do_gvec_shifts(unsigned vece, uint32_t dofs, uint32_t aofs, TCGv_i32 uint32_t some; check_size_align(oprsz, maxsz, dofs | aofs); - check_overlap_2(dofs, aofs, maxsz); + check_overlap_2(tcg_env, dofs, tcg_env, aofs, maxsz); /* If the backend has a scalar expansion, great. */ type = choose_vector_type(g->s_list, vece, oprsz, vece == MO_64); @@ -2884,8 +2924,8 @@ static void do_gvec_shifts(unsigned vece, uint32_t dofs, uint32_t aofs, TCGv_i32 tcg_gen_shli_i32(desc, shift, SIMD_DATA_SHIFT); tcg_gen_ori_i32(desc, desc, simd_desc(oprsz, maxsz, 0)); - tcg_gen_addi_ptr(a0, cpu_env, dofs); - tcg_gen_addi_ptr(a1, cpu_env, aofs); + tcg_gen_addi_ptr(a0, tcg_env, dofs); + tcg_gen_addi_ptr(a1, tcg_env, aofs); g->fno[vece](a0, a1, desc); @@ -2897,7 +2937,7 @@ static void do_gvec_shifts(unsigned vece, uint32_t dofs, uint32_t aofs, TCGv_i32 clear_tail: if (oprsz < maxsz) { - expand_clr(dofs + oprsz, maxsz - oprsz); + expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz); } } @@ -3258,11 +3298,10 @@ static void expand_cmp_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t uint32_t i; for (i = 0; i < oprsz; i += 4) { - tcg_gen_ld_i32(t0, cpu_env, aofs + i); - tcg_gen_ld_i32(t1, cpu_env, bofs + i); - tcg_gen_setcond_i32(cond, t0, t0, t1); - tcg_gen_neg_i32(t0, t0); - tcg_gen_st_i32(t0, cpu_env, dofs + i); + tcg_gen_ld_i32(t0, tcg_env, aofs + i); + tcg_gen_ld_i32(t1, tcg_env, bofs + i); + tcg_gen_negsetcond_i32(cond, t0, t0, t1); + tcg_gen_st_i32(t0, tcg_env, dofs + i); } tcg_temp_free_i32(t1); tcg_temp_free_i32(t0); @@ -3274,11 +3313,10 @@ static void expand_cmp_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t uint32_t i; for (i = 0; i < oprsz; i += 8) { - tcg_gen_ld_i64(t0, cpu_env, aofs + i); - tcg_gen_ld_i64(t1, cpu_env, bofs + i); - tcg_gen_setcond_i64(cond, t0, t0, t1); - tcg_gen_neg_i64(t0, t0); - tcg_gen_st_i64(t0, cpu_env, dofs + i); + tcg_gen_ld_i64(t0, tcg_env, aofs + i); + tcg_gen_ld_i64(t1, tcg_env, bofs + i); + tcg_gen_negsetcond_i64(cond, t0, t0, t1); + tcg_gen_st_i64(t0, tcg_env, dofs + i); } tcg_temp_free_i64(t1); tcg_temp_free_i64(t0); @@ -3286,18 +3324,16 @@ static void expand_cmp_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t static void expand_cmp_vec(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t tysz, TCGType type, TCGCond cond) { - TCGv_vec t0 = tcg_temp_new_vec(type); - TCGv_vec t1 = tcg_temp_new_vec(type); - uint32_t i; + for (uint32_t i = 0; i < oprsz; i += tysz) { + TCGv_vec t0 = tcg_temp_new_vec(type); + TCGv_vec t1 = tcg_temp_new_vec(type); + TCGv_vec t2 = tcg_temp_new_vec(type); - for (i = 0; i < oprsz; i += tysz) { - tcg_gen_ld_vec(t0, cpu_env, aofs + i); - tcg_gen_ld_vec(t1, cpu_env, bofs + i); - tcg_gen_cmp_vec(cond, vece, t0, t0, t1); - tcg_gen_st_vec(t0, cpu_env, dofs + i); + tcg_gen_ld_vec(t0, tcg_env, aofs + i); + tcg_gen_ld_vec(t1, tcg_env, bofs + i); + tcg_gen_cmp_vec(cond, vece, t2, t0, t1); + tcg_gen_st_vec(t2, tcg_env, dofs + i); } - tcg_temp_free_vec(t1); - tcg_temp_free_vec(t0); } void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, @@ -3325,10 +3361,10 @@ void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t some; check_size_align(oprsz, maxsz, dofs | aofs | bofs); - check_overlap_3(dofs, aofs, bofs, maxsz); + check_overlap_3(tcg_env, dofs, tcg_env, aofs, tcg_env, bofs, maxsz); if (cond == TCG_COND_NEVER || cond == TCG_COND_ALWAYS) { - do_dup(MO_8, dofs, oprsz, maxsz, NULL, NULL, -(cond == TCG_COND_ALWAYS)); + do_dup(MO_8, tcg_env, dofs, oprsz, maxsz, NULL, NULL, -(cond == TCG_COND_ALWAYS)); return; } @@ -3388,10 +3424,135 @@ void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs, uint32_t aofs, tcg_swap_vecop_list(hold_list); if (oprsz < maxsz) { - expand_clr(dofs + oprsz, maxsz - oprsz); + expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz); + } +} + +static void expand_cmps_vec(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t tysz, TCGType type, + TCGCond cond, TCGv_vec c) { + TCGv_vec t0 = tcg_temp_new_vec(type); + TCGv_vec t1 = tcg_temp_new_vec(type); + uint32_t i; + + for (i = 0; i < oprsz; i += tysz) { + tcg_gen_ld_vec(t1, tcg_env, aofs + i); + tcg_gen_cmp_vec(cond, vece, t0, t1, c); + tcg_gen_st_vec(t0, tcg_env, dofs + i); + } +} + +void tcg_gen_gvec_cmps(TCGCond cond, unsigned vece, uint32_t dofs, uint32_t aofs, TCGv_i64 c, uint32_t oprsz, + uint32_t maxsz) { + static const TCGOpcode cmp_list[] = {INDEX_op_cmp_vec, 0}; + static gen_helper_gvec_2i *const eq_fn[4] = {gen_helper_gvec_eqs8, gen_helper_gvec_eqs16, gen_helper_gvec_eqs32, + gen_helper_gvec_eqs64}; + static gen_helper_gvec_2i *const lt_fn[4] = {gen_helper_gvec_lts8, gen_helper_gvec_lts16, gen_helper_gvec_lts32, + gen_helper_gvec_lts64}; + static gen_helper_gvec_2i *const le_fn[4] = {gen_helper_gvec_les8, gen_helper_gvec_les16, gen_helper_gvec_les32, + gen_helper_gvec_les64}; + static gen_helper_gvec_2i *const ltu_fn[4] = {gen_helper_gvec_ltus8, gen_helper_gvec_ltus16, gen_helper_gvec_ltus32, + gen_helper_gvec_ltus64}; + static gen_helper_gvec_2i *const leu_fn[4] = {gen_helper_gvec_leus8, gen_helper_gvec_leus16, gen_helper_gvec_leus32, + gen_helper_gvec_leus64}; + static gen_helper_gvec_2i *const *const fns[16] = { + [TCG_COND_EQ] = eq_fn, [TCG_COND_LT] = lt_fn, [TCG_COND_LE] = le_fn, + [TCG_COND_LTU] = ltu_fn, [TCG_COND_LEU] = leu_fn, + }; + + TCGType type; + + check_size_align(oprsz, maxsz, dofs | aofs); + check_overlap_2(tcg_env, dofs, tcg_env, aofs, maxsz); + + if (cond == TCG_COND_NEVER || cond == TCG_COND_ALWAYS) { + do_dup(MO_8, tcg_env, dofs, oprsz, maxsz, NULL, NULL, -(cond == TCG_COND_ALWAYS)); + return; + } + + /* + * Implement inline with a vector type, if possible. + * Prefer integer when 64-bit host and 64-bit comparison. + */ + type = choose_vector_type(cmp_list, vece, oprsz, TCG_TARGET_REG_BITS == 64 && vece == MO_64); + if (type != 0) { + const TCGOpcode *hold_list = tcg_swap_vecop_list(cmp_list); + TCGv_vec t_vec = tcg_temp_new_vec(type); + uint32_t some; + + tcg_gen_dup_i64_vec(vece, t_vec, c); + switch (type) { + case TCG_TYPE_V256: + some = QEMU_ALIGN_DOWN(oprsz, 32); + expand_cmps_vec(vece, dofs, aofs, some, 32, TCG_TYPE_V256, cond, t_vec); + aofs += some; + dofs += some; + oprsz -= some; + maxsz -= some; + /* fallthru */ + + case TCG_TYPE_V128: + some = QEMU_ALIGN_DOWN(oprsz, 16); + expand_cmps_vec(vece, dofs, aofs, some, 16, TCG_TYPE_V128, cond, t_vec); + break; + + case TCG_TYPE_V64: + some = QEMU_ALIGN_DOWN(oprsz, 8); + expand_cmps_vec(vece, dofs, aofs, some, 8, TCG_TYPE_V64, cond, t_vec); + break; + + default: + g_assert_not_reached(); + } + tcg_temp_free_vec(t_vec); + tcg_swap_vecop_list(hold_list); + } else if (vece == MO_64 && check_size_impl(oprsz, 8)) { + TCGv_i64 t0 = tcg_temp_ebb_new_i64(); + uint32_t i; + + for (i = 0; i < oprsz; i += 8) { + tcg_gen_ld_i64(t0, tcg_env, aofs + i); + tcg_gen_negsetcond_i64(cond, t0, t0, c); + tcg_gen_st_i64(t0, tcg_env, dofs + i); + } + tcg_temp_free_i64(t0); + } else if (vece == MO_32 && check_size_impl(oprsz, 4)) { + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + TCGv_i32 t1 = tcg_temp_ebb_new_i32(); + uint32_t i; + + tcg_gen_extrl_i64_i32(t1, c); + for (i = 0; i < oprsz; i += 4) { + tcg_gen_ld_i32(t0, tcg_env, aofs + i); + tcg_gen_negsetcond_i32(cond, t0, t0, t1); + tcg_gen_st_i32(t0, tcg_env, dofs + i); + } + tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); + } else { + gen_helper_gvec_2i *const *fn = fns[cond]; + bool inv = false; + + if (fn == NULL) { + cond = tcg_invert_cond(cond); + fn = fns[cond]; + assert(fn != NULL); + inv = true; + } + tcg_gen_gvec_2i_ool(dofs, aofs, c, oprsz, maxsz, inv, fn[vece]); + return; + } + + if (oprsz < maxsz) { + expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz); } } +void tcg_gen_gvec_cmpi(TCGCond cond, unsigned vece, uint32_t dofs, uint32_t aofs, int64_t c, uint32_t oprsz, + uint32_t maxsz) { + TCGv_i64 tmp = tcg_constant_i64(c); + tcg_gen_gvec_cmps(cond, vece, dofs, aofs, tmp, oprsz, maxsz); +} + static void tcg_gen_bitsel_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 c) { TCGv_i64 t = tcg_temp_ebb_new_i64(); diff --git a/libtcg/src/tcg-op-ldst.c b/libtcg/src/tcg-op-ldst.c index ba3343e30..911f02ee9 100644 --- a/libtcg/src/tcg-op-ldst.c +++ b/libtcg/src/tcg-op-ldst.c @@ -31,22 +31,25 @@ #include #include #include +#include +#include +#include // clang-format on #include static void check_max_alignment(unsigned a_bits) { -#if defined(CONFIG_SOFTMMU) /* * The requested alignment cannot overlap the TLB flags. - * FIXME: Must keep the count up-to-date with "exec/cpu-all.h". + * FIXME: Must keep the count up-to-date with "exec/tlb-flags.h". */ - tcg_debug_assert(a_bits + 5 <= tcg_ctx->page_bits); -#endif + if (tcg_use_softmmu) { + tcg_debug_assert(a_bits + 5 <= TARGET_PAGE_BITS); + } } static MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st) { - unsigned a_bits = get_alignment_bits(op); + unsigned a_bits = memop_alignment_bits(op); check_max_alignment(a_bits); @@ -78,36 +81,39 @@ static MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st) { if (st) { op &= ~MO_SIGN; } + + /* In serial mode, reduce atomicity. */ + if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) { + op &= ~MO_ATOM_MASK; + op |= MO_ATOM_NONE; + } + return op; } -static void gen_ldst(TCGOpcode opc, TCGTemp *vl, TCGTemp *vh, TCGTemp *addr, MemOpIdx oi) { - if (TCG_TARGET_REG_BITS == 64 || tcg_ctx->addr_type == TCG_TYPE_I32) { - if (vh) { - tcg_gen_op4(opc, temp_arg(vl), temp_arg(vh), temp_arg(addr), oi); - } else { - tcg_gen_op3(opc, temp_arg(vl), temp_arg(addr), oi); - } - } else { - /* See TCGV_LOW/HIGH. */ - TCGTemp *al = addr + HOST_BIG_ENDIAN; - TCGTemp *ah = addr + !HOST_BIG_ENDIAN; +static void gen_ldst1(TCGOpcode opc, TCGType type, TCGTemp *v, TCGTemp *addr, MemOpIdx oi) { + TCGOp *op = tcg_gen_op3(opc, type, temp_arg(v), temp_arg(addr), oi); + TCGOP_FLAGS(op) = get_memop(oi) & MO_SIZE; +} - if (vh) { - tcg_gen_op5(opc, temp_arg(vl), temp_arg(vh), temp_arg(al), temp_arg(ah), oi); - } else { - tcg_gen_op4(opc, temp_arg(vl), temp_arg(al), temp_arg(ah), oi); - } +static void gen_ldst2(TCGOpcode opc, TCGType type, TCGTemp *vl, TCGTemp *vh, TCGTemp *addr, MemOpIdx oi) { + TCGOp *op = tcg_gen_op4(opc, type, temp_arg(vl), temp_arg(vh), temp_arg(addr), oi); + TCGOP_FLAGS(op) = get_memop(oi) & MO_SIZE; +} + +static void gen_ld_i64(TCGv_i64 v, TCGTemp *addr, MemOpIdx oi) { + if (TCG_TARGET_REG_BITS == 32) { + gen_ldst2(INDEX_op_qemu_ld2, TCG_TYPE_I64, tcgv_i32_temp(TCGV_LOW(v)), tcgv_i32_temp(TCGV_HIGH(v)), addr, oi); + } else { + gen_ldst1(INDEX_op_qemu_ld, TCG_TYPE_I64, tcgv_i64_temp(v), addr, oi); } } -static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 v, TCGTemp *addr, MemOpIdx oi) { +static void gen_st_i64(TCGv_i64 v, TCGTemp *addr, MemOpIdx oi) { if (TCG_TARGET_REG_BITS == 32) { - TCGTemp *vl = tcgv_i32_temp(TCGV_LOW(v)); - TCGTemp *vh = tcgv_i32_temp(TCGV_HIGH(v)); - gen_ldst(opc, vl, vh, addr, oi); + gen_ldst2(INDEX_op_qemu_st2, TCG_TYPE_I64, tcgv_i32_temp(TCGV_LOW(v)), tcgv_i32_temp(TCGV_HIGH(v)), addr, oi); } else { - gen_ldst(opc, tcgv_i64_temp(v), NULL, addr, oi); + gen_ldst1(INDEX_op_qemu_st, TCG_TYPE_I64, tcgv_i64_temp(v), addr, oi); } } @@ -119,6 +125,28 @@ static void tcg_gen_req_mo(TCGBar type) { } } +static TCGTemp *tci_extend_addr(TCGTemp *addr) { +#ifdef CONFIG_TCG_INTERPRETER + /* + * 64-bit interpreter requires 64-bit addresses. + * Compare to the extension performed by tcg_out_{ld,st}_helper_args + * for native code generation. + */ + if (TCG_TARGET_REG_BITS == 64 && tcg_ctx->addr_type == TCG_TYPE_I32) { + TCGv_i64 temp = tcg_temp_ebb_new_i64(); + tcg_gen_extu_i32_i64(temp, temp_tcgv_i32(addr)); + return tcgv_i64_temp(temp); + } +#endif + return addr; +} + +static void maybe_free_addr(TCGTemp *addr, TCGTemp *copy) { + if (addr != copy) { + tcg_temp_free_internal(copy); + } +} + /* Only required for loads, where value might overlap addr. */ static TCGv_i64 plugin_maybe_preserve_addr(TCGTemp *addr) { #ifdef CONFIG_PLUGIN @@ -136,27 +164,57 @@ static TCGv_i64 plugin_maybe_preserve_addr(TCGTemp *addr) { return NULL; } -static void plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGTemp *orig_addr, MemOpIdx oi, enum qemu_plugin_mem_rw rw) { #ifdef CONFIG_PLUGIN - if (tcg_ctx->plugin_insn != NULL) { - qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw); +static void plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGTemp *orig_addr, MemOpIdx oi, enum qemu_plugin_mem_rw rw) { + qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw); - if (tcg_ctx->addr_type == TCG_TYPE_I32) { - if (!copy_addr) { - copy_addr = tcg_temp_ebb_new_i64(); - tcg_gen_extu_i32_i64(copy_addr, temp_tcgv_i32(orig_addr)); - } - plugin_gen_empty_mem_callback(copy_addr, info); + if (tcg_ctx->addr_type == TCG_TYPE_I32) { + if (!copy_addr) { + copy_addr = tcg_temp_ebb_new_i64(); + tcg_gen_extu_i32_i64(copy_addr, temp_tcgv_i32(orig_addr)); + } + tcg_gen_plugin_mem_cb(copy_addr, info); + tcg_temp_free_i64(copy_addr); + } else { + if (copy_addr) { + tcg_gen_plugin_mem_cb(copy_addr, info); tcg_temp_free_i64(copy_addr); } else { - if (copy_addr) { - plugin_gen_empty_mem_callback(copy_addr, info); - tcg_temp_free_i64(copy_addr); - } else { - plugin_gen_empty_mem_callback(temp_tcgv_i64(orig_addr), info); - } + tcg_gen_plugin_mem_cb(temp_tcgv_i64(orig_addr), info); } } +} +#endif + +static void plugin_gen_mem_callbacks_i32(TCGv_i32 val, TCGv_i64 copy_addr, TCGTemp *orig_addr, MemOpIdx oi, + enum qemu_plugin_mem_rw rw) { +#ifdef CONFIG_PLUGIN + if (tcg_ctx->plugin_insn != NULL) { + tcg_gen_st_i32(val, tcg_env, + offsetof(CPUState, neg.plugin_mem_value_low) - sizeof(CPUState) + (HOST_BIG_ENDIAN * 4)); + plugin_gen_mem_callbacks(copy_addr, orig_addr, oi, rw); + } +#endif +} + +static void plugin_gen_mem_callbacks_i64(TCGv_i64 val, TCGv_i64 copy_addr, TCGTemp *orig_addr, MemOpIdx oi, + enum qemu_plugin_mem_rw rw) { +#ifdef CONFIG_PLUGIN + if (tcg_ctx->plugin_insn != NULL) { + tcg_gen_st_i64(val, tcg_env, offsetof(CPUState, neg.plugin_mem_value_low) - sizeof(CPUState)); + plugin_gen_mem_callbacks(copy_addr, orig_addr, oi, rw); + } +#endif +} + +static void plugin_gen_mem_callbacks_i128(TCGv_i128 val, TCGv_i64 copy_addr, TCGTemp *orig_addr, MemOpIdx oi, + enum qemu_plugin_mem_rw rw) { +#ifdef CONFIG_PLUGIN + if (tcg_ctx->plugin_insn != NULL) { + tcg_gen_st_i64(TCGV128_LOW(val), tcg_env, offsetof(CPUState, neg.plugin_mem_value_low) - sizeof(CPUState)); + tcg_gen_st_i64(TCGV128_HIGH(val), tcg_env, offsetof(CPUState, neg.plugin_mem_value_high) - sizeof(CPUState)); + plugin_gen_mem_callbacks(copy_addr, orig_addr, oi, rw); + } #endif } @@ -164,7 +222,7 @@ static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr, TCGArg idx, Mem MemOp orig_memop; MemOpIdx orig_oi, oi; TCGv_i64 copy_addr; - TCGOpcode opc; + TCGTemp *addr_new; tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); orig_memop = memop = tcg_canonicalize_memop(memop, 0, 0); @@ -179,14 +237,11 @@ static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr, TCGArg idx, Mem oi = make_memop_idx(memop, idx); } + addr_new = tci_extend_addr(addr); copy_addr = plugin_maybe_preserve_addr(addr); - if (tcg_ctx->addr_type == TCG_TYPE_I32) { - opc = INDEX_op_qemu_ld_a32_i32; - } else { - opc = INDEX_op_qemu_ld_a64_i32; - } - gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi); - plugin_gen_mem_callbacks(copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R); + gen_ldst1(INDEX_op_qemu_ld, TCG_TYPE_I32, tcgv_i32_temp(val), addr_new, oi); + plugin_gen_mem_callbacks_i32(val, copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R); + maybe_free_addr(addr, addr_new); if ((orig_memop ^ memop) & MO_BSWAP) { switch (orig_memop & MO_SIZE) { @@ -212,7 +267,7 @@ void tcg_gen_qemu_ld_i32_chk(TCGv_i32 val, TCGTemp *addr, TCGArg idx, MemOp memo static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr, TCGArg idx, MemOp memop) { TCGv_i32 swap = NULL; MemOpIdx orig_oi, oi; - TCGOpcode opc; + TCGTemp *addr_new; tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); memop = tcg_canonicalize_memop(memop, 0, 1); @@ -235,21 +290,10 @@ static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr, TCGArg idx, Mem oi = make_memop_idx(memop, idx); } - if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) { - if (tcg_ctx->addr_type == TCG_TYPE_I32) { - opc = INDEX_op_qemu_st8_a32_i32; - } else { - opc = INDEX_op_qemu_st8_a64_i32; - } - } else { - if (tcg_ctx->addr_type == TCG_TYPE_I32) { - opc = INDEX_op_qemu_st_a32_i32; - } else { - opc = INDEX_op_qemu_st_a64_i32; - } - } - gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi); - plugin_gen_mem_callbacks(NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W); + addr_new = tci_extend_addr(addr); + gen_ldst1(INDEX_op_qemu_st, TCG_TYPE_I32, tcgv_i32_temp(val), addr_new, oi); + plugin_gen_mem_callbacks_i32(val, NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W); + maybe_free_addr(addr, addr_new); if (swap) { tcg_temp_free_i32(swap); @@ -266,7 +310,7 @@ static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr, TCGArg idx, Mem MemOp orig_memop; MemOpIdx orig_oi, oi; TCGv_i64 copy_addr; - TCGOpcode opc; + TCGTemp *addr_new; if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) { tcg_gen_qemu_ld_i32_int(TCGV_LOW(val), addr, idx, memop); @@ -291,14 +335,11 @@ static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr, TCGArg idx, Mem oi = make_memop_idx(memop, idx); } + addr_new = tci_extend_addr(addr); copy_addr = plugin_maybe_preserve_addr(addr); - if (tcg_ctx->addr_type == TCG_TYPE_I32) { - opc = INDEX_op_qemu_ld_a32_i64; - } else { - opc = INDEX_op_qemu_ld_a64_i64; - } - gen_ldst_i64(opc, val, addr, oi); - plugin_gen_mem_callbacks(copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R); + gen_ld_i64(val, addr_new, oi); + plugin_gen_mem_callbacks_i64(val, copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R); + maybe_free_addr(addr, addr_new); if ((orig_memop ^ memop) & MO_BSWAP) { int flags = (orig_memop & MO_SIGN ? TCG_BSWAP_IZ | TCG_BSWAP_OS : TCG_BSWAP_IZ | TCG_BSWAP_OZ); @@ -327,7 +368,7 @@ void tcg_gen_qemu_ld_i64_chk(TCGv_i64 val, TCGTemp *addr, TCGArg idx, MemOp memo static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr, TCGArg idx, MemOp memop) { TCGv_i64 swap = NULL; MemOpIdx orig_oi, oi; - TCGOpcode opc; + TCGTemp *addr_new; if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) { tcg_gen_qemu_st_i32_int(TCGV_LOW(val), addr, idx, memop); @@ -358,13 +399,10 @@ static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr, TCGArg idx, Mem oi = make_memop_idx(memop, idx); } - if (tcg_ctx->addr_type == TCG_TYPE_I32) { - opc = INDEX_op_qemu_st_a32_i64; - } else { - opc = INDEX_op_qemu_st_a64_i64; - } - gen_ldst_i64(opc, val, addr, oi); - plugin_gen_mem_callbacks(NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W); + addr_new = tci_extend_addr(addr); + gen_st_i64(val, addr_new, oi); + plugin_gen_mem_callbacks_i64(val, NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W); + maybe_free_addr(addr, addr_new); if (swap) { tcg_temp_free_i64(swap); @@ -383,10 +421,11 @@ void tcg_gen_qemu_st_i64_chk(TCGv_i64 val, TCGTemp *addr, TCGArg idx, MemOp memo * to avoid a call to a helper function. */ static bool use_two_i64_for_i128(MemOp mop) { -#ifdef CONFIG_SOFTMMU /* Two softmmu tlb lookups is larger than one function call. */ - return false; -#else + if (tcg_use_softmmu) { + return false; + } + /* * For user-only, two 64-bit operations may well be smaller than a call. * Determine if that would be legal for the requested atomicity. @@ -399,12 +438,10 @@ static bool use_two_i64_for_i128(MemOp mop) { case MO_ATOM_SUBALIGN: case MO_ATOM_WITHIN16: case MO_ATOM_WITHIN16_PAIR: - /* In a serialized context, no atomicity is required. */ - return !(tcg_ctx->gen_tb->cflags & CF_PARALLEL); + return false; default: g_assert_not_reached(); } -#endif } static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig) { @@ -466,13 +503,20 @@ static void maybe_free_addr64(TCGv_i64 a64) { } static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr, TCGArg idx, MemOp memop) { - const MemOpIdx orig_oi = make_memop_idx(memop, idx); + MemOpIdx orig_oi; TCGv_i64 ext_addr = NULL; - TCGOpcode opc; + TCGTemp *addr_new; - check_max_alignment(get_alignment_bits(memop)); + check_max_alignment(memop_alignment_bits(memop)); tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); + /* In serial mode, reduce atomicity. */ + if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) { + memop &= ~MO_ATOM_MASK; + memop |= MO_ATOM_NONE; + } + orig_oi = make_memop_idx(memop, idx); + /* TODO: For now, force 32-bit hosts to use the helper. */ if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) { TCGv_i64 lo, hi; @@ -489,12 +533,9 @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr, TCGArg idx, M hi = TCGV128_HIGH(val); } - if (tcg_ctx->addr_type == TCG_TYPE_I32) { - opc = INDEX_op_qemu_ld_a32_i128; - } else { - opc = INDEX_op_qemu_ld_a64_i128; - } - gen_ldst(opc, tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr, oi); + addr_new = tci_extend_addr(addr); + gen_ldst2(INDEX_op_qemu_ld2, TCG_TYPE_I128, tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr_new, oi); + maybe_free_addr(addr, addr_new); if (need_bswap) { tcg_gen_bswap64_i64(lo, lo); @@ -509,12 +550,6 @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr, TCGArg idx, M canonicalize_memop_i128_as_i64(mop, memop); need_bswap = (mop[0] ^ memop) & MO_BSWAP; - if (tcg_ctx->addr_type == TCG_TYPE_I32) { - opc = INDEX_op_qemu_ld_a32_i64; - } else { - opc = INDEX_op_qemu_ld_a64_i64; - } - /* * Since there are no global TCGv_i128, there is no visible state * changed if the second load faults. Load directly into the two @@ -528,7 +563,9 @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr, TCGArg idx, M y = TCGV128_LOW(val); } - gen_ldst_i64(opc, x, addr, make_memop_idx(mop[0], idx)); + addr_new = tci_extend_addr(addr); + gen_ld_i64(x, addr_new, make_memop_idx(mop[0], idx)); + maybe_free_addr(addr, addr_new); if (need_bswap) { tcg_gen_bswap64_i64(x, x); @@ -544,7 +581,9 @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr, TCGArg idx, M addr_p8 = tcgv_i64_temp(t); } - gen_ldst_i64(opc, y, addr_p8, make_memop_idx(mop[1], idx)); + addr_new = tci_extend_addr(addr_p8); + gen_ld_i64(y, addr_new, make_memop_idx(mop[1], idx)); + maybe_free_addr(addr_p8, addr_new); tcg_temp_free_internal(addr_p8); if (need_bswap) { @@ -556,10 +595,10 @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr, TCGArg idx, M tcg_gen_extu_i32_i64(ext_addr, temp_tcgv_i32(addr)); addr = tcgv_i64_temp(ext_addr); } - gen_helper_ld_i128(val, cpu_env, temp_tcgv_i64(addr), tcg_constant_i32(orig_oi)); + gen_helper_ld_i128(val, tcg_env, temp_tcgv_i64(addr), tcg_constant_i32(orig_oi)); } - plugin_gen_mem_callbacks(ext_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R); + plugin_gen_mem_callbacks_i128(val, ext_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R); } void tcg_gen_qemu_ld_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx, MemOp memop, TCGType addr_type) { @@ -570,13 +609,20 @@ void tcg_gen_qemu_ld_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx, MemOp me } static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr, TCGArg idx, MemOp memop) { - const MemOpIdx orig_oi = make_memop_idx(memop, idx); + MemOpIdx orig_oi; TCGv_i64 ext_addr = NULL; - TCGOpcode opc; + TCGTemp *addr_new; - check_max_alignment(get_alignment_bits(memop)); + check_max_alignment(memop_alignment_bits(memop)); tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST); + /* In serial mode, reduce atomicity. */ + if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) { + memop &= ~MO_ATOM_MASK; + memop |= MO_ATOM_NONE; + } + orig_oi = make_memop_idx(memop, idx); + /* TODO: For now, force 32-bit hosts to use the helper. */ if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) { @@ -596,12 +642,9 @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr, TCGArg idx, M hi = TCGV128_HIGH(val); } - if (tcg_ctx->addr_type == TCG_TYPE_I32) { - opc = INDEX_op_qemu_st_a32_i128; - } else { - opc = INDEX_op_qemu_st_a64_i128; - } - gen_ldst(opc, tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr, oi); + addr_new = tci_extend_addr(addr); + gen_ldst2(INDEX_op_qemu_st2, TCG_TYPE_I128, tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr_new, oi); + maybe_free_addr(addr, addr_new); if (need_bswap) { tcg_temp_free_i64(lo); @@ -614,12 +657,6 @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr, TCGArg idx, M canonicalize_memop_i128_as_i64(mop, memop); - if (tcg_ctx->addr_type == TCG_TYPE_I32) { - opc = INDEX_op_qemu_st_a32_i64; - } else { - opc = INDEX_op_qemu_st_a64_i64; - } - if ((memop & MO_BSWAP) == MO_LE) { x = TCGV128_LOW(val); y = TCGV128_HIGH(val); @@ -634,7 +671,9 @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr, TCGArg idx, M x = b; } - gen_ldst_i64(opc, x, addr, make_memop_idx(mop[0], idx)); + addr_new = tci_extend_addr(addr); + gen_st_i64(x, addr_new, make_memop_idx(mop[0], idx)); + maybe_free_addr(addr, addr_new); if (tcg_ctx->addr_type == TCG_TYPE_I32) { TCGv_i32 t = tcg_temp_ebb_new_i32(); @@ -646,13 +685,15 @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr, TCGArg idx, M addr_p8 = tcgv_i64_temp(t); } + addr_new = tci_extend_addr(addr_p8); if (b) { tcg_gen_bswap64_i64(b, y); - gen_ldst_i64(opc, b, addr_p8, make_memop_idx(mop[1], idx)); + gen_st_i64(b, addr_new, make_memop_idx(mop[1], idx)); tcg_temp_free_i64(b); } else { - gen_ldst_i64(opc, y, addr_p8, make_memop_idx(mop[1], idx)); + gen_st_i64(y, addr_new, make_memop_idx(mop[1], idx)); } + maybe_free_addr(addr_p8, addr_new); tcg_temp_free_internal(addr_p8); } else { if (tcg_ctx->addr_type == TCG_TYPE_I32) { @@ -660,10 +701,10 @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr, TCGArg idx, M tcg_gen_extu_i32_i64(ext_addr, temp_tcgv_i32(addr)); addr = tcgv_i64_temp(ext_addr); } - gen_helper_st_i128(cpu_env, temp_tcgv_i64(addr), val, tcg_constant_i32(orig_oi)); + gen_helper_st_i128(tcg_env, temp_tcgv_i64(addr), val, tcg_constant_i32(orig_oi)); } - plugin_gen_mem_callbacks(ext_addr, addr, orig_oi, QEMU_PLUGIN_MEM_W); + plugin_gen_mem_callbacks_i128(val, ext_addr, addr, orig_oi, QEMU_PLUGIN_MEM_W); } void tcg_gen_qemu_st_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx, MemOp memop, TCGType addr_type) { @@ -673,7 +714,7 @@ void tcg_gen_qemu_st_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx, MemOp me tcg_gen_qemu_st_i128_int(val, addr, idx, memop); } -static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc) { +void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc) { switch (opc & MO_SSIZE) { case MO_SB: tcg_gen_ext8s_i32(ret, val); @@ -687,13 +728,16 @@ static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc) { case MO_UW: tcg_gen_ext16u_i32(ret, val); break; - default: + case MO_UL: + case MO_SL: tcg_gen_mov_i32(ret, val); break; + default: + g_assert_not_reached(); } } -static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc) { +void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc) { switch (opc & MO_SSIZE) { case MO_SB: tcg_gen_ext8s_i64(ret, val); @@ -713,9 +757,12 @@ static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc) { case MO_UL: tcg_gen_ext32u_i64(ret, val); break; - default: + case MO_UQ: + case MO_SQ: tcg_gen_mov_i64(ret, val); break; + default: + g_assert_not_reached(); } } @@ -724,6 +771,7 @@ typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv_i64, TCGv_i64, TCGv_i typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv_i64, TCGv_i128, TCGv_i128, TCGv_i32); typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv_i64, TCGv_i32, TCGv_i32); typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv_i64, TCGv_i64, TCGv_i32); +typedef void (*gen_atomic_op_i128)(TCGv_i128, TCGv_env, TCGv_i64, TCGv_i128, TCGv_i32); #ifdef CONFIG_ATOMIC64 #define WITH_ATOMIC64(X) X, @@ -791,7 +839,7 @@ static void tcg_gen_atomic_cmpxchg_i32_int(TCGv_i32 retv, TCGTemp *addr, TCGv_i3 oi = make_memop_idx(memop & ~MO_SIGN, idx); a64 = maybe_extend_addr64(addr); - gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi)); + gen(retv, tcg_env, a64, cmpv, newv, tcg_constant_i32(oi)); maybe_free_addr64(a64); if (memop & MO_SIGN) { @@ -860,12 +908,12 @@ static void tcg_gen_atomic_cmpxchg_i64_int(TCGv_i64 retv, TCGTemp *addr, TCGv_i6 if (gen) { MemOpIdx oi = make_memop_idx(memop, idx); TCGv_i64 a64 = maybe_extend_addr64(addr); - gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi)); + gen(retv, tcg_env, a64, cmpv, newv, tcg_constant_i32(oi)); maybe_free_addr64(a64); return; } - gen_helper_exit_atomic(cpu_env); + gen_helper_exit_atomic(tcg_env); /* * Produce a result for a well-formed opcode stream. This satisfies @@ -917,7 +965,7 @@ static void tcg_gen_nonatomic_cmpxchg_i128_int(TCGv_i128 retv, TCGTemp *addr, TC MemOpIdx oi = make_memop_idx(memop, idx); TCGv_i64 a64 = maybe_extend_addr64(addr); - gen_helper_nonatomic_cmpxchgo(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi)); + gen_helper_nonatomic_cmpxchgo(retv, tcg_env, a64, cmpv, newv, tcg_constant_i32(oi)); maybe_free_addr64(a64); } else { TCGv_i128 oldv = tcg_temp_ebb_new_i128(); @@ -968,12 +1016,12 @@ static void tcg_gen_atomic_cmpxchg_i128_int(TCGv_i128 retv, TCGTemp *addr, TCGv_ if (gen) { MemOpIdx oi = make_memop_idx(memop, idx); TCGv_i64 a64 = maybe_extend_addr64(addr); - gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi)); + gen(retv, tcg_env, a64, cmpv, newv, tcg_constant_i32(oi)); maybe_free_addr64(a64); return; } - gen_helper_exit_atomic(cpu_env); + gen_helper_exit_atomic(tcg_env); /* * Produce a result for a well-formed opcode stream. This satisfies @@ -1020,7 +1068,7 @@ static void do_atomic_op_i32(TCGv_i32 ret, TCGTemp *addr, TCGv_i32 val, TCGArg i oi = make_memop_idx(memop & ~MO_SIGN, idx); a64 = maybe_extend_addr64(addr); - gen(ret, cpu_env, a64, val, tcg_constant_i32(oi)); + gen(ret, tcg_env, a64, val, tcg_constant_i32(oi)); maybe_free_addr64(a64); if (memop & MO_SIGN) { @@ -1054,12 +1102,12 @@ static void do_atomic_op_i64(TCGv_i64 ret, TCGTemp *addr, TCGv_i64 val, TCGArg i if (gen) { MemOpIdx oi = make_memop_idx(memop & ~MO_SIGN, idx); TCGv_i64 a64 = maybe_extend_addr64(addr); - gen(ret, cpu_env, a64, val, tcg_constant_i32(oi)); + gen(ret, tcg_env, a64, val, tcg_constant_i32(oi)); maybe_free_addr64(a64); return; } - gen_helper_exit_atomic(cpu_env); + gen_helper_exit_atomic(tcg_env); /* Produce a result, so that we have a well-formed opcode stream with respect to uses of the result in the (dead) code following. */ tcg_gen_movi_i64(ret, 0); @@ -1080,6 +1128,81 @@ static void do_atomic_op_i64(TCGv_i64 ret, TCGTemp *addr, TCGv_i64 val, TCGArg i } } +static void do_nonatomic_op_i128(TCGv_i128 ret, TCGTemp *addr, TCGv_i128 val, TCGArg idx, MemOp memop, bool new_val, + void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64)) { + TCGv_i128 t = tcg_temp_ebb_new_i128(); + TCGv_i128 r = tcg_temp_ebb_new_i128(); + + tcg_gen_qemu_ld_i128_int(r, addr, idx, memop); + gen(TCGV128_LOW(t), TCGV128_LOW(r), TCGV128_LOW(val)); + gen(TCGV128_HIGH(t), TCGV128_HIGH(r), TCGV128_HIGH(val)); + tcg_gen_qemu_st_i128_int(t, addr, idx, memop); + + tcg_gen_mov_i128(ret, r); + tcg_temp_free_i128(t); + tcg_temp_free_i128(r); +} + +static void do_atomic_op_i128(TCGv_i128 ret, TCGTemp *addr, TCGv_i128 val, TCGArg idx, MemOp memop, + void *const table[]) { + gen_atomic_op_i128 gen = table[memop & (MO_SIZE | MO_BSWAP)]; + + if (gen) { + MemOpIdx oi = make_memop_idx(memop & ~MO_SIGN, idx); + TCGv_i64 a64 = maybe_extend_addr64(addr); + gen(ret, tcg_env, a64, val, tcg_constant_i32(oi)); + maybe_free_addr64(a64); + return; + } + + gen_helper_exit_atomic(tcg_env); + /* Produce a result */ + tcg_gen_movi_i64(TCGV128_LOW(ret), 0); + tcg_gen_movi_i64(TCGV128_HIGH(ret), 0); +} + +#define GEN_ATOMIC_HELPER128(NAME, OP, NEW) \ + static void *const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = { \ + [MO_8] = gen_helper_atomic_##NAME##b, \ + [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le, \ + [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be, \ + [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le, \ + [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be, \ + WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le) \ + WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be) \ + WITH_ATOMIC128([MO_128 | MO_LE] = gen_helper_atomic_##NAME##o_le) \ + WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_##NAME##o_be)}; \ + void tcg_gen_atomic_##NAME##_i32_chk(TCGv_i32 ret, TCGTemp *addr, TCGv_i32 val, TCGArg idx, MemOp memop, \ + TCGType addr_type) { \ + tcg_debug_assert(addr_type == tcg_ctx->addr_type); \ + tcg_debug_assert((memop & MO_SIZE) <= MO_32); \ + if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \ + do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME); \ + } else { \ + do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW, tcg_gen_##OP##_i32); \ + } \ + } \ + void tcg_gen_atomic_##NAME##_i64_chk(TCGv_i64 ret, TCGTemp *addr, TCGv_i64 val, TCGArg idx, MemOp memop, \ + TCGType addr_type) { \ + tcg_debug_assert(addr_type == tcg_ctx->addr_type); \ + tcg_debug_assert((memop & MO_SIZE) <= MO_64); \ + if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \ + do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME); \ + } else { \ + do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW, tcg_gen_##OP##_i64); \ + } \ + } \ + void tcg_gen_atomic_##NAME##_i128_chk(TCGv_i128 ret, TCGTemp *addr, TCGv_i128 val, TCGArg idx, MemOp memop, \ + TCGType addr_type) { \ + tcg_debug_assert(addr_type == tcg_ctx->addr_type); \ + tcg_debug_assert((memop & MO_SIZE) == MO_128); \ + if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \ + do_atomic_op_i128(ret, addr, val, idx, memop, table_##NAME); \ + } else { \ + do_nonatomic_op_i128(ret, addr, val, idx, memop, NEW, tcg_gen_##OP##_i64); \ + } \ + } + #define GEN_ATOMIC_HELPER(NAME, OP, NEW) \ static void *const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = { \ [MO_8] = gen_helper_atomic_##NAME##b, \ @@ -1111,8 +1234,8 @@ static void do_atomic_op_i64(TCGv_i64 ret, TCGTemp *addr, TCGv_i64 val, TCGArg i } GEN_ATOMIC_HELPER(fetch_add, add, 0) -GEN_ATOMIC_HELPER(fetch_and, and, 0) -GEN_ATOMIC_HELPER(fetch_or, or, 0) +GEN_ATOMIC_HELPER128(fetch_and, and, 0) +GEN_ATOMIC_HELPER128(fetch_or, or, 0) GEN_ATOMIC_HELPER(fetch_xor, xor, 0) GEN_ATOMIC_HELPER(fetch_smin, smin, 0) GEN_ATOMIC_HELPER(fetch_umin, umin, 0) @@ -1136,6 +1259,7 @@ static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b) { tcg_gen_mov_i64(r, b); } -GEN_ATOMIC_HELPER(xchg, mov2, 0) +GEN_ATOMIC_HELPER128(xchg, mov2, 0) #undef GEN_ATOMIC_HELPER +#undef GEN_ATOMIC_HELPER128 diff --git a/libtcg/src/tcg-op-vec.c b/libtcg/src/tcg-op-vec.c index 4f8004970..fcec02910 100644 --- a/libtcg/src/tcg-op-vec.c +++ b/libtcg/src/tcg-op-vec.c @@ -23,6 +23,7 @@ #include #include #include +#include #include // clang-format on @@ -141,7 +142,7 @@ bool tcg_can_emit_vecop_list(const TCGOpcode *list, TCGType type, unsigned vece) void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a) { TCGOp *op = tcg_emit_op(opc, 2); - TCGOP_VECL(op) = type - TCG_TYPE_V64; + TCGOP_TYPE(op) = type; TCGOP_VECE(op) = vece; op->args[0] = r; op->args[1] = a; @@ -149,7 +150,7 @@ void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a) { void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a, TCGArg b) { TCGOp *op = tcg_emit_op(opc, 3); - TCGOP_VECL(op) = type - TCG_TYPE_V64; + TCGOP_TYPE(op) = type; TCGOP_VECE(op) = vece; op->args[0] = r; op->args[1] = a; @@ -158,7 +159,7 @@ void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a, T void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a, TCGArg b, TCGArg c) { TCGOp *op = tcg_emit_op(opc, 4); - TCGOP_VECL(op) = type - TCG_TYPE_V64; + TCGOP_TYPE(op) = type; TCGOP_VECE(op) = vece; op->args[0] = r; op->args[1] = a; @@ -166,10 +167,9 @@ void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a, T op->args[3] = c; } -static void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a, TCGArg b, TCGArg c, TCGArg d, - TCGArg e) { +void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e) { TCGOp *op = tcg_emit_op(opc, 6); - TCGOP_VECL(op) = type - TCG_TYPE_V64; + TCGOP_TYPE(op) = type; TCGOP_VECE(op) = vece; op->args[0] = r; op->args[1] = a; @@ -361,12 +361,11 @@ static bool do_op2(unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpcode opc) { } void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a) { - const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); - - if (!TCG_TARGET_HAS_not_vec || !do_op2(vece, r, a, INDEX_op_not_vec)) { + if (TCG_TARGET_HAS_not_vec) { + vec_gen_op2(INDEX_op_not_vec, 0, r, a); + } else { tcg_gen_xor_vec(0, r, a, tcg_constant_vec_matching(r, 0, -1)); } - tcg_swap_vecop_list(hold_list); } void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a) { @@ -467,9 +466,11 @@ void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_v TCGTemp *rt = tcgv_vec_temp(r); TCGTemp *at = tcgv_vec_temp(a); TCGTemp *bt = tcgv_vec_temp(b); + TCGTemp *tt = NULL; TCGArg ri = temp_arg(rt); TCGArg ai = temp_arg(at); TCGArg bi = temp_arg(bt); + TCGArg ti; TCGType type = rt->base_type; int can; @@ -477,6 +478,18 @@ void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_v tcg_debug_assert(bt->base_type >= type); tcg_assert_listed_vecop(INDEX_op_cmp_vec); can = tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece); + + if (!TCG_TARGET_HAS_tst_vec && is_tst_cond(cond)) { + tt = tcg_temp_new_internal(type, TEMP_EBB); + ti = temp_arg(tt); + vec_gen_3(INDEX_op_and_vec, type, 0, ti, ai, bi); + at = tt; + ai = ti; + bt = tcg_constant_internal(type, 0); + bi = temp_arg(bt); + cond = tcg_tst_eqne_cond(cond); + } + if (can > 0) { vec_gen_4(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond); } else { @@ -485,6 +498,10 @@ void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_v tcg_expand_vec_op(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond); tcg_swap_vecop_list(hold_list); } + + if (tt) { + tcg_temp_free_internal(tt); + } } static bool do_op3(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b, TCGOpcode opc) { diff --git a/libtcg/src/tcg-op.c b/libtcg/src/tcg-op.c index 538992e31..d2420b74c 100644 --- a/libtcg/src/tcg-op.c +++ b/libtcg/src/tcg-op.c @@ -32,65 +32,204 @@ #include #include #include +#include +#include +#include // clang-format on -void tcg_gen_op1(TCGOpcode opc, TCGArg a1) { +/* + * Encourage the compiler to tail-call to a function, rather than inlining. + * Minimizes code size across 99 bottles of beer on the wall. + */ +#define NI __attribute__((noinline)) + +TCGOp *NI tcg_gen_op1(TCGOpcode opc, TCGType type, TCGArg a1) { TCGOp *op = tcg_emit_op(opc, 1); + TCGOP_TYPE(op) = type; op->args[0] = a1; + return op; } -void tcg_gen_op2(TCGOpcode opc, TCGArg a1, TCGArg a2) { +TCGOp *NI tcg_gen_op2(TCGOpcode opc, TCGType type, TCGArg a1, TCGArg a2) { TCGOp *op = tcg_emit_op(opc, 2); + TCGOP_TYPE(op) = type; op->args[0] = a1; op->args[1] = a2; + return op; } -void tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3) { +TCGOp *NI tcg_gen_op3(TCGOpcode opc, TCGType type, TCGArg a1, TCGArg a2, TCGArg a3) { TCGOp *op = tcg_emit_op(opc, 3); + TCGOP_TYPE(op) = type; op->args[0] = a1; op->args[1] = a2; op->args[2] = a3; + return op; } -void tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4) { +TCGOp *NI tcg_gen_op4(TCGOpcode opc, TCGType type, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4) { TCGOp *op = tcg_emit_op(opc, 4); + TCGOP_TYPE(op) = type; op->args[0] = a1; op->args[1] = a2; op->args[2] = a3; op->args[3] = a4; + return op; } -void tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4, TCGArg a5) { +TCGOp *NI tcg_gen_op5(TCGOpcode opc, TCGType type, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4, TCGArg a5) { TCGOp *op = tcg_emit_op(opc, 5); + TCGOP_TYPE(op) = type; op->args[0] = a1; op->args[1] = a2; op->args[2] = a3; op->args[3] = a4; op->args[4] = a5; + return op; } -void tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4, TCGArg a5, TCGArg a6) { +TCGOp *NI tcg_gen_op6(TCGOpcode opc, TCGType type, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4, TCGArg a5, TCGArg a6) { TCGOp *op = tcg_emit_op(opc, 6); + TCGOP_TYPE(op) = type; op->args[0] = a1; op->args[1] = a2; op->args[2] = a3; op->args[3] = a4; op->args[4] = a5; op->args[5] = a6; + return op; +} + +/* + * With CONFIG_DEBUG_TCG, tcgv_*_tmp via tcgv_*_arg, is an out-of-line + * assertion check. Force tail calls to avoid too much code expansion. + */ +#ifdef CONFIG_DEBUG_TCG +#define DNI NI +#else +#define DNI +#endif + +static void DNI tcg_gen_op1_i32(TCGOpcode opc, TCGType type, TCGv_i32 a1) { + tcg_gen_op1(opc, type, tcgv_i32_arg(a1)); +} + +static void DNI tcg_gen_op1_i64(TCGOpcode opc, TCGType type, TCGv_i64 a1) { + tcg_gen_op1(opc, type, tcgv_i64_arg(a1)); +} + +static TCGOp *DNI tcg_gen_op1i(TCGOpcode opc, TCGType type, TCGArg a1) { + return tcg_gen_op1(opc, type, a1); +} + +static void DNI tcg_gen_op2_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2) { + tcg_gen_op2(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2)); +} + +static void DNI tcg_gen_op2_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2) { + tcg_gen_op2(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2)); +} + +static void DNI tcg_gen_op3_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGv_i32 a3) { + tcg_gen_op3(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3)); +} + +static void DNI tcg_gen_op3_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, TCGv_i64 a3) { + tcg_gen_op3(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3)); +} + +static void DNI tcg_gen_op3i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGArg a3) { + tcg_gen_op3(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3); +} + +static void DNI tcg_gen_op3i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, TCGArg a3) { + tcg_gen_op3(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3); +} + +static void DNI tcg_gen_ldst_op_i32(TCGOpcode opc, TCGv_i32 val, TCGv_ptr base, TCGArg offset) { + tcg_gen_op3(opc, TCG_TYPE_I32, tcgv_i32_arg(val), tcgv_ptr_arg(base), offset); +} + +static void DNI tcg_gen_ldst_op_i64(TCGOpcode opc, TCGv_i64 val, TCGv_ptr base, TCGArg offset) { + tcg_gen_op3(opc, TCG_TYPE_I64, tcgv_i64_arg(val), tcgv_ptr_arg(base), offset); +} + +static void DNI tcg_gen_op4_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGv_i32 a3, TCGv_i32 a4) { + tcg_gen_op4(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3), tcgv_i32_arg(a4)); +} + +static void DNI tcg_gen_op4_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, TCGv_i64 a3, TCGv_i64 a4) { + tcg_gen_op4(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3), tcgv_i64_arg(a4)); +} + +static void DNI tcg_gen_op4i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGv_i32 a3, TCGArg a4) { + tcg_gen_op4(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3), a4); +} + +static void DNI tcg_gen_op4i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, TCGv_i64 a3, TCGArg a4) { + tcg_gen_op4(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3), a4); +} + +static TCGOp *DNI tcg_gen_op4ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGArg a3, TCGArg a4) { + return tcg_gen_op4(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3, a4); +} + +static TCGOp *DNI tcg_gen_op4ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, TCGArg a3, TCGArg a4) { + return tcg_gen_op4(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3, a4); +} + +static void DNI tcg_gen_op5_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGv_i32 a3, TCGv_i32 a4, TCGv_i32 a5) { + tcg_gen_op5(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3), tcgv_i32_arg(a4), + tcgv_i32_arg(a5)); +} + +static void DNI tcg_gen_op5_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, TCGv_i64 a3, TCGv_i64 a4, TCGv_i64 a5) { + tcg_gen_op5(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3), tcgv_i64_arg(a4), + tcgv_i64_arg(a5)); +} + +static void DNI tcg_gen_op5ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGv_i32 a3, TCGArg a4, TCGArg a5) { + tcg_gen_op5(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3), a4, a5); +} + +static void DNI tcg_gen_op5ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, TCGv_i64 a3, TCGArg a4, TCGArg a5) { + tcg_gen_op5(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3), a4, a5); +} + +static void DNI tcg_gen_op6i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGv_i32 a3, TCGv_i32 a4, TCGv_i32 a5, + TCGArg a6) { + tcg_gen_op6(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3), tcgv_i32_arg(a4), + tcgv_i32_arg(a5), a6); +} + +static void DNI tcg_gen_op6i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, TCGv_i64 a3, TCGv_i64 a4, TCGv_i64 a5, + TCGArg a6) { + tcg_gen_op6(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3), tcgv_i64_arg(a4), + tcgv_i64_arg(a5), a6); +} + +static TCGOp *DNI tcg_gen_op6ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGv_i32 a3, TCGv_i32 a4, TCGArg a5, + TCGArg a6) { + return tcg_gen_op6(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3), tcgv_i32_arg(a4), a5, + a6); } /* Generic ops. */ -static void add_last_as_label_use(TCGLabel *l) { +void gen_set_label(TCGLabel *l) { + l->present = 1; + tcg_gen_op1(INDEX_op_set_label, 0, label_arg(l)); +} + +static void add_as_label_use(TCGLabel *l, TCGOp *op) { TCGLabelUse *u = tcg_malloc(sizeof(TCGLabelUse)); - u->op = tcg_last_op(); + u->op = op; QSIMPLEQ_INSERT_TAIL(&l->branches, u, next); } void tcg_gen_br(TCGLabel *l) { - tcg_gen_op1(INDEX_op_br, label_arg(l)); - add_last_as_label_use(l); + add_as_label_use(l, tcg_gen_op1(INDEX_op_br, 0, label_arg(l))); } void tcg_gen_mb(TCGBar mb_type) { @@ -107,16 +246,38 @@ void tcg_gen_mb(TCGBar mb_type) { #endif if (parallel) { - tcg_gen_op1(INDEX_op_mb, mb_type); + tcg_gen_op1(INDEX_op_mb, 0, mb_type); } } +void tcg_gen_plugin_cb(unsigned from) { + tcg_gen_op1(INDEX_op_plugin_cb, 0, from); +} + +void tcg_gen_plugin_mem_cb(TCGv_i64 addr, unsigned meminfo) { + tcg_gen_op2(INDEX_op_plugin_mem_cb, 0, tcgv_i64_arg(addr), meminfo); +} + /* 32 bit ops */ +void tcg_gen_discard_i32(TCGv_i32 arg) { + tcg_gen_op1_i32(INDEX_op_discard, TCG_TYPE_I32, arg); +} + +void tcg_gen_mov_i32(TCGv_i32 ret, TCGv_i32 arg) { + if (ret != arg) { + tcg_gen_op2_i32(INDEX_op_mov, ret, arg); + } +} + void tcg_gen_movi_i32(TCGv_i32 ret, int32_t arg) { tcg_gen_mov_i32(ret, tcg_constant_i32(arg)); } +void tcg_gen_add_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { + tcg_gen_op3_i32(INDEX_op_add, ret, arg1, arg2); +} + void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { /* some cases can be optimized here */ if (arg2 == 0) { @@ -126,22 +287,28 @@ void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { } } +void tcg_gen_sub_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { + tcg_gen_op3_i32(INDEX_op_sub, ret, arg1, arg2); +} + void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2) { - if (arg1 == 0 && TCG_TARGET_HAS_neg_i32) { - /* Don't recurse with tcg_gen_neg_i32. */ - tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg2); + if (arg1 == 0) { + tcg_gen_neg_i32(ret, arg2); } else { tcg_gen_sub_i32(ret, tcg_constant_i32(arg1), arg2); } } void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { - /* some cases can be optimized here */ - if (arg2 == 0) { - tcg_gen_mov_i32(ret, arg1); - } else { - tcg_gen_sub_i32(ret, arg1, tcg_constant_i32(arg2)); - } + tcg_gen_addi_i32(ret, arg1, -arg2); +} + +void tcg_gen_neg_i32(TCGv_i32 ret, TCGv_i32 arg) { + tcg_gen_op2_i32(INDEX_op_neg, ret, arg); +} + +void tcg_gen_and_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { + tcg_gen_op3_i32(INDEX_op_and, ret, arg1, arg2); } void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { @@ -153,17 +320,19 @@ void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { case -1: tcg_gen_mov_i32(ret, arg1); return; - case 0xff: - /* Don't recurse with tcg_gen_ext8u_i32. */ - if (TCG_TARGET_HAS_ext8u_i32) { - tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg1); - return; - } - break; - case 0xffff: - if (TCG_TARGET_HAS_ext16u_i32) { - tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg1); - return; + default: + /* + * Canonicalize on extract, if valid. This aids x86 with its + * 2 operand MOVZBL and 2 operand AND, selecting the TCGOpcode + * which does not require matching operands. Other backends can + * trivially expand the extract to AND during code generation. + */ + if (!(arg2 & (arg2 + 1))) { + unsigned len = ctz32(~arg2); + if (TCG_TARGET_extract_valid(TCG_TYPE_I32, 0, len)) { + tcg_gen_extract_i32(ret, arg1, 0, len); + return; + } } break; } @@ -171,6 +340,10 @@ void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { tcg_gen_and_i32(ret, arg1, tcg_constant_i32(arg2)); } +void tcg_gen_or_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { + tcg_gen_op3_i32(INDEX_op_or, ret, arg1, arg2); +} + void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { /* Some cases can be optimized here. */ if (arg2 == -1) { @@ -182,18 +355,34 @@ void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { } } +void tcg_gen_xor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { + tcg_gen_op3_i32(INDEX_op_xor, ret, arg1, arg2); +} + void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { /* Some cases can be optimized here. */ if (arg2 == 0) { tcg_gen_mov_i32(ret, arg1); - } else if (arg2 == -1 && TCG_TARGET_HAS_not_i32) { + } else if (arg2 == -1 && tcg_op_supported(INDEX_op_not, TCG_TYPE_I32, 0)) { /* Don't recurse with tcg_gen_not_i32. */ - tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg1); + tcg_gen_op2_i32(INDEX_op_not, ret, arg1); } else { tcg_gen_xor_i32(ret, arg1, tcg_constant_i32(arg2)); } } +void tcg_gen_not_i32(TCGv_i32 ret, TCGv_i32 arg) { + if (tcg_op_supported(INDEX_op_not, TCG_TYPE_I32, 0)) { + tcg_gen_op2_i32(INDEX_op_not, ret, arg); + } else { + tcg_gen_xori_i32(ret, arg, -1); + } +} + +void tcg_gen_shl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { + tcg_gen_op3_i32(INDEX_op_shl, ret, arg1, arg2); +} + void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { tcg_debug_assert(arg2 >= 0 && arg2 < 32); if (arg2 == 0) { @@ -203,6 +392,10 @@ void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { } } +void tcg_gen_shr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { + tcg_gen_op3_i32(INDEX_op_shr, ret, arg1, arg2); +} + void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { tcg_debug_assert(arg2 >= 0 && arg2 < 32); if (arg2 == 0) { @@ -212,6 +405,10 @@ void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { } } +void tcg_gen_sar_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { + tcg_gen_op3_i32(INDEX_op_sar, ret, arg1, arg2); +} + void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { tcg_debug_assert(arg2 >= 0 && arg2 < 32); if (arg2 == 0) { @@ -225,8 +422,8 @@ void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *l) if (cond == TCG_COND_ALWAYS) { tcg_gen_br(l); } else if (cond != TCG_COND_NEVER) { - tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_arg(l)); - add_last_as_label_use(l); + TCGOp *op = tcg_gen_op4ii_i32(INDEX_op_brcond, arg1, arg2, cond, label_arg(l)); + add_as_label_use(l, op); } } @@ -244,7 +441,7 @@ void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg } else if (cond == TCG_COND_NEVER) { tcg_gen_movi_i32(ret, 0); } else { - tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond); + tcg_gen_op4i_i32(INDEX_op_setcond, ret, arg1, arg2, cond); } } @@ -252,6 +449,24 @@ void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 arg1, int32_t arg tcg_gen_setcond_i32(cond, ret, arg1, tcg_constant_i32(arg2)); } +void tcg_gen_negsetcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { + if (cond == TCG_COND_ALWAYS) { + tcg_gen_movi_i32(ret, -1); + } else if (cond == TCG_COND_NEVER) { + tcg_gen_movi_i32(ret, 0); + } else { + tcg_gen_op4i_i32(INDEX_op_negsetcond, ret, arg1, arg2, cond); + } +} + +void tcg_gen_negsetcondi_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { + tcg_gen_negsetcond_i32(cond, ret, arg1, tcg_constant_i32(arg2)); +} + +void tcg_gen_mul_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { + tcg_gen_op3_i32(INDEX_op_mul, ret, arg1, arg2); +} + void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { if (arg2 == 0) { tcg_gen_movi_i32(ret, 0); @@ -263,12 +478,12 @@ void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { } void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_div_i32) { - tcg_gen_op3_i32(INDEX_op_div_i32, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_div2_i32) { + if (tcg_op_supported(INDEX_op_divs, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_divs, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_divs2, TCG_TYPE_I32, 0)) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); tcg_gen_sari_i32(t0, arg1, 31); - tcg_gen_op5_i32(INDEX_op_div2_i32, ret, t0, arg1, t0, arg2); + tcg_gen_op5_i32(INDEX_op_divs2, ret, t0, arg1, t0, arg2); tcg_temp_free_i32(t0); } else { gen_helper_div_i32(ret, arg1, arg2); @@ -276,18 +491,18 @@ void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { } void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_rem_i32) { - tcg_gen_op3_i32(INDEX_op_rem_i32, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_div_i32) { + if (tcg_op_supported(INDEX_op_rems, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_rems, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_divs, TCG_TYPE_I32, 0)) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); - tcg_gen_op3_i32(INDEX_op_div_i32, t0, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_divs, t0, arg1, arg2); tcg_gen_mul_i32(t0, t0, arg2); tcg_gen_sub_i32(ret, arg1, t0); tcg_temp_free_i32(t0); - } else if (TCG_TARGET_HAS_div2_i32) { + } else if (tcg_op_supported(INDEX_op_divs2, TCG_TYPE_I32, 0)) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); tcg_gen_sari_i32(t0, arg1, 31); - tcg_gen_op5_i32(INDEX_op_div2_i32, t0, ret, arg1, t0, arg2); + tcg_gen_op5_i32(INDEX_op_divs2, t0, ret, arg1, t0, arg2); tcg_temp_free_i32(t0); } else { gen_helper_rem_i32(ret, arg1, arg2); @@ -295,12 +510,12 @@ void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { } void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_div_i32) { - tcg_gen_op3_i32(INDEX_op_divu_i32, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_div2_i32) { + if (tcg_op_supported(INDEX_op_divu, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_divu, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_divu2, TCG_TYPE_I32, 0)) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); - tcg_gen_movi_i32(t0, 0); - tcg_gen_op5_i32(INDEX_op_divu2_i32, ret, t0, arg1, t0, arg2); + TCGv_i32 zero = tcg_constant_i32(0); + tcg_gen_op5_i32(INDEX_op_divu2, ret, t0, arg1, zero, arg2); tcg_temp_free_i32(t0); } else { gen_helper_divu_i32(ret, arg1, arg2); @@ -308,18 +523,18 @@ void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { } void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_rem_i32) { - tcg_gen_op3_i32(INDEX_op_remu_i32, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_div_i32) { + if (tcg_op_supported(INDEX_op_remu, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_remu, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_divu, TCG_TYPE_I32, 0)) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); - tcg_gen_op3_i32(INDEX_op_divu_i32, t0, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_divu, t0, arg1, arg2); tcg_gen_mul_i32(t0, t0, arg2); tcg_gen_sub_i32(ret, arg1, t0); tcg_temp_free_i32(t0); - } else if (TCG_TARGET_HAS_div2_i32) { + } else if (tcg_op_supported(INDEX_op_divu2, TCG_TYPE_I32, 0)) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); - tcg_gen_movi_i32(t0, 0); - tcg_gen_op5_i32(INDEX_op_divu2_i32, t0, ret, arg1, t0, arg2); + TCGv_i32 zero = tcg_constant_i32(0); + tcg_gen_op5_i32(INDEX_op_divu2, t0, ret, arg1, zero, arg2); tcg_temp_free_i32(t0); } else { gen_helper_remu_i32(ret, arg1, arg2); @@ -327,8 +542,8 @@ void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { } void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_andc_i32) { - tcg_gen_op3_i32(INDEX_op_andc_i32, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_andc, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_andc, ret, arg1, arg2); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); tcg_gen_not_i32(t0, arg2); @@ -338,8 +553,8 @@ void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { } void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_eqv_i32) { - tcg_gen_op3_i32(INDEX_op_eqv_i32, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_eqv, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_eqv, ret, arg1, arg2); } else { tcg_gen_xor_i32(ret, arg1, arg2); tcg_gen_not_i32(ret, ret); @@ -347,8 +562,8 @@ void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { } void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_nand_i32) { - tcg_gen_op3_i32(INDEX_op_nand_i32, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_nand, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_nand, ret, arg1, arg2); } else { tcg_gen_and_i32(ret, arg1, arg2); tcg_gen_not_i32(ret, ret); @@ -356,8 +571,8 @@ void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { } void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_nor_i32) { - tcg_gen_op3_i32(INDEX_op_nor_i32, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_nor, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_nor, ret, arg1, arg2); } else { tcg_gen_or_i32(ret, arg1, arg2); tcg_gen_not_i32(ret, ret); @@ -365,8 +580,8 @@ void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { } void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_orc_i32) { - tcg_gen_op3_i32(INDEX_op_orc_i32, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_orc, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_orc, ret, arg1, arg2); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); tcg_gen_not_i32(t0, arg2); @@ -376,9 +591,9 @@ void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { } void tcg_gen_clz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_clz_i32) { - tcg_gen_op3_i32(INDEX_op_clz_i32, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_clz_i64) { + if (tcg_op_supported(INDEX_op_clz, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_clz, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_clz, TCG_TYPE_I64, 0)) { TCGv_i64 t1 = tcg_temp_ebb_new_i64(); TCGv_i64 t2 = tcg_temp_ebb_new_i64(); tcg_gen_extu_i32_i64(t1, arg1); @@ -399,9 +614,13 @@ void tcg_gen_clzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2) { } void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_ctz_i32) { - tcg_gen_op3_i32(INDEX_op_ctz_i32, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_ctz_i64) { + TCGv_i32 z, t; + + if (tcg_op_supported(INDEX_op_ctz, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_ctz, ret, arg1, arg2); + return; + } + if (tcg_op_supported(INDEX_op_ctz, TCG_TYPE_I64, 0)) { TCGv_i64 t1 = tcg_temp_ebb_new_i64(); TCGv_i64 t2 = tcg_temp_ebb_new_i64(); tcg_gen_extu_i32_i64(t1, arg1); @@ -410,31 +629,32 @@ void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { tcg_gen_extrl_i64_i32(ret, t1); tcg_temp_free_i64(t1); tcg_temp_free_i64(t2); - } else if (TCG_TARGET_HAS_ctpop_i32 || TCG_TARGET_HAS_ctpop_i64 || TCG_TARGET_HAS_clz_i32 || - TCG_TARGET_HAS_clz_i64) { - TCGv_i32 z, t = tcg_temp_ebb_new_i32(); - - if (TCG_TARGET_HAS_ctpop_i32 || TCG_TARGET_HAS_ctpop_i64) { - tcg_gen_subi_i32(t, arg1, 1); - tcg_gen_andc_i32(t, t, arg1); - tcg_gen_ctpop_i32(t, t); - } else { - /* Since all non-x86 hosts have clz(0) == 32, don't fight it. */ - tcg_gen_neg_i32(t, arg1); - tcg_gen_and_i32(t, t, arg1); - tcg_gen_clzi_i32(t, t, 32); - tcg_gen_xori_i32(t, t, 31); - } - z = tcg_constant_i32(0); - tcg_gen_movcond_i32(TCG_COND_EQ, ret, arg1, z, arg2, t); - tcg_temp_free_i32(t); + return; + } + if (tcg_op_supported(INDEX_op_ctpop, TCG_TYPE_REG, 0)) { + t = tcg_temp_ebb_new_i32(); + tcg_gen_subi_i32(t, arg1, 1); + tcg_gen_andc_i32(t, t, arg1); + tcg_gen_ctpop_i32(t, t); + } else if (tcg_op_supported(INDEX_op_clz, TCG_TYPE_REG, 0)) { + t = tcg_temp_ebb_new_i32(); + tcg_gen_neg_i32(t, arg1); + tcg_gen_and_i32(t, t, arg1); + tcg_gen_clzi_i32(t, t, 32); + tcg_gen_xori_i32(t, t, 31); } else { gen_helper_ctz_i32(ret, arg1, arg2); + return; } + + z = tcg_constant_i32(0); + tcg_gen_movcond_i32(TCG_COND_EQ, ret, arg1, z, arg2, t); + tcg_temp_free_i32(t); } void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2) { - if (!TCG_TARGET_HAS_ctz_i32 && TCG_TARGET_HAS_ctpop_i32 && arg2 == 32) { + if (arg2 == 32 && !tcg_op_supported(INDEX_op_ctz, TCG_TYPE_I32, 0) && + tcg_op_supported(INDEX_op_ctpop, TCG_TYPE_REG, 0)) { /* This equivalence has the advantage of not requiring a fixup. */ TCGv_i32 t = tcg_temp_ebb_new_i32(); tcg_gen_subi_i32(t, arg1, 1); @@ -447,7 +667,7 @@ void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2) { } void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg) { - if (TCG_TARGET_HAS_clz_i32) { + if (tcg_op_supported(INDEX_op_clz, TCG_TYPE_REG, 0)) { TCGv_i32 t = tcg_temp_ebb_new_i32(); tcg_gen_sari_i32(t, arg, 31); tcg_gen_xor_i32(t, t, arg); @@ -460,9 +680,9 @@ void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg) { } void tcg_gen_ctpop_i32(TCGv_i32 ret, TCGv_i32 arg1) { - if (TCG_TARGET_HAS_ctpop_i32) { - tcg_gen_op2_i32(INDEX_op_ctpop_i32, ret, arg1); - } else if (TCG_TARGET_HAS_ctpop_i64) { + if (tcg_op_supported(INDEX_op_ctpop, TCG_TYPE_I32, 0)) { + tcg_gen_op2_i32(INDEX_op_ctpop, ret, arg1); + } else if (tcg_op_supported(INDEX_op_ctpop, TCG_TYPE_I64, 0)) { TCGv_i64 t = tcg_temp_ebb_new_i64(); tcg_gen_extu_i32_i64(t, arg1); tcg_gen_ctpop_i64(t, t); @@ -474,15 +694,18 @@ void tcg_gen_ctpop_i32(TCGv_i32 ret, TCGv_i32 arg1) { } void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_rot_i32) { - tcg_gen_op3_i32(INDEX_op_rotl_i32, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_rotl, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_rotl, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_rotr, TCG_TYPE_I32, 0)) { + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + tcg_gen_neg_i32(t0, arg2); + tcg_gen_op3_i32(INDEX_op_rotr, ret, arg1, t0); + tcg_temp_free_i32(t0); } else { - TCGv_i32 t0, t1; - - t0 = tcg_temp_ebb_new_i32(); - t1 = tcg_temp_ebb_new_i32(); + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + TCGv_i32 t1 = tcg_temp_ebb_new_i32(); tcg_gen_shl_i32(t0, arg1, arg2); - tcg_gen_subfi_i32(t1, 32, arg2); + tcg_gen_neg_i32(t1, arg2); tcg_gen_shr_i32(t1, arg1, t1); tcg_gen_or_i32(ret, t0, t1); tcg_temp_free_i32(t0); @@ -495,12 +718,15 @@ void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { /* some cases can be optimized here */ if (arg2 == 0) { tcg_gen_mov_i32(ret, arg1); - } else if (TCG_TARGET_HAS_rot_i32) { - tcg_gen_rotl_i32(ret, arg1, tcg_constant_i32(arg2)); + } else if (tcg_op_supported(INDEX_op_rotl, TCG_TYPE_I32, 0)) { + TCGv_i32 t0 = tcg_constant_i32(arg2); + tcg_gen_op3_i32(INDEX_op_rotl, ret, arg1, t0); + } else if (tcg_op_supported(INDEX_op_rotr, TCG_TYPE_I32, 0)) { + TCGv_i32 t0 = tcg_constant_i32(32 - arg2); + tcg_gen_op3_i32(INDEX_op_rotr, ret, arg1, t0); } else { - TCGv_i32 t0, t1; - t0 = tcg_temp_ebb_new_i32(); - t1 = tcg_temp_ebb_new_i32(); + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + TCGv_i32 t1 = tcg_temp_ebb_new_i32(); tcg_gen_shli_i32(t0, arg1, arg2); tcg_gen_shri_i32(t1, arg1, 32 - arg2); tcg_gen_or_i32(ret, t0, t1); @@ -510,15 +736,18 @@ void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { } void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_rot_i32) { - tcg_gen_op3_i32(INDEX_op_rotr_i32, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_rotr, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_rotr, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_rotl, TCG_TYPE_I32, 0)) { + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + tcg_gen_neg_i32(t0, arg2); + tcg_gen_op3_i32(INDEX_op_rotl, ret, arg1, t0); + tcg_temp_free_i32(t0); } else { - TCGv_i32 t0, t1; - - t0 = tcg_temp_ebb_new_i32(); - t1 = tcg_temp_ebb_new_i32(); + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + TCGv_i32 t1 = tcg_temp_ebb_new_i32(); tcg_gen_shr_i32(t0, arg1, arg2); - tcg_gen_subfi_i32(t1, 32, arg2); + tcg_gen_neg_i32(t1, arg2); tcg_gen_shl_i32(t1, arg1, t1); tcg_gen_or_i32(ret, t0, t1); tcg_temp_free_i32(t0); @@ -528,12 +757,7 @@ void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { tcg_debug_assert(arg2 >= 0 && arg2 < 32); - /* some cases can be optimized here */ - if (arg2 == 0) { - tcg_gen_mov_i32(ret, arg1); - } else { - tcg_gen_rotli_i32(ret, arg1, 32 - arg2); - } + tcg_gen_rotli_i32(ret, arg1, -arg2 & 31); } void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2, unsigned int ofs, unsigned int len) { @@ -549,14 +773,14 @@ void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2, unsigned in tcg_gen_mov_i32(ret, arg2); return; } - if (TCG_TARGET_HAS_deposit_i32 && TCG_TARGET_deposit_i32_valid(ofs, len)) { - tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len); + if (TCG_TARGET_deposit_valid(TCG_TYPE_I32, ofs, len)) { + tcg_gen_op5ii_i32(INDEX_op_deposit, ret, arg1, arg2, ofs, len); return; } t1 = tcg_temp_ebb_new_i32(); - if (TCG_TARGET_HAS_extract2_i32) { + if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_I32, 0)) { if (ofs + len == 32) { tcg_gen_shli_i32(t1, arg1, len); tcg_gen_extract2_i32(ret, t1, arg2, len); @@ -592,44 +816,24 @@ void tcg_gen_deposit_z_i32(TCGv_i32 ret, TCGv_i32 arg, unsigned int ofs, unsigne tcg_gen_shli_i32(ret, arg, ofs); } else if (ofs == 0) { tcg_gen_andi_i32(ret, arg, (1u << len) - 1); - } else if (TCG_TARGET_HAS_deposit_i32 && TCG_TARGET_deposit_i32_valid(ofs, len)) { + } else if (TCG_TARGET_deposit_valid(TCG_TYPE_I32, ofs, len)) { TCGv_i32 zero = tcg_constant_i32(0); - tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, zero, arg, ofs, len); - } else { - /* To help two-operand hosts we prefer to zero-extend first, - which allows ARG to stay live. */ - switch (len) { - case 16: - if (TCG_TARGET_HAS_ext16u_i32) { - tcg_gen_ext16u_i32(ret, arg); - tcg_gen_shli_i32(ret, ret, ofs); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8u_i32) { - tcg_gen_ext8u_i32(ret, arg); - tcg_gen_shli_i32(ret, ret, ofs); - return; - } - break; + tcg_gen_op5ii_i32(INDEX_op_deposit, ret, zero, arg, ofs, len); + } else { + /* + * To help two-operand hosts we prefer to zero-extend first, + * which allows ARG to stay live. + */ + if (TCG_TARGET_extract_valid(TCG_TYPE_I32, 0, len)) { + tcg_gen_extract_i32(ret, arg, 0, len); + tcg_gen_shli_i32(ret, ret, ofs); + return; } /* Otherwise prefer zero-extension over AND for code size. */ - switch (ofs + len) { - case 16: - if (TCG_TARGET_HAS_ext16u_i32) { - tcg_gen_shli_i32(ret, arg, ofs); - tcg_gen_ext16u_i32(ret, ret); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8u_i32) { - tcg_gen_shli_i32(ret, arg, ofs); - tcg_gen_ext8u_i32(ret, ret); - return; - } - break; + if (TCG_TARGET_extract_valid(TCG_TYPE_I32, 0, ofs + len)) { + tcg_gen_shli_i32(ret, arg, ofs); + tcg_gen_extract_i32(ret, ret, 0, ofs + len); + return; } tcg_gen_andi_i32(ret, arg, (1u << len) - 1); tcg_gen_shli_i32(ret, ret, ofs); @@ -647,32 +851,21 @@ void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg, unsigned int ofs, unsigned tcg_gen_shri_i32(ret, arg, 32 - len); return; } - if (ofs == 0) { - tcg_gen_andi_i32(ret, arg, (1u << len) - 1); + + if (TCG_TARGET_extract_valid(TCG_TYPE_I32, ofs, len)) { + tcg_gen_op4ii_i32(INDEX_op_extract, ret, arg, ofs, len); return; } - - if (TCG_TARGET_HAS_extract_i32 && TCG_TARGET_extract_i32_valid(ofs, len)) { - tcg_gen_op4ii_i32(INDEX_op_extract_i32, ret, arg, ofs, len); + if (ofs == 0) { + tcg_gen_andi_i32(ret, arg, (1u << len) - 1); return; } /* Assume that zero-extension, if available, is cheaper than a shift. */ - switch (ofs + len) { - case 16: - if (TCG_TARGET_HAS_ext16u_i32) { - tcg_gen_ext16u_i32(ret, arg); - tcg_gen_shri_i32(ret, ret, ofs); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8u_i32) { - tcg_gen_ext8u_i32(ret, arg); - tcg_gen_shri_i32(ret, ret, ofs); - return; - } - break; + if (TCG_TARGET_extract_valid(TCG_TYPE_I32, 0, ofs + len)) { + tcg_gen_op4ii_i32(INDEX_op_extract, ret, arg, 0, ofs + len); + tcg_gen_shri_i32(ret, ret, ofs); + return; } /* ??? Ideally we'd know what values are available for immediate AND. @@ -702,54 +895,22 @@ void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg, unsigned int ofs, unsigned tcg_gen_sari_i32(ret, arg, 32 - len); return; } - if (ofs == 0) { - switch (len) { - case 16: - tcg_gen_ext16s_i32(ret, arg); - return; - case 8: - tcg_gen_ext8s_i32(ret, arg); - return; - } - } - if (TCG_TARGET_HAS_sextract_i32 && TCG_TARGET_extract_i32_valid(ofs, len)) { - tcg_gen_op4ii_i32(INDEX_op_sextract_i32, ret, arg, ofs, len); + if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, ofs, len)) { + tcg_gen_op4ii_i32(INDEX_op_sextract, ret, arg, ofs, len); return; } /* Assume that sign-extension, if available, is cheaper than a shift. */ - switch (ofs + len) { - case 16: - if (TCG_TARGET_HAS_ext16s_i32) { - tcg_gen_ext16s_i32(ret, arg); - tcg_gen_sari_i32(ret, ret, ofs); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8s_i32) { - tcg_gen_ext8s_i32(ret, arg); - tcg_gen_sari_i32(ret, ret, ofs); - return; - } - break; + if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, 0, ofs + len)) { + tcg_gen_op4ii_i32(INDEX_op_sextract, ret, arg, 0, ofs + len); + tcg_gen_sari_i32(ret, ret, ofs); + return; } - switch (len) { - case 16: - if (TCG_TARGET_HAS_ext16s_i32) { - tcg_gen_shri_i32(ret, arg, ofs); - tcg_gen_ext16s_i32(ret, ret); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8s_i32) { - tcg_gen_shri_i32(ret, arg, ofs); - tcg_gen_ext8s_i32(ret, ret); - return; - } - break; + if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, 0, len)) { + tcg_gen_shri_i32(ret, arg, ofs); + tcg_gen_op4ii_i32(INDEX_op_sextract, ret, ret, 0, len); + return; } tcg_gen_shli_i32(ret, arg, 32 - len - ofs); @@ -768,8 +929,8 @@ void tcg_gen_extract2_i32(TCGv_i32 ret, TCGv_i32 al, TCGv_i32 ah, unsigned int o tcg_gen_mov_i32(ret, ah); } else if (al == ah) { tcg_gen_rotri_i32(ret, al, ofs); - } else if (TCG_TARGET_HAS_extract2_i32) { - tcg_gen_op4i_i32(INDEX_op_extract2_i32, ret, al, ah, ofs); + } else if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_I32, 0)) { + tcg_gen_op4i_i32(INDEX_op_extract2, ret, al, ah, ofs); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); tcg_gen_shri_i32(t0, al, ofs); @@ -783,58 +944,83 @@ void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1, TCGv_i32 c2, T tcg_gen_mov_i32(ret, v1); } else if (cond == TCG_COND_NEVER) { tcg_gen_mov_i32(ret, v2); - } else if (TCG_TARGET_HAS_movcond_i32) { - tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, c1, c2, v1, v2, cond); + } else { + tcg_gen_op6i_i32(INDEX_op_movcond, ret, c1, c2, v1, v2, cond); + } +} + +void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh) { + if (tcg_op_supported(INDEX_op_addci, TCG_TYPE_I32, 0)) { + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + tcg_gen_op3_i32(INDEX_op_addco, t0, al, bl); + tcg_gen_op3_i32(INDEX_op_addci, rh, ah, bh); + tcg_gen_mov_i32(rl, t0); + tcg_temp_free_i32(t0); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); TCGv_i32 t1 = tcg_temp_ebb_new_i32(); - tcg_gen_setcond_i32(cond, t0, c1, c2); - tcg_gen_neg_i32(t0, t0); - tcg_gen_and_i32(t1, v1, t0); - tcg_gen_andc_i32(ret, v2, t0); - tcg_gen_or_i32(ret, ret, t1); + tcg_gen_add_i32(t0, al, bl); + tcg_gen_setcond_i32(TCG_COND_LTU, t1, t0, al); + tcg_gen_add_i32(rh, ah, bh); + tcg_gen_add_i32(rh, rh, t1); + tcg_gen_mov_i32(rl, t0); tcg_temp_free_i32(t0); tcg_temp_free_i32(t1); } } -void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh) { - if (TCG_TARGET_HAS_add2_i32) { - tcg_gen_op6_i32(INDEX_op_add2_i32, rl, rh, al, ah, bl, bh); +void tcg_gen_addcio_i32(TCGv_i32 r, TCGv_i32 co, TCGv_i32 a, TCGv_i32 b, TCGv_i32 ci) { + if (tcg_op_supported(INDEX_op_addci, TCG_TYPE_I32, 0)) { + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + TCGv_i32 zero = tcg_constant_i32(0); + TCGv_i32 mone = tcg_constant_i32(-1); + + tcg_gen_op3_i32(INDEX_op_addco, t0, ci, mone); + tcg_gen_op3_i32(INDEX_op_addcio, r, a, b); + tcg_gen_op3_i32(INDEX_op_addci, co, zero, zero); + tcg_temp_free_i32(t0); } else { - TCGv_i64 t0 = tcg_temp_ebb_new_i64(); - TCGv_i64 t1 = tcg_temp_ebb_new_i64(); - tcg_gen_concat_i32_i64(t0, al, ah); - tcg_gen_concat_i32_i64(t1, bl, bh); - tcg_gen_add_i64(t0, t0, t1); - tcg_gen_extr_i64_i32(rl, rh, t0); - tcg_temp_free_i64(t0); - tcg_temp_free_i64(t1); + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + TCGv_i32 t1 = tcg_temp_ebb_new_i32(); + + tcg_gen_add_i32(t0, a, b); + tcg_gen_setcond_i32(TCG_COND_LTU, t1, t0, a); + tcg_gen_add_i32(r, t0, ci); + tcg_gen_setcond_i32(TCG_COND_LTU, t0, r, t0); + tcg_gen_or_i32(co, t0, t1); + + tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); } } void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh) { - if (TCG_TARGET_HAS_sub2_i32) { - tcg_gen_op6_i32(INDEX_op_sub2_i32, rl, rh, al, ah, bl, bh); + if (tcg_op_supported(INDEX_op_subbi, TCG_TYPE_I32, 0)) { + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + tcg_gen_op3_i32(INDEX_op_subbo, t0, al, bl); + tcg_gen_op3_i32(INDEX_op_subbi, rh, ah, bh); + tcg_gen_mov_i32(rl, t0); + tcg_temp_free_i32(t0); } else { - TCGv_i64 t0 = tcg_temp_ebb_new_i64(); - TCGv_i64 t1 = tcg_temp_ebb_new_i64(); - tcg_gen_concat_i32_i64(t0, al, ah); - tcg_gen_concat_i32_i64(t1, bl, bh); - tcg_gen_sub_i64(t0, t0, t1); - tcg_gen_extr_i64_i32(rl, rh, t0); - tcg_temp_free_i64(t0); - tcg_temp_free_i64(t1); + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + TCGv_i32 t1 = tcg_temp_ebb_new_i32(); + tcg_gen_sub_i32(t0, al, bl); + tcg_gen_setcond_i32(TCG_COND_LTU, t1, al, bl); + tcg_gen_sub_i32(rh, ah, bh); + tcg_gen_sub_i32(rh, rh, t1); + tcg_gen_mov_i32(rl, t0); + tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); } } void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_mulu2_i32) { - tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2); - } else if (TCG_TARGET_HAS_muluh_i32) { + if (tcg_op_supported(INDEX_op_mulu2, TCG_TYPE_I32, 0)) { + tcg_gen_op4_i32(INDEX_op_mulu2, rl, rh, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_muluh, TCG_TYPE_I32, 0)) { TCGv_i32 t = tcg_temp_ebb_new_i32(); - tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2); - tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_mul, t, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_muluh, rh, arg1, arg2); tcg_gen_mov_i32(rl, t); tcg_temp_free_i32(t); } else if (TCG_TARGET_REG_BITS == 64) { @@ -847,17 +1033,17 @@ void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) { tcg_temp_free_i64(t0); tcg_temp_free_i64(t1); } else { - qemu_build_not_reached(); + g_assert_not_reached(); } } void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_muls2_i32) { - tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2); - } else if (TCG_TARGET_HAS_mulsh_i32) { + if (tcg_op_supported(INDEX_op_muls2, TCG_TYPE_I32, 0)) { + tcg_gen_op4_i32(INDEX_op_muls2, rl, rh, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_mulsh, TCG_TYPE_I32, 0)) { TCGv_i32 t = tcg_temp_ebb_new_i32(); - tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2); - tcg_gen_op3_i32(INDEX_op_mulsh_i32, rh, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_mul, t, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_mulsh, rh, arg1, arg2); tcg_gen_mov_i32(rl, t); tcg_temp_free_i32(t); } else if (TCG_TARGET_REG_BITS == 32) { @@ -917,73 +1103,71 @@ void tcg_gen_mulsu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) } void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg) { - if (TCG_TARGET_HAS_ext8s_i32) { - tcg_gen_op2_i32(INDEX_op_ext8s_i32, ret, arg); - } else { - tcg_gen_shli_i32(ret, arg, 24); - tcg_gen_sari_i32(ret, ret, 24); - } + tcg_gen_sextract_i32(ret, arg, 0, 8); } void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg) { - if (TCG_TARGET_HAS_ext16s_i32) { - tcg_gen_op2_i32(INDEX_op_ext16s_i32, ret, arg); - } else { - tcg_gen_shli_i32(ret, arg, 16); - tcg_gen_sari_i32(ret, ret, 16); - } + tcg_gen_sextract_i32(ret, arg, 0, 16); } void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg) { - if (TCG_TARGET_HAS_ext8u_i32) { - tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg); - } else { - tcg_gen_andi_i32(ret, arg, 0xffu); - } + tcg_gen_extract_i32(ret, arg, 0, 8); } void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg) { - if (TCG_TARGET_HAS_ext16u_i32) { - tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg); - } else { - tcg_gen_andi_i32(ret, arg, 0xffffu); - } + tcg_gen_extract_i32(ret, arg, 0, 16); } +/* + * bswap16_i32: 16-bit byte swap on the low bits of a 32-bit value. + * + * Byte pattern: xxab -> yyba + * + * With TCG_BSWAP_IZ, x == zero, else undefined. + * With TCG_BSWAP_OZ, y == zero, with TCG_BSWAP_OS y == sign, else undefined. + */ void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags) { /* Only one extension flag may be present. */ tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ)); - if (TCG_TARGET_HAS_bswap16_i32) { - tcg_gen_op3i_i32(INDEX_op_bswap16_i32, ret, arg, flags); + if (tcg_op_supported(INDEX_op_bswap16, TCG_TYPE_I32, 0)) { + tcg_gen_op3i_i32(INDEX_op_bswap16, ret, arg, flags); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); TCGv_i32 t1 = tcg_temp_ebb_new_i32(); - tcg_gen_shri_i32(t0, arg, 8); + /* arg = ..ab (IZ) xxab (!IZ) */ + tcg_gen_shri_i32(t0, arg, 8); /* t0 = ...a (IZ) .xxa (!IZ) */ if (!(flags & TCG_BSWAP_IZ)) { - tcg_gen_ext8u_i32(t0, t0); + tcg_gen_ext8u_i32(t0, t0); /* t0 = ...a */ } if (flags & TCG_BSWAP_OS) { - tcg_gen_shli_i32(t1, arg, 24); - tcg_gen_sari_i32(t1, t1, 16); + tcg_gen_shli_i32(t1, arg, 24); /* t1 = b... */ + tcg_gen_sari_i32(t1, t1, 16); /* t1 = ssb. */ } else if (flags & TCG_BSWAP_OZ) { - tcg_gen_ext8u_i32(t1, arg); - tcg_gen_shli_i32(t1, t1, 8); + tcg_gen_ext8u_i32(t1, arg); /* t1 = ...b */ + tcg_gen_shli_i32(t1, t1, 8); /* t1 = ..b. */ } else { - tcg_gen_shli_i32(t1, arg, 8); + tcg_gen_shli_i32(t1, arg, 8); /* t1 = xab. */ } - tcg_gen_or_i32(ret, t0, t1); + tcg_gen_or_i32(ret, t0, t1); /* ret = ..ba (OZ) */ + /* = ssba (OS) */ + /* = xaba (no flag) */ tcg_temp_free_i32(t0); tcg_temp_free_i32(t1); } } +/* + * bswap32_i32: 32-bit byte swap on a 32-bit value. + * + * Byte pattern: abcd -> dcba + */ void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg) { - if (TCG_TARGET_HAS_bswap32_i32) { - tcg_gen_op3i_i32(INDEX_op_bswap32_i32, ret, arg, 0); + if (tcg_op_supported(INDEX_op_bswap32, TCG_TYPE_I32, 0)) { + tcg_gen_op3i_i32(INDEX_op_bswap32, ret, arg, 0); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); TCGv_i32 t1 = tcg_temp_ebb_new_i32(); @@ -1005,6 +1189,11 @@ void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg) { } } +/* + * hswap_i32: Swap 16-bit halfwords within a 32-bit value. + * + * Byte pattern: abcd -> cdab + */ void tcg_gen_hswap_i32(TCGv_i32 ret, TCGv_i32 arg) { /* Swapping 2 16-bit elements is a rotate. */ tcg_gen_rotli_i32(ret, arg, 16); @@ -1035,136 +1224,261 @@ void tcg_gen_abs_i32(TCGv_i32 ret, TCGv_i32 a) { tcg_temp_free_i32(t); } -/* 64-bit ops */ +void tcg_gen_ld8u_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) { + tcg_gen_ldst_op_i32(INDEX_op_ld8u, ret, arg2, offset); +} -#if TCG_TARGET_REG_BITS == 32 -/* These are all inline for TCG_TARGET_REG_BITS == 64. */ +void tcg_gen_ld8s_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) { + tcg_gen_ldst_op_i32(INDEX_op_ld8s, ret, arg2, offset); +} + +void tcg_gen_ld16u_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) { + tcg_gen_ldst_op_i32(INDEX_op_ld16u, ret, arg2, offset); +} + +void tcg_gen_ld16s_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) { + tcg_gen_ldst_op_i32(INDEX_op_ld16s, ret, arg2, offset); +} + +void tcg_gen_ld_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) { + tcg_gen_ldst_op_i32(INDEX_op_ld, ret, arg2, offset); +} + +void tcg_gen_st8_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset) { + tcg_gen_ldst_op_i32(INDEX_op_st8, arg1, arg2, offset); +} + +void tcg_gen_st16_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset) { + tcg_gen_ldst_op_i32(INDEX_op_st16, arg1, arg2, offset); +} + +void tcg_gen_st_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset) { + tcg_gen_ldst_op_i32(INDEX_op_st, arg1, arg2, offset); +} + +/* 64-bit ops */ void tcg_gen_discard_i64(TCGv_i64 arg) { - tcg_gen_discard_i32(TCGV_LOW(arg)); - tcg_gen_discard_i32(TCGV_HIGH(arg)); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_op1_i64(INDEX_op_discard, TCG_TYPE_I64, arg); + } else { + tcg_gen_discard_i32(TCGV_LOW(arg)); + tcg_gen_discard_i32(TCGV_HIGH(arg)); + } } void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg) { - TCGTemp *ts = tcgv_i64_temp(arg); - - /* Canonicalize TCGv_i64 TEMP_CONST into TCGv_i32 TEMP_CONST. */ - if (ts->kind == TEMP_CONST) { - tcg_gen_movi_i64(ret, ts->val); + if (ret == arg) { + return; + } + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_op2_i64(INDEX_op_mov, ret, arg); } else { - tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg)); - tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg)); + TCGTemp *ts = tcgv_i64_temp(arg); + + /* Canonicalize TCGv_i64 TEMP_CONST into TCGv_i32 TEMP_CONST. */ + if (ts->kind == TEMP_CONST) { + tcg_gen_movi_i64(ret, ts->val); + } else { + tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg)); + tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg)); + } } } void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg) { - tcg_gen_movi_i32(TCGV_LOW(ret), arg); - tcg_gen_movi_i32(TCGV_HIGH(ret), arg >> 32); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_mov_i64(ret, tcg_constant_i64(arg)); + } else { + tcg_gen_movi_i32(TCGV_LOW(ret), arg); + tcg_gen_movi_i32(TCGV_HIGH(ret), arg >> 32); + } } void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ld8u_i32(TCGV_LOW(ret), arg2, offset); - tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_ldst_op_i64(INDEX_op_ld8u, ret, arg2, offset); + } else { + tcg_gen_ld8u_i32(TCGV_LOW(ret), arg2, offset); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + } } void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ld8s_i32(TCGV_LOW(ret), arg2, offset); - tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_ldst_op_i64(INDEX_op_ld8s, ret, arg2, offset); + } else { + tcg_gen_ld8s_i32(TCGV_LOW(ret), arg2, offset); + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); + } } void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ld16u_i32(TCGV_LOW(ret), arg2, offset); - tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_ldst_op_i64(INDEX_op_ld16u, ret, arg2, offset); + } else { + tcg_gen_ld16u_i32(TCGV_LOW(ret), arg2, offset); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + } } void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ld16s_i32(TCGV_LOW(ret), arg2, offset); - tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_ldst_op_i64(INDEX_op_ld16s, ret, arg2, offset); + } else { + tcg_gen_ld16s_i32(TCGV_LOW(ret), arg2, offset); + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); + } } void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset); - tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_ldst_op_i64(INDEX_op_ld32u, ret, arg2, offset); + } else { + tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + } } void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset); - tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_ldst_op_i64(INDEX_op_ld32s, ret, arg2, offset); + } else { + tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset); + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); + } } void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { - /* Since arg2 and ret have different types, - they cannot be the same temporary */ -#if HOST_BIG_ENDIAN - tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset); - tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset + 4); -#else - tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset); - tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset + 4); -#endif + /* + * For 32-bit host, since arg2 and ret have different types, + * they cannot be the same temporary -- no chance of overlap. + */ + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_ldst_op_i64(INDEX_op_ld, ret, arg2, offset); + } else if (HOST_BIG_ENDIAN) { + tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset); + tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset + 4); + } else { + tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset); + tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset + 4); + } } void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_st8_i32(TCGV_LOW(arg1), arg2, offset); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_ldst_op_i64(INDEX_op_st8, arg1, arg2, offset); + } else { + tcg_gen_st8_i32(TCGV_LOW(arg1), arg2, offset); + } } void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_st16_i32(TCGV_LOW(arg1), arg2, offset); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_ldst_op_i64(INDEX_op_st16, arg1, arg2, offset); + } else { + tcg_gen_st16_i32(TCGV_LOW(arg1), arg2, offset); + } } void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_ldst_op_i64(INDEX_op_st32, arg1, arg2, offset); + } else { + tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset); + } } void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) { -#if HOST_BIG_ENDIAN - tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset); - tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset + 4); -#else - tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset); - tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset + 4); -#endif + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_ldst_op_i64(INDEX_op_st, arg1, arg2, offset); + } else if (HOST_BIG_ENDIAN) { + tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset); + tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset + 4); + } else { + tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset); + tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset + 4); + } } void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_op3_i64(INDEX_op_add, ret, arg1, arg2); + } else { + tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2), + TCGV_HIGH(arg2)); + } } void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_op3_i64(INDEX_op_sub, ret, arg1, arg2); + } else { + tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2), + TCGV_HIGH(arg2)); + } } void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - tcg_gen_and_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); - tcg_gen_and_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_op3_i64(INDEX_op_and, ret, arg1, arg2); + } else { + tcg_gen_and_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); + tcg_gen_and_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); + } } void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); - tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_op3_i64(INDEX_op_or, ret, arg1, arg2); + } else { + tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); + tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); + } } void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - tcg_gen_xor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); - tcg_gen_xor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_op3_i64(INDEX_op_xor, ret, arg1, arg2); + } else { + tcg_gen_xor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); + tcg_gen_xor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); + } } void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - gen_helper_shl_i64(ret, arg1, arg2); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_op3_i64(INDEX_op_shl, ret, arg1, arg2); + } else { + gen_helper_shl_i64(ret, arg1, arg2); + } } void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - gen_helper_shr_i64(ret, arg1, arg2); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_op3_i64(INDEX_op_shr, ret, arg1, arg2); + } else { + gen_helper_shr_i64(ret, arg1, arg2); + } } void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - gen_helper_sar_i64(ret, arg1, arg2); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_op3_i64(INDEX_op_sar, ret, arg1, arg2); + } else { + gen_helper_sar_i64(ret, arg1, arg2); + } } void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { TCGv_i64 t0; TCGv_i32 t1; + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_op3_i64(INDEX_op_mul, ret, arg1, arg2); + return; + } + t0 = tcg_temp_ebb_new_i64(); t1 = tcg_temp_ebb_new_i32(); @@ -1176,18 +1490,10 @@ void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1); tcg_gen_mov_i64(ret, t0); - tcg_temp_free_i64(t0); - tcg_temp_free_i32(t1); -} - -#else - -void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg) { - tcg_gen_mov_i64(ret, tcg_constant_i64(arg)); + tcg_temp_free_i64(t0); + tcg_temp_free_i32(t1); } -#endif /* TCG_TARGET_REG_SIZE == 32 */ - void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) { /* some cases can be optimized here */ if (arg2 == 0) { @@ -1201,9 +1507,8 @@ void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) { } void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2) { - if (arg1 == 0 && TCG_TARGET_HAS_neg_i64) { - /* Don't recurse with tcg_gen_neg_i64. */ - tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg2); + if (arg1 == 0) { + tcg_gen_neg_i64(ret, arg2); } else if (TCG_TARGET_REG_BITS == 64) { tcg_gen_sub_i64(ret, tcg_constant_i64(arg1), arg2); } else { @@ -1213,14 +1518,15 @@ void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2) { } void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) { - /* some cases can be optimized here */ - if (arg2 == 0) { - tcg_gen_mov_i64(ret, arg1); - } else if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_sub_i64(ret, arg1, tcg_constant_i64(arg2)); + tcg_gen_addi_i64(ret, arg1, -arg2); +} + +void tcg_gen_neg_i64(TCGv_i64 ret, TCGv_i64 arg) { + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_op2_i64(INDEX_op_neg, ret, arg); } else { - tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), TCGV_HIGH(arg1), tcg_constant_i32(arg2), - tcg_constant_i32(arg2 >> 32)); + TCGv_i32 zero = tcg_constant_i32(0); + tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), zero, zero, TCGV_LOW(arg), TCGV_HIGH(arg)); } } @@ -1239,23 +1545,19 @@ void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) { case -1: tcg_gen_mov_i64(ret, arg1); return; - case 0xff: - /* Don't recurse with tcg_gen_ext8u_i64. */ - if (TCG_TARGET_HAS_ext8u_i64) { - tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg1); - return; - } - break; - case 0xffff: - if (TCG_TARGET_HAS_ext16u_i64) { - tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg1); - return; - } - break; - case 0xffffffffu: - if (TCG_TARGET_HAS_ext32u_i64) { - tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg1); - return; + default: + /* + * Canonicalize on extract, if valid. This aids x86 with its + * 2 operand MOVZBL and 2 operand AND, selecting the TCGOpcode + * which does not require matching operands. Other backends can + * trivially expand the extract to AND during code generation. + */ + if (!(arg2 & (arg2 + 1))) { + unsigned len = ctz64(~arg2); + if (TCG_TARGET_extract_valid(TCG_TYPE_I64, 0, len)) { + tcg_gen_extract_i64(ret, arg1, 0, len); + return; + } } break; } @@ -1288,9 +1590,9 @@ void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) { /* Some cases can be optimized here. */ if (arg2 == 0) { tcg_gen_mov_i64(ret, arg1); - } else if (arg2 == -1 && TCG_TARGET_HAS_not_i64) { + } else if (arg2 == -1 && tcg_op_supported(INDEX_op_not, TCG_TYPE_I64, 0)) { /* Don't recurse with tcg_gen_not_i64. */ - tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg1); + tcg_gen_op2_i64(INDEX_op_not, ret, arg1); } else { tcg_gen_xor_i64(ret, arg1, tcg_constant_i64(arg2)); } @@ -1316,26 +1618,14 @@ static inline void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned c, b tcg_gen_movi_i32(TCGV_LOW(ret), 0); } } else if (right) { - if (TCG_TARGET_HAS_extract2_i32) { - tcg_gen_extract2_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_HIGH(arg1), c); - } else { - tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c); - tcg_gen_deposit_i32(TCGV_LOW(ret), TCGV_LOW(ret), TCGV_HIGH(arg1), 32 - c, c); - } + tcg_gen_extract2_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_HIGH(arg1), c); if (arith) { tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c); } else { tcg_gen_shri_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c); } } else { - if (TCG_TARGET_HAS_extract2_i32) { - tcg_gen_extract2_i32(TCGV_HIGH(ret), TCGV_LOW(arg1), TCGV_HIGH(arg1), 32 - c); - } else { - TCGv_i32 t0 = tcg_temp_ebb_new_i32(); - tcg_gen_shri_i32(t0, TCGV_LOW(arg1), 32 - c); - tcg_gen_deposit_i32(TCGV_HIGH(ret), t0, TCGV_HIGH(arg1), c, 32 - c); - tcg_temp_free_i32(t0); - } + tcg_gen_extract2_i32(TCGV_HIGH(ret), TCGV_LOW(arg1), TCGV_HIGH(arg1), 32 - c); tcg_gen_shli_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c); } } @@ -1377,13 +1667,14 @@ void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *l) if (cond == TCG_COND_ALWAYS) { tcg_gen_br(l); } else if (cond != TCG_COND_NEVER) { + TCGOp *op; if (TCG_TARGET_REG_BITS == 32) { - tcg_gen_op6ii_i32(INDEX_op_brcond2_i32, TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2), - cond, label_arg(l)); + op = tcg_gen_op6ii_i32(INDEX_op_brcond2_i32, TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2), + TCGV_HIGH(arg2), cond, label_arg(l)); } else { - tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond, label_arg(l)); + op = tcg_gen_op4ii_i64(INDEX_op_brcond, arg1, arg2, cond, label_arg(l)); } - add_last_as_label_use(l); + add_as_label_use(l, op); } } @@ -1393,9 +1684,9 @@ void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, TCGLabel *l) } else if (cond == TCG_COND_ALWAYS) { tcg_gen_br(l); } else if (cond != TCG_COND_NEVER) { - tcg_gen_op6ii_i32(INDEX_op_brcond2_i32, TCGV_LOW(arg1), TCGV_HIGH(arg1), tcg_constant_i32(arg2), - tcg_constant_i32(arg2 >> 32), cond, label_arg(l)); - add_last_as_label_use(l); + TCGOp *op = tcg_gen_op6ii_i32(INDEX_op_brcond2_i32, TCGV_LOW(arg1), TCGV_HIGH(arg1), tcg_constant_i32(arg2), + tcg_constant_i32(arg2 >> 32), cond, label_arg(l)); + add_as_label_use(l, op); } } @@ -1410,7 +1701,7 @@ void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg TCGV_HIGH(arg2), cond); tcg_gen_movi_i32(TCGV_HIGH(ret), 0); } else { - tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond); + tcg_gen_op4i_i64(INDEX_op_setcond, ret, arg1, arg2, cond); } } } @@ -1429,6 +1720,25 @@ void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 arg1, int64_t arg } } +void tcg_gen_negsetcondi_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) { + tcg_gen_negsetcond_i64(cond, ret, arg1, tcg_constant_i64(arg2)); +} + +void tcg_gen_negsetcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { + if (cond == TCG_COND_ALWAYS) { + tcg_gen_movi_i64(ret, -1); + } else if (cond == TCG_COND_NEVER) { + tcg_gen_movi_i64(ret, 0); + } else if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_op4i_i64(INDEX_op_negsetcond, ret, arg1, arg2, cond); + } else { + tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2), + TCGV_HIGH(arg2), cond); + tcg_gen_neg_i32(TCGV_LOW(ret), TCGV_LOW(ret)); + tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_LOW(ret)); + } +} + void tcg_gen_muli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) { if (arg2 == 0) { tcg_gen_movi_i64(ret, 0); @@ -1440,12 +1750,12 @@ void tcg_gen_muli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) { } void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_div_i64) { - tcg_gen_op3_i64(INDEX_op_div_i64, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_div2_i64) { + if (tcg_op_supported(INDEX_op_divs, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_divs, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_divs2, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); tcg_gen_sari_i64(t0, arg1, 63); - tcg_gen_op5_i64(INDEX_op_div2_i64, ret, t0, arg1, t0, arg2); + tcg_gen_op5_i64(INDEX_op_divs2, ret, t0, arg1, t0, arg2); tcg_temp_free_i64(t0); } else { gen_helper_div_i64(ret, arg1, arg2); @@ -1453,18 +1763,18 @@ void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { } void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_rem_i64) { - tcg_gen_op3_i64(INDEX_op_rem_i64, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_div_i64) { + if (tcg_op_supported(INDEX_op_rems, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_rems, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_divs, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); - tcg_gen_op3_i64(INDEX_op_div_i64, t0, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_divs, t0, arg1, arg2); tcg_gen_mul_i64(t0, t0, arg2); tcg_gen_sub_i64(ret, arg1, t0); tcg_temp_free_i64(t0); - } else if (TCG_TARGET_HAS_div2_i64) { + } else if (tcg_op_supported(INDEX_op_divs2, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); tcg_gen_sari_i64(t0, arg1, 63); - tcg_gen_op5_i64(INDEX_op_div2_i64, t0, ret, arg1, t0, arg2); + tcg_gen_op5_i64(INDEX_op_divs2, t0, ret, arg1, t0, arg2); tcg_temp_free_i64(t0); } else { gen_helper_rem_i64(ret, arg1, arg2); @@ -1472,12 +1782,12 @@ void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { } void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_div_i64) { - tcg_gen_op3_i64(INDEX_op_divu_i64, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_div2_i64) { + if (tcg_op_supported(INDEX_op_divu, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_divu, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_divu2, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); - tcg_gen_movi_i64(t0, 0); - tcg_gen_op5_i64(INDEX_op_divu2_i64, ret, t0, arg1, t0, arg2); + TCGv_i64 zero = tcg_constant_i64(0); + tcg_gen_op5_i64(INDEX_op_divu2, ret, t0, arg1, zero, arg2); tcg_temp_free_i64(t0); } else { gen_helper_divu_i64(ret, arg1, arg2); @@ -1485,18 +1795,18 @@ void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { } void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_rem_i64) { - tcg_gen_op3_i64(INDEX_op_remu_i64, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_div_i64) { + if (tcg_op_supported(INDEX_op_remu, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_remu, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_divu, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); - tcg_gen_op3_i64(INDEX_op_divu_i64, t0, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_divu, t0, arg1, arg2); tcg_gen_mul_i64(t0, t0, arg2); tcg_gen_sub_i64(ret, arg1, t0); tcg_temp_free_i64(t0); - } else if (TCG_TARGET_HAS_div2_i64) { + } else if (tcg_op_supported(INDEX_op_divu2, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); - tcg_gen_movi_i64(t0, 0); - tcg_gen_op5_i64(INDEX_op_divu2_i64, t0, ret, arg1, t0, arg2); + TCGv_i64 zero = tcg_constant_i64(0); + tcg_gen_op5_i64(INDEX_op_divu2, t0, ret, arg1, zero, arg2); tcg_temp_free_i64(t0); } else { gen_helper_remu_i64(ret, arg1, arg2); @@ -1504,74 +1814,37 @@ void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { } void tcg_gen_ext8s_i64(TCGv_i64 ret, TCGv_i64 arg) { - if (TCG_TARGET_REG_BITS == 32) { - tcg_gen_ext8s_i32(TCGV_LOW(ret), TCGV_LOW(arg)); - tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); - } else if (TCG_TARGET_HAS_ext8s_i64) { - tcg_gen_op2_i64(INDEX_op_ext8s_i64, ret, arg); - } else { - tcg_gen_shli_i64(ret, arg, 56); - tcg_gen_sari_i64(ret, ret, 56); - } + tcg_gen_sextract_i64(ret, arg, 0, 8); } void tcg_gen_ext16s_i64(TCGv_i64 ret, TCGv_i64 arg) { - if (TCG_TARGET_REG_BITS == 32) { - tcg_gen_ext16s_i32(TCGV_LOW(ret), TCGV_LOW(arg)); - tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); - } else if (TCG_TARGET_HAS_ext16s_i64) { - tcg_gen_op2_i64(INDEX_op_ext16s_i64, ret, arg); - } else { - tcg_gen_shli_i64(ret, arg, 48); - tcg_gen_sari_i64(ret, ret, 48); - } + tcg_gen_sextract_i64(ret, arg, 0, 16); } void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg) { - if (TCG_TARGET_REG_BITS == 32) { - tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg)); - tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); - } else if (TCG_TARGET_HAS_ext32s_i64) { - tcg_gen_op2_i64(INDEX_op_ext32s_i64, ret, arg); - } else { - tcg_gen_shli_i64(ret, arg, 32); - tcg_gen_sari_i64(ret, ret, 32); - } + tcg_gen_sextract_i64(ret, arg, 0, 32); } void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg) { - if (TCG_TARGET_REG_BITS == 32) { - tcg_gen_ext8u_i32(TCGV_LOW(ret), TCGV_LOW(arg)); - tcg_gen_movi_i32(TCGV_HIGH(ret), 0); - } else if (TCG_TARGET_HAS_ext8u_i64) { - tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg); - } else { - tcg_gen_andi_i64(ret, arg, 0xffu); - } + tcg_gen_extract_i64(ret, arg, 0, 8); } void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg) { - if (TCG_TARGET_REG_BITS == 32) { - tcg_gen_ext16u_i32(TCGV_LOW(ret), TCGV_LOW(arg)); - tcg_gen_movi_i32(TCGV_HIGH(ret), 0); - } else if (TCG_TARGET_HAS_ext16u_i64) { - tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg); - } else { - tcg_gen_andi_i64(ret, arg, 0xffffu); - } + tcg_gen_extract_i64(ret, arg, 0, 16); } void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg) { - if (TCG_TARGET_REG_BITS == 32) { - tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg)); - tcg_gen_movi_i32(TCGV_HIGH(ret), 0); - } else if (TCG_TARGET_HAS_ext32u_i64) { - tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg); - } else { - tcg_gen_andi_i64(ret, arg, 0xffffffffu); - } + tcg_gen_extract_i64(ret, arg, 0, 32); } +/* + * bswap16_i64: 16-bit byte swap on the low bits of a 64-bit value. + * + * Byte pattern: xxxxxxxxab -> yyyyyyyyba + * + * With TCG_BSWAP_IZ, x == zero, else undefined. + * With TCG_BSWAP_OZ, y == zero, with TCG_BSWAP_OS y == sign, else undefined. + */ void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags) { /* Only one extension flag may be present. */ tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ)); @@ -1583,33 +1856,44 @@ void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags) { } else { tcg_gen_movi_i32(TCGV_HIGH(ret), 0); } - } else if (TCG_TARGET_HAS_bswap16_i64) { - tcg_gen_op3i_i64(INDEX_op_bswap16_i64, ret, arg, flags); + } else if (tcg_op_supported(INDEX_op_bswap16, TCG_TYPE_I64, 0)) { + tcg_gen_op3i_i64(INDEX_op_bswap16, ret, arg, flags); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 t1 = tcg_temp_ebb_new_i64(); - tcg_gen_shri_i64(t0, arg, 8); + /* arg = ......ab or xxxxxxab */ + tcg_gen_shri_i64(t0, arg, 8); /* t0 = .......a or .xxxxxxa */ if (!(flags & TCG_BSWAP_IZ)) { - tcg_gen_ext8u_i64(t0, t0); + tcg_gen_ext8u_i64(t0, t0); /* t0 = .......a */ } if (flags & TCG_BSWAP_OS) { - tcg_gen_shli_i64(t1, arg, 56); - tcg_gen_sari_i64(t1, t1, 48); + tcg_gen_shli_i64(t1, arg, 56); /* t1 = b....... */ + tcg_gen_sari_i64(t1, t1, 48); /* t1 = ssssssb. */ } else if (flags & TCG_BSWAP_OZ) { - tcg_gen_ext8u_i64(t1, arg); - tcg_gen_shli_i64(t1, t1, 8); + tcg_gen_ext8u_i64(t1, arg); /* t1 = .......b */ + tcg_gen_shli_i64(t1, t1, 8); /* t1 = ......b. */ } else { - tcg_gen_shli_i64(t1, arg, 8); + tcg_gen_shli_i64(t1, arg, 8); /* t1 = xxxxxab. */ } - tcg_gen_or_i64(ret, t0, t1); + tcg_gen_or_i64(ret, t0, t1); /* ret = ......ba (OZ) */ + /* ssssssba (OS) */ + /* xxxxxaba (no flag) */ tcg_temp_free_i64(t0); tcg_temp_free_i64(t1); } } +/* + * bswap32_i64: 32-bit byte swap on the low bits of a 64-bit value. + * + * Byte pattern: xxxxabcd -> yyyydcba + * + * With TCG_BSWAP_IZ, x == zero, else undefined. + * With TCG_BSWAP_OZ, y == zero, with TCG_BSWAP_OS y == sign, else undefined. + */ void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags) { /* Only one extension flag may be present. */ tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ)); @@ -1621,8 +1905,8 @@ void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags) { } else { tcg_gen_movi_i32(TCGV_HIGH(ret), 0); } - } else if (TCG_TARGET_HAS_bswap32_i64) { - tcg_gen_op3i_i64(INDEX_op_bswap32_i64, ret, arg, flags); + } else if (tcg_op_supported(INDEX_op_bswap32, TCG_TYPE_I64, 0)) { + tcg_gen_op3i_i64(INDEX_op_bswap32, ret, arg, flags); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 t1 = tcg_temp_ebb_new_i64(); @@ -1642,13 +1926,19 @@ void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags) { } else { tcg_gen_shri_i64(t1, t1, 32); /* t1 = ....dc.. */ } - tcg_gen_or_i64(ret, t0, t1); /* ret = ssssdcba */ + tcg_gen_or_i64(ret, t0, t1); /* ret = ssssdcba (OS) */ + /* ....dcba (else) */ tcg_temp_free_i64(t0); tcg_temp_free_i64(t1); } } +/* + * bswap64_i64: 64-bit byte swap on a 64-bit value. + * + * Byte pattern: abcdefgh -> hgfedcba + */ void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg) { if (TCG_TARGET_REG_BITS == 32) { TCGv_i32 t0, t1; @@ -1661,8 +1951,8 @@ void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg) { tcg_gen_mov_i32(TCGV_HIGH(ret), t0); tcg_temp_free_i32(t0); tcg_temp_free_i32(t1); - } else if (TCG_TARGET_HAS_bswap64_i64) { - tcg_gen_op3i_i64(INDEX_op_bswap64_i64, ret, arg, 0); + } else if (tcg_op_supported(INDEX_op_bswap64, TCG_TYPE_I64, 0)) { + tcg_gen_op3i_i64(INDEX_op_bswap64, ret, arg, 0); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 t1 = tcg_temp_ebb_new_i64(); @@ -1693,23 +1983,34 @@ void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg) { } } +/* + * hswap_i64: Swap 16-bit halfwords within a 64-bit value. + * See also include/qemu/bitops.h, hswap64. + * + * Byte pattern: abcdefgh -> ghefcdab + */ void tcg_gen_hswap_i64(TCGv_i64 ret, TCGv_i64 arg) { uint64_t m = 0x0000ffff0000ffffull; TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 t1 = tcg_temp_ebb_new_i64(); - /* See include/qemu/bitops.h, hswap64. */ - tcg_gen_rotli_i64(t1, arg, 32); - tcg_gen_andi_i64(t0, t1, m); - tcg_gen_shli_i64(t0, t0, 16); - tcg_gen_shri_i64(t1, t1, 16); - tcg_gen_andi_i64(t1, t1, m); - tcg_gen_or_i64(ret, t0, t1); + /* arg = abcdefgh */ + tcg_gen_rotli_i64(t1, arg, 32); /* t1 = efghabcd */ + tcg_gen_andi_i64(t0, t1, m); /* t0 = ..gh..cd */ + tcg_gen_shli_i64(t0, t0, 16); /* t0 = gh..cd.. */ + tcg_gen_shri_i64(t1, t1, 16); /* t1 = ..efghab */ + tcg_gen_andi_i64(t1, t1, m); /* t1 = ..ef..ab */ + tcg_gen_or_i64(ret, t0, t1); /* ret = ghefcdab */ tcg_temp_free_i64(t0); tcg_temp_free_i64(t1); } +/* + * wswap_i64: Swap 32-bit words within a 64-bit value. + * + * Byte pattern: abcdefgh -> efghabcd + */ void tcg_gen_wswap_i64(TCGv_i64 ret, TCGv_i64 arg) { /* Swapping 2 32-bit elements is a rotate. */ tcg_gen_rotli_i64(ret, arg, 32); @@ -1719,8 +2020,8 @@ void tcg_gen_not_i64(TCGv_i64 ret, TCGv_i64 arg) { if (TCG_TARGET_REG_BITS == 32) { tcg_gen_not_i32(TCGV_LOW(ret), TCGV_LOW(arg)); tcg_gen_not_i32(TCGV_HIGH(ret), TCGV_HIGH(arg)); - } else if (TCG_TARGET_HAS_not_i64) { - tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg); + } else if (tcg_op_supported(INDEX_op_not, TCG_TYPE_I64, 0)) { + tcg_gen_op2_i64(INDEX_op_not, ret, arg); } else { tcg_gen_xori_i64(ret, arg, -1); } @@ -1730,8 +2031,8 @@ void tcg_gen_andc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_REG_BITS == 32) { tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); - } else if (TCG_TARGET_HAS_andc_i64) { - tcg_gen_op3_i64(INDEX_op_andc_i64, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_andc, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_andc, ret, arg1, arg2); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); tcg_gen_not_i64(t0, arg2); @@ -1744,8 +2045,8 @@ void tcg_gen_eqv_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_REG_BITS == 32) { tcg_gen_eqv_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); tcg_gen_eqv_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); - } else if (TCG_TARGET_HAS_eqv_i64) { - tcg_gen_op3_i64(INDEX_op_eqv_i64, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_eqv, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_eqv, ret, arg1, arg2); } else { tcg_gen_xor_i64(ret, arg1, arg2); tcg_gen_not_i64(ret, ret); @@ -1756,8 +2057,8 @@ void tcg_gen_nand_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_REG_BITS == 32) { tcg_gen_nand_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); tcg_gen_nand_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); - } else if (TCG_TARGET_HAS_nand_i64) { - tcg_gen_op3_i64(INDEX_op_nand_i64, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_nand, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_nand, ret, arg1, arg2); } else { tcg_gen_and_i64(ret, arg1, arg2); tcg_gen_not_i64(ret, ret); @@ -1768,8 +2069,8 @@ void tcg_gen_nor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_REG_BITS == 32) { tcg_gen_nor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); tcg_gen_nor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); - } else if (TCG_TARGET_HAS_nor_i64) { - tcg_gen_op3_i64(INDEX_op_nor_i64, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_nor, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_nor, ret, arg1, arg2); } else { tcg_gen_or_i64(ret, arg1, arg2); tcg_gen_not_i64(ret, ret); @@ -1780,8 +2081,8 @@ void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_REG_BITS == 32) { tcg_gen_orc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); tcg_gen_orc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); - } else if (TCG_TARGET_HAS_orc_i64) { - tcg_gen_op3_i64(INDEX_op_orc_i64, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_orc, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_orc, ret, arg1, arg2); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); tcg_gen_not_i64(t0, arg2); @@ -1791,15 +2092,15 @@ void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { } void tcg_gen_clz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_clz_i64) { - tcg_gen_op3_i64(INDEX_op_clz_i64, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_clz, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_clz, ret, arg1, arg2); } else { gen_helper_clz_i64(ret, arg1, arg2); } } void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2) { - if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_clz_i32 && arg2 <= 0xffffffffu) { + if (TCG_TARGET_REG_BITS == 32 && arg2 <= 0xffffffffu && tcg_op_supported(INDEX_op_clz, TCG_TYPE_I32, 0)) { TCGv_i32 t = tcg_temp_ebb_new_i32(); tcg_gen_clzi_i32(t, TCGV_LOW(arg1), arg2 - 32); tcg_gen_addi_i32(t, t, 32); @@ -1812,40 +2113,43 @@ void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2) { } void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_ctz_i64) { - tcg_gen_op3_i64(INDEX_op_ctz_i64, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_ctpop_i64 || TCG_TARGET_HAS_clz_i64) { - TCGv_i64 z, t = tcg_temp_ebb_new_i64(); - - if (TCG_TARGET_HAS_ctpop_i64) { - tcg_gen_subi_i64(t, arg1, 1); - tcg_gen_andc_i64(t, t, arg1); - tcg_gen_ctpop_i64(t, t); - } else { - /* Since all non-x86 hosts have clz(0) == 64, don't fight it. */ - tcg_gen_neg_i64(t, arg1); - tcg_gen_and_i64(t, t, arg1); - tcg_gen_clzi_i64(t, t, 64); - tcg_gen_xori_i64(t, t, 63); - } - z = tcg_constant_i64(0); - tcg_gen_movcond_i64(TCG_COND_EQ, ret, arg1, z, arg2, t); - tcg_temp_free_i64(t); - tcg_temp_free_i64(z); + TCGv_i64 z, t; + + if (tcg_op_supported(INDEX_op_ctz, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_ctz, ret, arg1, arg2); + return; + } + if (tcg_op_supported(INDEX_op_ctpop, TCG_TYPE_I64, 0)) { + t = tcg_temp_ebb_new_i64(); + tcg_gen_subi_i64(t, arg1, 1); + tcg_gen_andc_i64(t, t, arg1); + tcg_gen_ctpop_i64(t, t); + } else if (tcg_op_supported(INDEX_op_clz, TCG_TYPE_I64, 0)) { + t = tcg_temp_ebb_new_i64(); + tcg_gen_neg_i64(t, arg1); + tcg_gen_and_i64(t, t, arg1); + tcg_gen_clzi_i64(t, t, 64); + tcg_gen_xori_i64(t, t, 63); } else { gen_helper_ctz_i64(ret, arg1, arg2); + return; } + + z = tcg_constant_i64(0); + tcg_gen_movcond_i64(TCG_COND_EQ, ret, arg1, z, arg2, t); + tcg_temp_free_i64(t); } void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2) { - if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_ctz_i32 && arg2 <= 0xffffffffu) { + if (TCG_TARGET_REG_BITS == 32 && arg2 <= 0xffffffffu && tcg_op_supported(INDEX_op_ctz, TCG_TYPE_I32, 0)) { TCGv_i32 t32 = tcg_temp_ebb_new_i32(); tcg_gen_ctzi_i32(t32, TCGV_HIGH(arg1), arg2 - 32); tcg_gen_addi_i32(t32, t32, 32); tcg_gen_ctz_i32(TCGV_LOW(ret), TCGV_LOW(arg1), t32); tcg_gen_movi_i32(TCGV_HIGH(ret), 0); tcg_temp_free_i32(t32); - } else if (!TCG_TARGET_HAS_ctz_i64 && TCG_TARGET_HAS_ctpop_i64 && arg2 == 64) { + } else if (arg2 == 64 && !tcg_op_supported(INDEX_op_ctz, TCG_TYPE_I64, 0) && + tcg_op_supported(INDEX_op_ctpop, TCG_TYPE_I64, 0)) { /* This equivalence has the advantage of not requiring a fixup. */ TCGv_i64 t = tcg_temp_ebb_new_i64(); tcg_gen_subi_i64(t, arg1, 1); @@ -1858,7 +2162,7 @@ void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2) { } void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg) { - if (TCG_TARGET_HAS_clz_i64 || TCG_TARGET_HAS_clz_i32) { + if (tcg_op_supported(INDEX_op_clz, TCG_TYPE_I64, 0)) { TCGv_i64 t = tcg_temp_ebb_new_i64(); tcg_gen_sari_i64(t, arg, 63); tcg_gen_xor_i64(t, t, arg); @@ -1871,27 +2175,36 @@ void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg) { } void tcg_gen_ctpop_i64(TCGv_i64 ret, TCGv_i64 arg1) { - if (TCG_TARGET_HAS_ctpop_i64) { - tcg_gen_op2_i64(INDEX_op_ctpop_i64, ret, arg1); - } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_ctpop_i32) { - tcg_gen_ctpop_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1)); - tcg_gen_ctpop_i32(TCGV_LOW(ret), TCGV_LOW(arg1)); - tcg_gen_add_i32(TCGV_LOW(ret), TCGV_LOW(ret), TCGV_HIGH(ret)); - tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + if (TCG_TARGET_REG_BITS == 64) { + if (tcg_op_supported(INDEX_op_ctpop, TCG_TYPE_I64, 0)) { + tcg_gen_op2_i64(INDEX_op_ctpop, ret, arg1); + return; + } } else { - gen_helper_ctpop_i64(ret, arg1); + if (tcg_op_supported(INDEX_op_ctpop, TCG_TYPE_I32, 0)) { + tcg_gen_ctpop_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1)); + tcg_gen_ctpop_i32(TCGV_LOW(ret), TCGV_LOW(arg1)); + tcg_gen_add_i32(TCGV_LOW(ret), TCGV_LOW(ret), TCGV_HIGH(ret)); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + return; + } } + gen_helper_ctpop_i64(ret, arg1); } void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_rot_i64) { - tcg_gen_op3_i64(INDEX_op_rotl_i64, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_rotl, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_rotl, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_rotl, TCG_TYPE_I64, 0)) { + TCGv_i64 t0 = tcg_temp_ebb_new_i64(); + tcg_gen_neg_i64(t0, arg2); + tcg_gen_op3_i64(INDEX_op_rotr, ret, arg1, t0); + tcg_temp_free_i64(t0); } else { - TCGv_i64 t0, t1; - t0 = tcg_temp_ebb_new_i64(); - t1 = tcg_temp_ebb_new_i64(); + TCGv_i64 t0 = tcg_temp_ebb_new_i64(); + TCGv_i64 t1 = tcg_temp_ebb_new_i64(); tcg_gen_shl_i64(t0, arg1, arg2); - tcg_gen_subfi_i64(t1, 64, arg2); + tcg_gen_neg_i64(t1, arg2); tcg_gen_shr_i64(t1, arg1, t1); tcg_gen_or_i64(ret, t0, t1); tcg_temp_free_i64(t0); @@ -1904,12 +2217,15 @@ void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) { /* some cases can be optimized here */ if (arg2 == 0) { tcg_gen_mov_i64(ret, arg1); - } else if (TCG_TARGET_HAS_rot_i64) { - tcg_gen_rotl_i64(ret, arg1, tcg_constant_i64(arg2)); + } else if (tcg_op_supported(INDEX_op_rotl, TCG_TYPE_I64, 0)) { + TCGv_i64 t0 = tcg_constant_i64(arg2); + tcg_gen_op3_i64(INDEX_op_rotl, ret, arg1, t0); + } else if (tcg_op_supported(INDEX_op_rotr, TCG_TYPE_I64, 0)) { + TCGv_i64 t0 = tcg_constant_i64(64 - arg2); + tcg_gen_op3_i64(INDEX_op_rotr, ret, arg1, t0); } else { - TCGv_i64 t0, t1; - t0 = tcg_temp_ebb_new_i64(); - t1 = tcg_temp_ebb_new_i64(); + TCGv_i64 t0 = tcg_temp_ebb_new_i64(); + TCGv_i64 t1 = tcg_temp_ebb_new_i64(); tcg_gen_shli_i64(t0, arg1, arg2); tcg_gen_shri_i64(t1, arg1, 64 - arg2); tcg_gen_or_i64(ret, t0, t1); @@ -1919,14 +2235,18 @@ void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) { } void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_rot_i64) { - tcg_gen_op3_i64(INDEX_op_rotr_i64, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_rotr, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_rotr, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_rotl, TCG_TYPE_I64, 0)) { + TCGv_i64 t0 = tcg_temp_ebb_new_i64(); + tcg_gen_neg_i64(t0, arg2); + tcg_gen_op3_i64(INDEX_op_rotl, ret, arg1, t0); + tcg_temp_free_i64(t0); } else { - TCGv_i64 t0, t1; - t0 = tcg_temp_ebb_new_i64(); - t1 = tcg_temp_ebb_new_i64(); + TCGv_i64 t0 = tcg_temp_ebb_new_i64(); + TCGv_i64 t1 = tcg_temp_ebb_new_i64(); tcg_gen_shr_i64(t0, arg1, arg2); - tcg_gen_subfi_i64(t1, 64, arg2); + tcg_gen_neg_i64(t1, arg2); tcg_gen_shl_i64(t1, arg1, t1); tcg_gen_or_i64(ret, t0, t1); tcg_temp_free_i64(t0); @@ -1936,12 +2256,7 @@ void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) { tcg_debug_assert(arg2 >= 0 && arg2 < 64); - /* some cases can be optimized here */ - if (arg2 == 0) { - tcg_gen_mov_i64(ret, arg1); - } else { - tcg_gen_rotli_i64(ret, arg1, 64 - arg2); - } + tcg_gen_rotli_i64(ret, arg1, -arg2 & 63); } void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2, unsigned int ofs, unsigned int len) { @@ -1957,12 +2272,13 @@ void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2, unsigned in tcg_gen_mov_i64(ret, arg2); return; } - if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(ofs, len)) { - tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len); - return; - } - if (TCG_TARGET_REG_BITS == 32) { + if (TCG_TARGET_REG_BITS == 64) { + if (TCG_TARGET_deposit_valid(TCG_TYPE_I64, ofs, len)) { + tcg_gen_op5ii_i64(INDEX_op_deposit, ret, arg1, arg2, ofs, len); + return; + } + } else { if (ofs >= 32) { tcg_gen_deposit_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_LOW(arg2), ofs - 32, len); tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1)); @@ -1977,7 +2293,7 @@ void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2, unsigned in t1 = tcg_temp_ebb_new_i64(); - if (TCG_TARGET_HAS_extract2_i64) { + if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_I64, 0)) { if (ofs + len == 64) { tcg_gen_shli_i64(t1, arg1, len); tcg_gen_extract2_i64(ret, t1, arg2, len); @@ -2013,9 +2329,9 @@ void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg, unsigned int ofs, unsigne tcg_gen_shli_i64(ret, arg, ofs); } else if (ofs == 0) { tcg_gen_andi_i64(ret, arg, (1ull << len) - 1); - } else if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(ofs, len)) { + } else if (TCG_TARGET_REG_BITS == 64 && TCG_TARGET_deposit_valid(TCG_TYPE_I64, ofs, len)) { TCGv_i64 zero = tcg_constant_i64(0); - tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, zero, arg, ofs, len); + tcg_gen_op5ii_i64(INDEX_op_deposit, ret, zero, arg, ofs, len); } else { if (TCG_TARGET_REG_BITS == 32) { if (ofs >= 32) { @@ -2029,54 +2345,20 @@ void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg, unsigned int ofs, unsigne return; } } - /* To help two-operand hosts we prefer to zero-extend first, - which allows ARG to stay live. */ - switch (len) { - case 32: - if (TCG_TARGET_HAS_ext32u_i64) { - tcg_gen_ext32u_i64(ret, arg); - tcg_gen_shli_i64(ret, ret, ofs); - return; - } - break; - case 16: - if (TCG_TARGET_HAS_ext16u_i64) { - tcg_gen_ext16u_i64(ret, arg); - tcg_gen_shli_i64(ret, ret, ofs); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8u_i64) { - tcg_gen_ext8u_i64(ret, arg); - tcg_gen_shli_i64(ret, ret, ofs); - return; - } - break; + /* + * To help two-operand hosts we prefer to zero-extend first, + * which allows ARG to stay live. + */ + if (TCG_TARGET_extract_valid(TCG_TYPE_I64, 0, len)) { + tcg_gen_extract_i64(ret, arg, 0, len); + tcg_gen_shli_i64(ret, ret, ofs); + return; } /* Otherwise prefer zero-extension over AND for code size. */ - switch (ofs + len) { - case 32: - if (TCG_TARGET_HAS_ext32u_i64) { - tcg_gen_shli_i64(ret, arg, ofs); - tcg_gen_ext32u_i64(ret, ret); - return; - } - break; - case 16: - if (TCG_TARGET_HAS_ext16u_i64) { - tcg_gen_shli_i64(ret, arg, ofs); - tcg_gen_ext16u_i64(ret, ret); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8u_i64) { - tcg_gen_shli_i64(ret, arg, ofs); - tcg_gen_ext8u_i64(ret, ret); - return; - } - break; + if (TCG_TARGET_extract_valid(TCG_TYPE_I64, 0, ofs + len)) { + tcg_gen_shli_i64(ret, arg, ofs); + tcg_gen_extract_i64(ret, ret, 0, ofs + len); + return; } tcg_gen_andi_i64(ret, arg, (1ull << len) - 1); tcg_gen_shli_i64(ret, ret, ofs); @@ -2094,10 +2376,6 @@ void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg, unsigned int ofs, unsigned tcg_gen_shri_i64(ret, arg, 64 - len); return; } - if (ofs == 0) { - tcg_gen_andi_i64(ret, arg, (1ull << len) - 1); - return; - } if (TCG_TARGET_REG_BITS == 32) { /* Look for a 32-bit extract within one of the two words. */ @@ -2111,39 +2389,32 @@ void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg, unsigned int ofs, unsigned tcg_gen_movi_i32(TCGV_HIGH(ret), 0); return; } - /* The field is split across two words. One double-word - shift is better than two double-word shifts. */ - goto do_shift_and; + + /* The field is split across two words. */ + tcg_gen_extract2_i32(TCGV_LOW(ret), TCGV_LOW(arg), TCGV_HIGH(arg), ofs); + if (len <= 32) { + tcg_gen_extract_i32(TCGV_LOW(ret), TCGV_LOW(ret), 0, len); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + } else { + tcg_gen_extract_i32(TCGV_HIGH(ret), TCGV_HIGH(arg), ofs, len - 32); + } + return; } - if (TCG_TARGET_HAS_extract_i64 && TCG_TARGET_extract_i64_valid(ofs, len)) { - tcg_gen_op4ii_i64(INDEX_op_extract_i64, ret, arg, ofs, len); + if (TCG_TARGET_extract_valid(TCG_TYPE_I64, ofs, len)) { + tcg_gen_op4ii_i64(INDEX_op_extract, ret, arg, ofs, len); + return; + } + if (ofs == 0) { + tcg_gen_andi_i64(ret, arg, (1ull << len) - 1); return; } /* Assume that zero-extension, if available, is cheaper than a shift. */ - switch (ofs + len) { - case 32: - if (TCG_TARGET_HAS_ext32u_i64) { - tcg_gen_ext32u_i64(ret, arg); - tcg_gen_shri_i64(ret, ret, ofs); - return; - } - break; - case 16: - if (TCG_TARGET_HAS_ext16u_i64) { - tcg_gen_ext16u_i64(ret, arg); - tcg_gen_shri_i64(ret, ret, ofs); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8u_i64) { - tcg_gen_ext8u_i64(ret, arg); - tcg_gen_shri_i64(ret, ret, ofs); - return; - } - break; + if (TCG_TARGET_extract_valid(TCG_TYPE_I64, 0, ofs + len)) { + tcg_gen_op4ii_i64(INDEX_op_extract, ret, arg, 0, ofs + len); + tcg_gen_shri_i64(ret, ret, ofs); + return; } /* ??? Ideally we'd know what values are available for immediate AND. @@ -2153,7 +2424,6 @@ void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg, unsigned int ofs, unsigned case 1 ... 8: case 16: case 32: - do_shift_and: tcg_gen_shri_i64(ret, arg, ofs); tcg_gen_andi_i64(ret, ret, (1ull << len) - 1); break; @@ -2175,19 +2445,6 @@ void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg, unsigned int ofs, unsigned tcg_gen_sari_i64(ret, arg, 64 - len); return; } - if (ofs == 0) { - switch (len) { - case 32: - tcg_gen_ext32s_i64(ret, arg); - return; - case 16: - tcg_gen_ext16s_i64(ret, arg); - return; - case 8: - tcg_gen_ext8s_i64(ret, arg); - return; - } - } if (TCG_TARGET_REG_BITS == 32) { /* Look for a 32-bit extract within one of the two words. */ @@ -2221,58 +2478,23 @@ void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg, unsigned int ofs, unsigned return; } - if (TCG_TARGET_HAS_sextract_i64 && TCG_TARGET_extract_i64_valid(ofs, len)) { - tcg_gen_op4ii_i64(INDEX_op_sextract_i64, ret, arg, ofs, len); + if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, ofs, len)) { + tcg_gen_op4ii_i64(INDEX_op_sextract, ret, arg, ofs, len); return; } /* Assume that sign-extension, if available, is cheaper than a shift. */ - switch (ofs + len) { - case 32: - if (TCG_TARGET_HAS_ext32s_i64) { - tcg_gen_ext32s_i64(ret, arg); - tcg_gen_sari_i64(ret, ret, ofs); - return; - } - break; - case 16: - if (TCG_TARGET_HAS_ext16s_i64) { - tcg_gen_ext16s_i64(ret, arg); - tcg_gen_sari_i64(ret, ret, ofs); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8s_i64) { - tcg_gen_ext8s_i64(ret, arg); - tcg_gen_sari_i64(ret, ret, ofs); - return; - } - break; + if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, 0, ofs + len)) { + tcg_gen_op4ii_i64(INDEX_op_sextract, ret, arg, 0, ofs + len); + tcg_gen_sari_i64(ret, ret, ofs); + return; } - switch (len) { - case 32: - if (TCG_TARGET_HAS_ext32s_i64) { - tcg_gen_shri_i64(ret, arg, ofs); - tcg_gen_ext32s_i64(ret, ret); - return; - } - break; - case 16: - if (TCG_TARGET_HAS_ext16s_i64) { - tcg_gen_shri_i64(ret, arg, ofs); - tcg_gen_ext16s_i64(ret, ret); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8s_i64) { - tcg_gen_shri_i64(ret, arg, ofs); - tcg_gen_ext8s_i64(ret, ret); - return; - } - break; + if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, 0, len)) { + tcg_gen_shri_i64(ret, arg, ofs); + tcg_gen_op4ii_i64(INDEX_op_sextract, ret, ret, 0, len); + return; } + tcg_gen_shli_i64(ret, arg, 64 - len - ofs); tcg_gen_sari_i64(ret, ret, 64 - len); } @@ -2289,8 +2511,8 @@ void tcg_gen_extract2_i64(TCGv_i64 ret, TCGv_i64 al, TCGv_i64 ah, unsigned int o tcg_gen_mov_i64(ret, ah); } else if (al == ah) { tcg_gen_rotri_i64(ret, al, ofs); - } else if (TCG_TARGET_HAS_extract2_i64) { - tcg_gen_op4i_i64(INDEX_op_extract2_i64, ret, al, ah, ofs); + } else if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_I64, 0)) { + tcg_gen_op4i_i64(INDEX_op_extract2, ret, al, ah, ofs); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); tcg_gen_shri_i64(t0, al, ofs); @@ -2304,46 +2526,37 @@ void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1, TCGv_i64 c2, T tcg_gen_mov_i64(ret, v1); } else if (cond == TCG_COND_NEVER) { tcg_gen_mov_i64(ret, v2); - } else if (TCG_TARGET_REG_BITS == 32) { + } else if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_op6i_i64(INDEX_op_movcond, ret, c1, c2, v1, v2, cond); + } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); - TCGv_i32 t1 = tcg_temp_ebb_new_i32(); - tcg_gen_op6i_i32(INDEX_op_setcond2_i32, t0, TCGV_LOW(c1), TCGV_HIGH(c1), TCGV_LOW(c2), TCGV_HIGH(c2), cond); + TCGv_i32 zero = tcg_constant_i32(0); - if (TCG_TARGET_HAS_movcond_i32) { - tcg_gen_movi_i32(t1, 0); - tcg_gen_movcond_i32(TCG_COND_NE, TCGV_LOW(ret), t0, t1, TCGV_LOW(v1), TCGV_LOW(v2)); - tcg_gen_movcond_i32(TCG_COND_NE, TCGV_HIGH(ret), t0, t1, TCGV_HIGH(v1), TCGV_HIGH(v2)); - } else { - tcg_gen_neg_i32(t0, t0); + tcg_gen_op6i_i32(INDEX_op_setcond2_i32, t0, TCGV_LOW(c1), TCGV_HIGH(c1), TCGV_LOW(c2), TCGV_HIGH(c2), cond); - tcg_gen_and_i32(t1, TCGV_LOW(v1), t0); - tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(v2), t0); - tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t1); + tcg_gen_movcond_i32(TCG_COND_NE, TCGV_LOW(ret), t0, zero, TCGV_LOW(v1), TCGV_LOW(v2)); + tcg_gen_movcond_i32(TCG_COND_NE, TCGV_HIGH(ret), t0, zero, TCGV_HIGH(v1), TCGV_HIGH(v2)); - tcg_gen_and_i32(t1, TCGV_HIGH(v1), t0); - tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(v2), t0); - tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t1); - } tcg_temp_free_i32(t0); - tcg_temp_free_i32(t1); - } else if (TCG_TARGET_HAS_movcond_i64) { - tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, c1, c2, v1, v2, cond); - } else { - TCGv_i64 t0 = tcg_temp_ebb_new_i64(); - TCGv_i64 t1 = tcg_temp_ebb_new_i64(); - tcg_gen_setcond_i64(cond, t0, c1, c2); - tcg_gen_neg_i64(t0, t0); - tcg_gen_and_i64(t1, v1, t0); - tcg_gen_andc_i64(ret, v2, t0); - tcg_gen_or_i64(ret, ret, t1); - tcg_temp_free_i64(t0); - tcg_temp_free_i64(t1); } } void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh) { - if (TCG_TARGET_HAS_add2_i64) { - tcg_gen_op6_i64(INDEX_op_add2_i64, rl, rh, al, ah, bl, bh); + if (tcg_op_supported(INDEX_op_addci, TCG_TYPE_REG, 0)) { + TCGv_i64 t0 = tcg_temp_ebb_new_i64(); + + if (TCG_TARGET_REG_BITS == 32) { + tcg_gen_op3_i32(INDEX_op_addco, TCGV_LOW(t0), TCGV_LOW(al), TCGV_LOW(bl)); + tcg_gen_op3_i32(INDEX_op_addcio, TCGV_HIGH(t0), TCGV_HIGH(al), TCGV_HIGH(bl)); + tcg_gen_op3_i32(INDEX_op_addcio, TCGV_LOW(rh), TCGV_LOW(ah), TCGV_LOW(bh)); + tcg_gen_op3_i32(INDEX_op_addci, TCGV_HIGH(rh), TCGV_HIGH(ah), TCGV_HIGH(bh)); + } else { + tcg_gen_op3_i64(INDEX_op_addco, t0, al, bl); + tcg_gen_op3_i64(INDEX_op_addci, rh, ah, bh); + } + + tcg_gen_mov_i64(rl, t0); + tcg_temp_free_i64(t0); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 t1 = tcg_temp_ebb_new_i64(); @@ -2357,9 +2570,86 @@ void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, TCGv_i64 ah, TCGv_i } } +void tcg_gen_addcio_i64(TCGv_i64 r, TCGv_i64 co, TCGv_i64 a, TCGv_i64 b, TCGv_i64 ci) { + if (TCG_TARGET_REG_BITS == 64) { + if (tcg_op_supported(INDEX_op_addci, TCG_TYPE_I64, 0)) { + TCGv_i64 discard = tcg_temp_ebb_new_i64(); + TCGv_i64 zero = tcg_constant_i64(0); + TCGv_i64 mone = tcg_constant_i64(-1); + + tcg_gen_op3_i64(INDEX_op_addco, discard, ci, mone); + tcg_gen_op3_i64(INDEX_op_addcio, r, a, b); + tcg_gen_op3_i64(INDEX_op_addci, co, zero, zero); + tcg_temp_free_i64(discard); + } else { + TCGv_i64 t0 = tcg_temp_ebb_new_i64(); + TCGv_i64 t1 = tcg_temp_ebb_new_i64(); + + tcg_gen_add_i64(t0, a, b); + tcg_gen_setcond_i64(TCG_COND_LTU, t1, t0, a); + tcg_gen_add_i64(r, t0, ci); + tcg_gen_setcond_i64(TCG_COND_LTU, t0, r, t0); + tcg_gen_or_i64(co, t0, t1); + + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + } + } else { + if (tcg_op_supported(INDEX_op_addci, TCG_TYPE_I32, 0)) { + TCGv_i32 discard = tcg_temp_ebb_new_i32(); + TCGv_i32 zero = tcg_constant_i32(0); + TCGv_i32 mone = tcg_constant_i32(-1); + + tcg_gen_op3_i32(INDEX_op_addco, discard, TCGV_LOW(ci), mone); + tcg_gen_op3_i32(INDEX_op_addcio, discard, TCGV_HIGH(ci), mone); + tcg_gen_op3_i32(INDEX_op_addcio, TCGV_LOW(r), TCGV_LOW(a), TCGV_LOW(b)); + tcg_gen_op3_i32(INDEX_op_addcio, TCGV_HIGH(r), TCGV_HIGH(a), TCGV_HIGH(b)); + tcg_gen_op3_i32(INDEX_op_addci, TCGV_LOW(co), zero, zero); + tcg_temp_free_i32(discard); + } else { + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + TCGv_i32 c0 = tcg_temp_ebb_new_i32(); + TCGv_i32 c1 = tcg_temp_ebb_new_i32(); + + tcg_gen_or_i32(c1, TCGV_LOW(ci), TCGV_HIGH(ci)); + tcg_gen_setcondi_i32(TCG_COND_NE, c1, c1, 0); + + tcg_gen_add_i32(t0, TCGV_LOW(a), TCGV_LOW(b)); + tcg_gen_setcond_i32(TCG_COND_LTU, c0, t0, TCGV_LOW(a)); + tcg_gen_add_i32(TCGV_LOW(r), t0, c1); + tcg_gen_setcond_i32(TCG_COND_LTU, c1, TCGV_LOW(r), c1); + tcg_gen_or_i32(c1, c1, c0); + + tcg_gen_add_i32(t0, TCGV_HIGH(a), TCGV_HIGH(b)); + tcg_gen_setcond_i32(TCG_COND_LTU, c0, t0, TCGV_HIGH(a)); + tcg_gen_add_i32(TCGV_HIGH(r), t0, c1); + tcg_gen_setcond_i32(TCG_COND_LTU, c1, TCGV_HIGH(r), c1); + tcg_gen_or_i32(TCGV_LOW(co), c0, c1); + + tcg_temp_free_i32(t0); + tcg_temp_free_i32(c0); + tcg_temp_free_i32(c1); + } + tcg_gen_movi_i32(TCGV_HIGH(co), 0); + } +} + void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh) { - if (TCG_TARGET_HAS_sub2_i64) { - tcg_gen_op6_i64(INDEX_op_sub2_i64, rl, rh, al, ah, bl, bh); + if (tcg_op_supported(INDEX_op_subbi, TCG_TYPE_REG, 0)) { + TCGv_i64 t0 = tcg_temp_ebb_new_i64(); + + if (TCG_TARGET_REG_BITS == 32) { + tcg_gen_op3_i32(INDEX_op_subbo, TCGV_LOW(t0), TCGV_LOW(al), TCGV_LOW(bl)); + tcg_gen_op3_i32(INDEX_op_subbio, TCGV_HIGH(t0), TCGV_HIGH(al), TCGV_HIGH(bl)); + tcg_gen_op3_i32(INDEX_op_subbio, TCGV_LOW(rh), TCGV_LOW(ah), TCGV_LOW(bh)); + tcg_gen_op3_i32(INDEX_op_subbi, TCGV_HIGH(rh), TCGV_HIGH(ah), TCGV_HIGH(bh)); + } else { + tcg_gen_op3_i64(INDEX_op_subbo, t0, al, bl); + tcg_gen_op3_i64(INDEX_op_subbi, rh, ah, bh); + } + + tcg_gen_mov_i64(rl, t0); + tcg_temp_free_i64(t0); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 t1 = tcg_temp_ebb_new_i64(); @@ -2374,12 +2664,12 @@ void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, TCGv_i64 ah, TCGv_i } void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_mulu2_i64) { - tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2); - } else if (TCG_TARGET_HAS_muluh_i64) { + if (tcg_op_supported(INDEX_op_mulu2, TCG_TYPE_I64, 0)) { + tcg_gen_op4_i64(INDEX_op_mulu2, rl, rh, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_muluh, TCG_TYPE_I64, 0)) { TCGv_i64 t = tcg_temp_ebb_new_i64(); - tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2); - tcg_gen_op3_i64(INDEX_op_muluh_i64, rh, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_mul, t, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_muluh, rh, arg1, arg2); tcg_gen_mov_i64(rl, t); tcg_temp_free_i64(t); } else { @@ -2392,15 +2682,15 @@ void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) { } void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_muls2_i64) { - tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2); - } else if (TCG_TARGET_HAS_mulsh_i64) { + if (tcg_op_supported(INDEX_op_muls2, TCG_TYPE_I64, 0)) { + tcg_gen_op4_i64(INDEX_op_muls2, rl, rh, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_mulsh, TCG_TYPE_I64, 0)) { TCGv_i64 t = tcg_temp_ebb_new_i64(); - tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2); - tcg_gen_op3_i64(INDEX_op_mulsh_i64, rh, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_mul, t, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_mulsh, rh, arg1, arg2); tcg_gen_mov_i64(rl, t); tcg_temp_free_i64(t); - } else if (TCG_TARGET_HAS_mulu2_i64 || TCG_TARGET_HAS_muluh_i64) { + } else if (tcg_op_supported(INDEX_op_mulu2, TCG_TYPE_I64, 0) || tcg_op_supported(INDEX_op_muluh, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 t1 = tcg_temp_ebb_new_i64(); TCGv_i64 t2 = tcg_temp_ebb_new_i64(); @@ -2472,23 +2762,16 @@ void tcg_gen_abs_i64(TCGv_i64 ret, TCGv_i64 a) { void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg) { if (TCG_TARGET_REG_BITS == 32) { tcg_gen_mov_i32(ret, TCGV_LOW(arg)); - } else if (TCG_TARGET_HAS_extrl_i64_i32) { - tcg_gen_op2(INDEX_op_extrl_i64_i32, tcgv_i32_arg(ret), tcgv_i64_arg(arg)); } else { - tcg_gen_mov_i32(ret, (TCGv_i32) arg); + tcg_gen_op2(INDEX_op_extrl_i64_i32, TCG_TYPE_I32, tcgv_i32_arg(ret), tcgv_i64_arg(arg)); } } void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg) { if (TCG_TARGET_REG_BITS == 32) { tcg_gen_mov_i32(ret, TCGV_HIGH(arg)); - } else if (TCG_TARGET_HAS_extrh_i64_i32) { - tcg_gen_op2(INDEX_op_extrh_i64_i32, tcgv_i32_arg(ret), tcgv_i64_arg(arg)); } else { - TCGv_i64 t = tcg_temp_ebb_new_i64(); - tcg_gen_shri_i64(t, arg, 32); - tcg_gen_mov_i32(ret, (TCGv_i32) t); - tcg_temp_free_i64(t); + tcg_gen_op2(INDEX_op_extrh_i64_i32, TCG_TYPE_I32, tcgv_i32_arg(ret), tcgv_i64_arg(arg)); } } @@ -2497,7 +2780,7 @@ void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg) { tcg_gen_mov_i32(TCGV_LOW(ret), arg); tcg_gen_movi_i32(TCGV_HIGH(ret), 0); } else { - tcg_gen_op2(INDEX_op_extu_i32_i64, tcgv_i64_arg(ret), tcgv_i32_arg(arg)); + tcg_gen_op2(INDEX_op_extu_i32_i64, TCG_TYPE_I64, tcgv_i64_arg(ret), tcgv_i32_arg(arg)); } } @@ -2506,7 +2789,7 @@ void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg) { tcg_gen_mov_i32(TCGV_LOW(ret), arg); tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); } else { - tcg_gen_op2(INDEX_op_ext_i32_i64, tcgv_i64_arg(ret), tcgv_i32_arg(arg)); + tcg_gen_op2(INDEX_op_ext_i32_i64, TCG_TYPE_I64, tcgv_i64_arg(ret), tcgv_i32_arg(arg)); } } @@ -2526,7 +2809,7 @@ void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high) { tcg_gen_extu_i32_i64(dest, low); /* If deposit is available, use it. Otherwise use the extra knowledge that we have of the zero-extensions above. */ - if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(32, 32)) { + if (TCG_TARGET_deposit_valid(TCG_TYPE_I64, 32, 32)) { tcg_gen_deposit_i64(dest, dest, tmp, 32, 32); } else { tcg_gen_shli_i64(tmp, tmp, 32); @@ -2550,6 +2833,10 @@ void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg) { tcg_gen_shri_i64(hi, arg, 32); } +void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi) { + tcg_gen_deposit_i64(ret, lo, hi, 32, 32); +} + void tcg_gen_extr_i128_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i128 arg) { tcg_gen_mov_i64(lo, TCGV128_LOW(arg)); tcg_gen_mov_i64(hi, TCGV128_HIGH(arg)); @@ -2567,6 +2854,26 @@ void tcg_gen_mov_i128(TCGv_i128 dst, TCGv_i128 src) { } } +void tcg_gen_ld_i128(TCGv_i128 ret, TCGv_ptr base, tcg_target_long offset) { + if (HOST_BIG_ENDIAN) { + tcg_gen_ld_i64(TCGV128_HIGH(ret), base, offset); + tcg_gen_ld_i64(TCGV128_LOW(ret), base, offset + 8); + } else { + tcg_gen_ld_i64(TCGV128_LOW(ret), base, offset); + tcg_gen_ld_i64(TCGV128_HIGH(ret), base, offset + 8); + } +} + +void tcg_gen_st_i128(TCGv_i128 val, TCGv_ptr base, tcg_target_long offset) { + if (HOST_BIG_ENDIAN) { + tcg_gen_st_i64(TCGV128_HIGH(val), base, offset); + tcg_gen_st_i64(TCGV128_LOW(val), base, offset + 8); + } else { + tcg_gen_st_i64(TCGV128_LOW(val), base, offset); + tcg_gen_st_i64(TCGV128_HIGH(val), base, offset + 8); + } +} + /* QEMU specific operations. */ void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx) { @@ -2596,7 +2903,7 @@ void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx) { tcg_debug_assert(idx == TB_EXIT_REQUESTED); } - tcg_gen_op1i(INDEX_op_exit_tb, val); + tcg_gen_op1i(INDEX_op_exit_tb, 0, val); } void tcg_gen_goto_tb(unsigned idx) { @@ -2610,7 +2917,7 @@ void tcg_gen_goto_tb(unsigned idx) { tcg_ctx->goto_tb_issue_mask |= 1 << idx; #endif plugin_gen_disable_mem_helpers(); - tcg_gen_op1i(INDEX_op_goto_tb, idx); + tcg_gen_op1i(INDEX_op_goto_tb, 0, idx); } void tcg_gen_lookup_and_goto_ptr(void) { @@ -2623,7 +2930,7 @@ void tcg_gen_lookup_and_goto_ptr(void) { plugin_gen_disable_mem_helpers(); ptr = tcg_temp_ebb_new_ptr(); - gen_helper_lookup_tb_ptr(ptr, cpu_env); - tcg_gen_op1i(INDEX_op_goto_ptr, tcgv_ptr_arg(ptr)); + gen_helper_lookup_tb_ptr(ptr, tcg_env); + tcg_gen_op1i(INDEX_op_goto_ptr, TCG_TYPE_PTR, tcgv_ptr_arg(ptr)); tcg_temp_free_ptr(ptr); } diff --git a/libtcg/src/tcg-rt/tcg-runtime-gvec.c b/libtcg/src/tcg-rt/tcg-runtime-gvec.c index a7db1afda..d785e77b0 100644 --- a/libtcg/src/tcg-rt/tcg-runtime-gvec.c +++ b/libtcg/src/tcg-rt/tcg-runtime-gvec.c @@ -959,6 +959,31 @@ DO_CMP2(64) #undef DO_CMP1 #undef DO_CMP2 +#define DO_CMP1(NAME, TYPE, OP) \ + void HELPER(NAME)(void *d, void *a, uint64_t b64, uint32_t desc) { \ + intptr_t oprsz = simd_oprsz(desc); \ + TYPE inv = simd_data(desc), b = b64; \ + for (intptr_t i = 0; i < oprsz; i += sizeof(TYPE)) { \ + *(TYPE *) (d + i) = -((*(TYPE *) (a + i) OP b) ^ inv); \ + } \ + clear_high(d, oprsz, desc); \ + } + +#define DO_CMP2(SZ) \ + DO_CMP1(gvec_eqs##SZ, uint##SZ##_t, ==) \ + DO_CMP1(gvec_lts##SZ, int##SZ##_t, <) \ + DO_CMP1(gvec_les##SZ, int##SZ##_t, <=) \ + DO_CMP1(gvec_ltus##SZ, uint##SZ##_t, <) \ + DO_CMP1(gvec_leus##SZ, uint##SZ##_t, <=) + +DO_CMP2(8) +DO_CMP2(16) +DO_CMP2(32) +DO_CMP2(64) + +#undef DO_CMP1 +#undef DO_CMP2 + void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc) { intptr_t oprsz = simd_oprsz(desc); intptr_t i; diff --git a/libtcg/src/tcg.c b/libtcg/src/tcg.c index 9cba5826f..fff850c62 100644 --- a/libtcg/src/tcg.c +++ b/libtcg/src/tcg.c @@ -22,12 +22,6 @@ * THE SOFTWARE. */ -/* define it to use liveness analysis (better code) */ -#define USE_TCG_OPTIMIZATIONS - -/* Define to jump the ELF file used to communicate with GDB. */ -#undef DEBUG_JIT - #include #include // #include @@ -42,6 +36,10 @@ #include #include +/* Define to jump the ELF file used to communicate with GDB. */ +#undef DEBUG_JIT + +#include #include #include #include @@ -50,10 +48,9 @@ #include -/* Note: the long term plan is to reduce the dependencies on the QEMU - CPU definitions. Currently they are used for qemu_ld/st - instructions */ -#define NO_CPU_IO_DEFS +#include + +#include #include @@ -72,15 +69,22 @@ #include #include -// #include "accel/tcg/perf.h" #include "elf.h" -// #include "exec/log.h" + +#ifdef CONFIG_USER_ONLY +#include "user/guest-base.h" +#endif /* Forward declarations for functions declared in tcg-target.c.inc and used here. */ static void tcg_target_init(TCGContext *s); static void tcg_target_qemu_prologue(TCGContext *s); static bool patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend); +static void tcg_out_nop_fill(tcg_insn_unit *p, int count); + +typedef struct TCGLabelQemuLdst TCGLabelQemuLdst; +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l); +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l); /* The CIE and FDE header definitions will be common to all hosts. */ typedef struct { @@ -105,23 +109,23 @@ typedef struct QEMU_PACKED { DebugFrameFDEHeader fde; } DebugFrameHeader; -typedef struct TCGLabelQemuLdst { +struct TCGLabelQemuLdst { bool is_ld; /* qemu_ld: true, qemu_st: false */ MemOpIdx oi; TCGType type; /* result type of a load */ - TCGReg addrlo_reg; /* reg index for low word of guest virtual addr */ - TCGReg addrhi_reg; /* reg index for high word of guest virtual addr */ + TCGReg addr_reg; /* reg index for guest virtual addr */ TCGReg datalo_reg; /* reg index for low word to be loaded or stored */ TCGReg datahi_reg; /* reg index for high word to be loaded or stored */ const tcg_insn_unit *raddr; /* addr of the next IR of qemu_ld/st IR */ tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */ QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next; -} TCGLabelQemuLdst; +}; static void tcg_register_jit_int(const void *buf, size_t size, const void *debug_frame, size_t debug_frame_size) __attribute__((unused)); /* Forward declarations for functions declared and used in tcg-target.c.inc. */ +static void tcg_out_tb_start(TCGContext *s); static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, intptr_t arg2); static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, tcg_target_long arg); @@ -138,8 +142,11 @@ static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2); static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); static void tcg_out_goto_tb(TCGContext *s, int which); -static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], - const int const_args[TCG_MAX_OP_ARGS]); +static void tcg_out_goto_ptr(TCGContext *s, TCGReg dest); +static void tcg_out_mb(TCGContext *s, unsigned bar); +static void tcg_out_br(TCGContext *s, TCGLabel *l); +static void tcg_out_set_carry(TCGContext *s); +static void tcg_out_set_borrow(TCGContext *s); #if TCG_TARGET_MAYBE_vec static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg dst, TCGReg src); static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg dst, TCGReg base, intptr_t offset); @@ -161,14 +168,22 @@ static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, u const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { g_assert_not_reached(); } +int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve) { + return 0; +} #endif static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, intptr_t arg2); static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, TCGReg base, intptr_t ofs); static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, const TCGHelperInfo *info); static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot); -static bool tcg_target_const_match(int64_t val, TCGType type, int ct); -#ifdef TCG_TARGET_NEED_LDST_LABELS -static int tcg_out_ldst_finalize(TCGContext *s); +static bool tcg_target_const_match(int64_t val, int ct, TCGType type, TCGCond cond, int vece); + +#ifndef CONFIG_USER_ONLY +#define guest_base \ + ({ \ + qemu_build_not_reached(); \ + (uintptr_t) 0; \ + }) #endif typedef struct TCGLdstHelperParam { @@ -184,7 +199,6 @@ static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l, bool static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, const TCGLdstHelperParam *p) __attribute__((unused)); -#if 0 static void *const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = { [MO_UB] = helper_ldub_mmu, [MO_SB] = helper_ldsb_mmu, [MO_UW] = helper_lduw_mmu, [MO_SW] = helper_ldsw_mmu, [MO_UL] = helper_ldul_mmu, [MO_UQ] = helper_ldq_mmu, @@ -199,7 +213,6 @@ static void *const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = { [MO_128] = helper_st16_mmu, #endif }; -#endif typedef struct { MemOp atom; /* lg2 bits of atomicity required */ @@ -209,13 +222,17 @@ typedef struct { static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, MemOp host_atom, bool allow_two_ops) __attribute__((unused)); +#ifdef CONFIG_USER_ONLY +bool tcg_use_softmmu; +#endif + TCGContext tcg_init_ctx; __thread TCGContext *tcg_ctx; TCGContext **tcg_ctxs; unsigned int tcg_cur_ctxs; unsigned int tcg_max_ctxs; -TCGv_env cpu_env = 0; +TCGv_env tcg_env; const void *tcg_code_gen_epilogue; uintptr_t tcg_splitwx_diff; @@ -367,14 +384,12 @@ static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) { return (uintptr_t) tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]); } -#if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER) -static int tlb_mask_table_ofs(TCGContext *s, int which) { +static int __attribute__((unused)) tlb_mask_table_ofs(TCGContext *s, int which) { return s->tlb_fast_offset + which * sizeof(CPUTLBDescFast); } -#endif /* Signal overflow, starting over with fewer guest insns. */ -static NORETURN void tcg_raise_tb_overflow(TCGContext *s) { +static G_NORETURN void tcg_raise_tb_overflow(TCGContext *s) { siglongjmp(s->jmp_trans, -2); } @@ -576,6 +591,174 @@ static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1, const TCGMovE } } +/* + * Allocate a new TCGLabelQemuLdst entry. + */ + +__attribute__((unused)) static TCGLabelQemuLdst *new_ldst_label(TCGContext *s) { + TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l)); + + memset(l, 0, sizeof(*l)); + QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next); + + return l; +} + +/* + * Allocate new constant pool entries. + */ + +typedef struct TCGLabelPoolData { + struct TCGLabelPoolData *next; + tcg_insn_unit *label; + intptr_t addend; + int rtype; + unsigned nlong; + tcg_target_ulong data[]; +} TCGLabelPoolData; + +static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype, tcg_insn_unit *label, intptr_t addend) { + TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData) + sizeof(tcg_target_ulong) * nlong); + + n->label = label; + n->addend = addend; + n->rtype = rtype; + n->nlong = nlong; + return n; +} + +static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n) { + TCGLabelPoolData *i, **pp; + int nlong = n->nlong; + + /* Insertion sort on the pool. */ + for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) { + if (nlong > i->nlong) { + break; + } + if (nlong < i->nlong) { + continue; + } + if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) { + break; + } + } + n->next = *pp; + *pp = n; +} + +/* The "usual" for generic integer code. */ +__attribute__((unused)) static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype, tcg_insn_unit *label, + intptr_t addend) { + TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend); + n->data[0] = d; + new_pool_insert(s, n); +} + +/* For v64 or v128, depending on the host. */ +__attribute__((unused)) static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label, intptr_t addend, + tcg_target_ulong d0, tcg_target_ulong d1) { + TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend); + n->data[0] = d0; + n->data[1] = d1; + new_pool_insert(s, n); +} + +/* For v128 or v256, depending on the host. */ +__attribute__((unused)) static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label, intptr_t addend, + tcg_target_ulong d0, tcg_target_ulong d1, tcg_target_ulong d2, + tcg_target_ulong d3) { + TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend); + n->data[0] = d0; + n->data[1] = d1; + n->data[2] = d2; + n->data[3] = d3; + new_pool_insert(s, n); +} + +/* For v256, for 32-bit host. */ +__attribute__((unused)) static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label, intptr_t addend, + tcg_target_ulong d0, tcg_target_ulong d1, tcg_target_ulong d2, + tcg_target_ulong d3, tcg_target_ulong d4, tcg_target_ulong d5, + tcg_target_ulong d6, tcg_target_ulong d7) { + TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend); + n->data[0] = d0; + n->data[1] = d1; + n->data[2] = d2; + n->data[3] = d3; + n->data[4] = d4; + n->data[5] = d5; + n->data[6] = d6; + n->data[7] = d7; + new_pool_insert(s, n); +} + +/* + * Generate TB finalization at the end of block + */ + +static int tcg_out_ldst_finalize(TCGContext *s) { + TCGLabelQemuLdst *lb; + + /* qemu_ld/st slow paths */ + QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) { + if (lb->is_ld ? !tcg_out_qemu_ld_slow_path(s, lb) : !tcg_out_qemu_st_slow_path(s, lb)) { + return -2; + } + + /* + * Test for (pending) buffer overflow. The assumption is that any + * one operation beginning below the high water mark cannot overrun + * the buffer completely. Thus we can test for overflow after + * generating code without having to check during generation. + */ + if (unlikely((void *) s->code_ptr > s->code_gen_highwater)) { + return -1; + } + } + return 0; +} + +static int tcg_out_pool_finalize(TCGContext *s) { + TCGLabelPoolData *p = s->pool_labels; + TCGLabelPoolData *l = NULL; + void *a; + + if (p == NULL) { + return 0; + } + + /* + * ??? Round up to g_icache_linesize, but then do not round + * again when allocating the next TranslationBlock structure. + */ + a = (void *) ROUND_UP((uintptr_t) s->code_ptr, sizeof(tcg_target_ulong) * p->nlong); + tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *) a - s->code_ptr); + s->data_gen_ptr = a; + + for (; p != NULL; p = p->next) { + size_t size = sizeof(tcg_target_ulong) * p->nlong; + uintptr_t value; + + if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) { + if (unlikely(a > s->code_gen_highwater)) { + return -1; + } + memcpy(a, p->data, size); + a += size; + l = p; + } + + value = (uintptr_t) tcg_splitwx_to_rx(a) - size; + if (!patch_reloc(p->label, p->rtype, value, p->addend)) { + return -2; + } + } + + s->code_ptr = a; + return 0; +} + #define C_PFX1(P, A) P##A #define C_PFX2(P, A, B) P##A##_##B #define C_PFX3(P, A, B, C) P##A##_##B##_##C @@ -595,7 +778,9 @@ static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1, const TCGMovE #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3), #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), -#define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), +#define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), +#define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1), +#define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1), #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), @@ -604,10 +789,12 @@ static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1, const TCGMovE #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4), typedef enum { + C_Dynamic = -2, + C_NotImplemented = -1, #include "i386/tcg-target-con-set.h" } TCGConstraintSetIndex; -static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); +static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned); #undef C_O0_I1 #undef C_O0_I2 @@ -618,6 +805,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); #undef C_O1_I3 #undef C_O1_I4 #undef C_N1_I2 +#undef C_N1O1_I1 +#undef C_N2_I1 #undef C_O2_I1 #undef C_O2_I2 #undef C_O2_I3 @@ -626,25 +815,32 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); /* Put all of the constraint sets into an array, indexed by the enum. */ -#define C_O0_I1(I1) {.args_ct_str = {#I1}}, -#define C_O0_I2(I1, I2) {.args_ct_str = {#I1, #I2}}, -#define C_O0_I3(I1, I2, I3) {.args_ct_str = {#I1, #I2, #I3}}, -#define C_O0_I4(I1, I2, I3, I4) {.args_ct_str = {#I1, #I2, #I3, #I4}}, +typedef struct TCGConstraintSet { + uint8_t nb_oargs, nb_iargs; + const char *args_ct_str[TCG_MAX_OP_ARGS]; +} TCGConstraintSet; -#define C_O1_I1(O1, I1) {.args_ct_str = {#O1, #I1}}, -#define C_O1_I2(O1, I1, I2) {.args_ct_str = {#O1, #I1, #I2}}, -#define C_O1_I3(O1, I1, I2, I3) {.args_ct_str = {#O1, #I1, #I2, #I3}}, -#define C_O1_I4(O1, I1, I2, I3, I4) {.args_ct_str = {#O1, #I1, #I2, #I3, #I4}}, +#define C_O0_I1(I1) {0, 1, {#I1}}, +#define C_O0_I2(I1, I2) {0, 2, {#I1, #I2}}, +#define C_O0_I3(I1, I2, I3) {0, 3, {#I1, #I2, #I3}}, +#define C_O0_I4(I1, I2, I3, I4) {0, 4, {#I1, #I2, #I3, #I4}}, -#define C_N1_I2(O1, I1, I2) {.args_ct_str = {"&" #O1, #I1, #I2}}, +#define C_O1_I1(O1, I1) {1, 1, {#O1, #I1}}, +#define C_O1_I2(O1, I1, I2) {1, 2, {#O1, #I1, #I2}}, +#define C_O1_I3(O1, I1, I2, I3) {1, 3, {#O1, #I1, #I2, #I3}}, +#define C_O1_I4(O1, I1, I2, I3, I4) {1, 4, {#O1, #I1, #I2, #I3, #I4}}, -#define C_O2_I1(O1, O2, I1) {.args_ct_str = {#O1, #O2, #I1}}, -#define C_O2_I2(O1, O2, I1, I2) {.args_ct_str = {#O1, #O2, #I1, #I2}}, -#define C_O2_I3(O1, O2, I1, I2, I3) {.args_ct_str = {#O1, #O2, #I1, #I2, #I3}}, -#define C_O2_I4(O1, O2, I1, I2, I3, I4) {.args_ct_str = {#O1, #O2, #I1, #I2, #I3, #I4}}, -#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) {.args_ct_str = {"&" #O1, #O2, #I1, #I2, #I3, #I4}}, +#define C_N1_I2(O1, I1, I2) {1, 2, {"&" #O1, #I1, #I2}}, +#define C_N1O1_I1(O1, O2, I1) {2, 1, {"&" #O1, #O2, #I1}}, +#define C_N2_I1(O1, O2, I1) {2, 1, {"&" #O1, "&" #O2, #I1}}, -static const TCGTargetOpDef constraint_sets[] = { +#define C_O2_I1(O1, O2, I1) {2, 1, {#O1, #O2, #I1}}, +#define C_O2_I2(O1, O2, I1, I2) {2, 2, {#O1, #O2, #I1, #I2}}, +#define C_O2_I3(O1, O2, I1, I2, I3) {2, 3, {#O1, #O2, #I1, #I2, #I3}}, +#define C_O2_I4(O1, O2, I1, I2, I3, I4) {2, 4, {#O1, #O2, #I1, #I2, #I3, #I4}}, +#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) {2, 4, {"&" #O1, #O2, #I1, #I2, #I3, #I4}}, + +static const TCGConstraintSet constraint_sets[] = { #include "i386/tcg-target-con-set.h" }; @@ -657,6 +853,8 @@ static const TCGTargetOpDef constraint_sets[] = { #undef C_O1_I3 #undef C_O1_I4 #undef C_N1_I2 +#undef C_N1O1_I1 +#undef C_N2_I1 #undef C_O2_I1 #undef C_O2_I2 #undef C_O2_I3 @@ -675,7 +873,9 @@ static const TCGTargetOpDef constraint_sets[] = { #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3) #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) -#define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) +#define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) +#define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1) +#define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1) #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) @@ -683,15 +883,280 @@ static const TCGTargetOpDef constraint_sets[] = { #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4) +/* + * TCGOutOp is the base class for a set of structures that describe how + * to generate code for a given TCGOpcode. + * + * @static_constraint: + * C_NotImplemented: The TCGOpcode is not supported by the backend. + * C_Dynamic: Use @dynamic_constraint to select a constraint set + * based on any of @type, @flags, or host isa. + * Otherwise: The register allocation constrains for the TCGOpcode. + * + * Subclasses of TCGOutOp will define a set of output routines that may + * be used. Such routines will often be selected by the set of registers + * and constants that come out of register allocation. The set of + * routines that are provided will guide the set of constraints that are + * legal. In particular, assume that tcg_optimize() has done its job in + * swapping commutative operands and folding operations for which all + * operands are constant. + */ +typedef struct TCGOutOp { + TCGConstraintSetIndex static_constraint; + TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags); +} TCGOutOp; + +typedef struct TCGOutOpAddSubCarry { + TCGOutOp base; + void (*out_rrr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2); + void (*out_rri)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, tcg_target_long a2); + void (*out_rir)(TCGContext *s, TCGType type, TCGReg a0, tcg_target_long a1, TCGReg a2); + void (*out_rii)(TCGContext *s, TCGType type, TCGReg a0, tcg_target_long a1, tcg_target_long a2); +} TCGOutOpAddSubCarry; + +typedef struct TCGOutOpBinary { + TCGOutOp base; + void (*out_rrr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2); + void (*out_rri)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, tcg_target_long a2); +} TCGOutOpBinary; + +typedef struct TCGOutOpBrcond { + TCGOutOp base; + void (*out_rr)(TCGContext *s, TCGType type, TCGCond cond, TCGReg a1, TCGReg a2, TCGLabel *label); + void (*out_ri)(TCGContext *s, TCGType type, TCGCond cond, TCGReg a1, tcg_target_long a2, TCGLabel *label); +} TCGOutOpBrcond; + +typedef struct TCGOutOpBrcond2 { + TCGOutOp base; + void (*out)(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, TCGArg bl, bool const_bl, TCGArg bh, bool const_bh, + TCGLabel *l); +} TCGOutOpBrcond2; + +typedef struct TCGOutOpBswap { + TCGOutOp base; + void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, unsigned flags); +} TCGOutOpBswap; + +typedef struct TCGOutOpDeposit { + TCGOutOp base; + void (*out_rrr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2, unsigned ofs, unsigned len); + void (*out_rri)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, tcg_target_long a2, unsigned ofs, unsigned len); + void (*out_rzr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a2, unsigned ofs, unsigned len); +} TCGOutOpDeposit; + +typedef struct TCGOutOpDivRem { + TCGOutOp base; + void (*out_rr01r)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a4); +} TCGOutOpDivRem; + +typedef struct TCGOutOpExtract { + TCGOutOp base; + void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, unsigned ofs, unsigned len); +} TCGOutOpExtract; + +typedef struct TCGOutOpExtract2 { + TCGOutOp base; + void (*out_rrr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2, unsigned shr); +} TCGOutOpExtract2; + +typedef struct TCGOutOpLoad { + TCGOutOp base; + void (*out)(TCGContext *s, TCGType type, TCGReg dest, TCGReg base, intptr_t offset); +} TCGOutOpLoad; + +typedef struct TCGOutOpMovcond { + TCGOutOp base; + void (*out)(TCGContext *s, TCGType type, TCGCond cond, TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2, TCGArg vt, + bool const_vt, TCGArg vf, bool consf_vf); +} TCGOutOpMovcond; + +typedef struct TCGOutOpMul2 { + TCGOutOp base; + void (*out_rrrr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3); +} TCGOutOpMul2; + +typedef struct TCGOutOpQemuLdSt { + TCGOutOp base; + void (*out)(TCGContext *s, TCGType type, TCGReg dest, TCGReg addr, MemOpIdx oi); +} TCGOutOpQemuLdSt; + +typedef struct TCGOutOpQemuLdSt2 { + TCGOutOp base; + void (*out)(TCGContext *s, TCGType type, TCGReg dlo, TCGReg dhi, TCGReg addr, MemOpIdx oi); +} TCGOutOpQemuLdSt2; + +typedef struct TCGOutOpUnary { + TCGOutOp base; + void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1); +} TCGOutOpUnary; + +typedef struct TCGOutOpSetcond { + TCGOutOp base; + void (*out_rrr)(TCGContext *s, TCGType type, TCGCond cond, TCGReg ret, TCGReg a1, TCGReg a2); + void (*out_rri)(TCGContext *s, TCGType type, TCGCond cond, TCGReg ret, TCGReg a1, tcg_target_long a2); +} TCGOutOpSetcond; + +typedef struct TCGOutOpSetcond2 { + TCGOutOp base; + void (*out)(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg al, TCGReg ah, TCGArg bl, bool const_bl, TCGArg bh, + bool const_bh); +} TCGOutOpSetcond2; + +typedef struct TCGOutOpStore { + TCGOutOp base; + void (*out_r)(TCGContext *s, TCGType type, TCGReg data, TCGReg base, intptr_t offset); + void (*out_i)(TCGContext *s, TCGType type, tcg_target_long data, TCGReg base, intptr_t offset); +} TCGOutOpStore; + +typedef struct TCGOutOpSubtract { + TCGOutOp base; + void (*out_rrr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2); + void (*out_rir)(TCGContext *s, TCGType type, TCGReg a0, tcg_target_long a1, TCGReg a2); +} TCGOutOpSubtract; + #include "i386/tcg-target.c.inc" -static void alloc_tcg_plugin_context(TCGContext *s) { -#ifdef CONFIG_PLUGIN - s->plugin_tb = g_new0(struct qemu_plugin_tb, 1); - s->plugin_tb->insns = g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn); +#ifndef CONFIG_TCG_INTERPRETER +/* Validate CPUTLBDescFast placement. */ +// QEMU_BUILD_BUG_ON((int) (offsetof(CPUNegativeOffsetState, tlb.f[0]) - sizeof(CPUNegativeOffsetState)) < +// MIN_TLB_MASK_TABLE_OFS); #endif + +#if TCG_TARGET_REG_BITS == 64 +/* + * We require these functions for slow-path function calls. + * Adapt them generically for opcode output. + */ + +static void tgen_exts_i32_i64(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) { + tcg_out_exts_i32_i64(s, a0, a1); +} + +static const TCGOutOpUnary outop_exts_i32_i64 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_exts_i32_i64, +}; + +static void tgen_extu_i32_i64(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) { + tcg_out_extu_i32_i64(s, a0, a1); } +static const TCGOutOpUnary outop_extu_i32_i64 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extu_i32_i64, +}; + +static void tgen_extrl_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) { + tcg_out_extrl_i64_i32(s, a0, a1); +} + +static const TCGOutOpUnary outop_extrl_i64_i32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = TCG_TARGET_HAS_extr_i64_i32 ? tgen_extrl_i64_i32 : NULL, +}; +#endif + +static const TCGOutOp outop_goto_ptr = { + .static_constraint = C_O0_I1(r), +}; + +static const TCGOutOpLoad outop_ld = { + .base.static_constraint = C_O1_I1(r, r), + .out = tcg_out_ld, +}; + +/* + * Register V as the TCGOutOp for O. + * This verifies that V is of type T, otherwise give a nice compiler error. + * This prevents trivial mistakes within each arch/tcg-target.c.inc. + */ +#define OUTOP(O, T, V) [O] = _Generic(V, T: &V.base) + +/* Register allocation descriptions for every TCGOpcode. */ +static const TCGOutOp *const all_outop[NB_OPS] = { + OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), + OUTOP(INDEX_op_addci, TCGOutOpAddSubCarry, outop_addci), + OUTOP(INDEX_op_addcio, TCGOutOpBinary, outop_addcio), + OUTOP(INDEX_op_addco, TCGOutOpBinary, outop_addco), + /* addc1o is implemented with set_carry + addcio */ + OUTOP(INDEX_op_addc1o, TCGOutOpBinary, outop_addcio), + OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), + OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), + OUTOP(INDEX_op_brcond, TCGOutOpBrcond, outop_brcond), + OUTOP(INDEX_op_bswap16, TCGOutOpBswap, outop_bswap16), + OUTOP(INDEX_op_bswap32, TCGOutOpBswap, outop_bswap32), + OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz), + OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop), + OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz), + OUTOP(INDEX_op_deposit, TCGOutOpDeposit, outop_deposit), + OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs), + OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu), + OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2), + OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2), + OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), + OUTOP(INDEX_op_extract, TCGOutOpExtract, outop_extract), + OUTOP(INDEX_op_extract2, TCGOutOpExtract2, outop_extract2), + OUTOP(INDEX_op_ld8u, TCGOutOpLoad, outop_ld8u), + OUTOP(INDEX_op_ld8s, TCGOutOpLoad, outop_ld8s), + OUTOP(INDEX_op_ld16u, TCGOutOpLoad, outop_ld16u), + OUTOP(INDEX_op_ld16s, TCGOutOpLoad, outop_ld16s), + OUTOP(INDEX_op_ld, TCGOutOpLoad, outop_ld), + OUTOP(INDEX_op_movcond, TCGOutOpMovcond, outop_movcond), + OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), + OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2), + OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh), + OUTOP(INDEX_op_mulu2, TCGOutOpMul2, outop_mulu2), + OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh), + OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), + OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg), + OUTOP(INDEX_op_negsetcond, TCGOutOpSetcond, outop_negsetcond), + OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor), + OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not), + OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), + OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), + OUTOP(INDEX_op_qemu_ld, TCGOutOpQemuLdSt, outop_qemu_ld), + OUTOP(INDEX_op_qemu_ld2, TCGOutOpQemuLdSt2, outop_qemu_ld2), + OUTOP(INDEX_op_qemu_st, TCGOutOpQemuLdSt, outop_qemu_st), + OUTOP(INDEX_op_qemu_st2, TCGOutOpQemuLdSt2, outop_qemu_st2), + OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems), + OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu), + OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl), + OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr), + OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar), + OUTOP(INDEX_op_setcond, TCGOutOpSetcond, outop_setcond), + OUTOP(INDEX_op_sextract, TCGOutOpExtract, outop_sextract), + OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl), + OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr), + OUTOP(INDEX_op_st, TCGOutOpStore, outop_st), + OUTOP(INDEX_op_st8, TCGOutOpStore, outop_st8), + OUTOP(INDEX_op_st16, TCGOutOpStore, outop_st16), + OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), + OUTOP(INDEX_op_subbi, TCGOutOpAddSubCarry, outop_subbi), + OUTOP(INDEX_op_subbio, TCGOutOpAddSubCarry, outop_subbio), + OUTOP(INDEX_op_subbo, TCGOutOpAddSubCarry, outop_subbo), + /* subb1o is implemented with set_borrow + subbio */ + OUTOP(INDEX_op_subb1o, TCGOutOpAddSubCarry, outop_subbio), + OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), + + [INDEX_op_goto_ptr] = &outop_goto_ptr, + +#if TCG_TARGET_REG_BITS == 32 + OUTOP(INDEX_op_brcond2_i32, TCGOutOpBrcond2, outop_brcond2), + OUTOP(INDEX_op_setcond2_i32, TCGOutOpSetcond2, outop_setcond2), +#else + OUTOP(INDEX_op_bswap64, TCGOutOpUnary, outop_bswap64), + OUTOP(INDEX_op_ext_i32_i64, TCGOutOpUnary, outop_exts_i32_i64), + OUTOP(INDEX_op_extu_i32_i64, TCGOutOpUnary, outop_extu_i32_i64), + OUTOP(INDEX_op_extrl_i64_i32, TCGOutOpUnary, outop_extrl_i64_i32), + OUTOP(INDEX_op_extrh_i64_i32, TCGOutOpUnary, outop_extrh_i64_i32), + OUTOP(INDEX_op_ld32u, TCGOutOpLoad, outop_ld32u), + OUTOP(INDEX_op_ld32s, TCGOutOpLoad, outop_ld32s), + OUTOP(INDEX_op_st32, TCGOutOpStore, outop_st), +#endif +}; + +#undef OUTOP + /* * All TCG threads except the parent (i.e. the one that called tcg_context_init * and registered the target's TCG globals) must register with this function @@ -700,12 +1165,13 @@ static void alloc_tcg_plugin_context(TCGContext *s) { * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation * of tcg_region_init() for the reasoning behind this. * - * In softmmu each caller registers its context in tcg_ctxs[]. Note that in - * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context + * In system-mode each caller registers its context in tcg_ctxs[]. Note that in + * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context * is not used anymore for translation once this function is called. * - * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates - * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode. + * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that + * iterates over the array (e.g. tcg_code_size() the same for both system/user + * modes. */ #ifdef CONFIG_USER_ONLY void tcg_register_thread(void) { @@ -733,7 +1199,6 @@ void tcg_register_thread(void) { qatomic_set(&tcg_ctxs[n], s); if (n > 0) { - alloc_tcg_plugin_context(s); tcg_region_initial_alloc(s); } @@ -757,8 +1222,9 @@ void *tcg_malloc_internal(TCGContext *s, int size) { p = s->pool_current; if (!p) { p = s->pool_first; - if (!p) + if (!p) { goto new_pool; + } } else { if (!p->next) { new_pool: @@ -777,8 +1243,8 @@ void *tcg_malloc_internal(TCGContext *s, int size) { } } s->pool_current = p; - s->pool_cur = p->data + size; - s->pool_end = p->data + p->size; + s->pool_cur = (uintptr_t) p->data + size; + s->pool_end = (uintptr_t) p->data + p->size; return p->data; } @@ -789,7 +1255,7 @@ void tcg_pool_reset(TCGContext *s) { g_free(p); } s->pool_first_large = NULL; - s->pool_cur = s->pool_end = NULL; + s->pool_cur = s->pool_end = 0; s->pool_current = NULL; } @@ -1195,37 +1661,17 @@ static void init_call_layout(TCGHelperInfo *info) { } static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; -static void process_op_defs(TCGContext *s); +static void process_constraint_sets(void); static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, TCGReg reg, const char *name); -static void tcg_context_init(unsigned max_cpus) { +static void tcg_context_init(unsigned max_threads) { TCGContext *s = &tcg_init_ctx; - int op, total_args, n, i; - TCGOpDef *def; - TCGArgConstraint *args_ct; + int n, i; TCGTemp *ts; memset(s, 0, sizeof(*s)); s->nb_globals = 0; - /* Count total number of arguments and allocate the corresponding - space */ - total_args = 0; - for (op = 0; op < NB_OPS; op++) { - def = &tcg_op_defs[op]; - n = def->nb_iargs + def->nb_oargs; - total_args += n; - } - - args_ct = g_new0(TCGArgConstraint, total_args); - - for (op = 0; op < NB_OPS; op++) { - def = &tcg_op_defs[op]; - def->args_ct = args_ct; - n = def->nb_iargs + def->nb_oargs; - args_ct += n; - } - init_call_layout(&info_helper_ld32_mmu); init_call_layout(&info_helper_ld64_mmu); init_call_layout(&info_helper_ld128_mmu); @@ -1234,7 +1680,7 @@ static void tcg_context_init(unsigned max_cpus) { init_call_layout(&info_helper_st128_mmu); tcg_target_init(s); - process_op_defs(s); + process_constraint_sets(); /* Reverse the order of the saved registers, assuming they're all at the start of tcg_target_reg_alloc_order. */ @@ -1251,32 +1697,30 @@ static void tcg_context_init(unsigned max_cpus) { indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; } - alloc_tcg_plugin_context(s); - tcg_ctx = s; /* * In user-mode we simply share the init context among threads, since we * use a single region. See the documentation tcg_region_init() for the * reasoning behind this. - * In softmmu we will have at most max_cpus TCG threads. + * In system-mode we will have at most max_threads TCG threads. */ #ifdef CONFIG_USER_ONLY tcg_ctxs = &tcg_ctx; tcg_cur_ctxs = 1; tcg_max_ctxs = 1; #else - tcg_max_ctxs = max_cpus; - tcg_ctxs = g_new0(TCGContext *, max_cpus); + tcg_max_ctxs = max_threads; + tcg_ctxs = g_new0(TCGContext *, max_threads); #endif tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); - cpu_env = temp_tcgv_ptr(ts); + tcg_env = temp_tcgv_ptr(ts); } -void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus) { - tcg_context_init(max_cpus); - tcg_region_init(tb_size, splitwx, max_cpus); +void tcg_init(size_t tb_size, int splitwx, unsigned max_threads) { + tcg_context_init(max_threads); + tcg_region_init(tb_size, splitwx, max_threads); } /* @@ -1299,7 +1743,6 @@ TranslationBlock *tcg_tb_alloc(TCGContext *s) { goto retry; } qatomic_set(&s->code_gen_ptr, next); - s->data_gen_ptr = NULL; return tb; } @@ -1314,24 +1757,20 @@ void tcg_prologue_init(TCGContext *s) { tcg_qemu_tb_exec = (tcg_prologue_fn *) tcg_splitwx_to_rx(s->code_ptr); #endif -#ifdef TCG_TARGET_NEED_POOL_LABELS s->pool_labels = NULL; -#endif qemu_thread_jit_write(); /* Generate the prologue. */ tcg_target_qemu_prologue(s); -#ifdef TCG_TARGET_NEED_POOL_LABELS /* Allow the prologue to put e.g. guest_base into a pool entry. */ { int result = tcg_out_pool_finalize(s); tcg_debug_assert(result == 0); } -#endif prologue_size = tcg_current_code_size(s); - // perf_report_prologue(s->code_gen_ptr, prologue_size); + perf_report_prologue(s->code_gen_ptr, prologue_size); #ifndef CONFIG_TCG_INTERPRETER flush_idcache_range((uintptr_t) tcg_splitwx_to_rx(s->code_buf), (uintptr_t) s->code_buf, prologue_size); @@ -1384,7 +1823,7 @@ void tcg_func_start(TCGContext *s) { s->nb_temps = s->nb_globals; /* No temps have been previously allocated for size or locality. */ - memset(s->free_temps, 0, sizeof(s->free_temps)); + tcg_temp_ebb_reset_freed(s); /* No constant temps have been previously allocated. */ for (int i = 0; i < TCG_TYPE_COUNT; ++i) { @@ -1403,16 +1842,10 @@ void tcg_func_start(TCGContext *s) { QTAILQ_INIT(&s->ops); QTAILQ_INIT(&s->free_ops); + s->emit_before_op = NULL; QSIMPLEQ_INIT(&s->labels); - tcg_debug_assert(s->addr_type == TCG_TYPE_I32 || s->addr_type == TCG_TYPE_I64); - -#if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER) - tcg_debug_assert(s->tlb_fast_offset < 0); - tcg_debug_assert(s->tlb_fast_offset >= MIN_TLB_MASK_TABLE_OFS); -#endif - - tcg_debug_assert(s->insn_start_words > 0); + tcg_debug_assert(s->addr_type <= TCG_TYPE_REG); } static TCGTemp *tcg_temp_alloc(TCGContext *s) { @@ -1458,7 +1891,7 @@ void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) { s->frame_temp = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); } -TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, intptr_t offset, const char *name) { +static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset, const char *name, TCGType type) { TCGContext *s = tcg_ctx; TCGTemp *base_ts = tcgv_ptr_temp(base); TCGTemp *ts = tcg_global_alloc(s); @@ -1515,6 +1948,21 @@ TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, intptr_t offse return ts; } +TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name) { + TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32); + return temp_tcgv_i32(ts); +} + +TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name) { + TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64); + return temp_tcgv_i64(ts); +} + +TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name) { + TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR); + return temp_tcgv_ptr(ts); +} + TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind) { TCGContext *s = tcg_ctx; TCGTemp *ts; @@ -1578,6 +2026,38 @@ TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind) { return ts; } +TCGv_i32 tcg_temp_new_i32(void) { + return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB)); +} + +TCGv_i32 tcg_temp_ebb_new_i32(void) { + return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB)); +} + +TCGv_i64 tcg_temp_new_i64(void) { + return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB)); +} + +TCGv_i64 tcg_temp_ebb_new_i64(void) { + return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB)); +} + +TCGv_ptr tcg_temp_new_ptr(void) { + return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB)); +} + +TCGv_ptr tcg_temp_ebb_new_ptr(void) { + return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB)); +} + +TCGv_i128 tcg_temp_new_i128(void) { + return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB)); +} + +TCGv_i128 tcg_temp_ebb_new_i128(void) { + return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB)); +} + TCGv_vec tcg_temp_new_vec(TCGType type) { TCGTemp *t; @@ -1630,6 +2110,26 @@ void tcg_temp_free_internal(TCGTemp *ts) { } } +void tcg_temp_free_i32(TCGv_i32 arg) { + tcg_temp_free_internal(tcgv_i32_temp(arg)); +} + +void tcg_temp_free_i64(TCGv_i64 arg) { + tcg_temp_free_internal(tcgv_i64_temp(arg)); +} + +void tcg_temp_free_i128(TCGv_i128 arg) { + tcg_temp_free_internal(tcgv_i128_temp(arg)); +} + +void tcg_temp_free_ptr(TCGv_ptr arg) { + tcg_temp_free_internal(tcgv_ptr_temp(arg)); +} + +void tcg_temp_free_vec(TCGv_vec arg) { + tcg_temp_free_internal(tcgv_vec_temp(arg)); +} + TCGTemp *tcg_constant_internal(TCGType type, int64_t val) { TCGContext *s = tcg_ctx; GHashTable *h = s->const_table[type]; @@ -1684,6 +2184,22 @@ TCGTemp *tcg_constant_internal(TCGType type, int64_t val) { return ts; } +TCGv_i32 tcg_constant_i32(int32_t val) { + return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val)); +} + +TCGv_i64 tcg_constant_i64(int64_t val) { + return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val)); +} + +TCGv_vaddr tcg_constant_vaddr(uintptr_t val) { + return temp_tcgv_vaddr(tcg_constant_internal(TCG_TYPE_PTR, val)); +} + +TCGv_ptr tcg_constant_ptr_int(intptr_t val) { + return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val)); +} + TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) { val = dup_const(vece, val); return temp_tcgv_vec(tcg_constant_internal(type, val)); @@ -1713,10 +2229,33 @@ TCGTemp *tcgv_i32_temp(TCGv_i32 v) { } #endif /* CONFIG_DEBUG_TCG */ -/* Return true if OP may appear in the opcode stream. - Test the runtime variable that controls each opcode. */ -bool tcg_op_supported(TCGOpcode op) { - const bool have_vec = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256; +/* + * Return true if OP may appear in the opcode stream with TYPE. + * Test the runtime variable that controls each opcode. + */ +bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) { + bool has_type; + + switch (type) { + case TCG_TYPE_I32: + has_type = true; + break; + case TCG_TYPE_I64: + has_type = TCG_TARGET_REG_BITS == 64; + break; + case TCG_TYPE_V64: + has_type = TCG_TARGET_HAS_v64; + break; + case TCG_TYPE_V128: + has_type = TCG_TARGET_HAS_v128; + break; + case TCG_TYPE_V256: + has_type = TCG_TARGET_HAS_v256; + break; + default: + has_type = false; + break; + } switch (op) { case INDEX_op_discard: @@ -1728,222 +2267,56 @@ bool tcg_op_supported(TCGOpcode op) { case INDEX_op_exit_tb: case INDEX_op_goto_tb: case INDEX_op_goto_ptr: - case INDEX_op_qemu_ld_a32_i32: - case INDEX_op_qemu_ld_a64_i32: - case INDEX_op_qemu_st_a32_i32: - case INDEX_op_qemu_st_a64_i32: - case INDEX_op_qemu_ld_a32_i64: - case INDEX_op_qemu_ld_a64_i64: - case INDEX_op_qemu_st_a32_i64: - case INDEX_op_qemu_st_a64_i64: return true; - case INDEX_op_qemu_st8_a32_i32: - case INDEX_op_qemu_st8_a64_i32: - return TCG_TARGET_HAS_qemu_st8_i32; - - case INDEX_op_qemu_ld_a32_i128: - case INDEX_op_qemu_ld_a64_i128: - case INDEX_op_qemu_st_a32_i128: - case INDEX_op_qemu_st_a64_i128: - return TCG_TARGET_HAS_qemu_ldst_i128; - - case INDEX_op_mov_i32: - case INDEX_op_setcond_i32: - case INDEX_op_brcond_i32: - case INDEX_op_ld8u_i32: - case INDEX_op_ld8s_i32: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16s_i32: - case INDEX_op_ld_i32: - case INDEX_op_st8_i32: - case INDEX_op_st16_i32: - case INDEX_op_st_i32: - case INDEX_op_add_i32: - case INDEX_op_sub_i32: - case INDEX_op_mul_i32: - case INDEX_op_and_i32: - case INDEX_op_or_i32: - case INDEX_op_xor_i32: - case INDEX_op_shl_i32: - case INDEX_op_shr_i32: - case INDEX_op_sar_i32: + case INDEX_op_qemu_ld: + case INDEX_op_qemu_st: + tcg_debug_assert(type <= TCG_TYPE_REG); return true; - case INDEX_op_movcond_i32: - return TCG_TARGET_HAS_movcond_i32; - case INDEX_op_div_i32: - case INDEX_op_divu_i32: - return TCG_TARGET_HAS_div_i32; - case INDEX_op_rem_i32: - case INDEX_op_remu_i32: - return TCG_TARGET_HAS_rem_i32; - case INDEX_op_div2_i32: - case INDEX_op_divu2_i32: - return TCG_TARGET_HAS_div2_i32; - case INDEX_op_rotl_i32: - case INDEX_op_rotr_i32: - return TCG_TARGET_HAS_rot_i32; - case INDEX_op_deposit_i32: - return TCG_TARGET_HAS_deposit_i32; - case INDEX_op_extract_i32: - return TCG_TARGET_HAS_extract_i32; - case INDEX_op_sextract_i32: - return TCG_TARGET_HAS_sextract_i32; - case INDEX_op_extract2_i32: - return TCG_TARGET_HAS_extract2_i32; - case INDEX_op_add2_i32: - return TCG_TARGET_HAS_add2_i32; - case INDEX_op_sub2_i32: - return TCG_TARGET_HAS_sub2_i32; - case INDEX_op_mulu2_i32: - return TCG_TARGET_HAS_mulu2_i32; - case INDEX_op_muls2_i32: - return TCG_TARGET_HAS_muls2_i32; - case INDEX_op_muluh_i32: - return TCG_TARGET_HAS_muluh_i32; - case INDEX_op_mulsh_i32: - return TCG_TARGET_HAS_mulsh_i32; - case INDEX_op_ext8s_i32: - return TCG_TARGET_HAS_ext8s_i32; - case INDEX_op_ext16s_i32: - return TCG_TARGET_HAS_ext16s_i32; - case INDEX_op_ext8u_i32: - return TCG_TARGET_HAS_ext8u_i32; - case INDEX_op_ext16u_i32: - return TCG_TARGET_HAS_ext16u_i32; - case INDEX_op_bswap16_i32: - return TCG_TARGET_HAS_bswap16_i32; - case INDEX_op_bswap32_i32: - return TCG_TARGET_HAS_bswap32_i32; - case INDEX_op_not_i32: - return TCG_TARGET_HAS_not_i32; - case INDEX_op_neg_i32: - return TCG_TARGET_HAS_neg_i32; - case INDEX_op_andc_i32: - return TCG_TARGET_HAS_andc_i32; - case INDEX_op_orc_i32: - return TCG_TARGET_HAS_orc_i32; - case INDEX_op_eqv_i32: - return TCG_TARGET_HAS_eqv_i32; - case INDEX_op_nand_i32: - return TCG_TARGET_HAS_nand_i32; - case INDEX_op_nor_i32: - return TCG_TARGET_HAS_nor_i32; - case INDEX_op_clz_i32: - return TCG_TARGET_HAS_clz_i32; - case INDEX_op_ctz_i32: - return TCG_TARGET_HAS_ctz_i32; - case INDEX_op_ctpop_i32: - return TCG_TARGET_HAS_ctpop_i32; + case INDEX_op_qemu_ld2: + case INDEX_op_qemu_st2: + if (TCG_TARGET_REG_BITS == 32) { + tcg_debug_assert(type == TCG_TYPE_I64); + return true; + } + tcg_debug_assert(type == TCG_TYPE_I128); + goto do_lookup; + + case INDEX_op_add: + case INDEX_op_and: + case INDEX_op_brcond: + case INDEX_op_deposit: + case INDEX_op_extract: + case INDEX_op_ld8u: + case INDEX_op_ld8s: + case INDEX_op_ld16u: + case INDEX_op_ld16s: + case INDEX_op_ld: + case INDEX_op_mov: + case INDEX_op_movcond: + case INDEX_op_negsetcond: + case INDEX_op_or: + case INDEX_op_setcond: + case INDEX_op_sextract: + case INDEX_op_st8: + case INDEX_op_st16: + case INDEX_op_st: + case INDEX_op_xor: + return has_type; case INDEX_op_brcond2_i32: case INDEX_op_setcond2_i32: return TCG_TARGET_REG_BITS == 32; - case INDEX_op_mov_i64: - case INDEX_op_setcond_i64: - case INDEX_op_brcond_i64: - case INDEX_op_ld8u_i64: - case INDEX_op_ld8s_i64: - case INDEX_op_ld16u_i64: - case INDEX_op_ld16s_i64: - case INDEX_op_ld32u_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_ld_i64: - case INDEX_op_st8_i64: - case INDEX_op_st16_i64: - case INDEX_op_st32_i64: - case INDEX_op_st_i64: - case INDEX_op_add_i64: - case INDEX_op_sub_i64: - case INDEX_op_mul_i64: - case INDEX_op_and_i64: - case INDEX_op_or_i64: - case INDEX_op_xor_i64: - case INDEX_op_shl_i64: - case INDEX_op_shr_i64: - case INDEX_op_sar_i64: + case INDEX_op_ld32u: + case INDEX_op_ld32s: + case INDEX_op_st32: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: - return TCG_TARGET_REG_BITS == 64; - - case INDEX_op_movcond_i64: - return TCG_TARGET_HAS_movcond_i64; - case INDEX_op_div_i64: - case INDEX_op_divu_i64: - return TCG_TARGET_HAS_div_i64; - case INDEX_op_rem_i64: - case INDEX_op_remu_i64: - return TCG_TARGET_HAS_rem_i64; - case INDEX_op_div2_i64: - case INDEX_op_divu2_i64: - return TCG_TARGET_HAS_div2_i64; - case INDEX_op_rotl_i64: - case INDEX_op_rotr_i64: - return TCG_TARGET_HAS_rot_i64; - case INDEX_op_deposit_i64: - return TCG_TARGET_HAS_deposit_i64; - case INDEX_op_extract_i64: - return TCG_TARGET_HAS_extract_i64; - case INDEX_op_sextract_i64: - return TCG_TARGET_HAS_sextract_i64; - case INDEX_op_extract2_i64: - return TCG_TARGET_HAS_extract2_i64; case INDEX_op_extrl_i64_i32: - return TCG_TARGET_HAS_extrl_i64_i32; case INDEX_op_extrh_i64_i32: - return TCG_TARGET_HAS_extrh_i64_i32; - case INDEX_op_ext8s_i64: - return TCG_TARGET_HAS_ext8s_i64; - case INDEX_op_ext16s_i64: - return TCG_TARGET_HAS_ext16s_i64; - case INDEX_op_ext32s_i64: - return TCG_TARGET_HAS_ext32s_i64; - case INDEX_op_ext8u_i64: - return TCG_TARGET_HAS_ext8u_i64; - case INDEX_op_ext16u_i64: - return TCG_TARGET_HAS_ext16u_i64; - case INDEX_op_ext32u_i64: - return TCG_TARGET_HAS_ext32u_i64; - case INDEX_op_bswap16_i64: - return TCG_TARGET_HAS_bswap16_i64; - case INDEX_op_bswap32_i64: - return TCG_TARGET_HAS_bswap32_i64; - case INDEX_op_bswap64_i64: - return TCG_TARGET_HAS_bswap64_i64; - case INDEX_op_not_i64: - return TCG_TARGET_HAS_not_i64; - case INDEX_op_neg_i64: - return TCG_TARGET_HAS_neg_i64; - case INDEX_op_andc_i64: - return TCG_TARGET_HAS_andc_i64; - case INDEX_op_orc_i64: - return TCG_TARGET_HAS_orc_i64; - case INDEX_op_eqv_i64: - return TCG_TARGET_HAS_eqv_i64; - case INDEX_op_nand_i64: - return TCG_TARGET_HAS_nand_i64; - case INDEX_op_nor_i64: - return TCG_TARGET_HAS_nor_i64; - case INDEX_op_clz_i64: - return TCG_TARGET_HAS_clz_i64; - case INDEX_op_ctz_i64: - return TCG_TARGET_HAS_ctz_i64; - case INDEX_op_ctpop_i64: - return TCG_TARGET_HAS_ctpop_i64; - case INDEX_op_add2_i64: - return TCG_TARGET_HAS_add2_i64; - case INDEX_op_sub2_i64: - return TCG_TARGET_HAS_sub2_i64; - case INDEX_op_mulu2_i64: - return TCG_TARGET_HAS_mulu2_i64; - case INDEX_op_muls2_i64: - return TCG_TARGET_HAS_muls2_i64; - case INDEX_op_muluh_i64: - return TCG_TARGET_HAS_muluh_i64; - case INDEX_op_mulsh_i64: - return TCG_TARGET_HAS_mulsh_i64; + return TCG_TARGET_REG_BITS == 64; case INDEX_op_mov_vec: case INDEX_op_dup_vec: @@ -1956,70 +2329,108 @@ bool tcg_op_supported(TCGOpcode op) { case INDEX_op_or_vec: case INDEX_op_xor_vec: case INDEX_op_cmp_vec: - return have_vec; + return has_type; case INDEX_op_dup2_vec: - return have_vec && TCG_TARGET_REG_BITS == 32; + return has_type && TCG_TARGET_REG_BITS == 32; case INDEX_op_not_vec: - return have_vec && TCG_TARGET_HAS_not_vec; + return has_type && TCG_TARGET_HAS_not_vec; case INDEX_op_neg_vec: - return have_vec && TCG_TARGET_HAS_neg_vec; + return has_type && TCG_TARGET_HAS_neg_vec; case INDEX_op_abs_vec: - return have_vec && TCG_TARGET_HAS_abs_vec; + return has_type && TCG_TARGET_HAS_abs_vec; case INDEX_op_andc_vec: - return have_vec && TCG_TARGET_HAS_andc_vec; + return has_type && TCG_TARGET_HAS_andc_vec; case INDEX_op_orc_vec: - return have_vec && TCG_TARGET_HAS_orc_vec; + return has_type && TCG_TARGET_HAS_orc_vec; case INDEX_op_nand_vec: - return have_vec && TCG_TARGET_HAS_nand_vec; + return has_type && TCG_TARGET_HAS_nand_vec; case INDEX_op_nor_vec: - return have_vec && TCG_TARGET_HAS_nor_vec; + return has_type && TCG_TARGET_HAS_nor_vec; case INDEX_op_eqv_vec: - return have_vec && TCG_TARGET_HAS_eqv_vec; + return has_type && TCG_TARGET_HAS_eqv_vec; case INDEX_op_mul_vec: - return have_vec && TCG_TARGET_HAS_mul_vec; + return has_type && TCG_TARGET_HAS_mul_vec; case INDEX_op_shli_vec: case INDEX_op_shri_vec: case INDEX_op_sari_vec: - return have_vec && TCG_TARGET_HAS_shi_vec; + return has_type && TCG_TARGET_HAS_shi_vec; case INDEX_op_shls_vec: case INDEX_op_shrs_vec: case INDEX_op_sars_vec: - return have_vec && TCG_TARGET_HAS_shs_vec; + return has_type && TCG_TARGET_HAS_shs_vec; case INDEX_op_shlv_vec: case INDEX_op_shrv_vec: case INDEX_op_sarv_vec: - return have_vec && TCG_TARGET_HAS_shv_vec; + return has_type && TCG_TARGET_HAS_shv_vec; case INDEX_op_rotli_vec: - return have_vec && TCG_TARGET_HAS_roti_vec; + return has_type && TCG_TARGET_HAS_roti_vec; case INDEX_op_rotls_vec: - return have_vec && TCG_TARGET_HAS_rots_vec; + return has_type && TCG_TARGET_HAS_rots_vec; case INDEX_op_rotlv_vec: case INDEX_op_rotrv_vec: - return have_vec && TCG_TARGET_HAS_rotv_vec; + return has_type && TCG_TARGET_HAS_rotv_vec; case INDEX_op_ssadd_vec: case INDEX_op_usadd_vec: case INDEX_op_sssub_vec: case INDEX_op_ussub_vec: - return have_vec && TCG_TARGET_HAS_sat_vec; + return has_type && TCG_TARGET_HAS_sat_vec; case INDEX_op_smin_vec: case INDEX_op_umin_vec: case INDEX_op_smax_vec: case INDEX_op_umax_vec: - return have_vec && TCG_TARGET_HAS_minmax_vec; + return has_type && TCG_TARGET_HAS_minmax_vec; case INDEX_op_bitsel_vec: - return have_vec && TCG_TARGET_HAS_bitsel_vec; + return has_type && TCG_TARGET_HAS_bitsel_vec; case INDEX_op_cmpsel_vec: - return have_vec && TCG_TARGET_HAS_cmpsel_vec; + return has_type && TCG_TARGET_HAS_cmpsel_vec; default: - tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); + if (op < INDEX_op_last_generic) { + const TCGOutOp *outop; + TCGConstraintSetIndex con_set; + + if (!has_type) { + return false; + } + + do_lookup: + outop = all_outop[op]; + tcg_debug_assert(outop != NULL); + + con_set = outop->static_constraint; + if (con_set == C_Dynamic) { + con_set = outop->dynamic_constraint(type, flags); + } + if (con_set >= 0) { + return true; + } + tcg_debug_assert(con_set == C_NotImplemented); + return false; + } + tcg_debug_assert(op < NB_OPS); return true; + + case INDEX_op_last_generic: + g_assert_not_reached(); } } +bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len) { + unsigned width; + + tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64); + width = (type == TCG_TYPE_I32 ? 32 : 64); + + tcg_debug_assert(ofs < width); + tcg_debug_assert(len > 0); + tcg_debug_assert(len <= width - ofs); + + return TCG_TARGET_deposit_valid(type, ofs, len); +} + static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs); -static void tcg_gen_callN(TCGHelperInfo *info, TCGTemp *ret, TCGTemp **args) { +static void tcg_gen_callN(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp **args) { TCGv_i64 extend_free[MAX_CALL_IARGS]; int n_extend = 0; TCGOp *op; @@ -2035,7 +2446,7 @@ static void tcg_gen_callN(TCGHelperInfo *info, TCGTemp *ret, TCGTemp **args) { #ifdef CONFIG_PLUGIN /* Flag helpers that may affect guest state */ - if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_PLUGIN) && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) { + if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) { tcg_ctx->plugin_insn->calls_helpers = true; } #endif @@ -2092,11 +2503,15 @@ static void tcg_gen_callN(TCGHelperInfo *info, TCGTemp *ret, TCGTemp **args) { g_assert_not_reached(); } } - op->args[pi++] = (uintptr_t) info->func; + op->args[pi++] = (uintptr_t) func; op->args[pi++] = (uintptr_t) info; tcg_debug_assert(pi == total_args); - QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); + if (tcg_ctx->emit_before_op) { + QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link); + } else { + QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); + } tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free)); for (i = 0; i < n_extend; ++i) { @@ -2104,44 +2519,45 @@ static void tcg_gen_callN(TCGHelperInfo *info, TCGTemp *ret, TCGTemp **args) { } } -void tcg_gen_call0(TCGHelperInfo *info, TCGTemp *ret) { - tcg_gen_callN(info, ret, NULL); +void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret) { + tcg_gen_callN(func, info, ret, NULL); } -void tcg_gen_call1(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1) { - tcg_gen_callN(info, ret, &t1); +void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1) { + tcg_gen_callN(func, info, ret, &t1); } -void tcg_gen_call2(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2) { +void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2) { TCGTemp *args[2] = {t1, t2}; - tcg_gen_callN(info, ret, args); + tcg_gen_callN(func, info, ret, args); } -void tcg_gen_call3(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2, TCGTemp *t3) { +void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2, TCGTemp *t3) { TCGTemp *args[3] = {t1, t2, t3}; - tcg_gen_callN(info, ret, args); + tcg_gen_callN(func, info, ret, args); } -void tcg_gen_call4(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4) { +void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4) { TCGTemp *args[4] = {t1, t2, t3, t4}; - tcg_gen_callN(info, ret, args); + tcg_gen_callN(func, info, ret, args); } -void tcg_gen_call5(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5) { +void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, + TCGTemp *t5) { TCGTemp *args[5] = {t1, t2, t3, t4, t5}; - tcg_gen_callN(info, ret, args); + tcg_gen_callN(func, info, ret, args); } -void tcg_gen_call6(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5, - TCGTemp *t6) { +void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, + TCGTemp *t5, TCGTemp *t6) { TCGTemp *args[6] = {t1, t2, t3, t4, t5, t6}; - tcg_gen_callN(info, ret, args); + tcg_gen_callN(func, info, ret, args); } -void tcg_gen_call7(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5, - TCGTemp *t6, TCGTemp *t7) { +void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, + TCGTemp *t5, TCGTemp *t6, TCGTemp *t7) { TCGTemp *args[7] = {t1, t2, t3, t4, t5, t6, t7}; - tcg_gen_callN(info, ret, args); + tcg_gen_callN(func, info, ret, args); } static void tcg_reg_alloc_start(TCGContext *s) { @@ -2219,7 +2635,9 @@ static char *tcg_get_arg_str(TCGContext *s, char *buf, int buf_size, TCGArg arg) static const char *const cond_name[] = { [TCG_COND_NEVER] = "never", [TCG_COND_ALWAYS] = "always", [TCG_COND_EQ] = "eq", [TCG_COND_NE] = "ne", [TCG_COND_LT] = "lt", [TCG_COND_GE] = "ge", [TCG_COND_LE] = "le", [TCG_COND_GT] = "gt", - [TCG_COND_LTU] = "ltu", [TCG_COND_GEU] = "geu", [TCG_COND_LEU] = "leu", [TCG_COND_GTU] = "gtu"}; + [TCG_COND_LTU] = "ltu", [TCG_COND_GEU] = "geu", [TCG_COND_LEU] = "leu", [TCG_COND_GTU] = "gtu", + [TCG_COND_TSTEQ] = "tsteq", [TCG_COND_TSTNE] = "tstne", +}; static const char *const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] = { [MO_UB] = "ub", [MO_SB] = "sb", [MO_LEUW] = "leuw", [MO_LESW] = "lesw", [MO_LEUL] = "leul", @@ -2247,6 +2665,15 @@ static const char bswap_flag_name[][6] = { [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os", }; +#ifdef CONFIG_PLUGIN +static const char *const plugin_from_name[] = { + "from-tb", + "from-insn", + "after-insn", + "after-tb", +}; +#endif + static inline bool tcg_regset_single(TCGRegSet d) { return (d & (d - 1)) == 0; } @@ -2270,16 +2697,12 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) { char buf[128]; TCGOp *op; - int icount = 0; - QTAILQ_FOREACH (op, &s->ops, link) { int i, k, nb_oargs, nb_iargs, nb_cargs; const TCGOpDef *def; TCGOpcode c; int col = 0; - fprintf(f, "%03d ", icount++); - c = op->opc; def = &tcg_op_defs[c]; @@ -2287,7 +2710,7 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) { nb_oargs = 0; col += ne_fprintf(f, "\n ----"); - for (i = 0, k = s->insn_start_words; i < k; ++i) { + for (i = 0, k = INSN_START_WORDS; i < k; ++i) { col += ne_fprintf(f, " %016" PRIx64, tcg_get_insn_start_param(op, i)); } } else if (c == INDEX_op_call) { @@ -2322,16 +2745,18 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) { col += ne_fprintf(f, ",%s", t); } } else { - col += ne_fprintf(f, " %s ", def->name); + if (def->flags & TCG_OPF_INT) { + col += ne_fprintf(f, " %s_i%d ", def->name, 8 * tcg_type_size(TCGOP_TYPE(op))); + } else if (def->flags & TCG_OPF_VECTOR) { + col += ne_fprintf(f, "%s v%d,e%d,", def->name, 8 * tcg_type_size(TCGOP_TYPE(op)), 8 << TCGOP_VECE(op)); + } else { + col += ne_fprintf(f, " %s ", def->name); + } nb_oargs = def->nb_oargs; nb_iargs = def->nb_iargs; nb_cargs = def->nb_cargs; - if (def->flags & TCG_OPF_VECTOR) { - col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op), 8 << TCGOP_VECE(op)); - } - k = 0; for (i = 0; i < nb_oargs; i++) { const char *sep = k ? "," : ""; @@ -2342,14 +2767,12 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) { col += ne_fprintf(f, "%s%s", sep, tcg_get_arg_str(s, buf, sizeof(buf), op->args[k++])); } switch (c) { - case INDEX_op_brcond_i32: - case INDEX_op_setcond_i32: - case INDEX_op_movcond_i32: + case INDEX_op_brcond: + case INDEX_op_setcond: + case INDEX_op_negsetcond: + case INDEX_op_movcond: case INDEX_op_brcond2_i32: case INDEX_op_setcond2_i32: - case INDEX_op_brcond_i64: - case INDEX_op_setcond_i64: - case INDEX_op_movcond_i64: case INDEX_op_cmp_vec: case INDEX_op_cmpsel_vec: if (op->args[k] < ARRAY_SIZE(cond_name) && cond_name[op->args[k]]) { @@ -2359,44 +2782,33 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) { } i = 1; break; - case INDEX_op_qemu_ld_a32_i32: - case INDEX_op_qemu_ld_a64_i32: - case INDEX_op_qemu_st_a32_i32: - case INDEX_op_qemu_st_a64_i32: - case INDEX_op_qemu_st8_a32_i32: - case INDEX_op_qemu_st8_a64_i32: - case INDEX_op_qemu_ld_a32_i64: - case INDEX_op_qemu_ld_a64_i64: - case INDEX_op_qemu_st_a32_i64: - case INDEX_op_qemu_st_a64_i64: - case INDEX_op_qemu_ld_a32_i128: - case INDEX_op_qemu_ld_a64_i128: - case INDEX_op_qemu_st_a32_i128: - case INDEX_op_qemu_st_a64_i128: { - const char *s_al, *s_op, *s_at; + case INDEX_op_qemu_ld: + case INDEX_op_qemu_st: + case INDEX_op_qemu_ld2: + case INDEX_op_qemu_st2: { + const char *s_al, *s_tlb, *s_op, *s_at; MemOpIdx oi = op->args[k++]; - MemOp op = get_memop(oi); + MemOp mop = get_memop(oi); unsigned ix = get_mmuidx(oi); - s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; - s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; - s_at = atom_name[(op & MO_ATOM_MASK) >> MO_ATOM_SHIFT]; - op &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK); + s_tlb = mop & MO_ALIGN_TLB_ONLY ? "tlb+" : ""; + s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT]; + s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)]; + s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT]; + mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK | MO_ALIGN_TLB_ONLY); /* If all fields are accounted for, print symbolically. */ - if (!op && s_al && s_op && s_at) { - col += ne_fprintf(f, ",%s%s%s,%u", s_at, s_al, s_op, ix); + if (!mop && s_al && s_op && s_at) { + col += ne_fprintf(f, ",%s%s%s%s,%u", s_at, s_al, s_tlb, s_op, ix); } else { - op = get_memop(oi); - col += ne_fprintf(f, ",$0x%x,%u", op, ix); + mop = get_memop(oi); + col += ne_fprintf(f, ",$0x%x,%u", mop, ix); } i = 1; } break; - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: - case INDEX_op_bswap64_i64: { + case INDEX_op_bswap16: + case INDEX_op_bswap32: + case INDEX_op_bswap64: { TCGArg flags = op->args[k]; const char *name = NULL; @@ -2410,6 +2822,22 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) { } i = k = 1; } break; +#ifdef CONFIG_PLUGIN + case INDEX_op_plugin_cb: { + TCGArg from = op->args[k++]; + const char *name = NULL; + + if (from < ARRAY_SIZE(plugin_from_name)) { + name = plugin_from_name[from]; + } + if (name) { + col += ne_fprintf(f, "%s", name); + } else { + col += ne_fprintf(f, "$0x%" TCG_PRIlx, from); + } + i = 1; + } break; +#endif default: i = 0; break; @@ -2417,8 +2845,7 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) { switch (c) { case INDEX_op_set_label: case INDEX_op_br: - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: + case INDEX_op_brcond: case INDEX_op_brcond2_i32: col += ne_fprintf(f, "%s$L%d", k ? "," : "", arg_label(op->args[k])->id); i++, k++; @@ -2567,9 +2994,11 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) { } /* we give more priority to constraints with less registers */ -static int get_constraint_priority(const TCGOpDef *def, int k) { - const TCGArgConstraint *arg_ct = &def->args_ct[k]; - int n = ctpop64(arg_ct->regs); +static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k) { + int n; + + arg_ct += k; + n = ctpop64(arg_ct->regs); /* * Sort constraints of a single register first, which includes output @@ -2598,9 +3027,8 @@ static int get_constraint_priority(const TCGOpDef *def, int k) { } /* sort from highest priority to lowest */ -static void sort_constraints(TCGOpDef *def, int start, int n) { +static void sort_constraints(TCGArgConstraint *a, int start, int n) { int i, j; - TCGArgConstraint *a = def->args_ct; for (i = 0; i < n; i++) { a[start + i].sort_index = start + i; @@ -2610,8 +3038,8 @@ static void sort_constraints(TCGOpDef *def, int start, int n) { } for (i = 0; i < n - 1; i++) { for (j = i + 1; j < n; j++) { - int p1 = get_constraint_priority(def, a[start + i].sort_index); - int p2 = get_constraint_priority(def, a[start + j].sort_index); + int p1 = get_constraint_priority(a, a[start + i].sort_index); + int p2 = get_constraint_priority(a, a[start + j].sort_index); if (p1 < p2) { int tmp = a[start + i].sort_index; a[start + i].sort_index = a[start + j].sort_index; @@ -2621,55 +3049,38 @@ static void sort_constraints(TCGOpDef *def, int start, int n) { } } -static void process_op_defs(TCGContext *s) { - TCGOpcode op; +static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS]; +static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS]; - for (op = 0; op < NB_OPS; op++) { - TCGOpDef *def = &tcg_op_defs[op]; - const TCGTargetOpDef *tdefs; +static void process_constraint_sets(void) { + for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) { + const TCGConstraintSet *tdefs = &constraint_sets[c]; + TCGArgConstraint *args_ct = all_cts[c]; + int nb_oargs = tdefs->nb_oargs; + int nb_iargs = tdefs->nb_iargs; + int nb_args = nb_oargs + nb_iargs; bool saw_alias_pair = false; - int i, o, i2, o2, nb_args; - if (def->flags & TCG_OPF_NOT_PRESENT) { - continue; - } - - nb_args = def->nb_iargs + def->nb_oargs; - if (nb_args == 0) { - continue; - } - - /* - * Macro magic should make it impossible, but double-check that - * the array index is in range. Since the signness of an enum - * is implementation defined, force the result to unsigned. - */ - unsigned con_set = tcg_target_op_def(op); - tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); - tdefs = &constraint_sets[con_set]; - - for (i = 0; i < nb_args; i++) { + for (int i = 0; i < nb_args; i++) { const char *ct_str = tdefs->args_ct_str[i]; - bool input_p = i >= def->nb_oargs; - - /* Incomplete TCGTargetOpDef entry. */ - tcg_debug_assert(ct_str != NULL); + bool input_p = i >= nb_oargs; + int o; switch (*ct_str) { case '0' ... '9': o = *ct_str - '0'; tcg_debug_assert(input_p); - tcg_debug_assert(o < def->nb_oargs); - tcg_debug_assert(def->args_ct[o].regs != 0); - tcg_debug_assert(!def->args_ct[o].oalias); - def->args_ct[i] = def->args_ct[o]; + tcg_debug_assert(o < nb_oargs); + tcg_debug_assert(args_ct[o].regs != 0); + tcg_debug_assert(!args_ct[o].oalias); + args_ct[i] = args_ct[o]; /* The output sets oalias. */ - def->args_ct[o].oalias = 1; - def->args_ct[o].alias_index = i; + args_ct[o].oalias = 1; + args_ct[o].alias_index = i; /* The input sets ialias. */ - def->args_ct[i].ialias = 1; - def->args_ct[i].alias_index = o; - if (def->args_ct[i].pair) { + args_ct[i].ialias = 1; + args_ct[i].alias_index = o; + if (args_ct[i].pair) { saw_alias_pair = true; } tcg_debug_assert(ct_str[1] == '\0'); @@ -2677,39 +3088,41 @@ static void process_op_defs(TCGContext *s) { case '&': tcg_debug_assert(!input_p); - def->args_ct[i].newreg = true; + args_ct[i].newreg = true; ct_str++; break; case 'p': /* plus */ /* Allocate to the register after the previous. */ - tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); + tcg_debug_assert(i > (input_p ? nb_oargs : 0)); o = i - 1; - tcg_debug_assert(!def->args_ct[o].pair); - tcg_debug_assert(!def->args_ct[o].ct); - def->args_ct[i] = (TCGArgConstraint) { + tcg_debug_assert(!args_ct[o].pair); + tcg_debug_assert(!args_ct[o].ct); + args_ct[i] = (TCGArgConstraint) { .pair = 2, .pair_index = o, - .regs = def->args_ct[o].regs << 1, + .regs = args_ct[o].regs << 1, + .newreg = args_ct[o].newreg, }; - def->args_ct[o].pair = 1; - def->args_ct[o].pair_index = i; + args_ct[o].pair = 1; + args_ct[o].pair_index = i; tcg_debug_assert(ct_str[1] == '\0'); continue; case 'm': /* minus */ /* Allocate to the register before the previous. */ - tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); + tcg_debug_assert(i > (input_p ? nb_oargs : 0)); o = i - 1; - tcg_debug_assert(!def->args_ct[o].pair); - tcg_debug_assert(!def->args_ct[o].ct); - def->args_ct[i] = (TCGArgConstraint) { + tcg_debug_assert(!args_ct[o].pair); + tcg_debug_assert(!args_ct[o].ct); + args_ct[i] = (TCGArgConstraint) { .pair = 1, .pair_index = o, - .regs = def->args_ct[o].regs >> 1, + .regs = args_ct[o].regs >> 1, + .newreg = args_ct[o].newreg, }; - def->args_ct[o].pair = 2; - def->args_ct[o].pair_index = i; + args_ct[o].pair = 2; + args_ct[o].pair_index = i; tcg_debug_assert(ct_str[1] == '\0'); continue; } @@ -2717,19 +3130,24 @@ static void process_op_defs(TCGContext *s) { do { switch (*ct_str) { case 'i': - def->args_ct[i].ct |= TCG_CT_CONST; + args_ct[i].ct |= TCG_CT_CONST; break; +#ifdef TCG_REG_ZERO + case 'z': + args_ct[i].ct |= TCG_CT_REG_ZERO; + break; +#endif /* Include all of the target-specific constraints. */ #undef CONST -#define CONST(CASE, MASK) \ - case CASE: \ - def->args_ct[i].ct |= MASK; \ +#define CONST(CASE, MASK) \ + case CASE: \ + args_ct[i].ct |= MASK; \ break; -#define REGS(CASE, MASK) \ - case CASE: \ - def->args_ct[i].regs |= MASK; \ +#define REGS(CASE, MASK) \ + case CASE: \ + args_ct[i].regs |= MASK; \ break; #include "i386/tcg-target-con-str.h" @@ -2741,15 +3159,12 @@ static void process_op_defs(TCGContext *s) { case '&': case 'p': case 'm': - /* Typo in TCGTargetOpDef constraint. */ + /* Typo in TCGConstraintSet constraint. */ g_assert_not_reached(); } } while (*++ct_str != '\0'); } - /* TCGTargetOpDef entry with too much information? */ - tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); - /* * Fix up output pairs that are aliased with inputs. * When we created the alias, we copied pair from the output. @@ -2770,51 +3185,53 @@ static void process_op_defs(TCGContext *s) { * first output to pair=3, and the pair_index'es to match. */ if (saw_alias_pair) { - for (i = def->nb_oargs; i < nb_args; i++) { + for (int i = nb_oargs; i < nb_args; i++) { + int o, o2, i2; + /* * Since [0-9pm] must be alone in the constraint string, * the only way they can both be set is if the pair comes * from the output alias. */ - if (!def->args_ct[i].ialias) { + if (!args_ct[i].ialias) { continue; } - switch (def->args_ct[i].pair) { + switch (args_ct[i].pair) { case 0: break; case 1: - o = def->args_ct[i].alias_index; - o2 = def->args_ct[o].pair_index; - tcg_debug_assert(def->args_ct[o].pair == 1); - tcg_debug_assert(def->args_ct[o2].pair == 2); - if (def->args_ct[o2].oalias) { + o = args_ct[i].alias_index; + o2 = args_ct[o].pair_index; + tcg_debug_assert(args_ct[o].pair == 1); + tcg_debug_assert(args_ct[o2].pair == 2); + if (args_ct[o2].oalias) { /* Case 1a */ - i2 = def->args_ct[o2].alias_index; - tcg_debug_assert(def->args_ct[i2].pair == 2); - def->args_ct[i2].pair_index = i; - def->args_ct[i].pair_index = i2; + i2 = args_ct[o2].alias_index; + tcg_debug_assert(args_ct[i2].pair == 2); + args_ct[i2].pair_index = i; + args_ct[i].pair_index = i2; } else { /* Case 1b */ - def->args_ct[i].pair_index = i; + args_ct[i].pair_index = i; } break; case 2: - o = def->args_ct[i].alias_index; - o2 = def->args_ct[o].pair_index; - tcg_debug_assert(def->args_ct[o].pair == 2); - tcg_debug_assert(def->args_ct[o2].pair == 1); - if (def->args_ct[o2].oalias) { + o = args_ct[i].alias_index; + o2 = args_ct[o].pair_index; + tcg_debug_assert(args_ct[o].pair == 2); + tcg_debug_assert(args_ct[o2].pair == 1); + if (args_ct[o2].oalias) { /* Case 1a */ - i2 = def->args_ct[o2].alias_index; - tcg_debug_assert(def->args_ct[i2].pair == 1); - def->args_ct[i2].pair_index = i; - def->args_ct[i].pair_index = i2; + i2 = args_ct[o2].alias_index; + tcg_debug_assert(args_ct[i2].pair == 1); + args_ct[i2].pair_index = i; + args_ct[i].pair_index = i2; } else { /* Case 2 */ - def->args_ct[i].pair = 3; - def->args_ct[o2].pair = 3; - def->args_ct[i].pair_index = o2; - def->args_ct[o2].pair_index = i; + args_ct[i].pair = 3; + args_ct[o2].pair = 3; + args_ct[i].pair_index = o2; + args_ct[o2].pair_index = i; } break; default: @@ -2824,9 +3241,39 @@ static void process_op_defs(TCGContext *s) { } /* sort the constraints (XXX: this is just an heuristic) */ - sort_constraints(def, 0, def->nb_oargs); - sort_constraints(def, def->nb_oargs, def->nb_iargs); + sort_constraints(args_ct, 0, nb_oargs); + sort_constraints(args_ct, nb_oargs, nb_iargs); + } +} + +static const TCGArgConstraint *opcode_args_ct(const TCGOp *op) { + TCGOpcode opc = op->opc; + TCGType type = TCGOP_TYPE(op); + unsigned flags = TCGOP_FLAGS(op); + const TCGOpDef *def = &tcg_op_defs[opc]; + const TCGOutOp *outop = all_outop[opc]; + TCGConstraintSetIndex con_set; + + if (def->flags & TCG_OPF_NOT_PRESENT) { + return empty_cts; + } + + if (outop) { + con_set = outop->static_constraint; + if (con_set == C_Dynamic) { + con_set = outop->dynamic_constraint(type, flags); + } + } else { + con_set = tcg_target_op_def(opc, type, flags); } + tcg_debug_assert(con_set >= 0); + tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); + + /* The constraint arguments must match TCGOpcode arguments. */ + tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs); + tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs); + + return all_cts[con_set]; } static void remove_label_use(TCGOp *op, int idx) { @@ -2847,8 +3294,7 @@ void tcg_op_remove(TCGContext *s, TCGOp *op) { case INDEX_op_br: remove_label_use(op, 0); break; - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: + case INDEX_op_brcond: remove_label_use(op, 3); break; case INDEX_op_brcond2_i32: @@ -2907,18 +3353,27 @@ static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) { TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs) { TCGOp *op = tcg_op_alloc(opc, nargs); - QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); + + if (tcg_ctx->emit_before_op) { + QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link); + } else { + QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); + } return op; } -TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc, unsigned nargs) { +TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc, TCGType type, unsigned nargs) { TCGOp *new_op = tcg_op_alloc(opc, nargs); + + TCGOP_TYPE(new_op) = type; QTAILQ_INSERT_BEFORE(old_op, new_op, link); return new_op; } -TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc, unsigned nargs) { +TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc, TCGType type, unsigned nargs) { TCGOp *new_op = tcg_op_alloc(opc, nargs); + + TCGOP_TYPE(new_op) = type; QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); return new_op; } @@ -2932,8 +3387,7 @@ static void move_label_uses(TCGLabel *to, TCGLabel *from) { case INDEX_op_br: op->args[0] = label_arg(to); break; - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: + case INDEX_op_brcond: op->args[3] = label_arg(to); break; case INDEX_op_brcond2_i32: @@ -3238,6 +3692,16 @@ static void __attribute__((noinline)) liveness_pass_0(TCGContext *s) { } } +static void assert_carry_dead(TCGContext *s) { + /* + * Carry operations can be separated by a few insns like mov, + * load or store, but they should always be "close", and + * carry-out operations should always be paired with carry-in. + * At various boundaries, carry must have been consumed. + */ + tcg_debug_assert(!s->carry_live); +} + /* Liveness analysis : update the opc_arg_life array to tell if a given input arguments is dead. Instructions updating dead temporaries are removed. */ @@ -3246,133 +3710,137 @@ static void __attribute__((noinline)) liveness_pass_1(TCGContext *s) { int nb_temps = s->nb_temps; TCGOp *op, *op_prev; TCGRegSet *prefs; - int i; prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); - for (i = 0; i < nb_temps; ++i) { + for (int i = 0; i < nb_temps; ++i) { s->temps[i].state_ptr = prefs + i; } /* ??? Should be redundant with the exit_tb that ends the TB. */ la_func_end(s, nb_globals, nb_temps); + s->carry_live = false; QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { int nb_iargs, nb_oargs; TCGOpcode opc_new, opc_new2; - bool have_opc_new2; TCGLifeData arg_life = 0; TCGTemp *ts; TCGOpcode opc = op->opc; - const TCGOpDef *def = &tcg_op_defs[opc]; + const TCGOpDef *def; + const TCGArgConstraint *args_ct; switch (opc) { - case INDEX_op_call: { - const TCGHelperInfo *info = tcg_call_info(op); - int call_flags = tcg_call_flags(op); + case INDEX_op_call: + assert_carry_dead(s); + { + const TCGHelperInfo *info = tcg_call_info(op); + int call_flags = tcg_call_flags(op); - nb_oargs = TCGOP_CALLO(op); - nb_iargs = TCGOP_CALLI(op); + nb_oargs = TCGOP_CALLO(op); + nb_iargs = TCGOP_CALLI(op); - /* pure functions can be removed if their result is unused */ - if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { - for (i = 0; i < nb_oargs; i++) { - ts = arg_temp(op->args[i]); - if (ts->state != TS_DEAD) { - goto do_not_remove_call; + /* pure functions can be removed if their result is unused */ + if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { + for (int i = 0; i < nb_oargs; i++) { + ts = arg_temp(op->args[i]); + if (ts->state != TS_DEAD) { + goto do_not_remove_call; + } } + goto do_remove; } - goto do_remove; - } - do_not_remove_call: + do_not_remove_call: - /* Output args are dead. */ - for (i = 0; i < nb_oargs; i++) { - ts = arg_temp(op->args[i]); - if (ts->state & TS_DEAD) { - arg_life |= DEAD_ARG << i; - } - if (ts->state & TS_MEM) { - arg_life |= SYNC_ARG << i; + /* Output args are dead. */ + for (int i = 0; i < nb_oargs; i++) { + ts = arg_temp(op->args[i]); + if (ts->state & TS_DEAD) { + arg_life |= DEAD_ARG << i; + } + if (ts->state & TS_MEM) { + arg_life |= SYNC_ARG << i; + } + ts->state = TS_DEAD; + la_reset_pref(ts); } - ts->state = TS_DEAD; - la_reset_pref(ts); - } - /* Not used -- it will be tcg_target_call_oarg_reg(). */ - memset(op->output_pref, 0, sizeof(op->output_pref)); + /* Not used -- it will be tcg_target_call_oarg_reg(). */ + memset(op->output_pref, 0, sizeof(op->output_pref)); - if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | TCG_CALL_NO_READ_GLOBALS))) { - la_global_kill(s, nb_globals); - } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { - la_global_sync(s, nb_globals); - } + if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | TCG_CALL_NO_READ_GLOBALS))) { + la_global_kill(s, nb_globals); + } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { + la_global_sync(s, nb_globals); + } - /* Record arguments that die in this helper. */ - for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { - ts = arg_temp(op->args[i]); - if (ts->state & TS_DEAD) { - arg_life |= DEAD_ARG << i; + /* Record arguments that die in this helper. */ + for (int i = nb_oargs; i < nb_iargs + nb_oargs; i++) { + ts = arg_temp(op->args[i]); + if (ts->state & TS_DEAD) { + arg_life |= DEAD_ARG << i; + } } - } - /* For all live registers, remove call-clobbered prefs. */ - la_cross_call(s, nb_temps); + /* For all live registers, remove call-clobbered prefs. */ + la_cross_call(s, nb_temps); - /* - * Input arguments are live for preceding opcodes. - * - * For those arguments that die, and will be allocated in - * registers, clear the register set for that arg, to be - * filled in below. For args that will be on the stack, - * reset to any available reg. Process arguments in reverse - * order so that if a temp is used more than once, the stack - * reset to max happens before the register reset to 0. - */ - for (i = nb_iargs - 1; i >= 0; i--) { - const TCGCallArgumentLoc *loc = &info->in[i]; - ts = arg_temp(op->args[nb_oargs + i]); + /* + * Input arguments are live for preceding opcodes. + * + * For those arguments that die, and will be allocated in + * registers, clear the register set for that arg, to be + * filled in below. For args that will be on the stack, + * reset to any available reg. Process arguments in reverse + * order so that if a temp is used more than once, the stack + * reset to max happens before the register reset to 0. + */ + for (int i = nb_iargs - 1; i >= 0; i--) { + const TCGCallArgumentLoc *loc = &info->in[i]; + ts = arg_temp(op->args[nb_oargs + i]); + + if (ts->state & TS_DEAD) { + switch (loc->kind) { + case TCG_CALL_ARG_NORMAL: + case TCG_CALL_ARG_EXTEND_U: + case TCG_CALL_ARG_EXTEND_S: + if (arg_slot_reg_p(loc->arg_slot)) { + *la_temp_pref(ts) = 0; + break; + } + /* fall through */ + default: + *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; + break; + } + ts->state &= ~TS_DEAD; + } + } + + /* + * For each input argument, add its input register to prefs. + * If a temp is used once, this produces a single set bit; + * if a temp is used multiple times, this produces a set. + */ + for (int i = 0; i < nb_iargs; i++) { + const TCGCallArgumentLoc *loc = &info->in[i]; + ts = arg_temp(op->args[nb_oargs + i]); - if (ts->state & TS_DEAD) { switch (loc->kind) { case TCG_CALL_ARG_NORMAL: case TCG_CALL_ARG_EXTEND_U: case TCG_CALL_ARG_EXTEND_S: if (arg_slot_reg_p(loc->arg_slot)) { - *la_temp_pref(ts) = 0; - break; + tcg_regset_set_reg(*la_temp_pref(ts), tcg_target_call_iarg_regs[loc->arg_slot]); } - /* fall through */ + break; default: - *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; break; } - ts->state &= ~TS_DEAD; - } - } - - /* - * For each input argument, add its input register to prefs. - * If a temp is used once, this produces a single set bit; - * if a temp is used multiple times, this produces a set. - */ - for (i = 0; i < nb_iargs; i++) { - const TCGCallArgumentLoc *loc = &info->in[i]; - ts = arg_temp(op->args[nb_oargs + i]); - - switch (loc->kind) { - case TCG_CALL_ARG_NORMAL: - case TCG_CALL_ARG_EXTEND_U: - case TCG_CALL_ARG_EXTEND_S: - if (arg_slot_reg_p(loc->arg_slot)) { - tcg_regset_set_reg(*la_temp_pref(ts), tcg_target_call_iarg_regs[loc->arg_slot]); - } - break; - default: - break; } } - } break; + break; case INDEX_op_insn_start: + assert_carry_dead(s); break; case INDEX_op_discard: /* mark the temporary as dead */ @@ -3381,62 +3849,15 @@ static void __attribute__((noinline)) liveness_pass_1(TCGContext *s) { la_reset_pref(ts); break; - case INDEX_op_add2_i32: - opc_new = INDEX_op_add_i32; - goto do_addsub2; - case INDEX_op_sub2_i32: - opc_new = INDEX_op_sub_i32; - goto do_addsub2; - case INDEX_op_add2_i64: - opc_new = INDEX_op_add_i64; - goto do_addsub2; - case INDEX_op_sub2_i64: - opc_new = INDEX_op_sub_i64; - do_addsub2: - nb_iargs = 4; - nb_oargs = 2; - /* Test if the high part of the operation is dead, but not - the low part. The result can be optimized to a simple - add or sub. This happens often for x86_64 guest when the - cpu mode is set to 32 bit. */ - if (arg_temp(op->args[1])->state == TS_DEAD) { - if (arg_temp(op->args[0])->state == TS_DEAD) { - goto do_remove; - } - /* Replace the opcode and adjust the args in place, - leaving 3 unused args at the end. */ - op->opc = opc = opc_new; - op->args[1] = op->args[2]; - op->args[2] = op->args[4]; - /* Fall through and mark the single-word operation live. */ - nb_iargs = 2; - nb_oargs = 1; - } - goto do_not_remove; - - case INDEX_op_mulu2_i32: - opc_new = INDEX_op_mul_i32; - opc_new2 = INDEX_op_muluh_i32; - have_opc_new2 = TCG_TARGET_HAS_muluh_i32; - goto do_mul2; - case INDEX_op_muls2_i32: - opc_new = INDEX_op_mul_i32; - opc_new2 = INDEX_op_mulsh_i32; - have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; - goto do_mul2; - case INDEX_op_mulu2_i64: - opc_new = INDEX_op_mul_i64; - opc_new2 = INDEX_op_muluh_i64; - have_opc_new2 = TCG_TARGET_HAS_muluh_i64; - goto do_mul2; - case INDEX_op_muls2_i64: - opc_new = INDEX_op_mul_i64; - opc_new2 = INDEX_op_mulsh_i64; - have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; + case INDEX_op_muls2: + opc_new = INDEX_op_mul; + opc_new2 = INDEX_op_mulsh; goto do_mul2; + case INDEX_op_mulu2: + opc_new = INDEX_op_mul; + opc_new2 = INDEX_op_muluh; do_mul2: - nb_iargs = 2; - nb_oargs = 2; + assert_carry_dead(s); if (arg_temp(op->args[1])->state == TS_DEAD) { if (arg_temp(op->args[0])->state == TS_DEAD) { /* Both parts of the operation are dead. */ @@ -3446,7 +3867,7 @@ static void __attribute__((noinline)) liveness_pass_1(TCGContext *s) { op->opc = opc = opc_new; op->args[1] = op->args[2]; op->args[2] = op->args[3]; - } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { + } else if (arg_temp(op->args[0])->state == TS_DEAD && tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) { /* The low part of the operation is dead; generate the high. */ op->opc = opc = opc_new2; op->args[0] = op->args[1]; @@ -3456,19 +3877,92 @@ static void __attribute__((noinline)) liveness_pass_1(TCGContext *s) { goto do_not_remove; } /* Mark the single-word operation live. */ - nb_oargs = 1; goto do_not_remove; - default: - /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ - nb_iargs = def->nb_iargs; - nb_oargs = def->nb_oargs; + case INDEX_op_addco: + if (s->carry_live) { + goto do_not_remove; + } + op->opc = opc = INDEX_op_add; + goto do_default; + + case INDEX_op_addcio: + if (s->carry_live) { + goto do_not_remove; + } + op->opc = opc = INDEX_op_addci; + goto do_default; + + case INDEX_op_subbo: + if (s->carry_live) { + goto do_not_remove; + } + /* Lower to sub, but this may also require canonicalization. */ + op->opc = opc = INDEX_op_sub; + ts = arg_temp(op->args[2]); + if (ts->kind == TEMP_CONST) { + ts = tcg_constant_internal(ts->type, -ts->val); + if (ts->state_ptr == NULL) { + tcg_debug_assert(temp_idx(ts) == nb_temps); + nb_temps++; + ts->state_ptr = tcg_malloc(sizeof(TCGRegSet)); + ts->state = TS_DEAD; + la_reset_pref(ts); + } + op->args[2] = temp_arg(ts); + op->opc = opc = INDEX_op_add; + } + goto do_default; + + case INDEX_op_subbio: + if (s->carry_live) { + goto do_not_remove; + } + op->opc = opc = INDEX_op_subbi; + goto do_default; + + case INDEX_op_addc1o: + if (s->carry_live) { + goto do_not_remove; + } + /* Lower to add, add +1. */ + op_prev = tcg_op_insert_before(s, op, INDEX_op_add, TCGOP_TYPE(op), 3); + op_prev->args[0] = op->args[0]; + op_prev->args[1] = op->args[1]; + op_prev->args[2] = op->args[2]; + op->opc = opc = INDEX_op_add; + op->args[1] = op->args[0]; + ts = arg_temp(op->args[0]); + ts = tcg_constant_internal(ts->type, 1); + op->args[2] = temp_arg(ts); + goto do_default; - /* Test if the operation can be removed because all - its outputs are dead. We assume that nb_oargs == 0 - implies side effects */ - if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { - for (i = 0; i < nb_oargs; i++) { + case INDEX_op_subb1o: + if (s->carry_live) { + goto do_not_remove; + } + /* Lower to sub, add -1. */ + op_prev = tcg_op_insert_before(s, op, INDEX_op_sub, TCGOP_TYPE(op), 3); + op_prev->args[0] = op->args[0]; + op_prev->args[1] = op->args[1]; + op_prev->args[2] = op->args[2]; + op->opc = opc = INDEX_op_add; + op->args[1] = op->args[0]; + ts = arg_temp(op->args[0]); + ts = tcg_constant_internal(ts->type, -1); + op->args[2] = temp_arg(ts); + goto do_default; + + default: + do_default: + /* + * Test if the operation can be removed because all + * its outputs are dead. We assume that nb_oargs == 0 + * implies side effects. + */ + def = &tcg_op_defs[opc]; + if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && def->nb_oargs != 0) { + for (int i = def->nb_oargs - 1; i >= 0; i--) { if (arg_temp(op->args[i])->state != TS_DEAD) { goto do_not_remove; } @@ -3482,7 +3976,11 @@ static void __attribute__((noinline)) liveness_pass_1(TCGContext *s) { break; do_not_remove: - for (i = 0; i < nb_oargs; i++) { + def = &tcg_op_defs[opc]; + nb_iargs = def->nb_iargs; + nb_oargs = def->nb_oargs; + + for (int i = 0; i < nb_oargs; i++) { ts = arg_temp(op->args[i]); /* Remember the preference of the uses that followed. */ @@ -3503,12 +4001,16 @@ static void __attribute__((noinline)) liveness_pass_1(TCGContext *s) { /* If end of basic block, update. */ if (def->flags & TCG_OPF_BB_EXIT) { + assert_carry_dead(s); la_func_end(s, nb_globals, nb_temps); } else if (def->flags & TCG_OPF_COND_BRANCH) { + assert_carry_dead(s); la_bb_sync(s, nb_globals, nb_temps); } else if (def->flags & TCG_OPF_BB_END) { + assert_carry_dead(s); la_bb_end(s, nb_globals, nb_temps); } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { + assert_carry_dead(s); la_global_sync(s, nb_globals); if (def->flags & TCG_OPF_CALL_CLOBBER) { la_cross_call(s, nb_temps); @@ -3516,15 +4018,18 @@ static void __attribute__((noinline)) liveness_pass_1(TCGContext *s) { } /* Record arguments that die in this opcode. */ - for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { + for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { ts = arg_temp(op->args[i]); if (ts->state & TS_DEAD) { arg_life |= DEAD_ARG << i; } } + if (def->flags & TCG_OPF_CARRY_OUT) { + s->carry_live = false; + } /* Input arguments are live for preceding opcodes. */ - for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { + for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { ts = arg_temp(op->args[i]); if (ts->state & TS_DEAD) { /* For operands that were dead, initially allow @@ -3533,11 +4038,13 @@ static void __attribute__((noinline)) liveness_pass_1(TCGContext *s) { ts->state &= ~TS_DEAD; } } + if (def->flags & TCG_OPF_CARRY_IN) { + s->carry_live = true; + } /* Incorporate constraints for this operand. */ switch (opc) { - case INDEX_op_mov_i32: - case INDEX_op_mov_i64: + case INDEX_op_mov: /* Note that these are TCG_OPF_NOT_PRESENT and do not have proper constraints. That said, special case moves to propagate preferences backward. */ @@ -3547,8 +4054,9 @@ static void __attribute__((noinline)) liveness_pass_1(TCGContext *s) { break; default: - for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { - const TCGArgConstraint *ct = &def->args_ct[i]; + args_ct = opcode_args_ct(op); + for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { + const TCGArgConstraint *ct = &args_ct[i]; TCGRegSet set, *pset; ts = arg_temp(op->args[i]); @@ -3571,6 +4079,7 @@ static void __attribute__((noinline)) liveness_pass_1(TCGContext *s) { } op->life = arg_life; } + assert_carry_dead(s); } /* Liveness analysis: Convert indirect regs to direct temporaries. */ @@ -3638,8 +4147,7 @@ static bool __attribute__((noinline)) liveness_pass_2(TCGContext *s) { arg_ts = arg_temp(op->args[i]); dir_ts = arg_ts->state_ptr; if (dir_ts && arg_ts->state == TS_DEAD) { - TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 ? INDEX_op_ld_i32 : INDEX_op_ld_i64); - TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); + TCGOp *lop = tcg_op_insert_before(s, op, INDEX_op_ld, arg_ts->type, 3); lop->args[0] = temp_arg(dir_ts); lop->args[1] = temp_arg(arg_ts->mem_base); @@ -3686,7 +4194,7 @@ static bool __attribute__((noinline)) liveness_pass_2(TCGContext *s) { } /* Outputs become available. */ - if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) { + if (opc == INDEX_op_mov) { arg_ts = arg_temp(op->args[0]); dir_ts = arg_ts->state_ptr; if (dir_ts) { @@ -3697,8 +4205,7 @@ static bool __attribute__((noinline)) liveness_pass_2(TCGContext *s) { arg_ts->state = 0; if (NEED_SYNC_ARG(0)) { - TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 ? INDEX_op_st_i32 : INDEX_op_st_i64); - TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); + TCGOp *sop = tcg_op_insert_after(s, op, INDEX_op_st, arg_ts->type, 3); TCGTemp *out_ts = dir_ts; if (IS_DEAD_ARG(0)) { @@ -3731,8 +4238,7 @@ static bool __attribute__((noinline)) liveness_pass_2(TCGContext *s) { /* Sync outputs upon their last write. */ if (NEED_SYNC_ARG(i)) { - TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 ? INDEX_op_st_i32 : INDEX_op_st_i64); - TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); + TCGOp *sop = tcg_op_insert_after(s, op, INDEX_op_st, arg_ts->type, 3); sop->args[0] = temp_arg(dir_ts); sop->args[1] = temp_arg(arg_ts->mem_base); @@ -3907,7 +4413,7 @@ static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, TCGR case TEMP_VAL_DEAD: default: - tcg_abort(); + g_assert_not_reached(); } ts->mem_coherent = 1; } @@ -3990,7 +4496,7 @@ static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, TCGRegSet al } } - tcg_abort(); + g_assert_not_reached(); } static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs, TCGRegSet allocated_regs, @@ -4034,7 +4540,7 @@ static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs, TCGRegS } } } - tcg_abort(); + g_assert_not_reached(); } /* Make sure the temporary is in a register. If needed, allocate the register @@ -4072,13 +4578,16 @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, TCGReg ts->mem_coherent = 0; break; case TEMP_VAL_MEM: + if (!ts->mem_allocated) { + temp_allocate_frame(s, ts); + } reg = tcg_reg_alloc(s, desired_regs, allocated_regs, preferred_regs, ts->indirect_base); tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); ts->mem_coherent = 1; break; case TEMP_VAL_DEAD: default: - tcg_abort(); + g_assert_not_reached(); } set_temp_val_reg(s, ts, reg); } @@ -4117,9 +4626,8 @@ static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) { /* at the end of a basic block, we assume all temporaries are dead and all globals are stored at their canonical location. */ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) { - int i; - - for (i = s->nb_globals; i < s->nb_temps; i++) { + assert_carry_dead(s); + for (int i = s->nb_globals; i < s->nb_temps; i++) { TCGTemp *ts = &s->temps[i]; switch (ts->kind) { @@ -4149,6 +4657,7 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) { * temps are synced to their location. */ static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) { + assert_carry_dead(s); sync_globals(s, allocated_regs); for (int i = s->nb_globals; i < s->nb_temps; i++) { @@ -4292,6 +4801,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) { static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) { const TCGLifeData arg_life = op->life; TCGRegSet dup_out_regs, dup_in_regs; + const TCGArgConstraint *dup_args_ct; TCGTemp *its, *ots; TCGType itype, vtype; unsigned vece; @@ -4306,11 +4816,11 @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) { itype = its->type; vece = TCGOP_VECE(op); - vtype = TCGOP_VECL(op) + TCG_TYPE_V64; + vtype = TCGOP_TYPE(op); if (its->val_type == TEMP_VAL_CONST) { /* Propagate constant via movi -> dupi. */ - tcg_target_ulong val = its->val; + tcg_target_ulong val = dup_const(vece, its->val); if (IS_DEAD_ARG(1)) { temp_dead(s, its); } @@ -4318,8 +4828,9 @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) { return; } - dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; - dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs; + dup_args_ct = opcode_args_ct(op); + dup_out_regs = dup_args_ct[0].regs; + dup_in_regs = dup_args_ct[1].regs; /* Allocate the output register now. */ if (ots->val_type != TEMP_VAL_REG) { @@ -4402,10 +4913,16 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) { int i, k, nb_iargs, nb_oargs; TCGReg reg; TCGArg arg; + const TCGArgConstraint *args_ct; const TCGArgConstraint *arg_ct; TCGTemp *ts; TCGArg new_args[TCG_MAX_OP_ARGS]; int const_args[TCG_MAX_OP_ARGS]; + TCGCond op_cond; + + if (def->flags & TCG_OPF_CARRY_IN) { + tcg_debug_assert(s->carry_live); + } nb_oargs = def->nb_oargs; nb_iargs = def->nb_iargs; @@ -4416,6 +4933,31 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) { i_allocated_regs = s->reserved_regs; o_allocated_regs = s->reserved_regs; + switch (op->opc) { + case INDEX_op_brcond: + op_cond = op->args[2]; + break; + case INDEX_op_setcond: + case INDEX_op_negsetcond: + case INDEX_op_cmp_vec: + op_cond = op->args[3]; + break; + case INDEX_op_brcond2_i32: + op_cond = op->args[4]; + break; + case INDEX_op_movcond: + case INDEX_op_setcond2_i32: + case INDEX_op_cmpsel_vec: + op_cond = op->args[5]; + break; + default: + /* No condition within opcode. */ + op_cond = TCG_COND_ALWAYS; + break; + } + + args_ct = opcode_args_ct(op); + /* satisfy input constraints */ for (k = 0; k < nb_iargs; k++) { TCGRegSet i_preferred_regs, i_required_regs; @@ -4423,16 +4965,27 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) { TCGTemp *ts2; int i1, i2; - i = def->args_ct[nb_oargs + k].sort_index; + i = args_ct[nb_oargs + k].sort_index; arg = op->args[i]; - arg_ct = &def->args_ct[i]; + arg_ct = &args_ct[i]; ts = arg_temp(arg); - if (ts->val_type == TEMP_VAL_CONST && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) { - /* constant is OK for instruction */ - const_args[i] = 1; - new_args[i] = ts->val; - continue; + if (ts->val_type == TEMP_VAL_CONST) { +#ifdef TCG_REG_ZERO + if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) { + /* Hardware zero register: indicate register via non-const. */ + const_args[i] = 0; + new_args[i] = TCG_REG_ZERO; + continue; + } +#endif + + if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type, op_cond, TCGOP_VECE(op))) { + /* constant is OK for instruction */ + const_args[i] = 1; + new_args[i] = ts->val; + continue; + } } reg = ts->reg; @@ -4452,7 +5005,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) { * dead after the instruction, we must allocate a new * register and move it. */ - if (temp_readonly(ts) || !IS_DEAD_ARG(i) || def->args_ct[arg_ct->alias_index].newreg) { + if (temp_readonly(ts) || !IS_DEAD_ARG(i) || args_ct[arg_ct->alias_index].newreg) { allocate_new_reg = true; } else if (ts->val_type == TEMP_VAL_REG) { /* @@ -4599,6 +5152,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) { tcg_reg_alloc_bb_end(s, i_allocated_regs); } else { if (def->flags & TCG_OPF_CALL_CLOBBER) { + assert_carry_dead(s); /* XXX: permit generic clobber register list ? */ for (i = 0; i < TCG_TARGET_NB_REGS; i++) { if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { @@ -4614,9 +5168,9 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) { /* satisfy the output constraints */ for (k = 0; k < nb_oargs; k++) { - i = def->args_ct[k].sort_index; + i = args_ct[k].sort_index; arg = op->args[i]; - arg_ct = &def->args_ct[i]; + arg_ct = &args_ct[i]; ts = arg_temp(arg); /* ENV should not be modified. */ @@ -4635,16 +5189,18 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) { break; case 1: /* first of pair */ - tcg_debug_assert(!arg_ct->newreg); if (arg_ct->oalias) { reg = new_args[arg_ct->alias_index]; - break; + } else if (arg_ct->newreg) { + reg = tcg_reg_alloc_pair(s, arg_ct->regs, i_allocated_regs | o_allocated_regs, + output_pref(op, k), ts->indirect_base); + } else { + reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, output_pref(op, k), + ts->indirect_base); } - reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, output_pref(op, k), ts->indirect_base); break; case 2: /* second of pair */ - tcg_debug_assert(!arg_ct->newreg); if (arg_ct->oalias) { reg = new_args[arg_ct->alias_index]; } else { @@ -4668,51 +5224,281 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) { } /* emit instruction */ + TCGType type = TCGOP_TYPE(op); switch (op->opc) { - case INDEX_op_ext8s_i32: - tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]); - break; - case INDEX_op_ext8s_i64: - tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]); - break; - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - tcg_out_ext8u(s, new_args[0], new_args[1]); - break; - case INDEX_op_ext16s_i32: - tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]); - break; - case INDEX_op_ext16s_i64: - tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]); - break; - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - tcg_out_ext16u(s, new_args[0], new_args[1]); - break; - case INDEX_op_ext32s_i64: - tcg_out_ext32s(s, new_args[0], new_args[1]); - break; - case INDEX_op_ext32u_i64: - tcg_out_ext32u(s, new_args[0], new_args[1]); - break; + case INDEX_op_addc1o: + tcg_out_set_carry(s); + /* fall through */ + case INDEX_op_add: + case INDEX_op_addcio: + case INDEX_op_addco: + case INDEX_op_and: + case INDEX_op_andc: + case INDEX_op_clz: + case INDEX_op_ctz: + case INDEX_op_divs: + case INDEX_op_divu: + case INDEX_op_eqv: + case INDEX_op_mul: + case INDEX_op_mulsh: + case INDEX_op_muluh: + case INDEX_op_nand: + case INDEX_op_nor: + case INDEX_op_or: + case INDEX_op_orc: + case INDEX_op_rems: + case INDEX_op_remu: + case INDEX_op_rotl: + case INDEX_op_rotr: + case INDEX_op_sar: + case INDEX_op_shl: + case INDEX_op_shr: + case INDEX_op_xor: { + const TCGOutOpBinary *out = container_of(all_outop[op->opc], TCGOutOpBinary, base); + + /* Constants should never appear in the first source operand. */ + tcg_debug_assert(!const_args[1]); + if (const_args[2]) { + out->out_rri(s, type, new_args[0], new_args[1], new_args[2]); + } else { + out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); + } + } break; + + case INDEX_op_sub: { + const TCGOutOpSubtract *out = &outop_sub; + + /* + * Constants should never appear in the second source operand. + * These are folded to add with negative constant. + */ + tcg_debug_assert(!const_args[2]); + if (const_args[1]) { + out->out_rir(s, type, new_args[0], new_args[1], new_args[2]); + } else { + out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); + } + } break; + + case INDEX_op_subb1o: + tcg_out_set_borrow(s); + /* fall through */ + case INDEX_op_addci: + case INDEX_op_subbi: + case INDEX_op_subbio: + case INDEX_op_subbo: { + const TCGOutOpAddSubCarry *out = container_of(all_outop[op->opc], TCGOutOpAddSubCarry, base); + + if (const_args[2]) { + if (const_args[1]) { + out->out_rii(s, type, new_args[0], new_args[1], new_args[2]); + } else { + out->out_rri(s, type, new_args[0], new_args[1], new_args[2]); + } + } else if (const_args[1]) { + out->out_rir(s, type, new_args[0], new_args[1], new_args[2]); + } else { + out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); + } + } break; + + case INDEX_op_bswap64: case INDEX_op_ext_i32_i64: - tcg_out_exts_i32_i64(s, new_args[0], new_args[1]); - break; case INDEX_op_extu_i32_i64: - tcg_out_extu_i32_i64(s, new_args[0], new_args[1]); - break; case INDEX_op_extrl_i64_i32: - tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]); - break; - default: - if (def->flags & TCG_OPF_VECTOR) { - tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op), new_args, const_args); + case INDEX_op_extrh_i64_i32: + assert(TCG_TARGET_REG_BITS == 64); + /* fall through */ + case INDEX_op_ctpop: + case INDEX_op_neg: + case INDEX_op_not: { + const TCGOutOpUnary *out = container_of(all_outop[op->opc], TCGOutOpUnary, base); + + /* Constants should have been folded. */ + tcg_debug_assert(!const_args[1]); + out->out_rr(s, type, new_args[0], new_args[1]); + } break; + + case INDEX_op_bswap16: + case INDEX_op_bswap32: { + const TCGOutOpBswap *out = container_of(all_outop[op->opc], TCGOutOpBswap, base); + + tcg_debug_assert(!const_args[1]); + out->out_rr(s, type, new_args[0], new_args[1], new_args[2]); + } break; + + case INDEX_op_deposit: { + const TCGOutOpDeposit *out = &outop_deposit; + + if (const_args[2]) { + tcg_debug_assert(!const_args[1]); + out->out_rri(s, type, new_args[0], new_args[1], new_args[2], new_args[3], new_args[4]); + } else if (const_args[1]) { + tcg_debug_assert(new_args[1] == 0); + tcg_debug_assert(!const_args[2]); + out->out_rzr(s, type, new_args[0], new_args[2], new_args[3], new_args[4]); + } else { + out->out_rrr(s, type, new_args[0], new_args[1], new_args[2], new_args[3], new_args[4]); + } + } break; + + case INDEX_op_divs2: + case INDEX_op_divu2: { + const TCGOutOpDivRem *out = container_of(all_outop[op->opc], TCGOutOpDivRem, base); + + /* Only used by x86 and s390x, which use matching constraints. */ + tcg_debug_assert(new_args[0] == new_args[2]); + tcg_debug_assert(new_args[1] == new_args[3]); + tcg_debug_assert(!const_args[4]); + out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]); + } break; + + case INDEX_op_extract: + case INDEX_op_sextract: { + const TCGOutOpExtract *out = container_of(all_outop[op->opc], TCGOutOpExtract, base); + + tcg_debug_assert(!const_args[1]); + out->out_rr(s, type, new_args[0], new_args[1], new_args[2], new_args[3]); + } break; + + case INDEX_op_extract2: { + const TCGOutOpExtract2 *out = &outop_extract2; + + tcg_debug_assert(!const_args[1]); + tcg_debug_assert(!const_args[2]); + out->out_rrr(s, type, new_args[0], new_args[1], new_args[2], new_args[3]); + } break; + + case INDEX_op_ld8u: + case INDEX_op_ld8s: + case INDEX_op_ld16u: + case INDEX_op_ld16s: + case INDEX_op_ld32u: + case INDEX_op_ld32s: + case INDEX_op_ld: { + const TCGOutOpLoad *out = container_of(all_outop[op->opc], TCGOutOpLoad, base); + + tcg_debug_assert(!const_args[1]); + out->out(s, type, new_args[0], new_args[1], new_args[2]); + } break; + + case INDEX_op_muls2: + case INDEX_op_mulu2: { + const TCGOutOpMul2 *out = container_of(all_outop[op->opc], TCGOutOpMul2, base); + + tcg_debug_assert(!const_args[2]); + tcg_debug_assert(!const_args[3]); + out->out_rrrr(s, type, new_args[0], new_args[1], new_args[2], new_args[3]); + } break; + + case INDEX_op_st32: + /* Use tcg_op_st w/ I32. */ + type = TCG_TYPE_I32; + /* fall through */ + case INDEX_op_st: + case INDEX_op_st8: + case INDEX_op_st16: { + const TCGOutOpStore *out = container_of(all_outop[op->opc], TCGOutOpStore, base); + + if (const_args[0]) { + out->out_i(s, type, new_args[0], new_args[1], new_args[2]); + } else { + out->out_r(s, type, new_args[0], new_args[1], new_args[2]); + } + } break; + + case INDEX_op_qemu_ld: + case INDEX_op_qemu_st: { + const TCGOutOpQemuLdSt *out = container_of(all_outop[op->opc], TCGOutOpQemuLdSt, base); + + out->out(s, type, new_args[0], new_args[1], new_args[2]); + } break; + + case INDEX_op_qemu_ld2: + case INDEX_op_qemu_st2: { + const TCGOutOpQemuLdSt2 *out = container_of(all_outop[op->opc], TCGOutOpQemuLdSt2, base); + + out->out(s, type, new_args[0], new_args[1], new_args[2], new_args[3]); + } break; + + case INDEX_op_brcond: { + const TCGOutOpBrcond *out = &outop_brcond; + TCGCond cond = new_args[2]; + TCGLabel *label = arg_label(new_args[3]); + + tcg_debug_assert(!const_args[0]); + if (const_args[1]) { + out->out_ri(s, type, cond, new_args[0], new_args[1], label); } else { - tcg_out_op(s, op->opc, new_args, const_args); + out->out_rr(s, type, cond, new_args[0], new_args[1], label); } + } break; + + case INDEX_op_movcond: { + const TCGOutOpMovcond *out = &outop_movcond; + TCGCond cond = new_args[5]; + + tcg_debug_assert(!const_args[1]); + out->out(s, type, cond, new_args[0], new_args[1], new_args[2], const_args[2], new_args[3], const_args[3], + new_args[4], const_args[4]); + } break; + + case INDEX_op_setcond: + case INDEX_op_negsetcond: { + const TCGOutOpSetcond *out = container_of(all_outop[op->opc], TCGOutOpSetcond, base); + TCGCond cond = new_args[3]; + + tcg_debug_assert(!const_args[1]); + if (const_args[2]) { + out->out_rri(s, type, cond, new_args[0], new_args[1], new_args[2]); + } else { + out->out_rrr(s, type, cond, new_args[0], new_args[1], new_args[2]); + } + } break; + +#if TCG_TARGET_REG_BITS == 32 + case INDEX_op_brcond2_i32: { + const TCGOutOpBrcond2 *out = &outop_brcond2; + TCGCond cond = new_args[4]; + TCGLabel *label = arg_label(new_args[5]); + + tcg_debug_assert(!const_args[0]); + tcg_debug_assert(!const_args[1]); + out->out(s, cond, new_args[0], new_args[1], new_args[2], const_args[2], new_args[3], const_args[3], label); + } break; + case INDEX_op_setcond2_i32: { + const TCGOutOpSetcond2 *out = &outop_setcond2; + TCGCond cond = new_args[5]; + + tcg_debug_assert(!const_args[1]); + tcg_debug_assert(!const_args[2]); + out->out(s, cond, new_args[0], new_args[1], new_args[2], new_args[3], const_args[3], new_args[4], + const_args[4]); + } break; +#else + case INDEX_op_brcond2_i32: + case INDEX_op_setcond2_i32: + g_assert_not_reached(); +#endif + + case INDEX_op_goto_ptr: + tcg_debug_assert(!const_args[0]); + tcg_out_goto_ptr(s, new_args[0]); + break; + + default: + tcg_debug_assert(def->flags & TCG_OPF_VECTOR); + tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64, TCGOP_VECE(op), new_args, const_args); break; } + if (def->flags & TCG_OPF_CARRY_IN) { + s->carry_live = false; + } + if (def->flags & TCG_OPF_CARRY_OUT) { + s->carry_live = true; + } + /* move the outputs in the correct register if needed */ for (i = 0; i < nb_oargs; i++) { ts = arg_temp(op->args[i]); @@ -4731,7 +5517,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) { static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) { const TCGLifeData arg_life = op->life; TCGTemp *ots, *itsl, *itsh; - TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64; + TCGType vtype = TCGOP_TYPE(op); /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ tcg_debug_assert(TCG_TARGET_REG_BITS == 32); @@ -4747,7 +5533,7 @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) { /* Allocate the output register now. */ if (ots->val_type != TEMP_VAL_REG) { TCGRegSet allocated_regs = s->reserved_regs; - TCGRegSet dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; + TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs; TCGReg oreg; /* Make sure to not spill the input registers. */ @@ -5011,18 +5797,11 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) { * and issue two loads or stores for subalignment. */ static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, MemOp host_atom, bool allow_two_ops) { - MemOp align = get_alignment_bits(opc); + MemOp align = memop_alignment_bits(opc); MemOp size = opc & MO_SIZE; MemOp half = size ? size - 1 : 0; + MemOp atom = opc & MO_ATOM_MASK; MemOp atmax; - MemOp atom; - - /* When serialized, no further atomicity required. */ - if (s->gen_tb->cflags & CF_PARALLEL) { - atom = opc & MO_ATOM_MASK; - } else { - atom = MO_ATOM_NONE; - } switch (atom) { case MO_ATOM_NONE: @@ -5339,13 +6118,13 @@ static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, * to 64-bits for the helper by storing the low part, then * load a zero for the high part. */ - tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, TCG_TYPE_I32, TCG_TYPE_I32, ldst->addrlo_reg, -1); + tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, TCG_TYPE_I32, TCG_TYPE_I32, ldst->addr_reg, -1); tcg_out_helper_load_slots(s, 1, mov, parm); tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot, TCG_TYPE_I32, 0, parm); next_arg += 2; } else { - nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, ldst->addrlo_reg, ldst->addrhi_reg); + nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, ldst->addr_reg, -1); tcg_out_helper_load_slots(s, nmov, mov, parm); next_arg += nmov; } @@ -5490,18 +6269,19 @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, /* Handle addr argument. */ loc = &info->in[next_arg]; - if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) { + tcg_debug_assert(s->addr_type <= TCG_TYPE_REG); + if (TCG_TARGET_REG_BITS == 32) { /* - * 32-bit host with 32-bit guest: zero-extend the guest address + * 32-bit host (and thus 32-bit guest): zero-extend the guest address * to 64-bits for the helper by storing the low part. Later, * after we have processed the register inputs, we will load a * zero for the high part. */ - tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, TCG_TYPE_I32, TCG_TYPE_I32, ldst->addrlo_reg, -1); + tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, TCG_TYPE_I32, TCG_TYPE_I32, ldst->addr_reg, -1); next_arg += 2; nmov += 1; } else { - n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, ldst->addrlo_reg, ldst->addrhi_reg); + n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, ldst->addr_reg, -1); next_arg += n; nmov += n; } @@ -5543,7 +6323,7 @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, g_assert_not_reached(); } - if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) { + if (TCG_TARGET_REG_BITS == 32) { /* Zero extend the address by loading a zero for the high part. */ loc = &info->in[1 + !HOST_BIG_ENDIAN]; tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm); @@ -5552,12 +6332,8 @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); } -void tcg_dump_op_count(GString *buf) { - g_string_append_printf(buf, "[TCG profiler not compiled]\n"); -} - int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) { - int i, start_words, num_insns; + int i, num_insns; TCGOp *op; if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) && qemu_log_in_addr_range(pc_start))) { @@ -5586,6 +6362,9 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) { } #endif + /* Do not reuse any EBB that may be allocated within the TB. */ + tcg_temp_ebb_reset_freed(s); + tcg_optimize(s); reachable_code_pass(s); @@ -5635,24 +6414,38 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) { */ s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); s->code_ptr = s->code_buf; + s->data_gen_ptr = NULL; -#ifdef TCG_TARGET_NEED_LDST_LABELS QSIMPLEQ_INIT(&s->ldst_labels); -#endif -#ifdef TCG_TARGET_NEED_POOL_LABELS s->pool_labels = NULL; -#endif - start_words = s->insn_start_words; - s->gen_insn_data = tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words); + s->gen_insn_data = tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * INSN_START_WORDS); + + tcg_out_tb_start(s); num_insns = -1; + s->carry_live = false; QTAILQ_FOREACH (op, &s->ops, link) { TCGOpcode opc = op->opc; switch (opc) { - case INDEX_op_mov_i32: - case INDEX_op_mov_i64: + case INDEX_op_extrl_i64_i32: + assert(TCG_TARGET_REG_BITS == 64); + /* + * If TCG_TYPE_I32 is represented in some canonical form, + * e.g. zero or sign-extended, then emit as a unary op. + * Otherwise we can treat this as a plain move. + * If the output dies, treat this as a plain move, because + * this will be implemented with a store. + */ + if (TCG_TARGET_HAS_extr_i64_i32) { + TCGLifeData arg_life = op->life; + if (!IS_DEAD_ARG(0)) { + goto do_default; + } + } + /* fall through */ + case INDEX_op_mov: case INDEX_op_mov_vec: tcg_reg_alloc_mov(s, op); break; @@ -5660,6 +6453,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) { tcg_reg_alloc_dup(s, op); break; case INDEX_op_insn_start: + assert_carry_dead(s); if (num_insns >= 0) { size_t off = tcg_current_code_size(s); s->gen_insn_end_off[num_insns] = off; @@ -5667,8 +6461,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) { assert(s->gen_insn_end_off[num_insns] == off); } num_insns++; - for (i = 0; i < start_words; ++i) { - s->gen_insn_data[num_insns * start_words + i] = tcg_get_insn_start_param(op, i); + for (i = 0; i < INSN_START_WORDS; ++i) { + s->gen_insn_data[num_insns * INSN_START_WORDS + i] = tcg_get_insn_start_param(op, i); } break; case INDEX_op_discard: @@ -5679,6 +6473,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) { tcg_out_label(s, arg_label(op->args[0])); break; case INDEX_op_call: + assert_carry_dead(s); tcg_reg_alloc_call(s, op); break; case INDEX_op_exit_tb: @@ -5687,14 +6482,21 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) { case INDEX_op_goto_tb: tcg_out_goto_tb(s, op->args[0]); break; + case INDEX_op_br: + tcg_out_br(s, arg_label(op->args[0])); + break; + case INDEX_op_mb: + tcg_out_mb(s, op->args[0]); + break; case INDEX_op_dup2_vec: if (tcg_reg_alloc_dup2(s, op)) { break; } /* fall through */ default: + do_default: /* Sanity check that we've not introduced any unhandled opcodes. */ - tcg_debug_assert(tcg_op_supported(opc)); + tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op), TCGOP_FLAGS(op))); /* Note: in order to speed up the code, it would be much faster to have specialized register allocator functions for some common argument patterns */ @@ -5713,22 +6515,20 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) { return -2; } } + assert_carry_dead(s); + tcg_debug_assert(num_insns + 1 == s->gen_tb->icount); s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); /* Generate TB finalization at the end of block */ -#ifdef TCG_TARGET_NEED_LDST_LABELS i = tcg_out_ldst_finalize(s); if (i < 0) { return i; } -#endif -#ifdef TCG_TARGET_NEED_POOL_LABELS i = tcg_out_pool_finalize(s); if (i < 0) { return i; } -#endif if (!tcg_resolve_relocs(s)) { return -2; } @@ -5742,10 +6542,6 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) { return tcg_current_code_size(s); } -void tcg_dump_info(GString *buf) { - g_string_append_printf(buf, "[TCG profiler not compiled]\n"); -} - #ifdef ELF_HOST_MACHINE /* In order to use this feature, the backend needs to do three things: diff --git a/libtcg/src/utils/interval-tree.c b/libtcg/src/utils/interval-tree.c new file mode 100644 index 000000000..b76c288ef --- /dev/null +++ b/libtcg/src/utils/interval-tree.c @@ -0,0 +1,853 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "tcg/utils/interval-tree.h" +#include "tcg/utils/atomic.h" +#include "tcg/utils/osdep.h" + +/* + * Red Black Trees. + * + * For now, don't expose Linux Red-Black Trees separately, but retain the + * separate type definitions to keep the implementation sane, and allow + * the possibility of separating them later. + * + * Derived from include/linux/rbtree_augmented.h and its dependencies. + */ + +/* + * red-black trees properties: https://en.wikipedia.org/wiki/Rbtree + * + * 1) A node is either red or black + * 2) The root is black + * 3) All leaves (NULL) are black + * 4) Both children of every red node are black + * 5) Every simple path from root to leaves contains the same number + * of black nodes. + * + * 4 and 5 give the O(log n) guarantee, since 4 implies you cannot have two + * consecutive red nodes in a path and every red node is therefore followed by + * a black. So if B is the number of black nodes on every simple path (as per + * 5), then the longest possible path due to 4 is 2B. + * + * We shall indicate color with case, where black nodes are uppercase and red + * nodes will be lowercase. Unknown color nodes shall be drawn as red within + * parentheses and have some accompanying text comment. + * + * Notes on lockless lookups: + * + * All stores to the tree structure (rb_left and rb_right) must be done using + * WRITE_ONCE [qatomic_set for QEMU]. And we must not inadvertently cause + * (temporary) loops in the tree structure as seen in program order. + * + * These two requirements will allow lockless iteration of the tree -- not + * correct iteration mind you, tree rotations are not atomic so a lookup might + * miss entire subtrees. + * + * But they do guarantee that any such traversal will only see valid elements + * and that it will indeed complete -- does not get stuck in a loop. + * + * It also guarantees that if the lookup returns an element it is the 'correct' + * one. But not returning an element does _NOT_ mean it's not present. + */ + +typedef enum RBColor { + RB_RED, + RB_BLACK, +} RBColor; + +typedef struct RBAugmentCallbacks { + void (*propagate)(RBNode *node, RBNode *stop); + void (*copy)(RBNode *old, RBNode *new); + void (*rotate)(RBNode *old, RBNode *new); +} RBAugmentCallbacks; + +static inline uintptr_t rb_pc(const RBNode *n) { + return qatomic_read(&n->rb_parent_color); +} + +static inline void rb_set_pc(RBNode *n, uintptr_t pc) { + qatomic_set(&n->rb_parent_color, pc); +} + +static inline RBNode *pc_parent(uintptr_t pc) { + return (RBNode *) (pc & ~1); +} + +static inline RBNode *rb_parent(const RBNode *n) { + return pc_parent(rb_pc(n)); +} + +static inline RBNode *rb_red_parent(const RBNode *n) { + return (RBNode *) rb_pc(n); +} + +static inline RBColor pc_color(uintptr_t pc) { + return (RBColor) (pc & 1); +} + +static inline bool pc_is_red(uintptr_t pc) { + return pc_color(pc) == RB_RED; +} + +static inline bool pc_is_black(uintptr_t pc) { + return !pc_is_red(pc); +} + +static inline RBColor rb_color(const RBNode *n) { + return pc_color(rb_pc(n)); +} + +static inline bool rb_is_red(const RBNode *n) { + return pc_is_red(rb_pc(n)); +} + +static inline bool rb_is_black(const RBNode *n) { + return pc_is_black(rb_pc(n)); +} + +static inline void rb_set_black(RBNode *n) { + rb_set_pc(n, rb_pc(n) | RB_BLACK); +} + +static inline void rb_set_parent_color(RBNode *n, RBNode *p, RBColor color) { + rb_set_pc(n, (uintptr_t) p | color); +} + +static inline void rb_set_parent(RBNode *n, RBNode *p) { + rb_set_parent_color(n, p, rb_color(n)); +} + +static inline void rb_link_node(RBNode *node, RBNode *parent, RBNode **rb_link) { + node->rb_parent_color = (uintptr_t) parent; + node->rb_left = node->rb_right = NULL; + + /* + * Ensure that node is initialized before insertion, + * as viewed by a concurrent search. + */ + qatomic_set_mb(rb_link, node); +} + +static RBNode *rb_next(RBNode *node) { + RBNode *parent; + + /* OMIT: if empty node, return null. */ + + /* + * If we have a right-hand child, go down and then left as far as we can. + */ + if (node->rb_right) { + node = node->rb_right; + while (node->rb_left) { + node = node->rb_left; + } + return node; + } + + /* + * No right-hand children. Everything down and left is smaller than us, + * so any 'next' node must be in the general direction of our parent. + * Go up the tree; any time the ancestor is a right-hand child of its + * parent, keep going up. First time it's a left-hand child of its + * parent, said parent is our 'next' node. + */ + while ((parent = rb_parent(node)) && node == parent->rb_right) { + node = parent; + } + + return parent; +} + +static inline void rb_change_child(RBNode *old, RBNode *new, RBNode *parent, RBRoot *root) { + if (!parent) { + qatomic_set(&root->rb_node, new); + } else if (parent->rb_left == old) { + qatomic_set(&parent->rb_left, new); + } else { + qatomic_set(&parent->rb_right, new); + } +} + +static inline void rb_rotate_set_parents(RBNode *old, RBNode *new, RBRoot *root, RBColor color) { + uintptr_t pc = rb_pc(old); + RBNode *parent = pc_parent(pc); + + rb_set_pc(new, pc); + rb_set_parent_color(old, new, color); + rb_change_child(old, new, parent, root); +} + +static void rb_insert_augmented(RBNode *node, RBRoot *root, const RBAugmentCallbacks *augment) { + RBNode *parent = rb_red_parent(node), *gparent, *tmp; + + while (true) { + /* + * Loop invariant: node is red. + */ + if (unlikely(!parent)) { + /* + * The inserted node is root. Either this is the first node, or + * we recursed at Case 1 below and are no longer violating 4). + */ + rb_set_parent_color(node, NULL, RB_BLACK); + break; + } + + /* + * If there is a black parent, we are done. Otherwise, take some + * corrective action as, per 4), we don't want a red root or two + * consecutive red nodes. + */ + if (rb_is_black(parent)) { + break; + } + + gparent = rb_red_parent(parent); + + tmp = gparent->rb_right; + if (parent != tmp) { /* parent == gparent->rb_left */ + if (tmp && rb_is_red(tmp)) { + /* + * Case 1 - node's uncle is red (color flips). + * + * G g + * / \ / \ + * p u --> P U + * / / + * n n + * + * However, since g's parent might be red, and 4) does not + * allow this, we need to recurse at g. + */ + rb_set_parent_color(tmp, gparent, RB_BLACK); + rb_set_parent_color(parent, gparent, RB_BLACK); + node = gparent; + parent = rb_parent(node); + rb_set_parent_color(node, parent, RB_RED); + continue; + } + + tmp = parent->rb_right; + if (node == tmp) { + /* + * Case 2 - node's uncle is black and node is + * the parent's right child (left rotate at parent). + * + * G G + * / \ / \ + * p U --> n U + * \ / + * n p + * + * This still leaves us in violation of 4), the + * continuation into Case 3 will fix that. + */ + tmp = node->rb_left; + qatomic_set(&parent->rb_right, tmp); + qatomic_set(&node->rb_left, parent); + if (tmp) { + rb_set_parent_color(tmp, parent, RB_BLACK); + } + rb_set_parent_color(parent, node, RB_RED); + augment->rotate(parent, node); + parent = node; + tmp = node->rb_right; + } + + /* + * Case 3 - node's uncle is black and node is + * the parent's left child (right rotate at gparent). + * + * G P + * / \ / \ + * p U --> n g + * / \ + * n U + */ + qatomic_set(&gparent->rb_left, tmp); /* == parent->rb_right */ + qatomic_set(&parent->rb_right, gparent); + if (tmp) { + rb_set_parent_color(tmp, gparent, RB_BLACK); + } + rb_rotate_set_parents(gparent, parent, root, RB_RED); + augment->rotate(gparent, parent); + break; + } else { + tmp = gparent->rb_left; + if (tmp && rb_is_red(tmp)) { + /* Case 1 - color flips */ + rb_set_parent_color(tmp, gparent, RB_BLACK); + rb_set_parent_color(parent, gparent, RB_BLACK); + node = gparent; + parent = rb_parent(node); + rb_set_parent_color(node, parent, RB_RED); + continue; + } + + tmp = parent->rb_left; + if (node == tmp) { + /* Case 2 - right rotate at parent */ + tmp = node->rb_right; + qatomic_set(&parent->rb_left, tmp); + qatomic_set(&node->rb_right, parent); + if (tmp) { + rb_set_parent_color(tmp, parent, RB_BLACK); + } + rb_set_parent_color(parent, node, RB_RED); + augment->rotate(parent, node); + parent = node; + tmp = node->rb_left; + } + + /* Case 3 - left rotate at gparent */ + qatomic_set(&gparent->rb_right, tmp); /* == parent->rb_left */ + qatomic_set(&parent->rb_left, gparent); + if (tmp) { + rb_set_parent_color(tmp, gparent, RB_BLACK); + } + rb_rotate_set_parents(gparent, parent, root, RB_RED); + augment->rotate(gparent, parent); + break; + } + } +} + +static void rb_insert_augmented_cached(RBNode *node, RBRootLeftCached *root, bool newleft, + const RBAugmentCallbacks *augment) { + if (newleft) { + root->rb_leftmost = node; + } + rb_insert_augmented(node, &root->rb_root, augment); +} + +static void rb_erase_color(RBNode *parent, RBRoot *root, const RBAugmentCallbacks *augment) { + RBNode *node = NULL, *sibling, *tmp1, *tmp2; + + while (true) { + /* + * Loop invariants: + * - node is black (or NULL on first iteration) + * - node is not the root (parent is not NULL) + * - All leaf paths going through parent and node have a + * black node count that is 1 lower than other leaf paths. + */ + sibling = parent->rb_right; + if (node != sibling) { /* node == parent->rb_left */ + if (rb_is_red(sibling)) { + /* + * Case 1 - left rotate at parent + * + * P S + * / \ / \ + * N s --> p Sr + * / \ / \ + * Sl Sr N Sl + */ + tmp1 = sibling->rb_left; + qatomic_set(&parent->rb_right, tmp1); + qatomic_set(&sibling->rb_left, parent); + rb_set_parent_color(tmp1, parent, RB_BLACK); + rb_rotate_set_parents(parent, sibling, root, RB_RED); + augment->rotate(parent, sibling); + sibling = tmp1; + } + tmp1 = sibling->rb_right; + if (!tmp1 || rb_is_black(tmp1)) { + tmp2 = sibling->rb_left; + if (!tmp2 || rb_is_black(tmp2)) { + /* + * Case 2 - sibling color flip + * (p could be either color here) + * + * (p) (p) + * / \ / \ + * N S --> N s + * / \ / \ + * Sl Sr Sl Sr + * + * This leaves us violating 5) which + * can be fixed by flipping p to black + * if it was red, or by recursing at p. + * p is red when coming from Case 1. + */ + rb_set_parent_color(sibling, parent, RB_RED); + if (rb_is_red(parent)) { + rb_set_black(parent); + } else { + node = parent; + parent = rb_parent(node); + if (parent) { + continue; + } + } + break; + } + /* + * Case 3 - right rotate at sibling + * (p could be either color here) + * + * (p) (p) + * / \ / \ + * N S --> N sl + * / \ \ + * sl Sr S + * \ + * Sr + * + * Note: p might be red, and then bot + * p and sl are red after rotation (which + * breaks property 4). This is fixed in + * Case 4 (in rb_rotate_set_parents() + * which set sl the color of p + * and set p RB_BLACK) + * + * (p) (sl) + * / \ / \ + * N sl --> P S + * \ / \ + * S N Sr + * \ + * Sr + */ + tmp1 = tmp2->rb_right; + qatomic_set(&sibling->rb_left, tmp1); + qatomic_set(&tmp2->rb_right, sibling); + qatomic_set(&parent->rb_right, tmp2); + if (tmp1) { + rb_set_parent_color(tmp1, sibling, RB_BLACK); + } + augment->rotate(sibling, tmp2); + tmp1 = sibling; + sibling = tmp2; + } + /* + * Case 4 - left rotate at parent + color flips + * (p and sl could be either color here. + * After rotation, p becomes black, s acquires + * p's color, and sl keeps its color) + * + * (p) (s) + * / \ / \ + * N S --> P Sr + * / \ / \ + * (sl) sr N (sl) + */ + tmp2 = sibling->rb_left; + qatomic_set(&parent->rb_right, tmp2); + qatomic_set(&sibling->rb_left, parent); + rb_set_parent_color(tmp1, sibling, RB_BLACK); + if (tmp2) { + rb_set_parent(tmp2, parent); + } + rb_rotate_set_parents(parent, sibling, root, RB_BLACK); + augment->rotate(parent, sibling); + break; + } else { + sibling = parent->rb_left; + if (rb_is_red(sibling)) { + /* Case 1 - right rotate at parent */ + tmp1 = sibling->rb_right; + qatomic_set(&parent->rb_left, tmp1); + qatomic_set(&sibling->rb_right, parent); + rb_set_parent_color(tmp1, parent, RB_BLACK); + rb_rotate_set_parents(parent, sibling, root, RB_RED); + augment->rotate(parent, sibling); + sibling = tmp1; + } + tmp1 = sibling->rb_left; + if (!tmp1 || rb_is_black(tmp1)) { + tmp2 = sibling->rb_right; + if (!tmp2 || rb_is_black(tmp2)) { + /* Case 2 - sibling color flip */ + rb_set_parent_color(sibling, parent, RB_RED); + if (rb_is_red(parent)) { + rb_set_black(parent); + } else { + node = parent; + parent = rb_parent(node); + if (parent) { + continue; + } + } + break; + } + /* Case 3 - left rotate at sibling */ + tmp1 = tmp2->rb_left; + qatomic_set(&sibling->rb_right, tmp1); + qatomic_set(&tmp2->rb_left, sibling); + qatomic_set(&parent->rb_left, tmp2); + if (tmp1) { + rb_set_parent_color(tmp1, sibling, RB_BLACK); + } + augment->rotate(sibling, tmp2); + tmp1 = sibling; + sibling = tmp2; + } + /* Case 4 - right rotate at parent + color flips */ + tmp2 = sibling->rb_right; + qatomic_set(&parent->rb_left, tmp2); + qatomic_set(&sibling->rb_right, parent); + rb_set_parent_color(tmp1, sibling, RB_BLACK); + if (tmp2) { + rb_set_parent(tmp2, parent); + } + rb_rotate_set_parents(parent, sibling, root, RB_BLACK); + augment->rotate(parent, sibling); + break; + } + } +} + +static void rb_erase_augmented(RBNode *node, RBRoot *root, const RBAugmentCallbacks *augment) { + RBNode *child = node->rb_right; + RBNode *tmp = node->rb_left; + RBNode *parent, *rebalance; + uintptr_t pc; + + if (!tmp) { + /* + * Case 1: node to erase has no more than 1 child (easy!) + * + * Note that if there is one child it must be red due to 5) + * and node must be black due to 4). We adjust colors locally + * so as to bypass rb_erase_color() later on. + */ + pc = rb_pc(node); + parent = pc_parent(pc); + rb_change_child(node, child, parent, root); + if (child) { + rb_set_pc(child, pc); + rebalance = NULL; + } else { + rebalance = pc_is_black(pc) ? parent : NULL; + } + tmp = parent; + } else if (!child) { + /* Still case 1, but this time the child is node->rb_left */ + pc = rb_pc(node); + parent = pc_parent(pc); + rb_set_pc(tmp, pc); + rb_change_child(node, tmp, parent, root); + rebalance = NULL; + tmp = parent; + } else { + RBNode *successor = child, *child2; + tmp = child->rb_left; + if (!tmp) { + /* + * Case 2: node's successor is its right child + * + * (n) (s) + * / \ / \ + * (x) (s) -> (x) (c) + * \ + * (c) + */ + parent = successor; + child2 = successor->rb_right; + + augment->copy(node, successor); + } else { + /* + * Case 3: node's successor is leftmost under + * node's right child subtree + * + * (n) (s) + * / \ / \ + * (x) (y) -> (x) (y) + * / / + * (p) (p) + * / / + * (s) (c) + * \ + * (c) + */ + do { + parent = successor; + successor = tmp; + tmp = tmp->rb_left; + } while (tmp); + child2 = successor->rb_right; + qatomic_set(&parent->rb_left, child2); + qatomic_set(&successor->rb_right, child); + rb_set_parent(child, successor); + + augment->copy(node, successor); + augment->propagate(parent, successor); + } + + tmp = node->rb_left; + qatomic_set(&successor->rb_left, tmp); + rb_set_parent(tmp, successor); + + pc = rb_pc(node); + tmp = pc_parent(pc); + rb_change_child(node, successor, tmp, root); + + if (child2) { + rb_set_parent_color(child2, parent, RB_BLACK); + rebalance = NULL; + } else { + rebalance = rb_is_black(successor) ? parent : NULL; + } + rb_set_pc(successor, pc); + tmp = successor; + } + + augment->propagate(tmp, NULL); + + if (rebalance) { + rb_erase_color(rebalance, root, augment); + } +} + +static void rb_erase_augmented_cached(RBNode *node, RBRootLeftCached *root, const RBAugmentCallbacks *augment) { + if (root->rb_leftmost == node) { + root->rb_leftmost = rb_next(node); + } + rb_erase_augmented(node, &root->rb_root, augment); +} + +/* + * Interval trees. + * + * Derived from lib/interval_tree.c and its dependencies, + * especially include/linux/interval_tree_generic.h. + */ + +#define rb_to_itree(N) container_of(N, IntervalTreeNode, rb) + +static bool interval_tree_compute_max(IntervalTreeNode *node, bool exit) { + IntervalTreeNode *child; + uint64_t max = node->last; + + if (node->rb.rb_left) { + child = rb_to_itree(node->rb.rb_left); + if (child->subtree_last > max) { + max = child->subtree_last; + } + } + if (node->rb.rb_right) { + child = rb_to_itree(node->rb.rb_right); + if (child->subtree_last > max) { + max = child->subtree_last; + } + } + if (exit && node->subtree_last == max) { + return true; + } + node->subtree_last = max; + return false; +} + +static void interval_tree_propagate(RBNode *rb, RBNode *stop) { + while (rb != stop) { + IntervalTreeNode *node = rb_to_itree(rb); + if (interval_tree_compute_max(node, true)) { + break; + } + rb = rb_parent(&node->rb); + } +} + +static void interval_tree_copy(RBNode *rb_old, RBNode *rb_new) { + IntervalTreeNode *old = rb_to_itree(rb_old); + IntervalTreeNode *new = rb_to_itree(rb_new); + + new->subtree_last = old->subtree_last; +} + +static void interval_tree_rotate(RBNode *rb_old, RBNode *rb_new) { + IntervalTreeNode *old = rb_to_itree(rb_old); + IntervalTreeNode *new = rb_to_itree(rb_new); + + new->subtree_last = old->subtree_last; + interval_tree_compute_max(old, false); +} + +static const RBAugmentCallbacks interval_tree_augment = { + .propagate = interval_tree_propagate, + .copy = interval_tree_copy, + .rotate = interval_tree_rotate, +}; + +/* Insert / remove interval nodes from the tree */ +void interval_tree_insert(IntervalTreeNode *node, IntervalTreeRoot *root) { + RBNode **link = &root->rb_root.rb_node, *rb_parent = NULL; + uint64_t start = node->start, last = node->last; + IntervalTreeNode *parent; + bool leftmost = true; + + while (*link) { + rb_parent = *link; + parent = rb_to_itree(rb_parent); + + if (parent->subtree_last < last) { + parent->subtree_last = last; + } + if (start < parent->start) { + link = &parent->rb.rb_left; + } else { + link = &parent->rb.rb_right; + leftmost = false; + } + } + + node->subtree_last = last; + rb_link_node(&node->rb, rb_parent, link); + rb_insert_augmented_cached(&node->rb, root, leftmost, &interval_tree_augment); +} + +void interval_tree_remove(IntervalTreeNode *node, IntervalTreeRoot *root) { + rb_erase_augmented_cached(&node->rb, root, &interval_tree_augment); +} + +/* + * Iterate over intervals intersecting [start;last] + * + * Note that a node's interval intersects [start;last] iff: + * Cond1: node->start <= last + * and + * Cond2: start <= node->last + */ + +static IntervalTreeNode *interval_tree_subtree_search(IntervalTreeNode *node, uint64_t start, uint64_t last) { + while (true) { + /* + * Loop invariant: start <= node->subtree_last + * (Cond2 is satisfied by one of the subtree nodes) + */ + RBNode *tmp = qatomic_read(&node->rb.rb_left); + if (tmp) { + IntervalTreeNode *left = rb_to_itree(tmp); + + if (start <= left->subtree_last) { + /* + * Some nodes in left subtree satisfy Cond2. + * Iterate to find the leftmost such node N. + * If it also satisfies Cond1, that's the + * match we are looking for. Otherwise, there + * is no matching interval as nodes to the + * right of N can't satisfy Cond1 either. + */ + node = left; + continue; + } + } + if (node->start <= last) { /* Cond1 */ + if (start <= node->last) { /* Cond2 */ + return node; /* node is leftmost match */ + } + tmp = qatomic_read(&node->rb.rb_right); + if (tmp) { + node = rb_to_itree(tmp); + if (start <= node->subtree_last) { + continue; + } + } + } + return NULL; /* no match */ + } +} + +IntervalTreeNode *interval_tree_iter_first(IntervalTreeRoot *root, uint64_t start, uint64_t last) { + IntervalTreeNode *node, *leftmost; + + if (!root || !root->rb_root.rb_node) { + return NULL; + } + + /* + * Fastpath range intersection/overlap between A: [a0, a1] and + * B: [b0, b1] is given by: + * + * a0 <= b1 && b0 <= a1 + * + * ... where A holds the lock range and B holds the smallest + * 'start' and largest 'last' in the tree. For the later, we + * rely on the root node, which by augmented interval tree + * property, holds the largest value in its last-in-subtree. + * This allows mitigating some of the tree walk overhead for + * for non-intersecting ranges, maintained and consulted in O(1). + */ + node = rb_to_itree(root->rb_root.rb_node); + if (node->subtree_last < start) { + return NULL; + } + + leftmost = rb_to_itree(root->rb_leftmost); + if (leftmost->start > last) { + return NULL; + } + + return interval_tree_subtree_search(node, start, last); +} + +IntervalTreeNode *interval_tree_iter_next(IntervalTreeNode *node, uint64_t start, uint64_t last) { + RBNode *rb, *prev; + + rb = qatomic_read(&node->rb.rb_right); + while (true) { + /* + * Loop invariants: + * Cond1: node->start <= last + * rb == node->rb.rb_right + * + * First, search right subtree if suitable + */ + if (rb) { + IntervalTreeNode *right = rb_to_itree(rb); + + if (start <= right->subtree_last) { + return interval_tree_subtree_search(right, start, last); + } + } + + /* Move up the tree until we come from a node's left child */ + do { + rb = rb_parent(&node->rb); + if (!rb) { + return NULL; + } + prev = &node->rb; + node = rb_to_itree(rb); + rb = qatomic_read(&node->rb.rb_right); + } while (prev == rb); + + /* Check if the node intersects [start;last] */ + if (last < node->start) { /* !Cond1 */ + return NULL; + } + if (start <= node->last) { /* Cond2 */ + return node; + } + } +} + +/* Occasionally useful for calling from within the debugger. */ +#if 0 +static void debug_interval_tree_int(IntervalTreeNode *node, + const char *dir, int level) +{ + printf("%4d %*s %s [%" PRIu64 ",%" PRIu64 "] subtree_last:%" PRIu64 "\n", + level, level + 1, dir, rb_is_red(&node->rb) ? "r" : "b", + node->start, node->last, node->subtree_last); + + if (node->rb.rb_left) { + debug_interval_tree_int(rb_to_itree(node->rb.rb_left), "<", level + 1); + } + if (node->rb.rb_right) { + debug_interval_tree_int(rb_to_itree(node->rb.rb_right), ">", level + 1); + } +} + +void debug_interval_tree(IntervalTreeNode *node); +void debug_interval_tree(IntervalTreeNode *node) +{ + if (node) { + debug_interval_tree_int(node, "*", 0); + } else { + printf("null\n"); + } +} +#endif diff --git a/testsuite/common-run.sh.tpl b/testsuite/common-run.sh.tpl index 9339c8b65..a583e76db 100644 --- a/testsuite/common-run.sh.tpl +++ b/testsuite/common-run.sh.tpl @@ -60,4 +60,17 @@ check_coverage() { fi } +wait_s2e() { + local TIMEOUT=30 + local ELAPSED=0 + while [ ! -d "$S2E_LAST" ]; do + if [ $ELAPSED -ge $TIMEOUT ]; then + echo "Timed out waiting for $S2E_LAST to appear after ${TIMEOUT}s" + exit 1 + fi + sleep 1 + ELAPSED=$((ELAPSED + 1)) + done +} + {% include 'helpers.sh' %} diff --git a/testsuite/pov-demo0/run-tests.tpl b/testsuite/pov-demo0/run-tests.tpl index 6641dbeaa..e9db5da1a 100755 --- a/testsuite/pov-demo0/run-tests.tpl +++ b/testsuite/pov-demo0/run-tests.tpl @@ -2,7 +2,11 @@ {% include 'common-run.sh.tpl' %} -s2e run -n {{ project_name }} +s2e run -n {{ project_name }} & +S2E_PID=$! +trap "rc=\$?; kill $S2E_PID 2>/dev/null || true; wait $S2E_PID 2>/dev/null || true; (exit \$rc); on_exit" EXIT + +wait_s2e TARGET="$(jq -r '.target.files[0]' $DIR_NAME/project.json)" @@ -23,7 +27,7 @@ if [ "$BITS" = "32" ]; then elif [ "$PLATFORM" = "windows" ]; then EXPECTED_GENERIC_REGS="ebp" else - echo Invaliid platform + echo Invalid platform exit 1 fi else @@ -99,21 +103,40 @@ check_expected_povs() { for REG in $EXPECTED_SHELLCODE_REGS; do if ! echo $RECIPES | grep -q "generic_shellcode_$REG"; then echo Did not find recipe "generic_shellcode_$REG" - exit 1 + return 1 fi done for REG in $EXPECTED_GENERIC_REGS; do if ! echo $RECIPES | grep -q "generic_reg_$REG"; then echo Did not find recipe "generic_reg_$REG" - exit 1 + return 1 fi done + + return 0 } +# Periodically check for expected POVs while S2E is running. +# If all expected POVs are found, stop S2E early. +while kill -0 $S2E_PID 2>/dev/null; do + RECIPES=$S2E_LAST/*input_0-recipe-type1_* + if check_expected_povs "$RECIPES" >/dev/null 2>&1; then + echo "All expected POVs found, stopping S2E" + kill $S2E_PID 2>/dev/null + wait $S2E_PID 2>/dev/null || true + break + fi + + sleep 2 +done + +# Wait for S2E to finish if it exited on its own +wait $S2E_PID 2>/dev/null || true + RECIPES=$S2E_LAST/*input_0-recipe-type1_* -check_expected_povs "$RECIPES" +check_expected_povs "$RECIPES" || exit 1 for RECIPE in $RECIPES; do echo Checking $RECIPE