diff options
-rw-r--r-- | include/exec/def-helper.h | 2 | ||||
-rw-r--r-- | tcg/README | 5 | ||||
-rw-r--r-- | tcg/aarch64/tcg-target.h | 1 | ||||
-rw-r--r-- | tcg/i386/tcg-target.h | 1 | ||||
-rw-r--r-- | tcg/ia64/tcg-target.h | 1 | ||||
-rw-r--r-- | tcg/optimize.c | 16 | ||||
-rw-r--r-- | tcg/ppc64/tcg-target.h | 1 | ||||
-rw-r--r-- | tcg/s390/tcg-target.h | 1 | ||||
-rw-r--r-- | tcg/sparc/tcg-target.c | 842 | ||||
-rw-r--r-- | tcg/sparc/tcg-target.h | 17 | ||||
-rw-r--r-- | tcg/tcg-op.h | 54 | ||||
-rw-r--r-- | tcg/tcg-opc.h | 4 | ||||
-rw-r--r-- | tcg/tcg.c | 80 | ||||
-rw-r--r-- | tcg/tcg.h | 1 | ||||
-rw-r--r-- | tcg/tci/tcg-target.h | 1 |
15 files changed, 498 insertions, 529 deletions
diff --git a/include/exec/def-helper.h b/include/exec/def-helper.h index 73d51f9cf5..255b58bb03 100644 --- a/include/exec/def-helper.h +++ b/include/exec/def-helper.h @@ -84,7 +84,7 @@ #define dh_is_64bit_noreturn 0 #define dh_is_64bit_i32 0 #define dh_is_64bit_i64 1 -#define dh_is_64bit_ptr (TCG_TARGET_REG_BITS == 64) +#define dh_is_64bit_ptr (sizeof(void *) == 8) #define dh_is_64bit(t) glue(dh_is_64bit_, dh_alias(t)) #define dh_is_signed_void 0 diff --git a/tcg/README b/tcg/README index 776e9259e3..a550ff176d 100644 --- a/tcg/README +++ b/tcg/README @@ -314,6 +314,11 @@ This operation would be equivalent to dest = (t1 & ~0x0f00) | ((t2 << 8) & 0x0f00) +* trunc_shr_i32 t0, t1, pos + +For 64-bit hosts only, right shift the 64-bit input T1 by POS and +truncate to 32-bit output T0. Depending on the host, this may be +a simple mov/shift, or may require additional canonicalization. ********* Conditional moves diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index eff1d68fc6..a1d4322165 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -68,6 +68,7 @@ typedef enum { #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 +#define TCG_TARGET_HAS_trunc_shr_i32 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index ababca0569..dbeb16d3ac 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -99,6 +99,7 @@ extern bool have_bmi1; #define TCG_TARGET_HAS_mulsh_i32 0 #if TCG_TARGET_REG_BITS == 64 +#define TCG_TARGET_HAS_trunc_shr_i32 0 #define TCG_TARGET_HAS_div2_i64 1 #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_ext8s_i64 1 diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h index 09c3ba8fe3..d834beb323 100644 --- a/tcg/ia64/tcg-target.h +++ b/tcg/ia64/tcg-target.h @@ -152,6 +152,7 @@ typedef enum { #define TCG_TARGET_HAS_muluh_i64 0 #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_mulsh_i64 0 +#define TCG_TARGET_HAS_trunc_shr_i32 0 #define TCG_TARGET_HAS_new_ldst 1 diff --git a/tcg/optimize.c b/tcg/optimize.c index c447062ab1..0302f4f99a 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -228,6 +228,7 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) case INDEX_op_shr_i32: return (uint32_t)x >> (y & 31); + case INDEX_op_trunc_shr_i32: case INDEX_op_shr_i64: return (uint64_t)x >> (y & 63); @@ -830,6 +831,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, } break; + case INDEX_op_trunc_shr_i32: + mask = (uint64_t)temps[args[1]].mask >> args[2]; + break; + CASE_OP_32_64(shl): if (temps[args[2]].state == TCG_TEMP_CONST) { tmp = temps[args[2]].val & (TCG_TARGET_REG_BITS - 1); @@ -1021,6 +1026,17 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, } goto do_default; + case INDEX_op_trunc_shr_i32: + if (temps[args[1]].state == TCG_TEMP_CONST) { + s->gen_opc_buf[op_index] = op_to_movi(op); + tmp = do_constant_folding(op, temps[args[1]].val, args[2]); + tcg_opt_gen_movi(gen_args, args[0], tmp); + gen_args += 2; + args += 3; + break; + } + goto do_default; + CASE_OP_32_64(add): CASE_OP_32_64(sub): CASE_OP_32_64(mul): diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h index 78bbf7a34a..3815b840a6 100644 --- a/tcg/ppc64/tcg-target.h +++ b/tcg/ppc64/tcg-target.h @@ -96,6 +96,7 @@ typedef enum { #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 +#define TCG_TARGET_HAS_trunc_shr_i32 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 0 diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index b3bfdcc22c..755c00237e 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -69,6 +69,7 @@ typedef enum TCGReg { #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 +#define TCG_TARGET_HAS_trunc_shr_i32 0 #define TCG_TARGET_HAS_div2_i64 1 #define TCG_TARGET_HAS_rot_i64 1 diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c index 35089b82c9..5468ff5c16 100644 --- a/tcg/sparc/tcg-target.c +++ b/tcg/sparc/tcg-target.c @@ -61,6 +61,24 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { }; #endif +#ifdef __arch64__ +# define SPARC64 1 +#else +# define SPARC64 0 +#endif + +/* Note that sparcv8plus can only hold 64 bit quantities in %g and %o + registers. These are saved manually by the kernel in full 64-bit + slots. The %i and %l registers are saved by the register window + mechanism, which only allocates space for 32 bits. Given that this + window spill/fill can happen on any signal, we must consider the + high bits of the %i and %l registers garbage at all times. */ +#if SPARC64 +# define ALL_64 0xffffffffu +#else +# define ALL_64 0xffffu +#endif + /* Define some temporary registers. T2 is used for constant generation. */ #define TCG_REG_T1 TCG_REG_G1 #define TCG_REG_T2 TCG_REG_O7 @@ -182,6 +200,7 @@ static const int tcg_target_call_oarg_regs[] = { #define ARITH_ADDX (INSN_OP(2) | INSN_OP3(0x08)) #define ARITH_SUBX (INSN_OP(2) | INSN_OP3(0x0c)) #define ARITH_UMUL (INSN_OP(2) | INSN_OP3(0x0a)) +#define ARITH_SMUL (INSN_OP(2) | INSN_OP3(0x0b)) #define ARITH_UDIV (INSN_OP(2) | INSN_OP3(0x0e)) #define ARITH_SDIV (INSN_OP(2) | INSN_OP3(0x0f)) #define ARITH_MULX (INSN_OP(2) | INSN_OP3(0x09)) @@ -201,6 +220,7 @@ static const int tcg_target_call_oarg_regs[] = { #define RDY (INSN_OP(2) | INSN_OP3(0x28) | INSN_RS1(0)) #define WRY (INSN_OP(2) | INSN_OP3(0x30) | INSN_RD(0)) #define JMPL (INSN_OP(2) | INSN_OP3(0x38)) +#define RETURN (INSN_OP(2) | INSN_OP3(0x39)) #define SAVE (INSN_OP(2) | INSN_OP3(0x3c)) #define RESTORE (INSN_OP(2) | INSN_OP3(0x3d)) #define SETHI (INSN_OP(0) | INSN_OP2(0x4)) @@ -242,17 +262,23 @@ static const int tcg_target_call_oarg_regs[] = { #define STW_LE (STWA | INSN_ASI(ASI_PRIMARY_LITTLE)) #define STX_LE (STXA | INSN_ASI(ASI_PRIMARY_LITTLE)) -static inline int check_fit_tl(tcg_target_long val, unsigned int bits) +static inline int check_fit_i64(int64_t val, unsigned int bits) { - return (val << ((sizeof(tcg_target_long) * 8 - bits)) - >> (sizeof(tcg_target_long) * 8 - bits)) == val; + return val == sextract64(val, 0, bits); } -static inline int check_fit_i32(uint32_t val, unsigned int bits) +static inline int check_fit_i32(int32_t val, unsigned int bits) { - return ((val << (32 - bits)) >> (32 - bits)) == val; + return val == sextract32(val, 0, bits); } +#define check_fit_tl check_fit_i64 +#if SPARC64 +# define check_fit_ptr check_fit_i64 +#else +# define check_fit_ptr check_fit_i32 +#endif + static void patch_reloc(uint8_t *code_ptr, int type, intptr_t value, intptr_t addend) { @@ -267,7 +293,7 @@ static void patch_reloc(uint8_t *code_ptr, int type, break; case R_SPARC_WDISP16: value -= (intptr_t)code_ptr; - if (!check_fit_tl(value >> 2, 16)) { + if (!check_fit_ptr(value >> 2, 16)) { tcg_abort(); } insn = *(uint32_t *)code_ptr; @@ -277,7 +303,7 @@ static void patch_reloc(uint8_t *code_ptr, int type, break; case R_SPARC_WDISP19: value -= (intptr_t)code_ptr; - if (!check_fit_tl(value >> 2, 19)) { + if (!check_fit_ptr(value >> 2, 19)) { tcg_abort(); } insn = *(uint32_t *)code_ptr; @@ -301,14 +327,27 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) ct->ct |= TCG_CT_REG; tcg_regset_set32(ct->u.regs, 0, 0xffffffff); break; - case 'L': /* qemu_ld/st constraint */ + case 'R': ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xffffffff); - // Helper args + tcg_regset_set32(ct->u.regs, 0, ALL_64); + break; + case 'A': /* qemu_ld/st address constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, + TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff); + reserve_helpers: tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0); tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1); tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2); break; + case 's': /* qemu_st data 32-bit constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, 0xffffffff); + goto reserve_helpers; + case 'S': /* qemu_st data 64-bit constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, ALL_64); + goto reserve_helpers; case 'I': ct->ct |= TCG_CT_CONST_S11; break; @@ -351,22 +390,20 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, } } -static inline void tcg_out_arith(TCGContext *s, int rd, int rs1, int rs2, - int op) +static inline void tcg_out_arith(TCGContext *s, TCGReg rd, TCGReg rs1, + TCGReg rs2, int op) { - tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | - INSN_RS2(rs2)); + tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_RS2(rs2)); } -static inline void tcg_out_arithi(TCGContext *s, int rd, int rs1, - uint32_t offset, int op) +static inline void tcg_out_arithi(TCGContext *s, TCGReg rd, TCGReg rs1, + int32_t offset, int op) { - tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | - INSN_IMM13(offset)); + tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_IMM13(offset)); } -static void tcg_out_arithc(TCGContext *s, int rd, int rs1, - int val2, int val2const, int op) +static void tcg_out_arithc(TCGContext *s, TCGReg rd, TCGReg rs1, + int32_t val2, int val2const, int op) { tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | (val2const ? INSN_IMM13(val2) : INSN_RS2(val2))); @@ -380,12 +417,12 @@ static inline void tcg_out_mov(TCGContext *s, TCGType type, } } -static inline void tcg_out_sethi(TCGContext *s, int ret, uint32_t arg) +static inline void tcg_out_sethi(TCGContext *s, TCGReg ret, uint32_t arg) { tcg_out32(s, SETHI | INSN_RD(ret) | ((arg & 0xfffffc00) >> 10)); } -static inline void tcg_out_movi_imm13(TCGContext *s, int ret, uint32_t arg) +static inline void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, int32_t arg) { tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR); } @@ -393,7 +430,12 @@ static inline void tcg_out_movi_imm13(TCGContext *s, int ret, uint32_t arg) static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, tcg_target_long arg) { - tcg_target_long hi, lo; + tcg_target_long hi, lo = (int32_t)arg; + + /* Make sure we test 32-bit constants for imm13 properly. */ + if (type == TCG_TYPE_I32) { + arg = lo; + } /* A 13-bit constant sign-extended to 64-bits. */ if (check_fit_tl(arg, 13)) { @@ -402,9 +444,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, } /* A 32-bit constant, or 32-bit zero-extended to 64-bits. */ - if (TCG_TARGET_REG_BITS == 32 - || type == TCG_TYPE_I32 - || (arg & ~0xffffffffu) == 0) { + if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) { tcg_out_sethi(s, ret, arg); if (arg & 0x3ff) { tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR); @@ -413,21 +453,20 @@ static void tcg_out_movi(TCGContext *s, TCGType type, } /* A 32-bit constant sign-extended to 64-bits. */ - if (check_fit_tl(arg, 32)) { + if (arg == lo) { tcg_out_sethi(s, ret, ~arg); tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR); return; } /* A 64-bit constant decomposed into 2 32-bit pieces. */ - lo = (int32_t)arg; - if (check_fit_tl(lo, 13)) { - hi = (arg - lo) >> 31 >> 1; + if (check_fit_i32(lo, 13)) { + hi = (arg - lo) >> 32; tcg_out_movi(s, TCG_TYPE_I32, ret, hi); tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX); tcg_out_arithi(s, ret, ret, lo, ARITH_ADD); } else { - hi = arg >> 31 >> 1; + hi = arg >> 32; tcg_out_movi(s, TCG_TYPE_I32, ret, hi); tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T2, lo); tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX); @@ -435,16 +474,16 @@ static void tcg_out_movi(TCGContext *s, TCGType type, } } -static inline void tcg_out_ldst_rr(TCGContext *s, int data, int a1, - int a2, int op) +static inline void tcg_out_ldst_rr(TCGContext *s, TCGReg data, TCGReg a1, + TCGReg a2, int op) { tcg_out32(s, op | INSN_RD(data) | INSN_RS1(a1) | INSN_RS2(a2)); } -static inline void tcg_out_ldst(TCGContext *s, int ret, int addr, - int offset, int op) +static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr, + intptr_t offset, int op) { - if (check_fit_tl(offset, 13)) { + if (check_fit_ptr(offset, 13)) { tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) | INSN_IMM13(offset)); } else { @@ -465,40 +504,24 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX)); } -static inline void tcg_out_ld_ptr(TCGContext *s, TCGReg ret, uintptr_t arg) +static void tcg_out_ld_ptr(TCGContext *s, TCGReg ret, uintptr_t arg) { - TCGReg base = TCG_REG_G0; - if (!check_fit_tl(arg, 10)) { - tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ff); - base = ret; - } - tcg_out_ld(s, TCG_TYPE_PTR, ret, base, arg & 0x3ff); + tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ff); + tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, arg & 0x3ff); } -static inline void tcg_out_sety(TCGContext *s, int rs) +static inline void tcg_out_sety(TCGContext *s, TCGReg rs) { tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs)); } -static inline void tcg_out_rdy(TCGContext *s, int rd) +static inline void tcg_out_rdy(TCGContext *s, TCGReg rd) { tcg_out32(s, RDY | INSN_RD(rd)); } -static inline void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val) -{ - if (val != 0) { - if (check_fit_tl(val, 13)) - tcg_out_arithi(s, reg, reg, val, ARITH_ADD); - else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, val); - tcg_out_arith(s, reg, reg, TCG_REG_T1, ARITH_ADD); - } - } -} - -static void tcg_out_div32(TCGContext *s, int rd, int rs1, - int val2, int val2const, int uns) +static void tcg_out_div32(TCGContext *s, TCGReg rd, TCGReg rs1, + int32_t val2, int val2const, int uns) { /* Load Y with the sign/zero extension of RS1 to 64-bits. */ if (uns) { @@ -559,38 +582,37 @@ static void tcg_out_bpcc(TCGContext *s, int scond, int flags, int label) tcg_out_bpcc0(s, scond, flags, off19); } -static void tcg_out_cmp(TCGContext *s, TCGArg c1, TCGArg c2, int c2const) +static void tcg_out_cmp(TCGContext *s, TCGReg c1, int32_t c2, int c2const) { tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const, ARITH_SUBCC); } -static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGArg arg1, - TCGArg arg2, int const_arg2, int label) +static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGReg arg1, + int32_t arg2, int const_arg2, int label) { tcg_out_cmp(s, arg1, arg2, const_arg2); tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_ICC | BPCC_PT, label); tcg_out_nop(s); } -static void tcg_out_movcc(TCGContext *s, TCGCond cond, int cc, TCGArg ret, - TCGArg v1, int v1const) +static void tcg_out_movcc(TCGContext *s, TCGCond cond, int cc, TCGReg ret, + int32_t v1, int v1const) { tcg_out32(s, ARITH_MOVCC | cc | INSN_RD(ret) | INSN_RS1(tcg_cond_to_bcond[cond]) | (v1const ? INSN_IMM11(v1) : INSN_RS2(v1))); } -static void tcg_out_movcond_i32(TCGContext *s, TCGCond cond, TCGArg ret, - TCGArg c1, TCGArg c2, int c2const, - TCGArg v1, int v1const) +static void tcg_out_movcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg c1, int32_t c2, int c2const, + int32_t v1, int v1const) { tcg_out_cmp(s, c1, c2, c2const); tcg_out_movcc(s, cond, MOVCC_ICC, ret, v1, v1const); } -#if TCG_TARGET_REG_BITS == 64 -static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGArg arg1, - TCGArg arg2, int const_arg2, int label) +static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGReg arg1, + int32_t arg2, int const_arg2, int label) { /* For 64-bit signed comparisons vs zero, we can avoid the compare. */ if (arg2 == 0 && !is_unsigned_cond(cond)) { @@ -613,71 +635,32 @@ static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGArg arg1, tcg_out_nop(s); } -static void tcg_out_movr(TCGContext *s, TCGCond cond, TCGArg ret, TCGArg c1, - TCGArg v1, int v1const) +static void tcg_out_movr(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg c1, + int32_t v1, int v1const) { tcg_out32(s, ARITH_MOVR | INSN_RD(ret) | INSN_RS1(c1) | (tcg_cond_to_rcond[cond] << 10) | (v1const ? INSN_IMM10(v1) : INSN_RS2(v1))); } -static void tcg_out_movcond_i64(TCGContext *s, TCGCond cond, TCGArg ret, - TCGArg c1, TCGArg c2, int c2const, - TCGArg v1, int v1const) +static void tcg_out_movcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg c1, int32_t c2, int c2const, + int32_t v1, int v1const) { /* For 64-bit signed comparisons vs zero, we can avoid the compare. Note that the immediate range is one bit smaller, so we must check for that as well. */ if (c2 == 0 && !is_unsigned_cond(cond) - && (!v1const || check_fit_tl(v1, 10))) { + && (!v1const || check_fit_i32(v1, 10))) { tcg_out_movr(s, cond, ret, c1, v1, v1const); } else { tcg_out_cmp(s, c1, c2, c2const); tcg_out_movcc(s, cond, MOVCC_XCC, ret, v1, v1const); } } -#else -static void tcg_out_brcond2_i32(TCGContext *s, TCGCond cond, - TCGArg al, TCGArg ah, - TCGArg bl, int blconst, - TCGArg bh, int bhconst, int label_dest) -{ - int scond, label_next = gen_new_label(); - - tcg_out_cmp(s, ah, bh, bhconst); - - /* Note that we fill one of the delay slots with the second compare. */ - switch (cond) { - case TCG_COND_EQ: - tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_next); - tcg_out_cmp(s, al, bl, blconst); - tcg_out_bpcc(s, COND_E, BPCC_ICC | BPCC_PT, label_dest); - break; - - case TCG_COND_NE: - tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_dest); - tcg_out_cmp(s, al, bl, blconst); - tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_dest); - break; - - default: - scond = tcg_cond_to_bcond[tcg_high_cond(cond)]; - tcg_out_bpcc(s, scond, BPCC_ICC | BPCC_PT, label_dest); - tcg_out_nop(s); - tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_next); - tcg_out_cmp(s, al, bl, blconst); - scond = tcg_cond_to_bcond[tcg_unsigned_cond(cond)]; - tcg_out_bpcc(s, scond, BPCC_ICC | BPCC_PT, label_dest); - break; - } - tcg_out_nop(s); - tcg_out_label(s, label_next, s->code_ptr); -} -#endif - -static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGArg ret, - TCGArg c1, TCGArg c2, int c2const) +static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg c1, int32_t c2, int c2const) { /* For 32-bit comparisons, we can play games with ADDX/SUBX. */ switch (cond) { @@ -702,7 +685,7 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGArg ret, swap the operands on GTU/LEU. There's no benefit to loading the constant into a temporary register. */ if (!c2const || c2 == 0) { - TCGArg t = c1; + TCGReg t = c1; c1 = c2; c2 = t; c2const = 0; @@ -726,9 +709,8 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGArg ret, } } -#if TCG_TARGET_REG_BITS == 64 -static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGArg ret, - TCGArg c1, TCGArg c2, int c2const) +static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg c1, int32_t c2, int c2const) { /* For 64-bit signed comparisons vs zero, we can avoid the compare if the input does not overlap the output. */ @@ -741,54 +723,12 @@ static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGArg ret, tcg_out_movcc(s, cond, MOVCC_XCC, ret, 1, 1); } } -#else -static void tcg_out_setcond2_i32(TCGContext *s, TCGCond cond, TCGArg ret, - TCGArg al, TCGArg ah, - TCGArg bl, int blconst, - TCGArg bh, int bhconst) -{ - int tmp = TCG_REG_T1; - - /* Note that the low parts are fully consumed before tmp is set. */ - if (ret != ah && (bhconst || ret != bh)) { - tmp = ret; - } - - switch (cond) { - case TCG_COND_EQ: - case TCG_COND_NE: - if (bl == 0 && bh == 0) { - if (cond == TCG_COND_EQ) { - tcg_out_arith(s, TCG_REG_G0, al, ah, ARITH_ORCC); - tcg_out_movi(s, TCG_TYPE_I32, ret, 1); - } else { - tcg_out_arith(s, ret, al, ah, ARITH_ORCC); - } - } else { - tcg_out_setcond_i32(s, cond, tmp, al, bl, blconst); - tcg_out_cmp(s, ah, bh, bhconst); - tcg_out_mov(s, TCG_TYPE_I32, ret, tmp); - } - tcg_out_movcc(s, TCG_COND_NE, MOVCC_ICC, ret, cond == TCG_COND_NE, 1); - break; - default: - /* <= : ah < bh | (ah == bh && al <= bl) */ - tcg_out_setcond_i32(s, tcg_unsigned_cond(cond), tmp, al, bl, blconst); - tcg_out_cmp(s, ah, bh, bhconst); - tcg_out_mov(s, TCG_TYPE_I32, ret, tmp); - tcg_out_movcc(s, TCG_COND_NE, MOVCC_ICC, ret, 0, 1); - tcg_out_movcc(s, tcg_high_cond(cond), MOVCC_ICC, ret, 1, 1); - break; - } -} -#endif - -static void tcg_out_addsub2(TCGContext *s, TCGArg rl, TCGArg rh, - TCGArg al, TCGArg ah, TCGArg bl, int blconst, - TCGArg bh, int bhconst, int opl, int oph) +static void tcg_out_addsub2(TCGContext *s, TCGReg rl, TCGReg rh, + TCGReg al, TCGReg ah, int32_t bl, int blconst, + int32_t bh, int bhconst, int opl, int oph) { - TCGArg tmp = TCG_REG_T1; + TCGReg tmp = TCG_REG_T1; /* Note that the low parts are fully consumed before tmp is set. */ if (rl != ah && (bhconst || rl != bh)) { @@ -800,7 +740,7 @@ static void tcg_out_addsub2(TCGContext *s, TCGArg rl, TCGArg rh, tcg_out_mov(s, TCG_TYPE_I32, rl, tmp); } -static inline void tcg_out_calli(TCGContext *s, uintptr_t dest) +static void tcg_out_calli(TCGContext *s, uintptr_t dest) { intptr_t disp = dest - (uintptr_t)s->code_ptr; @@ -857,8 +797,13 @@ static void build_trampolines(TCGContext *s) } qemu_ld_trampoline[i] = tramp; - /* Find the retaddr argument register. */ - ra = TCG_REG_O3 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS); + if (SPARC64 || TARGET_LONG_BITS == 32) { + ra = TCG_REG_O3; + } else { + /* Install the high part of the address. */ + tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O2, 32, SHIFT_SRLX); + ra = TCG_REG_O4; + } /* Set the retaddr operand. */ tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7); @@ -882,12 +827,28 @@ static void build_trampolines(TCGContext *s) } qemu_st_trampoline[i] = tramp; - /* Find the retaddr argument. For 32-bit, this may be past the - last argument register, and need passing on the stack. */ - ra = (TCG_REG_O4 - + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) - + (TCG_TARGET_REG_BITS == 32 && (i & MO_SIZE) == MO_64)); - + if (SPARC64) { + ra = TCG_REG_O4; + } else { + ra = TCG_REG_O1; + if (TARGET_LONG_BITS == 64) { + /* Install the high part of the address. */ + tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX); + ra += 2; + } else { + ra += 1; + } + if ((i & MO_SIZE) == MO_64) { + /* Install the high part of the data. */ + tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX); + ra += 2; + } else { + ra += 1; + } + /* Skip the mem_index argument. */ + ra += 1; + } + /* Set the retaddr operand. */ if (ra >= TCG_REG_O6) { tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_O7, TCG_REG_CALL_STACK, @@ -956,25 +917,16 @@ static void tcg_target_qemu_prologue(TCGContext *s) The result of the TLB comparison is in %[ix]cc. The sanitized address is in the returned register, maybe %o0. The TLB addend is in %o1. */ -static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, - int mem_index, TCGMemOp s_bits, int which) +static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index, + TCGMemOp s_bits, int which) { const TCGReg r0 = TCG_REG_O0; const TCGReg r1 = TCG_REG_O1; const TCGReg r2 = TCG_REG_O2; - TCGReg addr = addrlo; int tlb_ofs; - if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) { - /* Assemble the 64-bit address in R0. */ - tcg_out_arithi(s, r0, addrlo, 0, SHIFT_SRL); - tcg_out_arithi(s, r1, addrhi, 32, SHIFT_SLLX); - tcg_out_arith(s, r0, r0, r1, ARITH_OR); - addr = r0; - } - /* Shift the page number down. */ - tcg_out_arithi(s, r1, addrlo, TARGET_PAGE_BITS, SHIFT_SRL); + tcg_out_arithi(s, r1, addr, TARGET_PAGE_BITS, SHIFT_SRL); /* Mask out the page offset, except for the required alignment. */ tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_T1, @@ -994,8 +946,11 @@ static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, /* Find a base address that can load both tlb comparator and addend. */ tlb_ofs = offsetof(CPUArchState, tlb_table[mem_index][0]); - if (!check_fit_tl(tlb_ofs + sizeof(CPUTLBEntry), 13)) { - tcg_out_addi(s, r1, tlb_ofs & ~0x3ff); + if (!check_fit_ptr(tlb_ofs + sizeof(CPUTLBEntry), 13)) { + if (tlb_ofs & ~0x3ff) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, tlb_ofs & ~0x3ff); + tcg_out_arith(s, r1, r1, TCG_REG_T1, ARITH_ADD); + } tlb_ofs &= 0x3ff; } @@ -1007,11 +962,11 @@ static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, tcg_out_cmp(s, r0, r2, 0); /* If the guest address must be zero-extended, do so now. */ - if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) { - tcg_out_arithi(s, r0, addrlo, 0, SHIFT_SRL); + if (SPARC64 && TARGET_LONG_BITS == 32) { + tcg_out_arithi(s, r0, addr, 0, SHIFT_SRL); return r0; } - return addrlo; + return addr; } #endif /* CONFIG_SOFTMMU */ @@ -1044,78 +999,37 @@ static const int qemu_st_opc[16] = { [MO_LEQ] = STX_LE, }; -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) +static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, + TCGMemOp memop, int memi, bool is_64) { - TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused)); - TCGMemOp memop, s_bits; -#if defined(CONFIG_SOFTMMU) +#ifdef CONFIG_SOFTMMU + TCGMemOp s_bits = memop & MO_SIZE; TCGReg addrz, param; uintptr_t func; - int memi; - uint32_t *label_ptr[2]; -#endif - - datalo = *args++; - datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0); - addrlo = *args++; - addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0); - memop = *args++; - s_bits = memop & MO_SIZE; + uint32_t *label_ptr; -#if defined(CONFIG_SOFTMMU) - memi = *args++; - addrz = tcg_out_tlb_load(s, addrlo, addrhi, memi, s_bits, + addrz = tcg_out_tlb_load(s, addr, memi, s_bits, offsetof(CPUTLBEntry, addr_read)); - if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { - int reg64; - - /* bne,pn %[xi]cc, label0 */ - label_ptr[0] = (uint32_t *)s->code_ptr; - tcg_out_bpcc0(s, COND_NE, BPCC_PN - | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0); - tcg_out_nop(s); - - /* TLB Hit. */ - /* Load all 64-bits into an O/G register. */ - reg64 = (datalo < 16 ? datalo : TCG_REG_O0); - tcg_out_ldst_rr(s, reg64, addrz, TCG_REG_O1, qemu_ld_opc[memop]); + /* The fast path is exactly one insn. Thus we can perform the + entire TLB Hit in the (annulled) delay slot of the branch + over the TLB Miss case. */ - /* Move the two 32-bit pieces into the destination registers. */ - tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX); - if (reg64 != datalo) { - tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64); - } - - /* b,a,pt label1 */ - label_ptr[1] = (uint32_t *)s->code_ptr; - tcg_out_bpcc0(s, COND_A, BPCC_A | BPCC_PT, 0); - } else { - /* The fast path is exactly one insn. Thus we can perform the - entire TLB Hit in the (annulled) delay slot of the branch - over the TLB Miss case. */ - - /* beq,a,pt %[xi]cc, label0 */ - label_ptr[0] = NULL; - label_ptr[1] = (uint32_t *)s->code_ptr; - tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT - | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0); - /* delay slot */ - tcg_out_ldst_rr(s, datalo, addrz, TCG_REG_O1, qemu_ld_opc[memop]); - } + /* beq,a,pt %[xi]cc, label0 */ + label_ptr = (uint32_t *)s->code_ptr; + tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT + | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0); + /* delay slot */ + tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, qemu_ld_opc[memop]); /* TLB Miss. */ - if (label_ptr[0]) { - *label_ptr[0] |= INSN_OFF19((unsigned long)s->code_ptr - - (unsigned long)label_ptr[0]); - } - param = TCG_REG_O1; - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - tcg_out_mov(s, TCG_TYPE_REG, param++, addrhi); + if (!SPARC64 && TARGET_LONG_BITS == 64) { + /* Skip the high-part; we'll perform the extract in the trampoline. */ + param++; } - tcg_out_mov(s, TCG_TYPE_REG, param++, addrlo); + tcg_out_mov(s, TCG_TYPE_REG, param++, addr); /* We use the helpers to extend SB and SW data, leaving the case of SL needing explicit extending below. */ @@ -1129,81 +1043,54 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) /* delay slot */ tcg_out_movi(s, TCG_TYPE_I32, param, memi); - switch (memop & ~MO_BSWAP) { - case MO_SL: - tcg_out_arithi(s, datalo, TCG_REG_O0, 0, SHIFT_SRA); - break; - case MO_Q: - if (TCG_TARGET_REG_BITS == 32) { - tcg_out_mov(s, TCG_TYPE_REG, datahi, TCG_REG_O0); - tcg_out_mov(s, TCG_TYPE_REG, datalo, TCG_REG_O1); - break; + /* Recall that all of the helpers return 64-bit results. + Which complicates things for sparcv8plus. */ + if (SPARC64) { + /* We let the helper sign-extend SB and SW, but leave SL for here. */ + if (is_64 && (memop & ~MO_BSWAP) == MO_SL) { + tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA); + } else { + tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0); + } + } else { + if (s_bits == MO_64) { + tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, 32, SHIFT_SLLX); + tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O1, 0, SHIFT_SRL); + tcg_out_arith(s, data, TCG_REG_O0, TCG_REG_O1, ARITH_OR); + } else if (is_64) { + /* Re-extend from 32-bit rather than reassembling when we + know the high register must be an extension. */ + tcg_out_arithi(s, data, TCG_REG_O1, 0, + memop & MO_SIGN ? SHIFT_SRA : SHIFT_SRL); + } else { + tcg_out_mov(s, TCG_TYPE_I32, data, TCG_REG_O1); } - /* FALLTHRU */ - default: - /* mov */ - tcg_out_mov(s, TCG_TYPE_REG, datalo, TCG_REG_O0); - break; } - *label_ptr[1] |= INSN_OFF19((unsigned long)s->code_ptr - - (unsigned long)label_ptr[1]); + *label_ptr |= INSN_OFF19((uintptr_t)s->code_ptr - (uintptr_t)label_ptr); #else - if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) { - tcg_out_arithi(s, TCG_REG_T1, addrlo, 0, SHIFT_SRL); - addrlo = TCG_REG_T1; - } - if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { - int reg64 = (datalo < 16 ? datalo : TCG_REG_O0); - - tcg_out_ldst_rr(s, reg64, addrlo, - (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0), - qemu_ld_opc[memop]); - - tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX); - if (reg64 != datalo) { - tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64); - } - } else { - tcg_out_ldst_rr(s, datalo, addrlo, - (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0), - qemu_ld_opc[memop]); + if (SPARC64 && TARGET_LONG_BITS == 32) { + tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL); + addr = TCG_REG_T1; } + tcg_out_ldst_rr(s, data, addr, + (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0), + qemu_ld_opc[memop]); #endif /* CONFIG_SOFTMMU */ } -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) +static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, + TCGMemOp memop, int memi) { - TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused)); - TCGMemOp memop, s_bits; -#if defined(CONFIG_SOFTMMU) - TCGReg addrz, datafull, param; +#ifdef CONFIG_SOFTMMU + TCGMemOp s_bits = memop & MO_SIZE; + TCGReg addrz, param; uintptr_t func; - int memi; uint32_t *label_ptr; -#endif - - datalo = *args++; - datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0); - addrlo = *args++; - addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0); - memop = *args++; - s_bits = memop & MO_SIZE; -#if defined(CONFIG_SOFTMMU) - memi = *args++; - addrz = tcg_out_tlb_load(s, addrlo, addrhi, memi, s_bits, + addrz = tcg_out_tlb_load(s, addr, memi, s_bits, offsetof(CPUTLBEntry, addr_write)); - datafull = datalo; - if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { - /* Reconstruct the full 64-bit value. */ - tcg_out_arithi(s, TCG_REG_T1, datalo, 0, SHIFT_SRL); - tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX); - tcg_out_arith(s, TCG_REG_O2, TCG_REG_T1, TCG_REG_O2, ARITH_OR); - datafull = TCG_REG_O2; - } - /* The fast path is exactly one insn. Thus we can perform the entire TLB Hit in the (annulled) delay slot of the branch over TLB Miss. */ /* beq,a,pt %[xi]cc, label0 */ @@ -1211,19 +1098,21 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0); /* delay slot */ - tcg_out_ldst_rr(s, datafull, addrz, TCG_REG_O1, qemu_st_opc[memop]); + tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, qemu_st_opc[memop]); /* TLB Miss. */ param = TCG_REG_O1; - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - tcg_out_mov(s, TCG_TYPE_REG, param++, addrhi); + if (!SPARC64 && TARGET_LONG_BITS == 64) { + /* Skip the high-part; we'll perform the extract in the trampoline. */ + param++; } - tcg_out_mov(s, TCG_TYPE_REG, param++, addrlo); - if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { - tcg_out_mov(s, TCG_TYPE_REG, param++, datahi); + tcg_out_mov(s, TCG_TYPE_REG, param++, addr); + if (!SPARC64 && s_bits == MO_64) { + /* Skip the high-part; we'll perform the extract in the trampoline. */ + param++; } - tcg_out_mov(s, TCG_TYPE_REG, param++, datalo); + tcg_out_mov(s, TCG_TYPE_REG, param++, data); func = qemu_st_trampoline[memop]; assert(func != 0); @@ -1231,106 +1120,100 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) /* delay slot */ tcg_out_movi(s, TCG_TYPE_REG, param, memi); - *label_ptr |= INSN_OFF19((unsigned long)s->code_ptr - - (unsigned long)label_ptr); + *label_ptr |= INSN_OFF19((uintptr_t)s->code_ptr - (uintptr_t)label_ptr); #else - if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) { - tcg_out_arithi(s, TCG_REG_T1, addrlo, 0, SHIFT_SRL); - addrlo = TCG_REG_T1; - } - if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { - tcg_out_arithi(s, TCG_REG_T1, datalo, 0, SHIFT_SRL); - tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX); - tcg_out_arith(s, TCG_REG_O2, TCG_REG_T1, TCG_REG_O2, ARITH_OR); - datalo = TCG_REG_O2; + if (SPARC64 && TARGET_LONG_BITS == 32) { + tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL); + addr = TCG_REG_T1; } - tcg_out_ldst_rr(s, datalo, addrlo, + tcg_out_ldst_rr(s, data, addr, (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0), qemu_st_opc[memop]); #endif /* CONFIG_SOFTMMU */ } -static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, - const int *const_args) +static void tcg_out_op(TCGContext *s, TCGOpcode opc, + const TCGArg args[TCG_MAX_OP_ARGS], + const int const_args[TCG_MAX_OP_ARGS]) { - int c; + TCGArg a0, a1, a2; + int c, c2; + + /* Hoist the loads of the most common arguments. */ + a0 = args[0]; + a1 = args[1]; + a2 = args[2]; + c2 = const_args[2]; switch (opc) { case INDEX_op_exit_tb: - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I0, args[0]); - tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, JMPL); - tcg_out32(s, RESTORE | INSN_RD(TCG_REG_G0) | INSN_RS1(TCG_REG_G0) | - INSN_RS2(TCG_REG_G0)); + if (check_fit_ptr(a0, 13)) { + tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); + tcg_out_movi_imm13(s, TCG_REG_O0, a0); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I0, a0 & ~0x3ff); + tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); + tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, a0 & 0x3ff, ARITH_OR); + } break; case INDEX_op_goto_tb: if (s->tb_jmp_offset) { /* direct jump method */ uint32_t old_insn = *(uint32_t *)s->code_ptr; - s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf; + s->tb_jmp_offset[a0] = s->code_ptr - s->code_buf; /* Make sure to preserve links during retranslation. */ tcg_out32(s, CALL | (old_insn & ~INSN_OP(-1))); } else { /* indirect jump method */ - tcg_out_ld_ptr(s, TCG_REG_T1, (uintptr_t)(s->tb_next + args[0])); + tcg_out_ld_ptr(s, TCG_REG_T1, (uintptr_t)(s->tb_next + a0)); tcg_out_arithi(s, TCG_REG_G0, TCG_REG_T1, 0, JMPL); } tcg_out_nop(s); - s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf; + s->tb_next_offset[a0] = s->code_ptr - s->code_buf; break; case INDEX_op_call: if (const_args[0]) { - tcg_out_calli(s, args[0]); + tcg_out_calli(s, a0); } else { - tcg_out_arithi(s, TCG_REG_O7, args[0], 0, JMPL); + tcg_out_arithi(s, TCG_REG_O7, a0, 0, JMPL); } /* delay slot */ tcg_out_nop(s); break; case INDEX_op_br: - tcg_out_bpcc(s, COND_A, BPCC_PT, args[0]); + tcg_out_bpcc(s, COND_A, BPCC_PT, a0); tcg_out_nop(s); break; - case INDEX_op_movi_i32: - tcg_out_movi(s, TCG_TYPE_I32, args[0], (uint32_t)args[1]); - break; -#if TCG_TARGET_REG_BITS == 64 #define OP_32_64(x) \ glue(glue(case INDEX_op_, x), _i32): \ glue(glue(case INDEX_op_, x), _i64) -#else -#define OP_32_64(x) \ - glue(glue(case INDEX_op_, x), _i32) -#endif + OP_32_64(ld8u): - tcg_out_ldst(s, args[0], args[1], args[2], LDUB); + tcg_out_ldst(s, a0, a1, a2, LDUB); break; OP_32_64(ld8s): - tcg_out_ldst(s, args[0], args[1], args[2], LDSB); + tcg_out_ldst(s, a0, a1, a2, LDSB); break; OP_32_64(ld16u): - tcg_out_ldst(s, args[0], args[1], args[2], LDUH); + tcg_out_ldst(s, a0, a1, a2, LDUH); break; OP_32_64(ld16s): - tcg_out_ldst(s, args[0], args[1], args[2], LDSH); + tcg_out_ldst(s, a0, a1, a2, LDSH); break; case INDEX_op_ld_i32: -#if TCG_TARGET_REG_BITS == 64 case INDEX_op_ld32u_i64: -#endif - tcg_out_ldst(s, args[0], args[1], args[2], LDUW); + tcg_out_ldst(s, a0, a1, a2, LDUW); break; OP_32_64(st8): - tcg_out_ldst(s, args[0], args[1], args[2], STB); + tcg_out_ldst(s, a0, a1, a2, STB); break; OP_32_64(st16): - tcg_out_ldst(s, args[0], args[1], args[2], STH); + tcg_out_ldst(s, a0, a1, a2, STH); break; case INDEX_op_st_i32: -#if TCG_TARGET_REG_BITS == 64 case INDEX_op_st32_i64: -#endif - tcg_out_ldst(s, args[0], args[1], args[2], STW); + tcg_out_ldst(s, a0, a1, a2, STW); break; OP_32_64(add): c = ARITH_ADD; @@ -1357,7 +1240,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, c = SHIFT_SLL; do_shift32: /* Limit immediate shift count lest we create an illegal insn. */ - tcg_out_arithc(s, args[0], args[1], args[2] & 31, const_args[2], c); + tcg_out_arithc(s, a0, a1, a2 & 31, c2, c); break; case INDEX_op_shr_i32: c = SHIFT_SRL; @@ -1377,85 +1260,71 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, goto gen_arith1; case INDEX_op_div_i32: - tcg_out_div32(s, args[0], args[1], args[2], const_args[2], 0); + tcg_out_div32(s, a0, a1, a2, c2, 0); break; case INDEX_op_divu_i32: - tcg_out_div32(s, args[0], args[1], args[2], const_args[2], 1); + tcg_out_div32(s, a0, a1, a2, c2, 1); break; case INDEX_op_brcond_i32: - tcg_out_brcond_i32(s, args[2], args[0], args[1], const_args[1], - args[3]); + tcg_out_brcond_i32(s, a2, a0, a1, const_args[1], args[3]); break; case INDEX_op_setcond_i32: - tcg_out_setcond_i32(s, args[3], args[0], args[1], - args[2], const_args[2]); + tcg_out_setcond_i32(s, args[3], a0, a1, a2, c2); break; case INDEX_op_movcond_i32: - tcg_out_movcond_i32(s, args[5], args[0], args[1], - args[2], const_args[2], args[3], const_args[3]); - break; - -#if TCG_TARGET_REG_BITS == 32 - case INDEX_op_brcond2_i32: - tcg_out_brcond2_i32(s, args[4], args[0], args[1], - args[2], const_args[2], - args[3], const_args[3], args[5]); + tcg_out_movcond_i32(s, args[5], a0, a1, a2, c2, args[3], const_args[3]); break; - case INDEX_op_setcond2_i32: - tcg_out_setcond2_i32(s, args[5], args[0], args[1], args[2], - args[3], const_args[3], - args[4], const_args[4]); - break; -#endif case INDEX_op_add2_i32: - tcg_out_addsub2(s, args[0], args[1], args[2], args[3], - args[4], const_args[4], args[5], const_args[5], - ARITH_ADDCC, ARITH_ADDX); + tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], const_args[4], + args[5], const_args[5], ARITH_ADDCC, ARITH_ADDX); break; case INDEX_op_sub2_i32: - tcg_out_addsub2(s, args[0], args[1], args[2], args[3], - args[4], const_args[4], args[5], const_args[5], - ARITH_SUBCC, ARITH_SUBX); + tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], const_args[4], + args[5], const_args[5], ARITH_SUBCC, ARITH_SUBX); break; case INDEX_op_mulu2_i32: - tcg_out_arithc(s, args[0], args[2], args[3], const_args[3], - ARITH_UMUL); - tcg_out_rdy(s, args[1]); + c = ARITH_UMUL; + goto do_mul2; + case INDEX_op_muls2_i32: + c = ARITH_SMUL; + do_mul2: + /* The 32-bit multiply insns produce a full 64-bit result. If the + destination register can hold it, we can avoid the slower RDY. */ + tcg_out_arithc(s, a0, a2, args[3], const_args[3], c); + if (SPARC64 || a0 <= TCG_REG_O7) { + tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX); + } else { + tcg_out_rdy(s, a1); + } break; case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, args, 0); + tcg_out_qemu_ld(s, a0, a1, a2, args[3], false); break; case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, args, 1); + tcg_out_qemu_ld(s, a0, a1, a2, args[3], true); break; case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, args, 0); - break; case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, args, 1); + tcg_out_qemu_st(s, a0, a1, a2, args[3]); break; -#if TCG_TARGET_REG_BITS == 64 - case INDEX_op_movi_i64: - tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]); - break; case INDEX_op_ld32s_i64: - tcg_out_ldst(s, args[0], args[1], args[2], LDSW); + tcg_out_ldst(s, a0, a1, a2, LDSW); break; case INDEX_op_ld_i64: - tcg_out_ldst(s, args[0], args[1], args[2], LDX); + tcg_out_ldst(s, a0, a1, a2, LDX); break; case INDEX_op_st_i64: - tcg_out_ldst(s, args[0], args[1], args[2], STX); + tcg_out_ldst(s, a0, a1, a2, STX); break; case INDEX_op_shl_i64: c = SHIFT_SLLX; do_shift64: /* Limit immediate shift count lest we create an illegal insn. */ - tcg_out_arithc(s, args[0], args[1], args[2] & 63, const_args[2], c); + tcg_out_arithc(s, a0, a1, a2 & 63, c2, c); break; case INDEX_op_shr_i64: c = SHIFT_SRLX; @@ -1473,35 +1342,44 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, c = ARITH_UDIVX; goto gen_arith; case INDEX_op_ext32s_i64: - tcg_out_arithi(s, args[0], args[1], 0, SHIFT_SRA); + tcg_out_arithi(s, a0, a1, 0, SHIFT_SRA); break; case INDEX_op_ext32u_i64: - tcg_out_arithi(s, args[0], args[1], 0, SHIFT_SRL); + tcg_out_arithi(s, a0, a1, 0, SHIFT_SRL); + break; + case INDEX_op_trunc_shr_i32: + if (a2 == 0) { + tcg_out_mov(s, TCG_TYPE_I32, a0, a1); + } else { + tcg_out_arithi(s, a0, a1, a2, SHIFT_SRLX); + } break; case INDEX_op_brcond_i64: - tcg_out_brcond_i64(s, args[2], args[0], args[1], const_args[1], - args[3]); + tcg_out_brcond_i64(s, a2, a0, a1, const_args[1], args[3]); break; case INDEX_op_setcond_i64: - tcg_out_setcond_i64(s, args[3], args[0], args[1], - args[2], const_args[2]); + tcg_out_setcond_i64(s, args[3], a0, a1, a2, c2); break; case INDEX_op_movcond_i64: - tcg_out_movcond_i64(s, args[5], args[0], args[1], - args[2], const_args[2], args[3], const_args[3]); + tcg_out_movcond_i64(s, args[5], a0, a1, a2, c2, args[3], const_args[3]); break; -#endif + gen_arith: - tcg_out_arithc(s, args[0], args[1], args[2], const_args[2], c); + tcg_out_arithc(s, a0, a1, a2, c2, c); break; gen_arith1: - tcg_out_arithc(s, args[0], TCG_REG_G0, args[1], const_args[1], c); + tcg_out_arithc(s, a0, TCG_REG_G0, a1, const_args[1], c); break; + case INDEX_op_mov_i64: + case INDEX_op_mov_i32: + case INDEX_op_movi_i64: + case INDEX_op_movi_i32: + /* Always implemented with tcg_out_mov/i, never with tcg_out_op. */ default: - fprintf(stderr, "unknown opcode 0x%x\n", opc); + /* Opcode not implemented. */ tcg_abort(); } } @@ -1545,72 +1423,55 @@ static const TCGTargetOpDef sparc_op_defs[] = { { INDEX_op_setcond_i32, { "r", "rZ", "rJ" } }, { INDEX_op_movcond_i32, { "r", "rZ", "rJ", "rI", "0" } }, -#if TCG_TARGET_REG_BITS == 32 - { INDEX_op_brcond2_i32, { "rZ", "rZ", "rJ", "rJ" } }, - { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rJ", "rJ" } }, -#endif - { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rJ" } }, - -#if TCG_TARGET_REG_BITS == 64 - { INDEX_op_mov_i64, { "r", "r" } }, - { INDEX_op_movi_i64, { "r" } }, - { INDEX_op_ld8u_i64, { "r", "r" } }, - { INDEX_op_ld8s_i64, { "r", "r" } }, - { INDEX_op_ld16u_i64, { "r", "r" } }, - { INDEX_op_ld16s_i64, { "r", "r" } }, - { INDEX_op_ld32u_i64, { "r", "r" } }, - { INDEX_op_ld32s_i64, { "r", "r" } }, - { INDEX_op_ld_i64, { "r", "r" } }, - { INDEX_op_st8_i64, { "rZ", "r" } }, - { INDEX_op_st16_i64, { "rZ", "r" } }, - { INDEX_op_st32_i64, { "rZ", "r" } }, - { INDEX_op_st_i64, { "rZ", "r" } }, - - { INDEX_op_add_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_mul_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_div_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_divu_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_sub_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_and_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_andc_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_or_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_orc_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_xor_i64, { "r", "rZ", "rJ" } }, - - { INDEX_op_shl_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_shr_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_sar_i64, { "r", "rZ", "rJ" } }, - - { INDEX_op_neg_i64, { "r", "rJ" } }, - { INDEX_op_not_i64, { "r", "rJ" } }, - - { INDEX_op_ext32s_i64, { "r", "r" } }, - { INDEX_op_ext32u_i64, { "r", "r" } }, - - { INDEX_op_brcond_i64, { "rZ", "rJ" } }, - { INDEX_op_setcond_i64, { "r", "rZ", "rJ" } }, - { INDEX_op_movcond_i64, { "r", "rZ", "rJ", "rI", "0" } }, -#endif - -#if TCG_TARGET_REG_BITS == 64 - { INDEX_op_qemu_ld_i32, { "r", "L" } }, - { INDEX_op_qemu_ld_i64, { "r", "L" } }, - { INDEX_op_qemu_st_i32, { "L", "L" } }, - { INDEX_op_qemu_st_i64, { "L", "L" } }, -#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS - { INDEX_op_qemu_ld_i32, { "r", "L" } }, - { INDEX_op_qemu_ld_i64, { "r", "r", "L" } }, - { INDEX_op_qemu_st_i32, { "L", "L" } }, - { INDEX_op_qemu_st_i64, { "L", "L", "L" } }, -#else - { INDEX_op_qemu_ld_i32, { "r", "L", "L" } }, - { INDEX_op_qemu_ld_i64, { "L", "L", "L", "L" } }, - { INDEX_op_qemu_st_i32, { "L", "L", "L" } }, - { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } }, -#endif + { INDEX_op_muls2_i32, { "r", "r", "rZ", "rJ" } }, + + { INDEX_op_mov_i64, { "R", "R" } }, + { INDEX_op_movi_i64, { "R" } }, + { INDEX_op_ld8u_i64, { "R", "r" } }, + { INDEX_op_ld8s_i64, { "R", "r" } }, + { INDEX_op_ld16u_i64, { "R", "r" } }, + { INDEX_op_ld16s_i64, { "R", "r" } }, + { INDEX_op_ld32u_i64, { "R", "r" } }, + { INDEX_op_ld32s_i64, { "R", "r" } }, + { INDEX_op_ld_i64, { "R", "r" } }, + { INDEX_op_st8_i64, { "RZ", "r" } }, + { INDEX_op_st16_i64, { "RZ", "r" } }, + { INDEX_op_st32_i64, { "RZ", "r" } }, + { INDEX_op_st_i64, { "RZ", "r" } }, + + { INDEX_op_add_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_mul_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_div_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_divu_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_sub_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_and_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_andc_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_or_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_orc_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_xor_i64, { "R", "RZ", "RJ" } }, + + { INDEX_op_shl_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_shr_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_sar_i64, { "R", "RZ", "RJ" } }, + + { INDEX_op_neg_i64, { "R", "RJ" } }, + { INDEX_op_not_i64, { "R", "RJ" } }, + + { INDEX_op_ext32s_i64, { "R", "r" } }, + { INDEX_op_ext32u_i64, { "R", "r" } }, + { INDEX_op_trunc_shr_i32, { "r", "R" } }, + + { INDEX_op_brcond_i64, { "RZ", "RJ" } }, + { INDEX_op_setcond_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_movcond_i64, { "R", "RZ", "RJ", "RI", "0" } }, + + { INDEX_op_qemu_ld_i32, { "r", "A" } }, + { INDEX_op_qemu_ld_i64, { "R", "A" } }, + { INDEX_op_qemu_st_i32, { "sZ", "A" } }, + { INDEX_op_qemu_st_i64, { "SZ", "A" } }, { -1 }, }; @@ -1618,9 +1479,8 @@ static const TCGTargetOpDef sparc_op_defs[] = { static void tcg_target_init(TCGContext *s) { tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); -#if TCG_TARGET_REG_BITS == 64 - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff); -#endif + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, ALL_64); + tcg_regset_set32(tcg_target_call_clobber_regs, 0, (1 << TCG_REG_G1) | (1 << TCG_REG_G2) | @@ -1650,7 +1510,7 @@ static void tcg_target_init(TCGContext *s) tcg_add_target_add_op_defs(sparc_op_defs); } -#if TCG_TARGET_REG_BITS == 64 +#if SPARC64 # define ELF_HOST_MACHINE EM_SPARCV9 #else # define ELF_HOST_MACHINE EM_SPARC32PLUS @@ -1660,7 +1520,7 @@ static void tcg_target_init(TCGContext *s) typedef struct { DebugFrameCIE cie; DebugFrameFDEHeader fde; - uint8_t fde_def_cfa[TCG_TARGET_REG_BITS == 64 ? 4 : 2]; + uint8_t fde_def_cfa[SPARC64 ? 4 : 2]; uint8_t fde_win_save; uint8_t fde_ret_save[3]; } DebugFrame; @@ -1677,7 +1537,7 @@ static DebugFrame debug_frame = { .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), .fde_def_cfa = { -#if TCG_TARGET_REG_BITS == 64 +#if SPARC64 12, 30, /* DW_CFA_def_cfa i6, 2047 */ (2047 & 0x7f) | 0x80, (2047 >> 7) #else diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h index 4519c64ae9..3a903dbfd0 100644 --- a/tcg/sparc/tcg-target.h +++ b/tcg/sparc/tcg-target.h @@ -24,13 +24,7 @@ #ifndef TCG_TARGET_SPARC #define TCG_TARGET_SPARC 1 -#if UINTPTR_MAX == UINT32_MAX -# define TCG_TARGET_REG_BITS 32 -#elif UINTPTR_MAX == UINT64_MAX -# define TCG_TARGET_REG_BITS 64 -#else -# error Unknown pointer size for tcg target -#endif +#define TCG_TARGET_REG_BITS 64 #define TCG_TARGET_NB_REGS 32 @@ -76,7 +70,7 @@ typedef enum { /* used for function call generation */ #define TCG_REG_CALL_STACK TCG_REG_O6 -#if TCG_TARGET_REG_BITS == 64 +#ifdef __arch64__ #define TCG_TARGET_STACK_BIAS 2047 #define TCG_TARGET_STACK_ALIGN 16 #define TCG_TARGET_CALL_STACK_OFFSET (128 + 6*8 + TCG_TARGET_STACK_BIAS) @@ -86,7 +80,7 @@ typedef enum { #define TCG_TARGET_CALL_STACK_OFFSET (64 + 4 + 6*4) #endif -#if TCG_TARGET_REG_BITS == 64 +#ifdef __arch64__ #define TCG_TARGET_EXTEND_ARGS 1 #endif @@ -112,11 +106,11 @@ typedef enum { #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 1 -#define TCG_TARGET_HAS_muls2_i32 0 +#define TCG_TARGET_HAS_muls2_i32 1 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#if TCG_TARGET_REG_BITS == 64 +#define TCG_TARGET_HAS_trunc_shr_i32 1 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_rot_i64 0 @@ -144,7 +138,6 @@ typedef enum { #define TCG_TARGET_HAS_muls2_i64 0 #define TCG_TARGET_HAS_muluh_i64 0 #define TCG_TARGET_HAS_mulsh_i64 0 -#endif #define TCG_TARGET_HAS_new_ldst 1 diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index 8d4ff7da9b..6dbbb38838 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -1624,9 +1624,20 @@ static inline void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg) tcg_gen_movi_i32(TCGV_HIGH(ret), 0); } -static inline void tcg_gen_trunc_i64_i32(TCGv_i32 ret, TCGv_i64 arg) +static inline void tcg_gen_trunc_shr_i64_i32(TCGv_i32 ret, TCGv_i64 arg, + unsigned int count) { - tcg_gen_mov_i32(ret, TCGV_LOW(arg)); + tcg_debug_assert(count < 64); + if (count >= 32) { + tcg_gen_shri_i32(ret, TCGV_HIGH(arg), count - 32); + } else if (count == 0) { + tcg_gen_mov_i32(ret, TCGV_LOW(arg)); + } else { + TCGv_i64 t = tcg_temp_new_i64(); + tcg_gen_shri_i64(t, arg, count); + tcg_gen_mov_i32(ret, TCGV_LOW(t)); + tcg_temp_free_i64(t); + } } static inline void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg) @@ -1727,11 +1738,21 @@ static inline void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg) } } -/* Note: we assume the target supports move between 32 and 64 bit - registers. This will probably break MIPS64 targets. */ -static inline void tcg_gen_trunc_i64_i32(TCGv_i32 ret, TCGv_i64 arg) +static inline void tcg_gen_trunc_shr_i64_i32(TCGv_i32 ret, TCGv_i64 arg, + unsigned int count) { - tcg_gen_mov_i32(ret, MAKE_TCGV_I32(GET_TCGV_I64(arg))); + tcg_debug_assert(count < 64); + if (TCG_TARGET_HAS_trunc_shr_i32) { + tcg_gen_op3i_i32(INDEX_op_trunc_shr_i32, ret, + MAKE_TCGV_I32(GET_TCGV_I64(arg)), count); + } else if (count == 0) { + tcg_gen_mov_i32(ret, MAKE_TCGV_I32(GET_TCGV_I64(arg))); + } else { + TCGv_i64 t = tcg_temp_new_i64(); + tcg_gen_shri_i64(t, arg, count); + tcg_gen_mov_i32(ret, MAKE_TCGV_I32(GET_TCGV_I64(t))); + tcg_temp_free_i64(t); + } } /* Note: we assume the target supports move between 32 and 64 bit @@ -2275,18 +2296,15 @@ static inline void tcg_gen_concat32_i64(TCGv_i64 dest, TCGv_i64 low, tcg_gen_deposit_i64(dest, low, high, 32, 32); } +static inline void tcg_gen_trunc_i64_i32(TCGv_i32 ret, TCGv_i64 arg) +{ + tcg_gen_trunc_shr_i64_i32(ret, arg, 0); +} + static inline void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg) { -#if TCG_TARGET_REG_BITS == 32 - tcg_gen_mov_i32(lo, TCGV_LOW(arg)); - tcg_gen_mov_i32(hi, TCGV_HIGH(arg)); -#else - TCGv_i64 t0 = tcg_temp_new_i64(); - tcg_gen_trunc_i64_i32(lo, arg); - tcg_gen_shri_i64(t0, arg, 32); - tcg_gen_trunc_i64_i32(hi, t0); - tcg_temp_free_i64(t0); -#endif + tcg_gen_trunc_shr_i64_i32(lo, arg, 0); + tcg_gen_trunc_shr_i64_i32(hi, arg, 32); } static inline void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg) @@ -2861,7 +2879,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) #define tcg_gen_muls2_tl tcg_gen_muls2_i32 #endif -#if TCG_TARGET_REG_BITS == 32 +#if UINTPTR_MAX == UINT32_MAX # define tcg_gen_ld_ptr(R, A, O) \ tcg_gen_ld_i32(TCGV_PTR_TO_NAT(R), (A), (O)) # define tcg_gen_discard_ptr(A) \ @@ -2883,4 +2901,4 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) tcg_gen_addi_i64(TCGV_PTR_TO_NAT(R), TCGV_PTR_TO_NAT(A), (B)) # define tcg_gen_ext_i32_ptr(R, A) \ tcg_gen_ext_i32_i64(TCGV_PTR_TO_NAT(R), (A)) -#endif /* TCG_TARGET_REG_BITS == 32 */ +#endif /* UINTPTR_MAX == UINT32_MAX */ diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index d71707d9bb..adb6ce1bb9 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -147,6 +147,10 @@ DEF(rotl_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64)) DEF(rotr_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64)) DEF(deposit_i64, 1, 2, 2, IMPL64 | IMPL(TCG_TARGET_HAS_deposit_i64)) +DEF(trunc_shr_i32, 1, 1, 1, + IMPL(TCG_TARGET_HAS_trunc_shr_i32) + | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0)) + DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | IMPL64) DEF(ext8s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8s_i64)) DEF(ext16s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16s_i64)) @@ -664,7 +664,36 @@ void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags, int nb_rets; TCGArg *nparam; -#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 +#if defined(__sparc__) && !defined(__arch64__) \ + && !defined(CONFIG_TCG_INTERPRETER) + /* We have 64-bit values in one register, but need to pass as two + separate parameters. Split them. */ + int orig_sizemask = sizemask; + int orig_nargs = nargs; + TCGv_i64 retl, reth; + + TCGV_UNUSED_I64(retl); + TCGV_UNUSED_I64(reth); + if (sizemask != 0) { + TCGArg *split_args = __builtin_alloca(sizeof(TCGArg) * nargs * 2); + for (i = real_args = 0; i < nargs; ++i) { + int is_64bit = sizemask & (1 << (i+1)*2); + if (is_64bit) { + TCGv_i64 orig = MAKE_TCGV_I64(args[i]); + TCGv_i32 h = tcg_temp_new_i32(); + TCGv_i32 l = tcg_temp_new_i32(); + tcg_gen_extr_i64_i32(l, h, orig); + split_args[real_args++] = GET_TCGV_I32(h); + split_args[real_args++] = GET_TCGV_I32(l); + } else { + split_args[real_args++] = args[i]; + } + } + nargs = real_args; + args = split_args; + sizemask = 0; + } +#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 for (i = 0; i < nargs; ++i) { int is_64bit = sizemask & (1 << (i+1)*2); int is_signed = sizemask & (2 << (i+1)*2); @@ -684,8 +713,23 @@ void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags, *s->gen_opc_ptr++ = INDEX_op_call; nparam = s->gen_opparam_ptr++; if (ret != TCG_CALL_DUMMY_ARG) { -#if TCG_TARGET_REG_BITS < 64 - if (sizemask & 1) { +#if defined(__sparc__) && !defined(__arch64__) \ + && !defined(CONFIG_TCG_INTERPRETER) + if (orig_sizemask & 1) { + /* The 32-bit ABI is going to return the 64-bit value in + the %o0/%o1 register pair. Prepare for this by using + two return temporaries, and reassemble below. */ + retl = tcg_temp_new_i64(); + reth = tcg_temp_new_i64(); + *s->gen_opparam_ptr++ = GET_TCGV_I64(reth); + *s->gen_opparam_ptr++ = GET_TCGV_I64(retl); + nb_rets = 2; + } else { + *s->gen_opparam_ptr++ = ret; + nb_rets = 1; + } +#else + if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) { #ifdef HOST_WORDS_BIGENDIAN *s->gen_opparam_ptr++ = ret + 1; *s->gen_opparam_ptr++ = ret; @@ -694,12 +738,11 @@ void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags, *s->gen_opparam_ptr++ = ret + 1; #endif nb_rets = 2; - } else -#endif - { + } else { *s->gen_opparam_ptr++ = ret; nb_rets = 1; } +#endif } else { nb_rets = 0; } @@ -749,7 +792,29 @@ void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags, /* total parameters, needed to go backward in the instruction stream */ *s->gen_opparam_ptr++ = 1 + nb_rets + real_args + 3; -#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 +#if defined(__sparc__) && !defined(__arch64__) \ + && !defined(CONFIG_TCG_INTERPRETER) + /* Free all of the parts we allocated above. */ + for (i = real_args = 0; i < orig_nargs; ++i) { + int is_64bit = orig_sizemask & (1 << (i+1)*2); + if (is_64bit) { + TCGv_i32 h = MAKE_TCGV_I32(args[real_args++]); + TCGv_i32 l = MAKE_TCGV_I32(args[real_args++]); + tcg_temp_free_i32(h); + tcg_temp_free_i32(l); + } else { + real_args++; + } + } + if (orig_sizemask & 1) { + /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them. + Note that describing these as TCGv_i64 eliminates an unnecessary + zero-extension that tcg_gen_concat_i32_i64 would create. */ + tcg_gen_concat32_i64(MAKE_TCGV_I64(ret), retl, reth); + tcg_temp_free_i64(retl); + tcg_temp_free_i64(reth); + } +#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 for (i = 0; i < nargs; ++i) { int is_64bit = sizemask & (1 << (i+1)*2); if (!is_64bit) { @@ -2411,6 +2476,7 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, ts = &s->temps[arg]; reg = tcg_target_call_oarg_regs[i]; assert(s->reg_to_temp[reg] == -1); + if (ts->fixed_reg) { if (ts->reg != reg) { tcg_out_mov(s, ts->type, ts->reg, reg); @@ -66,6 +66,7 @@ typedef uint64_t TCGRegSet; #if TCG_TARGET_REG_BITS == 32 /* Turn some undef macros into false macros. */ +#define TCG_TARGET_HAS_trunc_shr_i32 0 #define TCG_TARGET_HAS_div_i64 0 #define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_div2_i64 0 diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index f43492cc67..37719e875c 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -82,6 +82,7 @@ #define TCG_TARGET_HAS_mulsh_i32 0 #if TCG_TARGET_REG_BITS == 64 +#define TCG_TARGET_HAS_trunc_shr_i32 0 #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 |