From a85833933628384d74ec412024d55cf012640287 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 31 Jul 2017 22:02:31 -0700 Subject: tcg: Move USE_DIRECT_JUMP discriminator to tcg/cpu/tcg-target.h Replace the USE_DIRECT_JUMP ifdef with a TCG_TARGET_HAS_direct_jump boolean test. Replace the tb_set_jmp_target1 ifdef with an unconditional function tb_target_set_jmp_target. While we're touching all backends, add a parameter for tb->tc_ptr; we're going to need it shortly for some backends. Move tb_set_jmp_target and tb_add_jump from exec-all.h to cpu-exec.c. This opens the possibility for TCG_TARGET_HAS_direct_jump to be a runtime decision -- based on host cpu capabilities, the size of code_gen_buffer, or a future debugging switch. Signed-off-by: Richard Henderson --- include/exec/exec-all.h | 95 ++----------------------------------------------- 1 file changed, 3 insertions(+), 92 deletions(-) (limited to 'include') diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index ff8fbe423d..673fc066d0 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -301,15 +301,6 @@ static inline void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr) #define CODE_GEN_AVG_BLOCK_SIZE 150 #endif -#if defined(_ARCH_PPC) \ - || defined(__x86_64__) || defined(__i386__) \ - || defined(__sparc__) || defined(__aarch64__) \ - || defined(__s390x__) || defined(__mips__) \ - || defined(CONFIG_TCG_INTERPRETER) -/* NOTE: Direct jump patching must be atomic to be thread-safe. */ -#define USE_DIRECT_JUMP -#endif - struct TranslationBlock { target_ulong pc; /* simulated PC corresponding to this block (EIP + CS base) */ target_ulong cs_base; /* CS base for this block */ @@ -347,11 +338,8 @@ struct TranslationBlock { */ uint16_t jmp_reset_offset[2]; /* offset of original jump target */ #define TB_JMP_RESET_OFFSET_INVALID 0xffff /* indicates no jump generated */ -#ifdef USE_DIRECT_JUMP - uint16_t jmp_insn_offset[2]; /* offset of native jump instruction */ -#else - uintptr_t jmp_target_addr[2]; /* target address for indirect jump */ -#endif + uintptr_t jmp_target_arg[2]; /* target address or offset */ + /* Each TB has an assosiated circular list of TBs jumping to this one. * jmp_list_first points to the first TB jumping to this one. * jmp_list_next is used to point to the next TB in a list. @@ -373,84 +361,7 @@ void tb_flush(CPUState *cpu); void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr); TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, target_ulong cs_base, uint32_t flags); - -#if defined(USE_DIRECT_JUMP) - -#if defined(CONFIG_TCG_INTERPRETER) -static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) -{ - /* patch the branch destination */ - atomic_set((int32_t *)jmp_addr, addr - (jmp_addr + 4)); - /* no need to flush icache explicitly */ -} -#elif defined(_ARCH_PPC) -void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr); -#define tb_set_jmp_target1 ppc_tb_set_jmp_target -#elif defined(__i386__) || defined(__x86_64__) -static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) -{ - /* patch the branch destination */ - atomic_set((int32_t *)jmp_addr, addr - (jmp_addr + 4)); - /* no need to flush icache explicitly */ -} -#elif defined(__s390x__) -static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) -{ - /* patch the branch destination */ - intptr_t disp = addr - (jmp_addr - 2); - atomic_set((int32_t *)jmp_addr, disp / 2); - /* no need to flush icache explicitly */ -} -#elif defined(__aarch64__) -void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr); -#define tb_set_jmp_target1 aarch64_tb_set_jmp_target -#elif defined(__sparc__) || defined(__mips__) -void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr); -#else -#error tb_set_jmp_target1 is missing -#endif - -static inline void tb_set_jmp_target(TranslationBlock *tb, - int n, uintptr_t addr) -{ - uint16_t offset = tb->jmp_insn_offset[n]; - tb_set_jmp_target1((uintptr_t)(tb->tc_ptr + offset), addr); -} - -#else - -/* set the jump target */ -static inline void tb_set_jmp_target(TranslationBlock *tb, - int n, uintptr_t addr) -{ - tb->jmp_target_addr[n] = addr; -} - -#endif - -/* Called with tb_lock held. */ -static inline void tb_add_jump(TranslationBlock *tb, int n, - TranslationBlock *tb_next) -{ - assert(n < ARRAY_SIZE(tb->jmp_list_next)); - if (tb->jmp_list_next[n]) { - /* Another thread has already done this while we were - * outside of the lock; nothing to do in this case */ - return; - } - qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc, - "Linking TBs %p [" TARGET_FMT_lx - "] index %d -> %p [" TARGET_FMT_lx "]\n", - tb->tc_ptr, tb->pc, n, - tb_next->tc_ptr, tb_next->pc); - - /* patch the native jump address */ - tb_set_jmp_target(tb, n, (uintptr_t)tb_next->tc_ptr); - - /* add in TB jmp circular list */ - tb->jmp_list_next[n] = tb_next->jmp_list_first; - tb_next->jmp_list_first = (uintptr_t)tb | n; -} +void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr); /* GETPC is the true target of the return instruction that we'll execute. */ #if defined(CONFIG_TCG_INTERPRETER) -- cgit v1.2.3 From 28eef8aaece5e83df4568d9842ab9611ec130b2c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 31 Jul 2017 19:16:02 -0700 Subject: tcg/s390: Use constant pool for movi Split out maybe_out_small_movi for use with other operations that want to add to the constant pool. Signed-off-by: Richard Henderson --- include/elf.h | 3 +- tcg/s390/tcg-target.h | 1 + tcg/s390/tcg-target.inc.c | 130 +++++++++++++++++++++++++++------------------- 3 files changed, 80 insertions(+), 54 deletions(-) (limited to 'include') diff --git a/include/elf.h b/include/elf.h index cd51434877..e8a515ce3d 100644 --- a/include/elf.h +++ b/include/elf.h @@ -942,8 +942,9 @@ typedef struct { #define R_390_TLS_DTPOFF 55 /* Offset in TLS block. */ #define R_390_TLS_TPOFF 56 /* Negate offset in static TLS block. */ +#define R_390_20 57 /* Keep this the last entry. */ -#define R_390_NUM 57 +#define R_390_NUM 58 /* x86-64 relocation types */ #define R_X86_64_NONE 0 /* No reloc */ diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index 9c9c8cd464..6f2b06a7d1 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -158,5 +158,6 @@ static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, #ifdef CONFIG_SOFTMMU #define TCG_TARGET_NEED_LDST_LABELS #endif +#define TCG_TARGET_NEED_POOL_LABELS #endif diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c index 59c0da0922..29b77ff67f 100644 --- a/tcg/s390/tcg-target.inc.c +++ b/tcg/s390/tcg-target.inc.c @@ -29,6 +29,7 @@ #error "unsupported code generation mode" #endif +#include "tcg-pool.inc.c" #include "elf.h" /* ??? The translation blocks produced by TCG are generally small enough to @@ -361,6 +362,7 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { intptr_t pcrel2; + uint32_t old; value += addend; pcrel2 = (tcg_insn_unit *)value - code_ptr; @@ -374,6 +376,12 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type, assert(pcrel2 == (int32_t)pcrel2); tcg_patch32(code_ptr, pcrel2); break; + case R_390_20: + assert(value == sextract64(value, 0, 20)); + old = *(uint32_t *)code_ptr & 0xf00000ff; + old |= ((value & 0xfff) << 16) | ((value & 0xff000) >> 4); + tcg_patch32(code_ptr, old); + break; default: g_assert_not_reached(); } @@ -562,14 +570,16 @@ static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) } } -/* load a register with an immediate value */ -static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, - tcg_target_long sval, bool in_prologue) -{ - static const S390Opcode lli_insns[4] = { - RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH - }; +static const S390Opcode lli_insns[4] = { + RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH +}; +static const S390Opcode ii_insns[4] = { + RI_IILL, RI_IILH, RI_IIHL, RI_IIHH +}; +static bool maybe_out_small_movi(TCGContext *s, TCGType type, + TCGReg ret, tcg_target_long sval) +{ tcg_target_ulong uval = sval; int i; @@ -581,17 +591,37 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, /* Try all 32-bit insns that can load it in one go. */ if (sval >= -0x8000 && sval < 0x8000) { tcg_out_insn(s, RI, LGHI, ret, sval); - return; + return true; } for (i = 0; i < 4; i++) { tcg_target_long mask = 0xffffull << i*16; if ((uval & mask) == uval) { tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i*16); - return; + return true; } } + return false; +} + +/* load a register with an immediate value */ +static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, + tcg_target_long sval, bool in_prologue) +{ + tcg_target_ulong uval; + + /* Try all 32-bit insns that can load it in one go. */ + if (maybe_out_small_movi(s, type, ret, sval)) { + return; + } + + uval = sval; + if (type == TCG_TYPE_I32) { + uval = (uint32_t)sval; + sval = (int32_t)sval; + } + /* Try all 48-bit insns that can load it in one go. */ if (s390_facilities & FACILITY_EXT_IMM) { if (sval == (int32_t)sval) { @@ -603,7 +633,7 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, return; } if ((uval & 0xffffffff) == 0) { - tcg_out_insn(s, RIL, LLIHF, ret, uval >> 31 >> 1); + tcg_out_insn(s, RIL, LLIHF, ret, uval >> 32); return; } } @@ -626,55 +656,44 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, } } - /* If extended immediates are not present, then we may have to issue - several instructions to load the low 32 bits. */ - if (!(s390_facilities & FACILITY_EXT_IMM)) { - /* A 32-bit unsigned value can be loaded in 2 insns. And given - that the lli_insns loop above did not succeed, we know that - both insns are required. */ - if (uval <= 0xffffffff) { - tcg_out_insn(s, RI, LLILL, ret, uval); - tcg_out_insn(s, RI, IILH, ret, uval >> 16); - return; - } + /* A 32-bit unsigned value can be loaded in 2 insns. And given + that LLILL, LLIHL, LLILF above did not succeed, we know that + both insns are required. */ + if (uval <= 0xffffffff) { + tcg_out_insn(s, RI, LLILL, ret, uval); + tcg_out_insn(s, RI, IILH, ret, uval >> 16); + return; + } - /* If all high bits are set, the value can be loaded in 2 or 3 insns. - We first want to make sure that all the high bits get set. With - luck the low 16-bits can be considered negative to perform that for - free, otherwise we load an explicit -1. */ - if (sval >> 31 >> 1 == -1) { - if (uval & 0x8000) { - tcg_out_insn(s, RI, LGHI, ret, uval); - } else { - tcg_out_insn(s, RI, LGHI, ret, -1); - tcg_out_insn(s, RI, IILL, ret, uval); - } - tcg_out_insn(s, RI, IILH, ret, uval >> 16); - return; + /* When allowed, stuff it in the constant pool. */ + if (!in_prologue) { + if (USE_REG_TB) { + tcg_out_insn(s, RXY, LG, ret, TCG_REG_TB, TCG_REG_NONE, 0); + new_pool_label(s, sval, R_390_20, s->code_ptr - 2, + -(intptr_t)s->code_gen_ptr); + } else { + tcg_out_insn(s, RIL, LGRL, ret, 0); + new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2); } + return; } - /* If we get here, both the high and low parts have non-zero bits. */ - - /* Recurse to load the lower 32-bits. */ - tcg_out_movi(s, TCG_TYPE_I64, ret, uval & 0xffffffff); - - /* Insert data into the high 32-bits. */ - uval = uval >> 31 >> 1; + /* What's left is for the prologue, loading GUEST_BASE, and because + it failed to match above, is known to be a full 64-bit quantity. + We could try more than this, but it probably wouldn't pay off. */ if (s390_facilities & FACILITY_EXT_IMM) { - if (uval < 0x10000) { - tcg_out_insn(s, RI, IIHL, ret, uval); - } else if ((uval & 0xffff) == 0) { - tcg_out_insn(s, RI, IIHH, ret, uval >> 16); - } else { - tcg_out_insn(s, RIL, IIHF, ret, uval); - } + tcg_out_insn(s, RIL, LLILF, ret, uval); + tcg_out_insn(s, RIL, IIHF, ret, uval >> 32); } else { - if (uval & 0xffff) { - tcg_out_insn(s, RI, IIHL, ret, uval); - } - if (uval & 0xffff0000) { - tcg_out_insn(s, RI, IIHH, ret, uval >> 16); + const S390Opcode *insns = lli_insns; + int i; + + for (i = 0; i < 4; i++) { + uint16_t part = uval >> (16 * i); + if (part) { + tcg_out_insn_RI(s, insns[i], ret, part); + insns = ii_insns; + } } } } @@ -2573,6 +2592,11 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14); } +static void tcg_out_nop_fill(tcg_insn_unit *p, int count) +{ + memset(p, 0x07, count * sizeof(tcg_insn_unit)); +} + typedef struct { DebugFrameHeader h; uint8_t fde_def_cfa[4]; -- cgit v1.2.3