diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2018-10-19 16:17:32 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2018-10-19 16:17:32 +0100 |
commit | 31e213e30617b986a6e8ab4d9a0646eb4e6a4227 (patch) | |
tree | 24369d1b7d7f420feaa3d5d73524f58571d374f4 /accel | |
parent | 784c2e4f232adf5ef47a84a262ec72a07d068d6a (diff) | |
parent | 403f290c0603f35f2d09c982bf5549b6d0803ec1 (diff) |
Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20181018' into staging
Queued tcg patches.
# gpg: Signature made Fri 19 Oct 2018 07:03:20 BST
# gpg: using RSA key 64DF38E8AF7E215F
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>"
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F
* remotes/rth/tags/pull-tcg-20181018: (21 commits)
cputlb: read CPUTLBEntry.addr_write atomically
target/s390x: Check HAVE_ATOMIC128 and HAVE_CMPXCHG128 at translate
target/s390x: Skip wout, cout helpers if op helper does not return
target/s390x: Split do_cdsg, do_lpq, do_stpq
target/s390x: Convert to HAVE_CMPXCHG128 and HAVE_ATOMIC128
target/ppc: Convert to HAVE_CMPXCHG128 and HAVE_ATOMIC128
target/arm: Check HAVE_CMPXCHG128 at translate time
target/arm: Convert to HAVE_CMPXCHG128
target/i386: Convert to HAVE_CMPXCHG128
tcg: Split CONFIG_ATOMIC128
tcg: Add tlb_index and tlb_entry helpers
cputlb: serialize tlb updates with env->tlb_lock
cputlb: fix assert_cpu_is_self macro
exec: introduce tlb_init
target/unicore32: remove tlb_flush from uc32_init_fn
target/alpha: remove tlb_flush from alpha_cpu_initfn
tcg: distribute tcg_time into TCG contexts
tcg: plug holes in struct TCGProfile
tcg: fix use of uninitialized variable under CONFIG_PROFILER
tcg: access cpu->icount_decr.u16.high with atomics
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'accel')
-rw-r--r-- | accel/tcg/atomic_template.h | 20 | ||||
-rw-r--r-- | accel/tcg/cpu-exec.c | 2 | ||||
-rw-r--r-- | accel/tcg/cputlb.c | 235 | ||||
-rw-r--r-- | accel/tcg/softmmu_template.h | 64 | ||||
-rw-r--r-- | accel/tcg/tcg-all.c | 2 | ||||
-rw-r--r-- | accel/tcg/translate-all.c | 2 | ||||
-rw-r--r-- | accel/tcg/user-exec.c | 5 |
7 files changed, 183 insertions, 147 deletions
diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h index d751bcba48..efde12fdb2 100644 --- a/accel/tcg/atomic_template.h +++ b/accel/tcg/atomic_template.h @@ -100,19 +100,24 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, DATA_TYPE ret; ATOMIC_TRACE_RMW; +#if DATA_SIZE == 16 + ret = atomic16_cmpxchg(haddr, cmpv, newv); +#else ret = atomic_cmpxchg__nocheck(haddr, cmpv, newv); +#endif ATOMIC_MMU_CLEANUP; return ret; } #if DATA_SIZE >= 16 +#if HAVE_ATOMIC128 ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS) { ATOMIC_MMU_DECLS; DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP; ATOMIC_TRACE_LD; - __atomic_load(haddr, &val, __ATOMIC_RELAXED); + val = atomic16_read(haddr); ATOMIC_MMU_CLEANUP; return val; } @@ -124,9 +129,10 @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; ATOMIC_TRACE_ST; - __atomic_store(haddr, &val, __ATOMIC_RELAXED); + atomic16_set(haddr, val); ATOMIC_MMU_CLEANUP; } +#endif #else ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val EXTRA_ARGS) @@ -228,19 +234,24 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, DATA_TYPE ret; ATOMIC_TRACE_RMW; +#if DATA_SIZE == 16 + ret = atomic16_cmpxchg(haddr, BSWAP(cmpv), BSWAP(newv)); +#else ret = atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv)); +#endif ATOMIC_MMU_CLEANUP; return BSWAP(ret); } #if DATA_SIZE >= 16 +#if HAVE_ATOMIC128 ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS) { ATOMIC_MMU_DECLS; DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP; ATOMIC_TRACE_LD; - __atomic_load(haddr, &val, __ATOMIC_RELAXED); + val = atomic16_read(haddr); ATOMIC_MMU_CLEANUP; return BSWAP(val); } @@ -253,9 +264,10 @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, ATOMIC_TRACE_ST; val = BSWAP(val); - __atomic_store(haddr, &val, __ATOMIC_RELAXED); + atomic16_set(haddr, val); ATOMIC_MMU_CLEANUP; } +#endif #else ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val EXTRA_ARGS) diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 6bcb6d99bd..870027d435 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -416,7 +416,7 @@ static inline TranslationBlock *tb_find(CPUState *cpu, } #endif /* See if we can patch the calling TB. */ - if (last_tb && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { + if (last_tb) { tb_add_jump(last_tb, tb_exit, tb); } return tb; diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index f4702ce91f..af57aca5e4 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -32,6 +32,7 @@ #include "exec/log.h" #include "exec/helper-proto.h" #include "qemu/atomic.h" +#include "qemu/atomic128.h" /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ /* #define DEBUG_TLB */ @@ -58,9 +59,9 @@ } \ } while (0) -#define assert_cpu_is_self(this_cpu) do { \ +#define assert_cpu_is_self(cpu) do { \ if (DEBUG_TLB_GATE) { \ - g_assert(!cpu->created || qemu_cpu_is_self(cpu)); \ + g_assert(!(cpu)->created || qemu_cpu_is_self(cpu)); \ } \ } while (0) @@ -73,6 +74,13 @@ QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data)); QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1) +void tlb_init(CPUState *cpu) +{ + CPUArchState *env = cpu->env_ptr; + + qemu_spin_init(&env->tlb_lock); +} + /* flush_all_helper: run fn across all cpus * * If the wait flag is set then the src cpu's helper will be queued as @@ -125,8 +133,17 @@ static void tlb_flush_nocheck(CPUState *cpu) atomic_set(&env->tlb_flush_count, env->tlb_flush_count + 1); tlb_debug("(count: %zu)\n", tlb_flush_count()); + /* + * tlb_table/tlb_v_table updates from any thread must hold tlb_lock. + * However, updates from the owner thread (as is the case here; see the + * above assert_cpu_is_self) do not need atomic_set because all reads + * that do not hold the lock are performed by the same owner thread. + */ + qemu_spin_lock(&env->tlb_lock); memset(env->tlb_table, -1, sizeof(env->tlb_table)); memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table)); + qemu_spin_unlock(&env->tlb_lock); + cpu_tb_jmp_cache_clear(cpu); env->vtlb_index = 0; @@ -178,6 +195,7 @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) tlb_debug("start: mmu_idx:0x%04lx\n", mmu_idx_bitmask); + qemu_spin_lock(&env->tlb_lock); for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { if (test_bit(mmu_idx, &mmu_idx_bitmask)) { @@ -187,6 +205,7 @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0])); } } + qemu_spin_unlock(&env->tlb_lock); cpu_tb_jmp_cache_clear(cpu); @@ -239,23 +258,28 @@ static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, target_ulong page) { return tlb_hit_page(tlb_entry->addr_read, page) || - tlb_hit_page(tlb_entry->addr_write, page) || + tlb_hit_page(tlb_addr_write(tlb_entry), page) || tlb_hit_page(tlb_entry->addr_code, page); } -static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong page) +/* Called with tlb_lock held */ +static inline void tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, + target_ulong page) { if (tlb_hit_page_anyprot(tlb_entry, page)) { memset(tlb_entry, -1, sizeof(*tlb_entry)); } } -static inline void tlb_flush_vtlb_page(CPUArchState *env, int mmu_idx, - target_ulong page) +/* Called with tlb_lock held */ +static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx, + target_ulong page) { int k; + + assert_cpu_is_self(ENV_GET_CPU(env)); for (k = 0; k < CPU_VTLB_SIZE; k++) { - tlb_flush_entry(&env->tlb_v_table[mmu_idx][k], page); + tlb_flush_entry_locked(&env->tlb_v_table[mmu_idx][k], page); } } @@ -263,7 +287,6 @@ static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data) { CPUArchState *env = cpu->env_ptr; target_ulong addr = (target_ulong) data.target_ptr; - int i; int mmu_idx; assert_cpu_is_self(cpu); @@ -281,11 +304,12 @@ static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data) } addr &= TARGET_PAGE_MASK; - i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + qemu_spin_lock(&env->tlb_lock); for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { - tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr); - tlb_flush_vtlb_page(env, mmu_idx, addr); + tlb_flush_entry_locked(tlb_entry(env, mmu_idx, addr), addr); + tlb_flush_vtlb_page_locked(env, mmu_idx, addr); } + qemu_spin_unlock(&env->tlb_lock); tb_flush_jmp_cache(cpu, addr); } @@ -314,20 +338,21 @@ static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu, target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr; target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK; unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS; - int page = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); int mmu_idx; assert_cpu_is_self(cpu); - tlb_debug("page:%d addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n", - page, addr, mmu_idx_bitmap); + tlb_debug("flush page addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n", + addr, mmu_idx_bitmap); + qemu_spin_lock(&env->tlb_lock); for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { if (test_bit(mmu_idx, &mmu_idx_bitmap)) { - tlb_flush_entry(&env->tlb_table[mmu_idx][page], addr); - tlb_flush_vtlb_page(env, mmu_idx, addr); + tlb_flush_entry_locked(tlb_entry(env, mmu_idx, addr), addr); + tlb_flush_vtlb_page_locked(env, mmu_idx, addr); } } + qemu_spin_unlock(&env->tlb_lock); tb_flush_jmp_cache(cpu, addr); } @@ -450,72 +475,44 @@ void tlb_unprotect_code(ram_addr_t ram_addr) * most usual is detecting writes to code regions which may invalidate * generated code. * - * Because we want other vCPUs to respond to changes straight away we - * update the te->addr_write field atomically. If the TLB entry has - * been changed by the vCPU in the mean time we skip the update. + * Other vCPUs might be reading their TLBs during guest execution, so we update + * te->addr_write with atomic_set. We don't need to worry about this for + * oversized guests as MTTCG is disabled for them. * - * As this function uses atomic accesses we also need to ensure - * updates to tlb_entries follow the same access rules. We don't need - * to worry about this for oversized guests as MTTCG is disabled for - * them. + * Called with tlb_lock held. */ - -static void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start, - uintptr_t length) +static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry, + uintptr_t start, uintptr_t length) { -#if TCG_OVERSIZED_GUEST uintptr_t addr = tlb_entry->addr_write; if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) { addr &= TARGET_PAGE_MASK; addr += tlb_entry->addend; if ((addr - start) < length) { +#if TCG_OVERSIZED_GUEST tlb_entry->addr_write |= TLB_NOTDIRTY; - } - } #else - /* paired with atomic_mb_set in tlb_set_page_with_attrs */ - uintptr_t orig_addr = atomic_mb_read(&tlb_entry->addr_write); - uintptr_t addr = orig_addr; - - if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) { - addr &= TARGET_PAGE_MASK; - addr += atomic_read(&tlb_entry->addend); - if ((addr - start) < length) { - uintptr_t notdirty_addr = orig_addr | TLB_NOTDIRTY; - atomic_cmpxchg(&tlb_entry->addr_write, orig_addr, notdirty_addr); + atomic_set(&tlb_entry->addr_write, + tlb_entry->addr_write | TLB_NOTDIRTY); +#endif } } -#endif } -/* For atomic correctness when running MTTCG we need to use the right - * primitives when copying entries */ -static inline void copy_tlb_helper(CPUTLBEntry *d, CPUTLBEntry *s, - bool atomic_set) +/* + * Called with tlb_lock held. + * Called only from the vCPU context, i.e. the TLB's owner thread. + */ +static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s) { -#if TCG_OVERSIZED_GUEST *d = *s; -#else - if (atomic_set) { - d->addr_read = s->addr_read; - d->addr_code = s->addr_code; - atomic_set(&d->addend, atomic_read(&s->addend)); - /* Pairs with flag setting in tlb_reset_dirty_range */ - atomic_mb_set(&d->addr_write, atomic_read(&s->addr_write)); - } else { - d->addr_read = s->addr_read; - d->addr_write = atomic_read(&s->addr_write); - d->addr_code = s->addr_code; - d->addend = atomic_read(&s->addend); - } -#endif } /* This is a cross vCPU call (i.e. another vCPU resetting the flags of - * the target vCPU). As such care needs to be taken that we don't - * dangerously race with another vCPU update. The only thing actually - * updated is the target TLB entry ->addr_write flags. + * the target vCPU). + * We must take tlb_lock to avoid racing with another vCPU update. The only + * thing actually updated is the target TLB entry ->addr_write flags. */ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) { @@ -524,22 +521,26 @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) int mmu_idx; env = cpu->env_ptr; + qemu_spin_lock(&env->tlb_lock); for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { unsigned int i; for (i = 0; i < CPU_TLB_SIZE; i++) { - tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i], - start1, length); + tlb_reset_dirty_range_locked(&env->tlb_table[mmu_idx][i], start1, + length); } for (i = 0; i < CPU_VTLB_SIZE; i++) { - tlb_reset_dirty_range(&env->tlb_v_table[mmu_idx][i], - start1, length); + tlb_reset_dirty_range_locked(&env->tlb_v_table[mmu_idx][i], start1, + length); } } + qemu_spin_unlock(&env->tlb_lock); } -static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr) +/* Called with tlb_lock held */ +static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry, + target_ulong vaddr) { if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) { tlb_entry->addr_write = vaddr; @@ -551,23 +552,23 @@ static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr) void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) { CPUArchState *env = cpu->env_ptr; - int i; int mmu_idx; assert_cpu_is_self(cpu); vaddr &= TARGET_PAGE_MASK; - i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + qemu_spin_lock(&env->tlb_lock); for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { - tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr); + tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr); } for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { int k; for (k = 0; k < CPU_VTLB_SIZE; k++) { - tlb_set_dirty1(&env->tlb_v_table[mmu_idx][k], vaddr); + tlb_set_dirty1_locked(&env->tlb_v_table[mmu_idx][k], vaddr); } } + qemu_spin_unlock(&env->tlb_lock); } /* Our TLB does not support large pages, so remember the area covered by @@ -654,15 +655,24 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; } - /* Make sure there's no cached translation for the new page. */ - tlb_flush_vtlb_page(env, mmu_idx, vaddr_page); - code_address = address; iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page, paddr_page, xlat, prot, &address); - index = (vaddr_page >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - te = &env->tlb_table[mmu_idx][index]; + index = tlb_index(env, mmu_idx, vaddr_page); + te = tlb_entry(env, mmu_idx, vaddr_page); + + /* + * Hold the TLB lock for the rest of the function. We could acquire/release + * the lock several times in the function, but it is faster to amortize the + * acquisition cost by acquiring it just once. Note that this leads to + * a longer critical section, but this is not a concern since the TLB lock + * is unlikely to be contended. + */ + qemu_spin_lock(&env->tlb_lock); + + /* Make sure there's no cached translation for the new page. */ + tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page); /* * Only evict the old entry to the victim tlb if it's for a @@ -673,7 +683,7 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, CPUTLBEntry *tv = &env->tlb_v_table[mmu_idx][vidx]; /* Evict the old entry into the victim tlb. */ - copy_tlb_helper(tv, te, true); + copy_tlb_helper_locked(tv, te); env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index]; } @@ -725,9 +735,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, } } - /* Pairs with flag setting in tlb_reset_dirty_range */ - copy_tlb_helper(te, &tn, true); - /* atomic_mb_set(&te->addr_write, write_address); */ + copy_tlb_helper_locked(te, &tn); + qemu_spin_unlock(&env->tlb_lock); } /* Add a new TLB entry, but without specifying the memory @@ -773,16 +782,16 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, * repeat the MMU check here. This tlb_fill() call might * longjump out if this access should cause a guest exception. */ - int index; + CPUTLBEntry *entry; target_ulong tlb_addr; tlb_fill(cpu, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr); - index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - tlb_addr = env->tlb_table[mmu_idx][index].addr_read; + entry = tlb_entry(env, mmu_idx, addr); + tlb_addr = entry->addr_read; if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) { /* RAM access */ - uintptr_t haddr = addr + env->tlb_table[mmu_idx][index].addend; + uintptr_t haddr = addr + entry->addend; return ldn_p((void *)haddr, size); } @@ -840,16 +849,16 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, * repeat the MMU check here. This tlb_fill() call might * longjump out if this access should cause a guest exception. */ - int index; + CPUTLBEntry *entry; target_ulong tlb_addr; tlb_fill(cpu, addr, size, MMU_DATA_STORE, mmu_idx, retaddr); - index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - tlb_addr = env->tlb_table[mmu_idx][index].addr_write; + entry = tlb_entry(env, mmu_idx, addr); + tlb_addr = tlb_addr_write(entry); if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) { /* RAM access */ - uintptr_t haddr = addr + env->tlb_table[mmu_idx][index].addend; + uintptr_t haddr = addr + entry->addend; stn_p((void *)haddr, size, val); return; @@ -891,17 +900,28 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, size_t elt_ofs, target_ulong page) { size_t vidx; + + assert_cpu_is_self(ENV_GET_CPU(env)); for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx]; - target_ulong cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); + target_ulong cmp; + + /* elt_ofs might correspond to .addr_write, so use atomic_read */ +#if TCG_OVERSIZED_GUEST + cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); +#else + cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs)); +#endif if (cmp == page) { /* Found entry in victim tlb, swap tlb and iotlb. */ CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index]; - copy_tlb_helper(&tmptlb, tlb, false); - copy_tlb_helper(tlb, vtlb, true); - copy_tlb_helper(vtlb, &tmptlb, true); + qemu_spin_lock(&env->tlb_lock); + copy_tlb_helper_locked(&tmptlb, tlb); + copy_tlb_helper_locked(tlb, vtlb); + copy_tlb_helper_locked(vtlb, &tmptlb); + qemu_spin_unlock(&env->tlb_lock); CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index]; CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx]; @@ -924,20 +944,19 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, */ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) { - int mmu_idx, index; + uintptr_t mmu_idx = cpu_mmu_index(env, true); + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); void *p; - index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - mmu_idx = cpu_mmu_index(env, true); - if (unlikely(!tlb_hit(env->tlb_table[mmu_idx][index].addr_code, addr))) { + if (unlikely(!tlb_hit(entry->addr_code, addr))) { if (!VICTIM_TLB_HIT(addr_code, addr)) { tlb_fill(ENV_GET_CPU(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0); } - assert(tlb_hit(env->tlb_table[mmu_idx][index].addr_code, addr)); + assert(tlb_hit(entry->addr_code, addr)); } - if (unlikely(env->tlb_table[mmu_idx][index].addr_code & - (TLB_RECHECK | TLB_MMIO))) { + if (unlikely(entry->addr_code & (TLB_RECHECK | TLB_MMIO))) { /* * Return -1 if we can't translate and execute from an entire * page of RAM here, which will cause us to execute by loading @@ -949,7 +968,7 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) return -1; } - p = (void *)((uintptr_t)addr + env->tlb_table[mmu_idx][index].addend); + p = (void *)((uintptr_t)addr + entry->addend); return qemu_ram_addr_from_host_nofail(p); } @@ -962,10 +981,10 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx, uintptr_t retaddr) { - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write; + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); - if (!tlb_hit(tlb_addr, addr)) { + if (!tlb_hit(tlb_addr_write(entry), addr)) { /* TLB entry is for a different page */ if (!VICTIM_TLB_HIT(addr_write, addr)) { tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE, @@ -981,9 +1000,9 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, NotDirtyInfo *ndi) { size_t mmu_idx = get_mmuidx(oi); - size_t index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - CPUTLBEntry *tlbe = &env->tlb_table[mmu_idx][index]; - target_ulong tlb_addr = tlbe->addr_write; + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); + target_ulong tlb_addr = tlb_addr_write(tlbe); TCGMemOp mop = get_memop(oi); int a_bits = get_alignment_bits(mop); int s_bits = mop & MO_SIZE; @@ -1014,7 +1033,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, tlb_fill(ENV_GET_CPU(env), addr, 1 << s_bits, MMU_DATA_STORE, mmu_idx, retaddr); } - tlb_addr = tlbe->addr_write & ~TLB_INVALID_MASK; + tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK; } /* Notice an IO access or a needs-MMU-lookup access */ @@ -1101,7 +1120,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, #include "atomic_template.h" #endif -#ifdef CONFIG_ATOMIC128 +#if HAVE_CMPXCHG128 || HAVE_ATOMIC128 #define DATA_SIZE 16 #include "atomic_template.h" #endif diff --git a/accel/tcg/softmmu_template.h b/accel/tcg/softmmu_template.h index f060a693d4..b0adea045e 100644 --- a/accel/tcg/softmmu_template.h +++ b/accel/tcg/softmmu_template.h @@ -111,9 +111,10 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env, WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr) { - unsigned mmu_idx = get_mmuidx(oi); - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; + uintptr_t mmu_idx = get_mmuidx(oi); + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + target_ulong tlb_addr = entry->ADDR_READ; unsigned a_bits = get_alignment_bits(get_memop(oi)); uintptr_t haddr; DATA_TYPE res; @@ -129,7 +130,7 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, READ_ACCESS_TYPE, mmu_idx, retaddr); } - tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; + tlb_addr = entry->ADDR_READ; } /* Handle an IO access. */ @@ -166,7 +167,7 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, return res; } - haddr = addr + env->tlb_table[mmu_idx][index].addend; + haddr = addr + entry->addend; #if DATA_SIZE == 1 res = glue(glue(ld, LSUFFIX), _p)((uint8_t *)haddr); #else @@ -179,9 +180,10 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr) { - unsigned mmu_idx = get_mmuidx(oi); - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; + uintptr_t mmu_idx = get_mmuidx(oi); + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + target_ulong tlb_addr = entry->ADDR_READ; unsigned a_bits = get_alignment_bits(get_memop(oi)); uintptr_t haddr; DATA_TYPE res; @@ -197,7 +199,7 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, READ_ACCESS_TYPE, mmu_idx, retaddr); } - tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; + tlb_addr = entry->ADDR_READ; } /* Handle an IO access. */ @@ -234,7 +236,7 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, return res; } - haddr = addr + env->tlb_table[mmu_idx][index].addend; + haddr = addr + entry->addend; res = glue(glue(ld, LSUFFIX), _be_p)((uint8_t *)haddr); return res; } @@ -275,9 +277,10 @@ static inline void glue(io_write, SUFFIX)(CPUArchState *env, void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, TCGMemOpIdx oi, uintptr_t retaddr) { - unsigned mmu_idx = get_mmuidx(oi); - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write; + uintptr_t mmu_idx = get_mmuidx(oi); + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + target_ulong tlb_addr = tlb_addr_write(entry); unsigned a_bits = get_alignment_bits(get_memop(oi)); uintptr_t haddr; @@ -292,7 +295,7 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE, mmu_idx, retaddr); } - tlb_addr = env->tlb_table[mmu_idx][index].addr_write & ~TLB_INVALID_MASK; + tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; } /* Handle an IO access. */ @@ -313,16 +316,16 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, if (DATA_SIZE > 1 && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 >= TARGET_PAGE_SIZE)) { - int i, index2; - target_ulong page2, tlb_addr2; + int i; + target_ulong page2; + CPUTLBEntry *entry2; do_unaligned_access: /* Ensure the second page is in the TLB. Note that the first page is already guaranteed to be filled, and that the second page cannot evict the first. */ page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK; - index2 = (page2 >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - tlb_addr2 = env->tlb_table[mmu_idx][index2].addr_write; - if (!tlb_hit_page(tlb_addr2, page2) + entry2 = tlb_entry(env, mmu_idx, page2); + if (!tlb_hit_page(tlb_addr_write(entry2), page2) && !VICTIM_TLB_HIT(addr_write, page2)) { tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE, mmu_idx, retaddr); @@ -340,7 +343,7 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, return; } - haddr = addr + env->tlb_table[mmu_idx][index].addend; + haddr = addr + entry->addend; #if DATA_SIZE == 1 glue(glue(st, SUFFIX), _p)((uint8_t *)haddr, val); #else @@ -352,9 +355,10 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, TCGMemOpIdx oi, uintptr_t retaddr) { - unsigned mmu_idx = get_mmuidx(oi); - int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write; + uintptr_t mmu_idx = get_mmuidx(oi); + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + target_ulong tlb_addr = tlb_addr_write(entry); unsigned a_bits = get_alignment_bits(get_memop(oi)); uintptr_t haddr; @@ -369,7 +373,7 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE, mmu_idx, retaddr); } - tlb_addr = env->tlb_table[mmu_idx][index].addr_write & ~TLB_INVALID_MASK; + tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; } /* Handle an IO access. */ @@ -390,16 +394,16 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, if (DATA_SIZE > 1 && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 >= TARGET_PAGE_SIZE)) { - int i, index2; - target_ulong page2, tlb_addr2; + int i; + target_ulong page2; + CPUTLBEntry *entry2; do_unaligned_access: /* Ensure the second page is in the TLB. Note that the first page is already guaranteed to be filled, and that the second page cannot evict the first. */ page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK; - index2 = (page2 >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - tlb_addr2 = env->tlb_table[mmu_idx][index2].addr_write; - if (!tlb_hit_page(tlb_addr2, page2) + entry2 = tlb_entry(env, mmu_idx, page2); + if (!tlb_hit_page(tlb_addr_write(entry2), page2) && !VICTIM_TLB_HIT(addr_write, page2)) { tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE, mmu_idx, retaddr); @@ -417,7 +421,7 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, return; } - haddr = addr + env->tlb_table[mmu_idx][index].addend; + haddr = addr + entry->addend; glue(glue(st, SUFFIX), _be_p)((uint8_t *)haddr, val); } #endif /* DATA_SIZE > 1 */ diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c index 56dbb56a16..3d25bdcc17 100644 --- a/accel/tcg/tcg-all.c +++ b/accel/tcg/tcg-all.c @@ -51,7 +51,7 @@ static void tcg_handle_interrupt(CPUState *cpu, int mask) if (!qemu_cpu_is_self(cpu)) { qemu_cpu_kick(cpu); } else { - cpu->icount_decr.u16.high = -1; + atomic_set(&cpu->icount_decr.u16.high, -1); if (use_icount && !cpu->can_do_io && (mask & ~old_mask) != 0) { diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index ad5c758246..356dcd0948 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -2341,7 +2341,7 @@ void cpu_interrupt(CPUState *cpu, int mask) { g_assert(qemu_mutex_iothread_locked()); cpu->interrupt_request |= mask; - cpu->icount_decr.u16.high = -1; + atomic_set(&cpu->icount_decr.u16.high, -1); } /* diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index 26a3ffbba1..cd75829cf2 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -25,6 +25,7 @@ #include "exec/cpu_ldst.h" #include "translate-all.h" #include "exec/helper-proto.h" +#include "qemu/atomic128.h" #undef EAX #undef ECX @@ -615,7 +616,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, /* The following is only callable from other helpers, and matches up with the softmmu version. */ -#ifdef CONFIG_ATOMIC128 +#if HAVE_ATOMIC128 || HAVE_CMPXCHG128 #undef EXTRA_ARGS #undef ATOMIC_NAME @@ -628,4 +629,4 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, #define DATA_SIZE 16 #include "atomic_template.h" -#endif /* CONFIG_ATOMIC128 */ +#endif |