diff options
author | Emilio G. Cota <cota@braap.org> | 2018-10-16 11:38:40 -0400 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2018-10-18 19:46:53 -0700 |
commit | 403f290c0603f35f2d09c982bf5549b6d0803ec1 (patch) | |
tree | 36daee8dfc07be6e60c86fff83f128814e2aa280 /accel/tcg | |
parent | 830bf10c82b49c7e8e2e3e6ff0cc6e440cdcf8d4 (diff) |
cputlb: read CPUTLBEntry.addr_write atomically
Updates can come from other threads, so readers that do not
take tlb_lock must use atomic_read to avoid undefined
behaviour (UB).
This completes the conversion to tlb_lock. This conversion results
on average in no performance loss, as the following experiments
(run on an Intel i7-6700K CPU @ 4.00GHz) show.
1. aarch64 bootup+shutdown test:
- Before:
Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs):
7487.087786 task-clock (msec) # 0.998 CPUs utilized ( +- 0.12% )
31,574,905,303 cycles # 4.217 GHz ( +- 0.12% )
57,097,908,812 instructions # 1.81 insns per cycle ( +- 0.08% )
10,255,415,367 branches # 1369.747 M/sec ( +- 0.08% )
173,278,962 branch-misses # 1.69% of all branches ( +- 0.18% )
7.504481349 seconds time elapsed ( +- 0.14% )
- After:
Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs):
7462.441328 task-clock (msec) # 0.998 CPUs utilized ( +- 0.07% )
31,478,476,520 cycles # 4.218 GHz ( +- 0.07% )
57,017,330,084 instructions # 1.81 insns per cycle ( +- 0.05% )
10,251,929,667 branches # 1373.804 M/sec ( +- 0.05% )
173,023,787 branch-misses # 1.69% of all branches ( +- 0.11% )
7.474970463 seconds time elapsed ( +- 0.07% )
2. SPEC06int:
SPEC06int (test set)
[Y axis: Speedup over master]
1.15 +-+----+------+------+------+------+------+-------+------+------+------+------+------+------+----+-+
| |
1.1 +-+.................................+++.............................+ tlb-lock-v2 (m+++x) +-+
| +++ | +++ tlb-lock-v3 (spinl|ck) |
| +++ | | +++ +++ | | |
1.05 +-+....+++...........####.........|####.+++.|......|.....###....+++...........+++....###.........+-+
| ### ++#| # |# |# ***### +++### +++#+# | +++ | #|# ### |
1 +-+++***+#++++####+++#++#++++++++++#++#+*+*++#++++#+#+****+#++++###++++###++++###++++#+#++++#+#+++-+
| *+* # #++# *** # #### *** # * *++# ****+# *| * # ****|# |# # #|# #+# # # |
0.95 +-+..*.*.#....#..#.*|*..#...#..#.*|*..#.*.*..#.*|.*.#.*++*.#.*++*+#.****.#....#+#....#.#..++#.#..+-+
| * * # # # *|* # # # *|* # * * # *++* # * * # * * # * |* # ++# # # # *** # |
| * * # ++# # *+* # # # *|* # * * # * * # * * # * * # *++* # **** # ++# # * * # |
0.9 +-+..*.*.#...|#..#.*.*..#.++#..#.*|*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*.|*.#...|#.#..*.*.#..+-+
| * * # *** # * * # |# # *+* # * * # * * # * * # * * # * * # *++* # |# # * * # |
0.85 +-+..*.*.#..*|*..#.*.*..#.***..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.****.#..*.*.#..+-+
| * * # *+* # * * # *|* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # |
| * * # * * # * * # *+* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # |
0.8 +-+..*.*.#..*.*..#.*.*..#.*.*..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.*++*.#..*.*.#..+-+
| * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # |
0.75 +-+--***##--***###-***###-***###-***###-***###-****##-****##-****##-****##-****##-****##--***##--+-+
400.perlben401.bzip2403.gcc429.m445.gob456.hmme45462.libqua464.h26471.omnet473483.xalancbmkgeomean
png: https://imgur.com/a/BHzpPTW
Notes:
- tlb-lock-v2 corresponds to an implementation with a mutex.
- tlb-lock-v3 corresponds to the current implementation, i.e.
a spinlock and a single lock acquisition in tlb_set_page_with_attrs.
Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <20181016153840.25877-1-cota@braap.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'accel/tcg')
-rw-r--r-- | accel/tcg/cputlb.c | 19 | ||||
-rw-r--r-- | accel/tcg/softmmu_template.h | 12 |
2 files changed, 19 insertions, 12 deletions
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 28b770a404..af57aca5e4 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -258,7 +258,7 @@ static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, target_ulong page) { return tlb_hit_page(tlb_entry->addr_read, page) || - tlb_hit_page(tlb_entry->addr_write, page) || + tlb_hit_page(tlb_addr_write(tlb_entry), page) || tlb_hit_page(tlb_entry->addr_code, page); } @@ -855,7 +855,7 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, tlb_fill(cpu, addr, size, MMU_DATA_STORE, mmu_idx, retaddr); entry = tlb_entry(env, mmu_idx, addr); - tlb_addr = entry->addr_write; + tlb_addr = tlb_addr_write(entry); if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) { /* RAM access */ uintptr_t haddr = addr + entry->addend; @@ -904,7 +904,14 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, assert_cpu_is_self(ENV_GET_CPU(env)); for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx]; - target_ulong cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); + target_ulong cmp; + + /* elt_ofs might correspond to .addr_write, so use atomic_read */ +#if TCG_OVERSIZED_GUEST + cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); +#else + cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs)); +#endif if (cmp == page) { /* Found entry in victim tlb, swap tlb and iotlb. */ @@ -977,7 +984,7 @@ void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx, uintptr_t index = tlb_index(env, mmu_idx, addr); CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); - if (!tlb_hit(entry->addr_write, addr)) { + if (!tlb_hit(tlb_addr_write(entry), addr)) { /* TLB entry is for a different page */ if (!VICTIM_TLB_HIT(addr_write, addr)) { tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE, @@ -995,7 +1002,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, size_t mmu_idx = get_mmuidx(oi); uintptr_t index = tlb_index(env, mmu_idx, addr); CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); - target_ulong tlb_addr = tlbe->addr_write; + target_ulong tlb_addr = tlb_addr_write(tlbe); TCGMemOp mop = get_memop(oi); int a_bits = get_alignment_bits(mop); int s_bits = mop & MO_SIZE; @@ -1026,7 +1033,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, tlb_fill(ENV_GET_CPU(env), addr, 1 << s_bits, MMU_DATA_STORE, mmu_idx, retaddr); } - tlb_addr = tlbe->addr_write & ~TLB_INVALID_MASK; + tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK; } /* Notice an IO access or a needs-MMU-lookup access */ diff --git a/accel/tcg/softmmu_template.h b/accel/tcg/softmmu_template.h index 09538b5349..b0adea045e 100644 --- a/accel/tcg/softmmu_template.h +++ b/accel/tcg/softmmu_template.h @@ -280,7 +280,7 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, uintptr_t mmu_idx = get_mmuidx(oi); uintptr_t index = tlb_index(env, mmu_idx, addr); CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); - target_ulong tlb_addr = entry->addr_write; + target_ulong tlb_addr = tlb_addr_write(entry); unsigned a_bits = get_alignment_bits(get_memop(oi)); uintptr_t haddr; @@ -295,7 +295,7 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE, mmu_idx, retaddr); } - tlb_addr = entry->addr_write & ~TLB_INVALID_MASK; + tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; } /* Handle an IO access. */ @@ -325,7 +325,7 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, cannot evict the first. */ page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK; entry2 = tlb_entry(env, mmu_idx, page2); - if (!tlb_hit_page(entry2->addr_write, page2) + if (!tlb_hit_page(tlb_addr_write(entry2), page2) && !VICTIM_TLB_HIT(addr_write, page2)) { tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE, mmu_idx, retaddr); @@ -358,7 +358,7 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, uintptr_t mmu_idx = get_mmuidx(oi); uintptr_t index = tlb_index(env, mmu_idx, addr); CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); - target_ulong tlb_addr = entry->addr_write; + target_ulong tlb_addr = tlb_addr_write(entry); unsigned a_bits = get_alignment_bits(get_memop(oi)); uintptr_t haddr; @@ -373,7 +373,7 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE, mmu_idx, retaddr); } - tlb_addr = entry->addr_write & ~TLB_INVALID_MASK; + tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; } /* Handle an IO access. */ @@ -403,7 +403,7 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, cannot evict the first. */ page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK; entry2 = tlb_entry(env, mmu_idx, page2); - if (!tlb_hit_page(entry2->addr_write, page2) + if (!tlb_hit_page(tlb_addr_write(entry2), page2) && !VICTIM_TLB_HIT(addr_write, page2)) { tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE, mmu_idx, retaddr); |