diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-04-09 11:01:42 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-04-09 11:01:42 -0700 |
commit | e4da01d8333e500e15a674d75885a9dfcfd31e77 (patch) | |
tree | 4a9816c19bde1b027c58aa2486535221d4c355b9 /arch/powerpc/perf | |
parent | 6cff4821c0856cbf310601c849047fbd666216ea (diff) | |
parent | 6ba4a2d3591039aea1cb45c7c42262d26351a2fa (diff) |
Merge tag 'powerpc-5.7-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull more powerpc updates from Michael Ellerman:
"The bulk of this is the series to make CONFIG_COMPAT user-selectable,
it's been around for a long time but was blocked behind the
syscall-in-C series.
Plus there's also a few fixes and other minor things.
Summary:
- A fix for a crash in machine check handling on pseries (ie. guests)
- A small series to make it possible to disable CONFIG_COMPAT, and
turn it off by default for ppc64le where it's not used.
- A few other miscellaneous fixes and small improvements.
Thanks to: Alexey Kardashevskiy, Anju T Sudhakar, Arnd Bergmann,
Christophe Leroy, Dan Carpenter, Ganesh Goudar, Geert Uytterhoeven,
Geoff Levand, Mahesh Salgaonkar, Markus Elfring, Michal Suchanek,
Nicholas Piggin, Stephen Boyd, Wen Xiong"
* tag 'powerpc-5.7-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
selftests/powerpc: Always build the tm-poison test 64-bit
powerpc: Improve ppc_save_regs()
Revert "powerpc/64: irq_work avoid interrupt when called with hardware irqs enabled"
powerpc/time: Replace <linux/clk-provider.h> by <linux/of_clk.h>
powerpc/pseries/ddw: Extend upper limit for huge DMA window for persistent memory
powerpc/perf: split callchain.c by bitness
powerpc/64: Make COMPAT user-selectable disabled on littleendian by default.
powerpc/64: make buildable without CONFIG_COMPAT
powerpc/perf: consolidate valid_user_sp -> invalid_user_sp
powerpc/perf: consolidate read_user_stack_32
powerpc: move common register copy functions from signal_32.c to signal.c
powerpc: Add back __ARCH_WANT_SYS_LLSEEK macro
powerpc/ps3: Set CONFIG_UEVENT_HELPER=y in ps3_defconfig
powerpc/ps3: Remove an unneeded NULL check
powerpc/ps3: Remove duplicate error message
powerpc/powernv: Re-enable imc trace-mode in kernel
powerpc/perf: Implement a global lock to avoid races between trace, core and thread imc events.
powerpc/pseries: Fix MCE handling on pseries
selftests/eeh: Skip ahci adapters
powerpc/64s: Fix doorbell wakeup msgclr optimisation
Diffstat (limited to 'arch/powerpc/perf')
-rw-r--r-- | arch/powerpc/perf/Makefile | 5 | ||||
-rw-r--r-- | arch/powerpc/perf/callchain.c | 356 | ||||
-rw-r--r-- | arch/powerpc/perf/callchain.h | 19 | ||||
-rw-r--r-- | arch/powerpc/perf/callchain_32.c | 196 | ||||
-rw-r--r-- | arch/powerpc/perf/callchain_64.c | 174 | ||||
-rw-r--r-- | arch/powerpc/perf/imc-pmu.c | 173 |
6 files changed, 543 insertions, 380 deletions
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile index c155dcbb8691..53d614e98537 100644 --- a/arch/powerpc/perf/Makefile +++ b/arch/powerpc/perf/Makefile @@ -1,6 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_PERF_EVENTS) += callchain.o perf_regs.o +obj-$(CONFIG_PERF_EVENTS) += callchain.o callchain_$(BITS).o perf_regs.o +ifdef CONFIG_COMPAT +obj-$(CONFIG_PERF_EVENTS) += callchain_32.o +endif obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \ diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index cbc251981209..dd5051015008 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -15,11 +15,9 @@ #include <asm/sigcontext.h> #include <asm/ucontext.h> #include <asm/vdso.h> -#ifdef CONFIG_PPC64 -#include "../kernel/ppc32.h" -#endif #include <asm/pte-walk.h> +#include "callchain.h" /* * Is sp valid as the address of the next kernel stack frame after prev_sp? @@ -102,358 +100,6 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re } } -#ifdef CONFIG_PPC64 -/* - * On 64-bit we don't want to invoke hash_page on user addresses from - * interrupt context, so if the access faults, we read the page tables - * to find which page (if any) is mapped and access it directly. - */ -static int read_user_stack_slow(void __user *ptr, void *buf, int nb) -{ - int ret = -EFAULT; - pgd_t *pgdir; - pte_t *ptep, pte; - unsigned shift; - unsigned long addr = (unsigned long) ptr; - unsigned long offset; - unsigned long pfn, flags; - void *kaddr; - - pgdir = current->mm->pgd; - if (!pgdir) - return -EFAULT; - - local_irq_save(flags); - ptep = find_current_mm_pte(pgdir, addr, NULL, &shift); - if (!ptep) - goto err_out; - if (!shift) - shift = PAGE_SHIFT; - - /* align address to page boundary */ - offset = addr & ((1UL << shift) - 1); - - pte = READ_ONCE(*ptep); - if (!pte_present(pte) || !pte_user(pte)) - goto err_out; - pfn = pte_pfn(pte); - if (!page_is_ram(pfn)) - goto err_out; - - /* no highmem to worry about here */ - kaddr = pfn_to_kaddr(pfn); - memcpy(buf, kaddr + offset, nb); - ret = 0; -err_out: - local_irq_restore(flags); - return ret; -} - -static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) -{ - if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) || - ((unsigned long)ptr & 7)) - return -EFAULT; - - if (!probe_user_read(ret, ptr, sizeof(*ret))) - return 0; - - return read_user_stack_slow(ptr, ret, 8); -} - -static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) -{ - if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || - ((unsigned long)ptr & 3)) - return -EFAULT; - - if (!probe_user_read(ret, ptr, sizeof(*ret))) - return 0; - - return read_user_stack_slow(ptr, ret, 4); -} - -static inline int valid_user_sp(unsigned long sp, int is_64) -{ - if (!sp || (sp & 7) || sp > (is_64 ? TASK_SIZE : 0x100000000UL) - 32) - return 0; - return 1; -} - -/* - * 64-bit user processes use the same stack frame for RT and non-RT signals. - */ -struct signal_frame_64 { - char dummy[__SIGNAL_FRAMESIZE]; - struct ucontext uc; - unsigned long unused[2]; - unsigned int tramp[6]; - struct siginfo *pinfo; - void *puc; - struct siginfo info; - char abigap[288]; -}; - -static int is_sigreturn_64_address(unsigned long nip, unsigned long fp) -{ - if (nip == fp + offsetof(struct signal_frame_64, tramp)) - return 1; - if (vdso64_rt_sigtramp && current->mm->context.vdso_base && - nip == current->mm->context.vdso_base + vdso64_rt_sigtramp) - return 1; - return 0; -} - -/* - * Do some sanity checking on the signal frame pointed to by sp. - * We check the pinfo and puc pointers in the frame. - */ -static int sane_signal_64_frame(unsigned long sp) -{ - struct signal_frame_64 __user *sf; - unsigned long pinfo, puc; - - sf = (struct signal_frame_64 __user *) sp; - if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) || - read_user_stack_64((unsigned long __user *) &sf->puc, &puc)) - return 0; - return pinfo == (unsigned long) &sf->info && - puc == (unsigned long) &sf->uc; -} - -static void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, - struct pt_regs *regs) -{ - unsigned long sp, next_sp; - unsigned long next_ip; - unsigned long lr; - long level = 0; - struct signal_frame_64 __user *sigframe; - unsigned long __user *fp, *uregs; - - next_ip = perf_instruction_pointer(regs); - lr = regs->link; - sp = regs->gpr[1]; - perf_callchain_store(entry, next_ip); - - while (entry->nr < entry->max_stack) { - fp = (unsigned long __user *) sp; - if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp)) - return; - if (level > 0 && read_user_stack_64(&fp[2], &next_ip)) - return; - - /* - * Note: the next_sp - sp >= signal frame size check - * is true when next_sp < sp, which can happen when - * transitioning from an alternate signal stack to the - * normal stack. - */ - if (next_sp - sp >= sizeof(struct signal_frame_64) && - (is_sigreturn_64_address(next_ip, sp) || - (level <= 1 && is_sigreturn_64_address(lr, sp))) && - sane_signal_64_frame(sp)) { - /* - * This looks like an signal frame - */ - sigframe = (struct signal_frame_64 __user *) sp; - uregs = sigframe->uc.uc_mcontext.gp_regs; - if (read_user_stack_64(&uregs[PT_NIP], &next_ip) || - read_user_stack_64(&uregs[PT_LNK], &lr) || - read_user_stack_64(&uregs[PT_R1], &sp)) - return; - level = 0; - perf_callchain_store_context(entry, PERF_CONTEXT_USER); - perf_callchain_store(entry, next_ip); - continue; - } - - if (level == 0) - next_ip = lr; - perf_callchain_store(entry, next_ip); - ++level; - sp = next_sp; - } -} - -#else /* CONFIG_PPC64 */ -/* - * On 32-bit we just access the address and let hash_page create a - * HPTE if necessary, so there is no need to fall back to reading - * the page tables. Since this is called at interrupt level, - * do_page_fault() won't treat a DSI as a page fault. - */ -static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) -{ - if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || - ((unsigned long)ptr & 3)) - return -EFAULT; - - return probe_user_read(ret, ptr, sizeof(*ret)); -} - -static inline void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, - struct pt_regs *regs) -{ -} - -static inline int valid_user_sp(unsigned long sp, int is_64) -{ - if (!sp || (sp & 7) || sp > TASK_SIZE - 32) - return 0; - return 1; -} - -#define __SIGNAL_FRAMESIZE32 __SIGNAL_FRAMESIZE -#define sigcontext32 sigcontext -#define mcontext32 mcontext -#define ucontext32 ucontext -#define compat_siginfo_t struct siginfo - -#endif /* CONFIG_PPC64 */ - -/* - * Layout for non-RT signal frames - */ -struct signal_frame_32 { - char dummy[__SIGNAL_FRAMESIZE32]; - struct sigcontext32 sctx; - struct mcontext32 mctx; - int abigap[56]; -}; - -/* - * Layout for RT signal frames - */ -struct rt_signal_frame_32 { - char dummy[__SIGNAL_FRAMESIZE32 + 16]; - compat_siginfo_t info; - struct ucontext32 uc; - int abigap[56]; -}; - -static int is_sigreturn_32_address(unsigned int nip, unsigned int fp) -{ - if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad)) - return 1; - if (vdso32_sigtramp && current->mm->context.vdso_base && - nip == current->mm->context.vdso_base + vdso32_sigtramp) - return 1; - return 0; -} - -static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp) -{ - if (nip == fp + offsetof(struct rt_signal_frame_32, - uc.uc_mcontext.mc_pad)) - return 1; - if (vdso32_rt_sigtramp && current->mm->context.vdso_base && - nip == current->mm->context.vdso_base + vdso32_rt_sigtramp) - return 1; - return 0; -} - -static int sane_signal_32_frame(unsigned int sp) -{ - struct signal_frame_32 __user *sf; - unsigned int regs; - - sf = (struct signal_frame_32 __user *) (unsigned long) sp; - if (read_user_stack_32((unsigned int __user *) &sf->sctx.regs, ®s)) - return 0; - return regs == (unsigned long) &sf->mctx; -} - -static int sane_rt_signal_32_frame(unsigned int sp) -{ - struct rt_signal_frame_32 __user *sf; - unsigned int regs; - - sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp; - if (read_user_stack_32((unsigned int __user *) &sf->uc.uc_regs, ®s)) - return 0; - return regs == (unsigned long) &sf->uc.uc_mcontext; -} - -static unsigned int __user *signal_frame_32_regs(unsigned int sp, - unsigned int next_sp, unsigned int next_ip) -{ - struct mcontext32 __user *mctx = NULL; - struct signal_frame_32 __user *sf; - struct rt_signal_frame_32 __user *rt_sf; - - /* - * Note: the next_sp - sp >= signal frame size check - * is true when next_sp < sp, for example, when - * transitioning from an alternate signal stack to the - * normal stack. - */ - if (next_sp - sp >= sizeof(struct signal_frame_32) && - is_sigreturn_32_address(next_ip, sp) && - sane_signal_32_frame(sp)) { - sf = (struct signal_frame_32 __user *) (unsigned long) sp; - mctx = &sf->mctx; - } - - if (!mctx && next_sp - sp >= sizeof(struct rt_signal_frame_32) && - is_rt_sigreturn_32_address(next_ip, sp) && - sane_rt_signal_32_frame(sp)) { - rt_sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp; - mctx = &rt_sf->uc.uc_mcontext; - } - - if (!mctx) - return NULL; - return mctx->mc_gregs; -} - -static void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry, - struct pt_regs *regs) -{ - unsigned int sp, next_sp; - unsigned int next_ip; - unsigned int lr; - long level = 0; - unsigned int __user *fp, *uregs; - - next_ip = perf_instruction_pointer(regs); - lr = regs->link; - sp = regs->gpr[1]; - perf_callchain_store(entry, next_ip); - - while (entry->nr < entry->max_stack) { - fp = (unsigned int __user *) (unsigned long) sp; - if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp)) - return; - if (level > 0 && read_user_stack_32(&fp[1], &next_ip)) - return; - - uregs = signal_frame_32_regs(sp, next_sp, next_ip); - if (!uregs && level <= 1) - uregs = signal_frame_32_regs(sp, next_sp, lr); - if (uregs) { - /* - * This looks like an signal frame, so restart - * the stack trace with the values in it. - */ - if (read_user_stack_32(&uregs[PT_NIP], &next_ip) || - read_user_stack_32(&uregs[PT_LNK], &lr) || - read_user_stack_32(&uregs[PT_R1], &sp)) - return; - level = 0; - perf_callchain_store_context(entry, PERF_CONTEXT_USER); - perf_callchain_store(entry, next_ip); - continue; - } - - if (level == 0) - next_ip = lr; - perf_callchain_store(entry, next_ip); - ++level; - sp = next_sp; - } -} - void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { diff --git a/arch/powerpc/perf/callchain.h b/arch/powerpc/perf/callchain.h new file mode 100644 index 000000000000..7a2cb9e1181a --- /dev/null +++ b/arch/powerpc/perf/callchain.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _POWERPC_PERF_CALLCHAIN_H +#define _POWERPC_PERF_CALLCHAIN_H + +int read_user_stack_slow(void __user *ptr, void *buf, int nb); +void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs); +void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs); + +static inline bool invalid_user_sp(unsigned long sp) +{ + unsigned long mask = is_32bit_task() ? 3 : 7; + unsigned long top = STACK_TOP - (is_32bit_task() ? 16 : 32); + + return (!sp || (sp & mask) || (sp > top)); +} + +#endif /* _POWERPC_PERF_CALLCHAIN_H */ diff --git a/arch/powerpc/perf/callchain_32.c b/arch/powerpc/perf/callchain_32.c new file mode 100644 index 000000000000..8aa951003141 --- /dev/null +++ b/arch/powerpc/perf/callchain_32.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Performance counter callchain support - powerpc architecture code + * + * Copyright © 2009 Paul Mackerras, IBM Corporation. + */ +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/perf_event.h> +#include <linux/percpu.h> +#include <linux/uaccess.h> +#include <linux/mm.h> +#include <asm/ptrace.h> +#include <asm/pgtable.h> +#include <asm/sigcontext.h> +#include <asm/ucontext.h> +#include <asm/vdso.h> +#include <asm/pte-walk.h> + +#include "callchain.h" + +#ifdef CONFIG_PPC64 +#include "../kernel/ppc32.h" +#else /* CONFIG_PPC64 */ + +#define __SIGNAL_FRAMESIZE32 __SIGNAL_FRAMESIZE +#define sigcontext32 sigcontext +#define mcontext32 mcontext +#define ucontext32 ucontext +#define compat_siginfo_t struct siginfo + +#endif /* CONFIG_PPC64 */ + +/* + * On 32-bit we just access the address and let hash_page create a + * HPTE if necessary, so there is no need to fall back to reading + * the page tables. Since this is called at interrupt level, + * do_page_fault() won't treat a DSI as a page fault. + */ +static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) +{ + int rc; + + if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || + ((unsigned long)ptr & 3)) + return -EFAULT; + + rc = probe_user_read(ret, ptr, sizeof(*ret)); + + if (IS_ENABLED(CONFIG_PPC64) && rc) + return read_user_stack_slow(ptr, ret, 4); + + return rc; +} + +/* + * Layout for non-RT signal frames + */ +struct signal_frame_32 { + char dummy[__SIGNAL_FRAMESIZE32]; + struct sigcontext32 sctx; + struct mcontext32 mctx; + int abigap[56]; +}; + +/* + * Layout for RT signal frames + */ +struct rt_signal_frame_32 { + char dummy[__SIGNAL_FRAMESIZE32 + 16]; + compat_siginfo_t info; + struct ucontext32 uc; + int abigap[56]; +}; + +static int is_sigreturn_32_address(unsigned int nip, unsigned int fp) +{ + if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad)) + return 1; + if (vdso32_sigtramp && current->mm->context.vdso_base && + nip == current->mm->context.vdso_base + vdso32_sigtramp) + return 1; + return 0; +} + +static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp) +{ + if (nip == fp + offsetof(struct rt_signal_frame_32, + uc.uc_mcontext.mc_pad)) + return 1; + if (vdso32_rt_sigtramp && current->mm->context.vdso_base && + nip == current->mm->context.vdso_base + vdso32_rt_sigtramp) + return 1; + return 0; +} + +static int sane_signal_32_frame(unsigned int sp) +{ + struct signal_frame_32 __user *sf; + unsigned int regs; + + sf = (struct signal_frame_32 __user *) (unsigned long) sp; + if (read_user_stack_32((unsigned int __user *) &sf->sctx.regs, ®s)) + return 0; + return regs == (unsigned long) &sf->mctx; +} + +static int sane_rt_signal_32_frame(unsigned int sp) +{ + struct rt_signal_frame_32 __user *sf; + unsigned int regs; + + sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp; + if (read_user_stack_32((unsigned int __user *) &sf->uc.uc_regs, ®s)) + return 0; + return regs == (unsigned long) &sf->uc.uc_mcontext; +} + +static unsigned int __user *signal_frame_32_regs(unsigned int sp, + unsigned int next_sp, unsigned int next_ip) +{ + struct mcontext32 __user *mctx = NULL; + struct signal_frame_32 __user *sf; + struct rt_signal_frame_32 __user *rt_sf; + + /* + * Note: the next_sp - sp >= signal frame size check + * is true when next_sp < sp, for example, when + * transitioning from an alternate signal stack to the + * normal stack. + */ + if (next_sp - sp >= sizeof(struct signal_frame_32) && + is_sigreturn_32_address(next_ip, sp) && + sane_signal_32_frame(sp)) { + sf = (struct signal_frame_32 __user *) (unsigned long) sp; + mctx = &sf->mctx; + } + + if (!mctx && next_sp - sp >= sizeof(struct rt_signal_frame_32) && + is_rt_sigreturn_32_address(next_ip, sp) && + sane_rt_signal_32_frame(sp)) { + rt_sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp; + mctx = &rt_sf->uc.uc_mcontext; + } + + if (!mctx) + return NULL; + return mctx->mc_gregs; +} + +void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + unsigned int sp, next_sp; + unsigned int next_ip; + unsigned int lr; + long level = 0; + unsigned int __user *fp, *uregs; + + next_ip = perf_instruction_pointer(regs); + lr = regs->link; + sp = regs->gpr[1]; + perf_callchain_store(entry, next_ip); + + while (entry->nr < entry->max_stack) { + fp = (unsigned int __user *) (unsigned long) sp; + if (invalid_user_sp(sp) || read_user_stack_32(fp, &next_sp)) + return; + if (level > 0 && read_user_stack_32(&fp[1], &next_ip)) + return; + + uregs = signal_frame_32_regs(sp, next_sp, next_ip); + if (!uregs && level <= 1) + uregs = signal_frame_32_regs(sp, next_sp, lr); + if (uregs) { + /* + * This looks like an signal frame, so restart + * the stack trace with the values in it. + */ + if (read_user_stack_32(&uregs[PT_NIP], &next_ip) || + read_user_stack_32(&uregs[PT_LNK], &lr) || + read_user_stack_32(&uregs[PT_R1], &sp)) + return; + level = 0; + perf_callchain_store_context(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, next_ip); + continue; + } + + if (level == 0) + next_ip = lr; + perf_callchain_store(entry, next_ip); + ++level; + sp = next_sp; + } +} diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c new file mode 100644 index 000000000000..df1ffd8b20f2 --- /dev/null +++ b/arch/powerpc/perf/callchain_64.c @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Performance counter callchain support - powerpc architecture code + * + * Copyright © 2009 Paul Mackerras, IBM Corporation. + */ +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/perf_event.h> +#include <linux/percpu.h> +#include <linux/uaccess.h> +#include <linux/mm.h> +#include <asm/ptrace.h> +#include <asm/pgtable.h> +#include <asm/sigcontext.h> +#include <asm/ucontext.h> +#include <asm/vdso.h> +#include <asm/pte-walk.h> + +#include "callchain.h" + +/* + * On 64-bit we don't want to invoke hash_page on user addresses from + * interrupt context, so if the access faults, we read the page tables + * to find which page (if any) is mapped and access it directly. + */ +int read_user_stack_slow(void __user *ptr, void *buf, int nb) +{ + int ret = -EFAULT; + pgd_t *pgdir; + pte_t *ptep, pte; + unsigned int shift; + unsigned long addr = (unsigned long) ptr; + unsigned long offset; + unsigned long pfn, flags; + void *kaddr; + + pgdir = current->mm->pgd; + if (!pgdir) + return -EFAULT; + + local_irq_save(flags); + ptep = find_current_mm_pte(pgdir, addr, NULL, &shift); + if (!ptep) + goto err_out; + if (!shift) + shift = PAGE_SHIFT; + + /* align address to page boundary */ + offset = addr & ((1UL << shift) - 1); + + pte = READ_ONCE(*ptep); + if (!pte_present(pte) || !pte_user(pte)) + goto err_out; + pfn = pte_pfn(pte); + if (!page_is_ram(pfn)) + goto err_out; + + /* no highmem to worry about here */ + kaddr = pfn_to_kaddr(pfn); + memcpy(buf, kaddr + offset, nb); + ret = 0; +err_out: + local_irq_restore(flags); + return ret; +} + +static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) +{ + if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) || + ((unsigned long)ptr & 7)) + return -EFAULT; + + if (!probe_user_read(ret, ptr, sizeof(*ret))) + return 0; + + return read_user_stack_slow(ptr, ret, 8); +} + +/* + * 64-bit user processes use the same stack frame for RT and non-RT signals. + */ +struct signal_frame_64 { + char dummy[__SIGNAL_FRAMESIZE]; + struct ucontext uc; + unsigned long unused[2]; + unsigned int tramp[6]; + struct siginfo *pinfo; + void *puc; + struct siginfo info; + char abigap[288]; +}; + +static int is_sigreturn_64_address(unsigned long nip, unsigned long fp) +{ + if (nip == fp + offsetof(struct signal_frame_64, tramp)) + return 1; + if (vdso64_rt_sigtramp && current->mm->context.vdso_base && + nip == current->mm->context.vdso_base + vdso64_rt_sigtramp) + return 1; + return 0; +} + +/* + * Do some sanity checking on the signal frame pointed to by sp. + * We check the pinfo and puc pointers in the frame. + */ +static int sane_signal_64_frame(unsigned long sp) +{ + struct signal_frame_64 __user *sf; + unsigned long pinfo, puc; + + sf = (struct signal_frame_64 __user *) sp; + if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) || + read_user_stack_64((unsigned long __user *) &sf->puc, &puc)) + return 0; + return pinfo == (unsigned long) &sf->info && + puc == (unsigned long) &sf->uc; +} + +void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + unsigned long sp, next_sp; + unsigned long next_ip; + unsigned long lr; + long level = 0; + struct signal_frame_64 __user *sigframe; + unsigned long __user *fp, *uregs; + + next_ip = perf_instruction_pointer(regs); + lr = regs->link; + sp = regs->gpr[1]; + perf_callchain_store(entry, next_ip); + + while (entry->nr < entry->max_stack) { + fp = (unsigned long __user *) sp; + if (invalid_user_sp(sp) || read_user_stack_64(fp, &next_sp)) + return; + if (level > 0 && read_user_stack_64(&fp[2], &next_ip)) + return; + + /* + * Note: the next_sp - sp >= signal frame size check + * is true when next_sp < sp, which can happen when + * transitioning from an alternate signal stack to the + * normal stack. + */ + if (next_sp - sp >= sizeof(struct signal_frame_64) && + (is_sigreturn_64_address(next_ip, sp) || + (level <= 1 && is_sigreturn_64_address(lr, sp))) && + sane_signal_64_frame(sp)) { + /* + * This looks like an signal frame + */ + sigframe = (struct signal_frame_64 __user *) sp; + uregs = sigframe->uc.uc_mcontext.gp_regs; + if (read_user_stack_64(&uregs[PT_NIP], &next_ip) || + read_user_stack_64(&uregs[PT_LNK], &lr) || + read_user_stack_64(&uregs[PT_R1], &sp)) + return; + level = 0; + perf_callchain_store_context(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, next_ip); + continue; + } + + if (level == 0) + next_ip = lr; + perf_callchain_store(entry, next_ip); + ++level; + sp = next_sp; + } +} diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index cb50a9e1fd2d..eb82dda884e5 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -44,6 +44,16 @@ static DEFINE_PER_CPU(u64 *, trace_imc_mem); static struct imc_pmu_ref *trace_imc_refc; static int trace_imc_mem_size; +/* + * Global data structure used to avoid races between thread, + * core and trace-imc + */ +static struct imc_pmu_ref imc_global_refc = { + .lock = __MUTEX_INITIALIZER(imc_global_refc.lock), + .id = 0, + .refc = 0, +}; + static struct imc_pmu *imc_event_to_pmu(struct perf_event *event) { return container_of(event->pmu, struct imc_pmu, pmu); @@ -698,6 +708,16 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu) return -EINVAL; ref->refc = 0; + /* + * Reduce the global reference count, if this is the + * last cpu in this core and core-imc event running + * in this cpu. + */ + mutex_lock(&imc_global_refc.lock); + if (imc_global_refc.id == IMC_DOMAIN_CORE) + imc_global_refc.refc--; + + mutex_unlock(&imc_global_refc.lock); } return 0; } @@ -710,6 +730,23 @@ static int core_imc_pmu_cpumask_init(void) ppc_core_imc_cpu_offline); } +static void reset_global_refc(struct perf_event *event) +{ + mutex_lock(&imc_global_refc.lock); + imc_global_refc.refc--; + + /* + * If no other thread is running any + * event for this domain(thread/core/trace), + * set the global id to zero. + */ + if (imc_global_refc.refc <= 0) { + imc_global_refc.refc = 0; + imc_global_refc.id = 0; + } + mutex_unlock(&imc_global_refc.lock); +} + static void core_imc_counters_release(struct perf_event *event) { int rc, core_id; @@ -759,6 +796,8 @@ static void core_imc_counters_release(struct perf_event *event) ref->refc = 0; } mutex_unlock(&ref->lock); + + reset_global_refc(event); } static int core_imc_event_init(struct perf_event *event) @@ -819,6 +858,29 @@ static int core_imc_event_init(struct perf_event *event) ++ref->refc; mutex_unlock(&ref->lock); + /* + * Since the system can run either in accumulation or trace-mode + * of IMC at a time, core-imc events are allowed only if no other + * trace/thread imc events are enabled/monitored. + * + * Take the global lock, and check the refc.id + * to know whether any other trace/thread imc + * events are running. + */ + mutex_lock(&imc_global_refc.lock); + if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_CORE) { + /* + * No other trace/thread imc events are running in + * the system, so set the refc.id to core-imc. + */ + imc_global_refc.id = IMC_DOMAIN_CORE; + imc_global_refc.refc++; + } else { + mutex_unlock(&imc_global_refc.lock); + return -EBUSY; + } + mutex_unlock(&imc_global_refc.lock); + event->hw.event_base = (u64)pcmi->vbase + (config & IMC_EVENT_OFFSET_MASK); event->destroy = core_imc_counters_release; return 0; @@ -877,7 +939,23 @@ static int ppc_thread_imc_cpu_online(unsigned int cpu) static int ppc_thread_imc_cpu_offline(unsigned int cpu) { - mtspr(SPRN_LDBAR, 0); + /* + * Set the bit 0 of LDBAR to zero. + * + * If bit 0 of LDBAR is unset, it will stop posting + * the counter data to memory. + * For thread-imc, bit 0 of LDBAR will be set to 1 in the + * event_add function. So reset this bit here, to stop the updates + * to memory in the cpu_offline path. + */ + mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63)))); + + /* Reduce the refc if thread-imc event running on this cpu */ + mutex_lock(&imc_global_refc.lock); + if (imc_global_refc.id == IMC_DOMAIN_THREAD) + imc_global_refc.refc--; + mutex_unlock(&imc_global_refc.lock); + return 0; } @@ -916,7 +994,22 @@ static int thread_imc_event_init(struct perf_event *event) if (!target) return -EINVAL; + mutex_lock(&imc_global_refc.lock); + /* + * Check if any other trace/core imc events are running in the + * system, if not set the global id to thread-imc. + */ + if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_THREAD) { + imc_global_refc.id = IMC_DOMAIN_THREAD; + imc_global_refc.refc++; + } else { + mutex_unlock(&imc_global_refc.lock); + return -EBUSY; + } + mutex_unlock(&imc_global_refc.lock); + event->pmu->task_ctx_nr = perf_sw_context; + event->destroy = reset_global_refc; return 0; } @@ -1063,10 +1156,12 @@ static void thread_imc_event_del(struct perf_event *event, int flags) int core_id; struct imc_pmu_ref *ref; - mtspr(SPRN_LDBAR, 0); - core_id = smp_processor_id() / threads_per_core; ref = &core_imc_refc[core_id]; + if (!ref) { + pr_debug("imc: Failed to get event reference count\n"); + return; + } mutex_lock(&ref->lock); ref->refc--; @@ -1082,6 +1177,10 @@ static void thread_imc_event_del(struct perf_event *event, int flags) ref->refc = 0; } mutex_unlock(&ref->lock); + + /* Set bit 0 of LDBAR to zero, to stop posting updates to memory */ + mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63)))); + /* * Take a snapshot and calculate the delta and update * the event counter values. @@ -1133,7 +1232,18 @@ static int ppc_trace_imc_cpu_online(unsigned int cpu) static int ppc_trace_imc_cpu_offline(unsigned int cpu) { - mtspr(SPRN_LDBAR, 0); + /* + * No need to set bit 0 of LDBAR to zero, as + * it is set to zero for imc trace-mode + * + * Reduce the refc if any trace-imc event running + * on this cpu. + */ + mutex_lock(&imc_global_refc.lock); + if (imc_global_refc.id == IMC_DOMAIN_TRACE) + imc_global_refc.refc--; + mutex_unlock(&imc_global_refc.lock); + return 0; } @@ -1226,15 +1336,14 @@ static int trace_imc_event_add(struct perf_event *event, int flags) local_mem = get_trace_imc_event_base_addr(); ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | TRACE_IMC_ENABLE; - if (core_imc_refc) - ref = &core_imc_refc[core_id]; + /* trace-imc reference count */ + if (trace_imc_refc) + ref = &trace_imc_refc[core_id]; if (!ref) { - /* If core-imc is not enabled, use trace-imc reference count */ - if (trace_imc_refc) - ref = &trace_imc_refc[core_id]; - if (!ref) - return -EINVAL; + pr_debug("imc: Failed to get the event reference count\n"); + return -EINVAL; } + mtspr(SPRN_LDBAR, ldbar_value); mutex_lock(&ref->lock); if (ref->refc == 0) { @@ -1242,13 +1351,11 @@ static int trace_imc_event_add(struct perf_event *event, int flags) get_hard_smp_processor_id(smp_processor_id()))) { mutex_unlock(&ref->lock); pr_err("trace-imc: Unable to start the counters for core %d\n", core_id); - mtspr(SPRN_LDBAR, 0); return -EINVAL; } } ++ref->refc; mutex_unlock(&ref->lock); - return 0; } @@ -1274,16 +1381,13 @@ static void trace_imc_event_del(struct perf_event *event, int flags) int core_id = smp_processor_id() / threads_per_core; struct imc_pmu_ref *ref = NULL; - if (core_imc_refc) - ref = &core_imc_refc[core_id]; + if (trace_imc_refc) + ref = &trace_imc_refc[core_id]; if (!ref) { - /* If core-imc is not enabled, use trace-imc reference count */ - if (trace_imc_refc) - ref = &trace_imc_refc[core_id]; - if (!ref) - return; + pr_debug("imc: Failed to get event reference count\n"); + return; } - mtspr(SPRN_LDBAR, 0); + mutex_lock(&ref->lock); ref->refc--; if (ref->refc == 0) { @@ -1297,6 +1401,7 @@ static void trace_imc_event_del(struct perf_event *event, int flags) ref->refc = 0; } mutex_unlock(&ref->lock); + trace_imc_event_stop(event, flags); } @@ -1314,10 +1419,30 @@ static int trace_imc_event_init(struct perf_event *event) if (event->attr.sample_period == 0) return -ENOENT; + /* + * Take the global lock, and make sure + * no other thread is running any core/thread imc + * events + */ + mutex_lock(&imc_global_refc.lock); + if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_TRACE) { + /* + * No core/thread imc events are running in the + * system, so set the refc.id to trace-imc. + */ + imc_global_refc.id = IMC_DOMAIN_TRACE; + imc_global_refc.refc++; + } else { + mutex_unlock(&imc_global_refc.lock); + return -EBUSY; + } + mutex_unlock(&imc_global_refc.lock); + event->hw.idx = -1; target = event->hw.target; event->pmu->task_ctx_nr = perf_hw_context; + event->destroy = reset_global_refc; return 0; } @@ -1429,10 +1554,10 @@ static void cleanup_all_core_imc_memory(void) static void thread_imc_ldbar_disable(void *dummy) { /* - * By Zeroing LDBAR, we disable thread-imc - * updates. + * By setting 0th bit of LDBAR to zero, we disable thread-imc + * updates to memory. */ - mtspr(SPRN_LDBAR, 0); + mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63)))); } void thread_imc_disable(void) |