diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-03 12:47:28 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-03 12:47:28 -0700 |
commit | 12b7bcb43e6ea834ab2f5dc52d971e379a0ca109 (patch) | |
tree | 65218ee4792a1bae88feb75e615c5ec4e602c7a2 /tools/perf/arch/powerpc | |
parent | 00bcf5cdd6c0e2e92ce3dd852ca68a3b779fa4ec (diff) | |
parent | 41aad2a6d4fcdda8d73c9739daf7a9f3f49499d6 (diff) |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar:
"The main kernel side changes were:
- uprobes enhancements (Masami Hiramatsu)
- Uncore group events enhancements (David Carrillo-Cisneros)
- x86 Intel: Add support for Skylake server uncore PMUs (Kan Liang)
- x86 Intel: LBR cleanups and enhancements, for better branch
annotation tracking (Peter Zijlstra)
- x86 Intel: Add support for PTWRITE and power event tracing
(Alexander Shishkin)
- ... various fixes, cleanups and smaller enhancements.
Lots of tooling changes - a couple of highlights:
- Support event group view with hierarchy mode in 'perf top' and
'perf report' (Namhyung Kim)
e.g.:
$ perf record -e '{cycles,instructions}' make
$ perf report --hierarchy --stdio
...
# Overhead Command / Shared Object / Symbol
# ...................... ..................................
...
25.74% 27.18%sh
19.96% 24.14%libc-2.24.so
9.55% 14.64%[.] __strcmp_sse2
1.54% 0.00%[.] __tfind
1.07% 1.13%[.] _int_malloc
0.95% 0.00%[.] __strchr_sse2
0.89% 1.39%[.] __tsearch
0.76% 0.00%[.] strlen
- Add branch stack / basic block info to 'perf annotate --stdio',
where for each branch, we add an asm comment after the instruction
with information on how often it was taken and predicted. See
example with color output at:
http://vger.kernel.org/~acme/perf/annotate_basic_blocks.png
(Peter Zijlstra)
- Add support for using symbols in address filters with Intel PT and
ARM CoreSight (hardware assisted tracing facilities) (Adrian
Hunter, Mathieu Poirier)
- Add support for interacting with Coresight PMU ETMs/PTMs, that are
IP blocks to perform hardware assisted tracing on a ARM CPU core
(Mathieu Poirier)
- Support generating cross arch probes, i.e. if you specify a vmlinux
file for different arch than the one in the host machine,
$ perf probe --definition function_name args
will generate the probe definition string needed to append to the
target machine /sys/kernel/debug/tracing/kprobes_events file, using
scripting (Masami Hiramatsu).
- Allow configuring the default 'perf report -s' sort order in
~/.perfconfig, for instance, "sym,dso" may be more fitting for
kernel developers. (Arnaldo Carvalho de Melo)
- ... plus lots of other changes, refactorings, features and fixes"
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (149 commits)
perf tests: Add dwarf unwind test for powerpc
perf probe: Match linkage name with mangled name
perf probe: Fix to cut off incompatible chars from group name
perf probe: Skip if the function address is 0
perf probe: Ignore the error of finding inline instance
perf intel-pt: Fix decoding when there are address filters
perf intel-pt: Enable decoder to handle TIP.PGD with missing IP
perf intel-pt: Read address filter from AUXTRACE_INFO event
perf intel-pt: Record address filter in AUXTRACE_INFO event
perf intel-pt: Add a helper function for processing AUXTRACE_INFO
perf intel-pt: Fix missing error codes processing auxtrace_info
perf intel-pt: Add support for recording the max non-turbo ratio
perf intel-pt: Fix snapshot overlap detection decoder errors
perf probe: Increase debug level of SDT debug messages
perf record: Add support for using symbols in address filters
perf symbols: Add dso__last_symbol()
perf record: Fix error paths
perf record: Rename label 'out_symbol_exit'
perf script: Fix vanished idle symbols
perf evsel: Add support for address filters
...
Diffstat (limited to 'tools/perf/arch/powerpc')
-rw-r--r-- | tools/perf/arch/powerpc/Build | 1 | ||||
-rw-r--r-- | tools/perf/arch/powerpc/include/arch-tests.h | 13 | ||||
-rw-r--r-- | tools/perf/arch/powerpc/include/dwarf-regs-table.h | 27 | ||||
-rw-r--r-- | tools/perf/arch/powerpc/include/perf_regs.h | 2 | ||||
-rw-r--r-- | tools/perf/arch/powerpc/tests/Build | 4 | ||||
-rw-r--r-- | tools/perf/arch/powerpc/tests/arch-tests.c | 15 | ||||
-rw-r--r-- | tools/perf/arch/powerpc/tests/dwarf-unwind.c | 62 | ||||
-rw-r--r-- | tools/perf/arch/powerpc/tests/regs_load.S | 94 | ||||
-rw-r--r-- | tools/perf/arch/powerpc/util/sym-handling.c | 2 |
9 files changed, 219 insertions, 1 deletions
diff --git a/tools/perf/arch/powerpc/Build b/tools/perf/arch/powerpc/Build index 54afe4a467e7..db52fa22d3a1 100644 --- a/tools/perf/arch/powerpc/Build +++ b/tools/perf/arch/powerpc/Build @@ -1 +1,2 @@ libperf-y += util/ +libperf-y += tests/ diff --git a/tools/perf/arch/powerpc/include/arch-tests.h b/tools/perf/arch/powerpc/include/arch-tests.h new file mode 100644 index 000000000000..84d8dedef2ed --- /dev/null +++ b/tools/perf/arch/powerpc/include/arch-tests.h @@ -0,0 +1,13 @@ +#ifndef ARCH_TESTS_H +#define ARCH_TESTS_H + +#ifdef HAVE_DWARF_UNWIND_SUPPORT +struct thread; +struct perf_sample; +int test__arch_unwind_sample(struct perf_sample *sample, + struct thread *thread); +#endif + +extern struct test arch_tests[]; + +#endif diff --git a/tools/perf/arch/powerpc/include/dwarf-regs-table.h b/tools/perf/arch/powerpc/include/dwarf-regs-table.h new file mode 100644 index 000000000000..db4730f5585c --- /dev/null +++ b/tools/perf/arch/powerpc/include/dwarf-regs-table.h @@ -0,0 +1,27 @@ +#ifdef DEFINE_DWARF_REGSTR_TABLE +/* This is included in perf/util/dwarf-regs.c */ + +/* + * Reference: + * http://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi-1.9.html + * http://refspecs.linux-foundation.org/elf/elfspec_ppc.pdf + */ +#define REG_DWARFNUM_NAME(reg, idx) [idx] = "%" #reg + +static const char * const powerpc_regstr_tbl[] = { + "%gpr0", "%gpr1", "%gpr2", "%gpr3", "%gpr4", + "%gpr5", "%gpr6", "%gpr7", "%gpr8", "%gpr9", + "%gpr10", "%gpr11", "%gpr12", "%gpr13", "%gpr14", + "%gpr15", "%gpr16", "%gpr17", "%gpr18", "%gpr19", + "%gpr20", "%gpr21", "%gpr22", "%gpr23", "%gpr24", + "%gpr25", "%gpr26", "%gpr27", "%gpr28", "%gpr29", + "%gpr30", "%gpr31", + REG_DWARFNUM_NAME(msr, 66), + REG_DWARFNUM_NAME(ctr, 109), + REG_DWARFNUM_NAME(link, 108), + REG_DWARFNUM_NAME(xer, 101), + REG_DWARFNUM_NAME(dar, 119), + REG_DWARFNUM_NAME(dsisr, 118), +}; + +#endif diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h index 75de0e92e71e..c12f4e804f66 100644 --- a/tools/perf/arch/powerpc/include/perf_regs.h +++ b/tools/perf/arch/powerpc/include/perf_regs.h @@ -5,6 +5,8 @@ #include <linux/types.h> #include <asm/perf_regs.h> +void perf_regs_load(u64 *regs); + #define PERF_REGS_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1) #define PERF_REGS_MAX PERF_REG_POWERPC_MAX #ifdef __powerpc64__ diff --git a/tools/perf/arch/powerpc/tests/Build b/tools/perf/arch/powerpc/tests/Build new file mode 100644 index 000000000000..d827ef384b33 --- /dev/null +++ b/tools/perf/arch/powerpc/tests/Build @@ -0,0 +1,4 @@ +libperf-$(CONFIG_DWARF_UNWIND) += regs_load.o +libperf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o + +libperf-y += arch-tests.o diff --git a/tools/perf/arch/powerpc/tests/arch-tests.c b/tools/perf/arch/powerpc/tests/arch-tests.c new file mode 100644 index 000000000000..e24f46241f40 --- /dev/null +++ b/tools/perf/arch/powerpc/tests/arch-tests.c @@ -0,0 +1,15 @@ +#include <string.h> +#include "tests/tests.h" +#include "arch-tests.h" + +struct test arch_tests[] = { +#ifdef HAVE_DWARF_UNWIND_SUPPORT + { + .desc = "Test dwarf unwind", + .func = test__dwarf_unwind, + }, +#endif + { + .func = NULL, + }, +}; diff --git a/tools/perf/arch/powerpc/tests/dwarf-unwind.c b/tools/perf/arch/powerpc/tests/dwarf-unwind.c new file mode 100644 index 000000000000..0bac3137ccbd --- /dev/null +++ b/tools/perf/arch/powerpc/tests/dwarf-unwind.c @@ -0,0 +1,62 @@ +#include <string.h> +#include "perf_regs.h" +#include "thread.h" +#include "map.h" +#include "event.h" +#include "debug.h" +#include "tests/tests.h" +#include "arch-tests.h" + +#define STACK_SIZE 8192 + +static int sample_ustack(struct perf_sample *sample, + struct thread *thread, u64 *regs) +{ + struct stack_dump *stack = &sample->user_stack; + struct map *map; + unsigned long sp; + u64 stack_size, *buf; + + buf = malloc(STACK_SIZE); + if (!buf) { + pr_debug("failed to allocate sample uregs data\n"); + return -1; + } + + sp = (unsigned long) regs[PERF_REG_POWERPC_R1]; + + map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp); + if (!map) { + pr_debug("failed to get stack map\n"); + free(buf); + return -1; + } + + stack_size = map->end - sp; + stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size; + + memcpy(buf, (void *) sp, stack_size); + stack->data = (char *) buf; + stack->size = stack_size; + return 0; +} + +int test__arch_unwind_sample(struct perf_sample *sample, + struct thread *thread) +{ + struct regs_dump *regs = &sample->user_regs; + u64 *buf; + + buf = calloc(1, sizeof(u64) * PERF_REGS_MAX); + if (!buf) { + pr_debug("failed to allocate sample uregs data\n"); + return -1; + } + + perf_regs_load(buf); + regs->abi = PERF_SAMPLE_REGS_ABI; + regs->regs = buf; + regs->mask = PERF_REGS_MASK; + + return sample_ustack(sample, thread, buf); +} diff --git a/tools/perf/arch/powerpc/tests/regs_load.S b/tools/perf/arch/powerpc/tests/regs_load.S new file mode 100644 index 000000000000..d76c9a32f327 --- /dev/null +++ b/tools/perf/arch/powerpc/tests/regs_load.S @@ -0,0 +1,94 @@ +#include <linux/linkage.h> + +/* Offset is based on macros from arch/powerpc/include/uapi/asm/ptrace.h. */ +#define R0 0 +#define R1 1 * 8 +#define R2 2 * 8 +#define R3 3 * 8 +#define R4 4 * 8 +#define R5 5 * 8 +#define R6 6 * 8 +#define R7 7 * 8 +#define R8 8 * 8 +#define R9 9 * 8 +#define R10 10 * 8 +#define R11 11 * 8 +#define R12 12 * 8 +#define R13 13 * 8 +#define R14 14 * 8 +#define R15 15 * 8 +#define R16 16 * 8 +#define R17 17 * 8 +#define R18 18 * 8 +#define R19 19 * 8 +#define R20 20 * 8 +#define R21 21 * 8 +#define R22 22 * 8 +#define R23 23 * 8 +#define R24 24 * 8 +#define R25 25 * 8 +#define R26 26 * 8 +#define R27 27 * 8 +#define R28 28 * 8 +#define R29 29 * 8 +#define R30 30 * 8 +#define R31 31 * 8 +#define NIP 32 * 8 +#define CTR 35 * 8 +#define LINK 36 * 8 +#define XER 37 * 8 + +.globl perf_regs_load +perf_regs_load: + std 0, R0(3) + std 1, R1(3) + std 2, R2(3) + std 3, R3(3) + std 4, R4(3) + std 5, R5(3) + std 6, R6(3) + std 7, R7(3) + std 8, R8(3) + std 9, R9(3) + std 10, R10(3) + std 11, R11(3) + std 12, R12(3) + std 13, R13(3) + std 14, R14(3) + std 15, R15(3) + std 16, R16(3) + std 17, R17(3) + std 18, R18(3) + std 19, R19(3) + std 20, R20(3) + std 21, R21(3) + std 22, R22(3) + std 23, R23(3) + std 24, R24(3) + std 25, R25(3) + std 26, R26(3) + std 27, R27(3) + std 28, R28(3) + std 29, R29(3) + std 30, R30(3) + std 31, R31(3) + + /* store NIP */ + mflr 4 + std 4, NIP(3) + + /* Store LR */ + std 4, LINK(3) + + /* Store XER */ + mfxer 4 + std 4, XER(3) + + /* Store CTR */ + mfctr 4 + std 4, CTR(3) + + /* Restore original value of r4 */ + ld 4, R4(3) + + blr diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index 35745a733100..ed9d5d15d5b6 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -108,7 +108,7 @@ void arch__post_process_probe_trace_events(struct perf_probe_event *pev, int i = 0; map = get_target_map(pev->target, pev->uprobes); - if (!map || map__load(map, NULL) < 0) + if (!map || map__load(map) < 0) return; for (i = 0; i < ntevs; i++) { |