diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-03 12:47:28 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-03 12:47:28 -0700 |
commit | 12b7bcb43e6ea834ab2f5dc52d971e379a0ca109 (patch) | |
tree | 65218ee4792a1bae88feb75e615c5ec4e602c7a2 /include | |
parent | 00bcf5cdd6c0e2e92ce3dd852ca68a3b779fa4ec (diff) | |
parent | 41aad2a6d4fcdda8d73c9739daf7a9f3f49499d6 (diff) |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar:
"The main kernel side changes were:
- uprobes enhancements (Masami Hiramatsu)
- Uncore group events enhancements (David Carrillo-Cisneros)
- x86 Intel: Add support for Skylake server uncore PMUs (Kan Liang)
- x86 Intel: LBR cleanups and enhancements, for better branch
annotation tracking (Peter Zijlstra)
- x86 Intel: Add support for PTWRITE and power event tracing
(Alexander Shishkin)
- ... various fixes, cleanups and smaller enhancements.
Lots of tooling changes - a couple of highlights:
- Support event group view with hierarchy mode in 'perf top' and
'perf report' (Namhyung Kim)
e.g.:
$ perf record -e '{cycles,instructions}' make
$ perf report --hierarchy --stdio
...
# Overhead Command / Shared Object / Symbol
# ...................... ..................................
...
25.74% 27.18%sh
19.96% 24.14%libc-2.24.so
9.55% 14.64%[.] __strcmp_sse2
1.54% 0.00%[.] __tfind
1.07% 1.13%[.] _int_malloc
0.95% 0.00%[.] __strchr_sse2
0.89% 1.39%[.] __tsearch
0.76% 0.00%[.] strlen
- Add branch stack / basic block info to 'perf annotate --stdio',
where for each branch, we add an asm comment after the instruction
with information on how often it was taken and predicted. See
example with color output at:
http://vger.kernel.org/~acme/perf/annotate_basic_blocks.png
(Peter Zijlstra)
- Add support for using symbols in address filters with Intel PT and
ARM CoreSight (hardware assisted tracing facilities) (Adrian
Hunter, Mathieu Poirier)
- Add support for interacting with Coresight PMU ETMs/PTMs, that are
IP blocks to perform hardware assisted tracing on a ARM CPU core
(Mathieu Poirier)
- Support generating cross arch probes, i.e. if you specify a vmlinux
file for different arch than the one in the host machine,
$ perf probe --definition function_name args
will generate the probe definition string needed to append to the
target machine /sys/kernel/debug/tracing/kprobes_events file, using
scripting (Masami Hiramatsu).
- Allow configuring the default 'perf report -s' sort order in
~/.perfconfig, for instance, "sym,dso" may be more fitting for
kernel developers. (Arnaldo Carvalho de Melo)
- ... plus lots of other changes, refactorings, features and fixes"
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (149 commits)
perf tests: Add dwarf unwind test for powerpc
perf probe: Match linkage name with mangled name
perf probe: Fix to cut off incompatible chars from group name
perf probe: Skip if the function address is 0
perf probe: Ignore the error of finding inline instance
perf intel-pt: Fix decoding when there are address filters
perf intel-pt: Enable decoder to handle TIP.PGD with missing IP
perf intel-pt: Read address filter from AUXTRACE_INFO event
perf intel-pt: Record address filter in AUXTRACE_INFO event
perf intel-pt: Add a helper function for processing AUXTRACE_INFO
perf intel-pt: Fix missing error codes processing auxtrace_info
perf intel-pt: Add support for recording the max non-turbo ratio
perf intel-pt: Fix snapshot overlap detection decoder errors
perf probe: Increase debug level of SDT debug messages
perf record: Add support for using symbols in address filters
perf symbols: Add dso__last_symbol()
perf record: Fix error paths
perf record: Rename label 'out_symbol_exit'
perf script: Fix vanished idle symbols
perf evsel: Add support for address filters
...
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/bitmap.h | 18 | ||||
-rw-r--r-- | include/linux/perf_event.h | 24 |
2 files changed, 37 insertions, 5 deletions
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 598bc999f4c2..3b77588a9360 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -339,6 +339,24 @@ static inline int bitmap_parse(const char *buf, unsigned int buflen, return __bitmap_parse(buf, buflen, 0, maskp, nmaskbits); } +/* + * bitmap_from_u64 - Check and swap words within u64. + * @mask: source bitmap + * @dst: destination bitmap + * + * In 32-bit Big Endian kernel, when using (u32 *)(&val)[*] + * to read u64 mask, we will get the wrong word. + * That is "(u32 *)(&val)[0]" gets the upper 32 bits, + * but we expect the lower 32-bits of u64. + */ +static inline void bitmap_from_u64(unsigned long *dst, u64 mask) +{ + dst[0] = mask & ULONG_MAX; + + if (sizeof(mask) > sizeof(unsigned long)) + dst[1] = mask >> 32; +} + #endif /* __ASSEMBLY__ */ #endif /* __LINUX_BITMAP_H */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2b6b43cc0dd5..5c5362584aba 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -510,9 +510,15 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *, struct perf_sample_data *, struct pt_regs *regs); -enum perf_group_flag { - PERF_GROUP_SOFTWARE = 0x1, -}; +/* + * Event capabilities. For event_caps and groups caps. + * + * PERF_EV_CAP_SOFTWARE: Is a software event. + * PERF_EV_CAP_READ_ACTIVE_PKG: A CPU event (or cgroup event) that can be read + * from any CPU in the package where it is active. + */ +#define PERF_EV_CAP_SOFTWARE BIT(0) +#define PERF_EV_CAP_READ_ACTIVE_PKG BIT(1) #define SWEVENT_HLIST_BITS 8 #define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS) @@ -568,7 +574,12 @@ struct perf_event { struct hlist_node hlist_entry; struct list_head active_entry; int nr_siblings; - int group_flags; + + /* Not serialized. Only written during event initialization. */ + int event_caps; + /* The cumulative AND of all event_caps for events in this group. */ + int group_caps; + struct perf_event *group_leader; struct pmu *pmu; void *pmu_private; @@ -774,6 +785,9 @@ struct perf_cpu_context { #ifdef CONFIG_CGROUP_PERF struct perf_cgroup *cgrp; #endif + + struct list_head sched_cb_entry; + int sched_cb_usage; }; struct perf_output_handle { @@ -985,7 +999,7 @@ static inline bool is_sampling_event(struct perf_event *event) */ static inline int is_software_event(struct perf_event *event) { - return event->pmu->task_ctx_nr == perf_sw_context; + return event->event_caps & PERF_EV_CAP_SOFTWARE; } extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; |