summaryrefslogtreecommitdiff
path: root/tools/perf/util/machine.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-13 13:05:08 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-13 13:05:08 -0800
commit31486372a1e9a66ec2e9e2903b8792bba7e503e1 (patch)
tree84f61e0758695e6fbee8d2b7d7b9bc4a5ce6e03b /tools/perf/util/machine.c
parent8e9a2dba8686187d8c8179e5b86640e653963889 (diff)
parentfcdfafcb73be8fa45909327bbddca46fb362a675 (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "The main changes in this cycle were: Kernel: - kprobes updates: use better W^X patterns for code modifications, improve optprobes, remove jprobes. (Masami Hiramatsu, Kees Cook) - core fixes: event timekeeping (enabled/running times statistics) fixes, perf_event_read() locking fixes and cleanups, etc. (Peter Zijlstra) - Extend x86 Intel free-running PEBS support and support x86 user-register sampling in perf record and perf script. (Andi Kleen) Tooling: - Completely rework the way inline frames are handled. Instead of querying for the inline nodes on-demand in the individual tools, we now create proper callchain nodes for inlined frames. (Milian Wolff) - 'perf trace' updates (Arnaldo Carvalho de Melo) - Implement a way to print formatted output to per-event files in 'perf script' to facilitate generate flamegraphs, elliminating the need to write scripts to do that separation (yuzhoujian, Arnaldo Carvalho de Melo) - Update vendor events JSON metrics for Intel's Broadwell, Broadwell Server, Haswell, Haswell Server, IvyBridge, IvyTown, JakeTown, Sandy Bridge, Skylake, SkyLake Server - and Goldmont Plus V1 (Andi Kleen, Kan Liang) - Multithread the synthesizing of PERF_RECORD_ events for pre-existing threads in 'perf top', speeding up that phase, greatly improving the user experience in systems such as Intel's Knights Mill (Kan Liang) - Introduce the concept of weak groups in 'perf stat': try to set up a group, but if it's not schedulable fallback to not using a group. That gives us the best of both worlds: groups if they work, but still a usable fallback if they don't. E.g: (Andi Kleen) - perf sched timehist enhancements (David Ahern) - ... various other enhancements, updates, cleanups and fixes" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (139 commits) kprobes: Don't spam the build log with deprecation warnings arm/kprobes: Remove jprobe test case arm/kprobes: Fix kretprobe test to check correct counter perf srcline: Show correct function name for srcline of callchains perf srcline: Fix memory leak in addr2inlines() perf trace beauty kcmp: Beautify arguments perf trace beauty: Implement pid_fd beautifier tools include uapi: Grab a copy of linux/kcmp.h perf callchain: Fix double mapping al->addr for children without self period perf stat: Make --per-thread update shadow stats to show metrics perf stat: Move the shadow stats scale computation in perf_stat__update_shadow_stats perf tools: Add perf_data_file__write function perf tools: Add struct perf_data_file perf tools: Rename struct perf_data_file to perf_data perf script: Print information about per-event-dump files perf trace beauty prctl: Generate 'option' string table from kernel headers tools include uapi: Grab a copy of linux/prctl.h perf script: Allow creating per-event dump files perf evsel: Restore evsel->priv as a tool private area perf script: Use event_format__fprintf() ...
Diffstat (limited to 'tools/perf/util/machine.c')
-rw-r--r--tools/perf/util/machine.c228
1 files changed, 164 insertions, 64 deletions
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index bd5d5b5e2218..6a8d03c3d9b7 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -31,7 +31,21 @@ static void dsos__init(struct dsos *dsos)
{
INIT_LIST_HEAD(&dsos->head);
dsos->root = RB_ROOT;
- pthread_rwlock_init(&dsos->lock, NULL);
+ init_rwsem(&dsos->lock);
+}
+
+static void machine__threads_init(struct machine *machine)
+{
+ int i;
+
+ for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+ struct threads *threads = &machine->threads[i];
+ threads->entries = RB_ROOT;
+ init_rwsem(&threads->lock);
+ threads->nr = 0;
+ INIT_LIST_HEAD(&threads->dead);
+ threads->last_match = NULL;
+ }
}
int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
@@ -41,11 +55,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
RB_CLEAR_NODE(&machine->rb_node);
dsos__init(&machine->dsos);
- machine->threads = RB_ROOT;
- pthread_rwlock_init(&machine->threads_lock, NULL);
- machine->nr_threads = 0;
- INIT_LIST_HEAD(&machine->dead_threads);
- machine->last_match = NULL;
+ machine__threads_init(machine);
machine->vdso_info = NULL;
machine->env = NULL;
@@ -121,7 +131,7 @@ static void dsos__purge(struct dsos *dsos)
{
struct dso *pos, *n;
- pthread_rwlock_wrlock(&dsos->lock);
+ down_write(&dsos->lock);
list_for_each_entry_safe(pos, n, &dsos->head, node) {
RB_CLEAR_NODE(&pos->rb_node);
@@ -130,39 +140,49 @@ static void dsos__purge(struct dsos *dsos)
dso__put(pos);
}
- pthread_rwlock_unlock(&dsos->lock);
+ up_write(&dsos->lock);
}
static void dsos__exit(struct dsos *dsos)
{
dsos__purge(dsos);
- pthread_rwlock_destroy(&dsos->lock);
+ exit_rwsem(&dsos->lock);
}
void machine__delete_threads(struct machine *machine)
{
struct rb_node *nd;
+ int i;
- pthread_rwlock_wrlock(&machine->threads_lock);
- nd = rb_first(&machine->threads);
- while (nd) {
- struct thread *t = rb_entry(nd, struct thread, rb_node);
+ for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+ struct threads *threads = &machine->threads[i];
+ down_write(&threads->lock);
+ nd = rb_first(&threads->entries);
+ while (nd) {
+ struct thread *t = rb_entry(nd, struct thread, rb_node);
- nd = rb_next(nd);
- __machine__remove_thread(machine, t, false);
+ nd = rb_next(nd);
+ __machine__remove_thread(machine, t, false);
+ }
+ up_write(&threads->lock);
}
- pthread_rwlock_unlock(&machine->threads_lock);
}
void machine__exit(struct machine *machine)
{
+ int i;
+
machine__destroy_kernel_maps(machine);
map_groups__exit(&machine->kmaps);
dsos__exit(&machine->dsos);
machine__exit_vdso(machine);
zfree(&machine->root_dir);
zfree(&machine->current_tid);
- pthread_rwlock_destroy(&machine->threads_lock);
+
+ for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+ struct threads *threads = &machine->threads[i];
+ exit_rwsem(&threads->lock);
+ }
}
void machine__delete(struct machine *machine)
@@ -380,10 +400,11 @@ out_err:
* lookup/new thread inserted.
*/
static struct thread *____machine__findnew_thread(struct machine *machine,
+ struct threads *threads,
pid_t pid, pid_t tid,
bool create)
{
- struct rb_node **p = &machine->threads.rb_node;
+ struct rb_node **p = &threads->entries.rb_node;
struct rb_node *parent = NULL;
struct thread *th;
@@ -392,14 +413,14 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
* so most of the time we dont have to look up
* the full rbtree:
*/
- th = machine->last_match;
+ th = threads->last_match;
if (th != NULL) {
if (th->tid == tid) {
machine__update_thread_pid(machine, th, pid);
return thread__get(th);
}
- machine->last_match = NULL;
+ threads->last_match = NULL;
}
while (*p != NULL) {
@@ -407,7 +428,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
th = rb_entry(parent, struct thread, rb_node);
if (th->tid == tid) {
- machine->last_match = th;
+ threads->last_match = th;
machine__update_thread_pid(machine, th, pid);
return thread__get(th);
}
@@ -424,7 +445,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
th = thread__new(pid, tid);
if (th != NULL) {
rb_link_node(&th->rb_node, parent, p);
- rb_insert_color(&th->rb_node, &machine->threads);
+ rb_insert_color(&th->rb_node, &threads->entries);
/*
* We have to initialize map_groups separately
@@ -435,7 +456,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
* leader and that would screwed the rb tree.
*/
if (thread__init_map_groups(th, machine)) {
- rb_erase_init(&th->rb_node, &machine->threads);
+ rb_erase_init(&th->rb_node, &threads->entries);
RB_CLEAR_NODE(&th->rb_node);
thread__put(th);
return NULL;
@@ -444,8 +465,8 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
* It is now in the rbtree, get a ref
*/
thread__get(th);
- machine->last_match = th;
- ++machine->nr_threads;
+ threads->last_match = th;
+ ++threads->nr;
}
return th;
@@ -453,27 +474,30 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid)
{
- return ____machine__findnew_thread(machine, pid, tid, true);
+ return ____machine__findnew_thread(machine, machine__threads(machine, tid), pid, tid, true);
}
struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
pid_t tid)
{
+ struct threads *threads = machine__threads(machine, tid);
struct thread *th;
- pthread_rwlock_wrlock(&machine->threads_lock);
+ down_write(&threads->lock);
th = __machine__findnew_thread(machine, pid, tid);
- pthread_rwlock_unlock(&machine->threads_lock);
+ up_write(&threads->lock);
return th;
}
struct thread *machine__find_thread(struct machine *machine, pid_t pid,
pid_t tid)
{
+ struct threads *threads = machine__threads(machine, tid);
struct thread *th;
- pthread_rwlock_rdlock(&machine->threads_lock);
- th = ____machine__findnew_thread(machine, pid, tid, false);
- pthread_rwlock_unlock(&machine->threads_lock);
+
+ down_read(&threads->lock);
+ th = ____machine__findnew_thread(machine, threads, pid, tid, false);
+ up_read(&threads->lock);
return th;
}
@@ -565,7 +589,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine,
{
struct dso *dso;
- pthread_rwlock_wrlock(&machine->dsos.lock);
+ down_write(&machine->dsos.lock);
dso = __dsos__find(&machine->dsos, m->name, true);
if (!dso) {
@@ -579,7 +603,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine,
dso__get(dso);
out_unlock:
- pthread_rwlock_unlock(&machine->dsos.lock);
+ up_write(&machine->dsos.lock);
return dso;
}
@@ -720,21 +744,25 @@ size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp)
size_t machine__fprintf(struct machine *machine, FILE *fp)
{
- size_t ret;
struct rb_node *nd;
+ size_t ret;
+ int i;
- pthread_rwlock_rdlock(&machine->threads_lock);
+ for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+ struct threads *threads = &machine->threads[i];
- ret = fprintf(fp, "Threads: %u\n", machine->nr_threads);
+ down_read(&threads->lock);
- for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
- struct thread *pos = rb_entry(nd, struct thread, rb_node);
+ ret = fprintf(fp, "Threads: %u\n", threads->nr);
- ret += thread__fprintf(pos, fp);
- }
+ for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) {
+ struct thread *pos = rb_entry(nd, struct thread, rb_node);
- pthread_rwlock_unlock(&machine->threads_lock);
+ ret += thread__fprintf(pos, fp);
+ }
+ up_read(&threads->lock);
+ }
return ret;
}
@@ -1293,7 +1321,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
struct dso *kernel = NULL;
struct dso *dso;
- pthread_rwlock_rdlock(&machine->dsos.lock);
+ down_read(&machine->dsos.lock);
list_for_each_entry(dso, &machine->dsos.head, node) {
@@ -1323,7 +1351,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
break;
}
- pthread_rwlock_unlock(&machine->dsos.lock);
+ up_read(&machine->dsos.lock);
if (kernel == NULL)
kernel = machine__findnew_dso(machine, kmmap_prefix);
@@ -1480,23 +1508,25 @@ out_problem:
static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock)
{
- if (machine->last_match == th)
- machine->last_match = NULL;
+ struct threads *threads = machine__threads(machine, th->tid);
+
+ if (threads->last_match == th)
+ threads->last_match = NULL;
BUG_ON(refcount_read(&th->refcnt) == 0);
if (lock)
- pthread_rwlock_wrlock(&machine->threads_lock);
- rb_erase_init(&th->rb_node, &machine->threads);
+ down_write(&threads->lock);
+ rb_erase_init(&th->rb_node, &threads->entries);
RB_CLEAR_NODE(&th->rb_node);
- --machine->nr_threads;
+ --threads->nr;
/*
* Move it first to the dead_threads list, then drop the reference,
* if this is the last reference, then the thread__delete destructor
* will be called and we will remove it from the dead_threads list.
*/
- list_add_tail(&th->node, &machine->dead_threads);
+ list_add_tail(&th->node, &threads->dead);
if (lock)
- pthread_rwlock_unlock(&machine->threads_lock);
+ up_write(&threads->lock);
thread__put(th);
}
@@ -1680,6 +1710,26 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample,
return mi;
}
+static char *callchain_srcline(struct map *map, struct symbol *sym, u64 ip)
+{
+ char *srcline = NULL;
+
+ if (!map || callchain_param.key == CCKEY_FUNCTION)
+ return srcline;
+
+ srcline = srcline__tree_find(&map->dso->srclines, ip);
+ if (!srcline) {
+ bool show_sym = false;
+ bool show_addr = callchain_param.key == CCKEY_ADDRESS;
+
+ srcline = get_srcline(map->dso, map__rip_2objdump(map, ip),
+ sym, show_sym, show_addr);
+ srcline__tree_insert(&map->dso->srclines, ip, srcline);
+ }
+
+ return srcline;
+}
+
struct iterations {
int nr_loop_iter;
u64 cycles;
@@ -1699,6 +1749,7 @@ static int add_callchain_ip(struct thread *thread,
struct addr_location al;
int nr_loop_iter = 0;
u64 iter_cycles = 0;
+ const char *srcline = NULL;
al.filtered = 0;
al.sym = NULL;
@@ -1754,9 +1805,10 @@ static int add_callchain_ip(struct thread *thread,
iter_cycles = iter->cycles;
}
+ srcline = callchain_srcline(al.map, al.sym, al.addr);
return callchain_cursor_append(cursor, al.addr, al.map, al.sym,
branch, flags, nr_loop_iter,
- iter_cycles, branch_from);
+ iter_cycles, branch_from, srcline);
}
struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
@@ -2069,15 +2121,54 @@ check_calls:
return 0;
}
+static int append_inlines(struct callchain_cursor *cursor,
+ struct map *map, struct symbol *sym, u64 ip)
+{
+ struct inline_node *inline_node;
+ struct inline_list *ilist;
+ u64 addr;
+ int ret = 1;
+
+ if (!symbol_conf.inline_name || !map || !sym)
+ return ret;
+
+ addr = map__rip_2objdump(map, ip);
+
+ inline_node = inlines__tree_find(&map->dso->inlined_nodes, addr);
+ if (!inline_node) {
+ inline_node = dso__parse_addr_inlines(map->dso, addr, sym);
+ if (!inline_node)
+ return ret;
+ inlines__tree_insert(&map->dso->inlined_nodes, inline_node);
+ }
+
+ list_for_each_entry(ilist, &inline_node->val, list) {
+ ret = callchain_cursor_append(cursor, ip, map,
+ ilist->symbol, false,
+ NULL, 0, 0, 0, ilist->srcline);
+
+ if (ret != 0)
+ return ret;
+ }
+
+ return ret;
+}
+
static int unwind_entry(struct unwind_entry *entry, void *arg)
{
struct callchain_cursor *cursor = arg;
+ const char *srcline = NULL;
if (symbol_conf.hide_unresolved && entry->sym == NULL)
return 0;
+
+ if (append_inlines(cursor, entry->map, entry->sym, entry->ip) == 0)
+ return 0;
+
+ srcline = callchain_srcline(entry->map, entry->sym, entry->ip);
return callchain_cursor_append(cursor, entry->ip,
entry->map, entry->sym,
- false, NULL, 0, 0, 0);
+ false, NULL, 0, 0, 0, srcline);
}
static int thread__resolve_callchain_unwind(struct thread *thread,
@@ -2141,21 +2232,26 @@ int machine__for_each_thread(struct machine *machine,
int (*fn)(struct thread *thread, void *p),
void *priv)
{
+ struct threads *threads;
struct rb_node *nd;
struct thread *thread;
int rc = 0;
+ int i;
- for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
- thread = rb_entry(nd, struct thread, rb_node);
- rc = fn(thread, priv);
- if (rc != 0)
- return rc;
- }
+ for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+ threads = &machine->threads[i];
+ for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) {
+ thread = rb_entry(nd, struct thread, rb_node);
+ rc = fn(thread, priv);
+ if (rc != 0)
+ return rc;
+ }
- list_for_each_entry(thread, &machine->dead_threads, node) {
- rc = fn(thread, priv);
- if (rc != 0)
- return rc;
+ list_for_each_entry(thread, &threads->dead, node) {
+ rc = fn(thread, priv);
+ if (rc != 0)
+ return rc;
+ }
}
return rc;
}
@@ -2184,12 +2280,16 @@ int machines__for_each_thread(struct machines *machines,
int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
struct target *target, struct thread_map *threads,
perf_event__handler_t process, bool data_mmap,
- unsigned int proc_map_timeout)
+ unsigned int proc_map_timeout,
+ unsigned int nr_threads_synthesize)
{
if (target__has_task(target))
return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap, proc_map_timeout);
else if (target__has_cpu(target))
- return perf_event__synthesize_threads(tool, process, machine, data_mmap, proc_map_timeout);
+ return perf_event__synthesize_threads(tool, process,
+ machine, data_mmap,
+ proc_map_timeout,
+ nr_threads_synthesize);
/* command specified */
return 0;
}