diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-13 13:05:08 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-13 13:05:08 -0800 |
commit | 31486372a1e9a66ec2e9e2903b8792bba7e503e1 (patch) | |
tree | 84f61e0758695e6fbee8d2b7d7b9bc4a5ce6e03b /tools/perf/util/machine.c | |
parent | 8e9a2dba8686187d8c8179e5b86640e653963889 (diff) | |
parent | fcdfafcb73be8fa45909327bbddca46fb362a675 (diff) |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar:
"The main changes in this cycle were:
Kernel:
- kprobes updates: use better W^X patterns for code modifications,
improve optprobes, remove jprobes. (Masami Hiramatsu, Kees Cook)
- core fixes: event timekeeping (enabled/running times statistics)
fixes, perf_event_read() locking fixes and cleanups, etc. (Peter
Zijlstra)
- Extend x86 Intel free-running PEBS support and support x86
user-register sampling in perf record and perf script. (Andi Kleen)
Tooling:
- Completely rework the way inline frames are handled. Instead of
querying for the inline nodes on-demand in the individual tools, we
now create proper callchain nodes for inlined frames. (Milian
Wolff)
- 'perf trace' updates (Arnaldo Carvalho de Melo)
- Implement a way to print formatted output to per-event files in
'perf script' to facilitate generate flamegraphs, elliminating the
need to write scripts to do that separation (yuzhoujian, Arnaldo
Carvalho de Melo)
- Update vendor events JSON metrics for Intel's Broadwell, Broadwell
Server, Haswell, Haswell Server, IvyBridge, IvyTown, JakeTown,
Sandy Bridge, Skylake, SkyLake Server - and Goldmont Plus V1 (Andi
Kleen, Kan Liang)
- Multithread the synthesizing of PERF_RECORD_ events for
pre-existing threads in 'perf top', speeding up that phase, greatly
improving the user experience in systems such as Intel's Knights
Mill (Kan Liang)
- Introduce the concept of weak groups in 'perf stat': try to set up
a group, but if it's not schedulable fallback to not using a group.
That gives us the best of both worlds: groups if they work, but
still a usable fallback if they don't. E.g: (Andi Kleen)
- perf sched timehist enhancements (David Ahern)
- ... various other enhancements, updates, cleanups and fixes"
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (139 commits)
kprobes: Don't spam the build log with deprecation warnings
arm/kprobes: Remove jprobe test case
arm/kprobes: Fix kretprobe test to check correct counter
perf srcline: Show correct function name for srcline of callchains
perf srcline: Fix memory leak in addr2inlines()
perf trace beauty kcmp: Beautify arguments
perf trace beauty: Implement pid_fd beautifier
tools include uapi: Grab a copy of linux/kcmp.h
perf callchain: Fix double mapping al->addr for children without self period
perf stat: Make --per-thread update shadow stats to show metrics
perf stat: Move the shadow stats scale computation in perf_stat__update_shadow_stats
perf tools: Add perf_data_file__write function
perf tools: Add struct perf_data_file
perf tools: Rename struct perf_data_file to perf_data
perf script: Print information about per-event-dump files
perf trace beauty prctl: Generate 'option' string table from kernel headers
tools include uapi: Grab a copy of linux/prctl.h
perf script: Allow creating per-event dump files
perf evsel: Restore evsel->priv as a tool private area
perf script: Use event_format__fprintf()
...
Diffstat (limited to 'tools/perf/util/machine.c')
-rw-r--r-- | tools/perf/util/machine.c | 228 |
1 files changed, 164 insertions, 64 deletions
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index bd5d5b5e2218..6a8d03c3d9b7 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -31,7 +31,21 @@ static void dsos__init(struct dsos *dsos) { INIT_LIST_HEAD(&dsos->head); dsos->root = RB_ROOT; - pthread_rwlock_init(&dsos->lock, NULL); + init_rwsem(&dsos->lock); +} + +static void machine__threads_init(struct machine *machine) +{ + int i; + + for (i = 0; i < THREADS__TABLE_SIZE; i++) { + struct threads *threads = &machine->threads[i]; + threads->entries = RB_ROOT; + init_rwsem(&threads->lock); + threads->nr = 0; + INIT_LIST_HEAD(&threads->dead); + threads->last_match = NULL; + } } int machine__init(struct machine *machine, const char *root_dir, pid_t pid) @@ -41,11 +55,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) RB_CLEAR_NODE(&machine->rb_node); dsos__init(&machine->dsos); - machine->threads = RB_ROOT; - pthread_rwlock_init(&machine->threads_lock, NULL); - machine->nr_threads = 0; - INIT_LIST_HEAD(&machine->dead_threads); - machine->last_match = NULL; + machine__threads_init(machine); machine->vdso_info = NULL; machine->env = NULL; @@ -121,7 +131,7 @@ static void dsos__purge(struct dsos *dsos) { struct dso *pos, *n; - pthread_rwlock_wrlock(&dsos->lock); + down_write(&dsos->lock); list_for_each_entry_safe(pos, n, &dsos->head, node) { RB_CLEAR_NODE(&pos->rb_node); @@ -130,39 +140,49 @@ static void dsos__purge(struct dsos *dsos) dso__put(pos); } - pthread_rwlock_unlock(&dsos->lock); + up_write(&dsos->lock); } static void dsos__exit(struct dsos *dsos) { dsos__purge(dsos); - pthread_rwlock_destroy(&dsos->lock); + exit_rwsem(&dsos->lock); } void machine__delete_threads(struct machine *machine) { struct rb_node *nd; + int i; - pthread_rwlock_wrlock(&machine->threads_lock); - nd = rb_first(&machine->threads); - while (nd) { - struct thread *t = rb_entry(nd, struct thread, rb_node); + for (i = 0; i < THREADS__TABLE_SIZE; i++) { + struct threads *threads = &machine->threads[i]; + down_write(&threads->lock); + nd = rb_first(&threads->entries); + while (nd) { + struct thread *t = rb_entry(nd, struct thread, rb_node); - nd = rb_next(nd); - __machine__remove_thread(machine, t, false); + nd = rb_next(nd); + __machine__remove_thread(machine, t, false); + } + up_write(&threads->lock); } - pthread_rwlock_unlock(&machine->threads_lock); } void machine__exit(struct machine *machine) { + int i; + machine__destroy_kernel_maps(machine); map_groups__exit(&machine->kmaps); dsos__exit(&machine->dsos); machine__exit_vdso(machine); zfree(&machine->root_dir); zfree(&machine->current_tid); - pthread_rwlock_destroy(&machine->threads_lock); + + for (i = 0; i < THREADS__TABLE_SIZE; i++) { + struct threads *threads = &machine->threads[i]; + exit_rwsem(&threads->lock); + } } void machine__delete(struct machine *machine) @@ -380,10 +400,11 @@ out_err: * lookup/new thread inserted. */ static struct thread *____machine__findnew_thread(struct machine *machine, + struct threads *threads, pid_t pid, pid_t tid, bool create) { - struct rb_node **p = &machine->threads.rb_node; + struct rb_node **p = &threads->entries.rb_node; struct rb_node *parent = NULL; struct thread *th; @@ -392,14 +413,14 @@ static struct thread *____machine__findnew_thread(struct machine *machine, * so most of the time we dont have to look up * the full rbtree: */ - th = machine->last_match; + th = threads->last_match; if (th != NULL) { if (th->tid == tid) { machine__update_thread_pid(machine, th, pid); return thread__get(th); } - machine->last_match = NULL; + threads->last_match = NULL; } while (*p != NULL) { @@ -407,7 +428,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, th = rb_entry(parent, struct thread, rb_node); if (th->tid == tid) { - machine->last_match = th; + threads->last_match = th; machine__update_thread_pid(machine, th, pid); return thread__get(th); } @@ -424,7 +445,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, th = thread__new(pid, tid); if (th != NULL) { rb_link_node(&th->rb_node, parent, p); - rb_insert_color(&th->rb_node, &machine->threads); + rb_insert_color(&th->rb_node, &threads->entries); /* * We have to initialize map_groups separately @@ -435,7 +456,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, * leader and that would screwed the rb tree. */ if (thread__init_map_groups(th, machine)) { - rb_erase_init(&th->rb_node, &machine->threads); + rb_erase_init(&th->rb_node, &threads->entries); RB_CLEAR_NODE(&th->rb_node); thread__put(th); return NULL; @@ -444,8 +465,8 @@ static struct thread *____machine__findnew_thread(struct machine *machine, * It is now in the rbtree, get a ref */ thread__get(th); - machine->last_match = th; - ++machine->nr_threads; + threads->last_match = th; + ++threads->nr; } return th; @@ -453,27 +474,30 @@ static struct thread *____machine__findnew_thread(struct machine *machine, struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid) { - return ____machine__findnew_thread(machine, pid, tid, true); + return ____machine__findnew_thread(machine, machine__threads(machine, tid), pid, tid, true); } struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid) { + struct threads *threads = machine__threads(machine, tid); struct thread *th; - pthread_rwlock_wrlock(&machine->threads_lock); + down_write(&threads->lock); th = __machine__findnew_thread(machine, pid, tid); - pthread_rwlock_unlock(&machine->threads_lock); + up_write(&threads->lock); return th; } struct thread *machine__find_thread(struct machine *machine, pid_t pid, pid_t tid) { + struct threads *threads = machine__threads(machine, tid); struct thread *th; - pthread_rwlock_rdlock(&machine->threads_lock); - th = ____machine__findnew_thread(machine, pid, tid, false); - pthread_rwlock_unlock(&machine->threads_lock); + + down_read(&threads->lock); + th = ____machine__findnew_thread(machine, threads, pid, tid, false); + up_read(&threads->lock); return th; } @@ -565,7 +589,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine, { struct dso *dso; - pthread_rwlock_wrlock(&machine->dsos.lock); + down_write(&machine->dsos.lock); dso = __dsos__find(&machine->dsos, m->name, true); if (!dso) { @@ -579,7 +603,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine, dso__get(dso); out_unlock: - pthread_rwlock_unlock(&machine->dsos.lock); + up_write(&machine->dsos.lock); return dso; } @@ -720,21 +744,25 @@ size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp) size_t machine__fprintf(struct machine *machine, FILE *fp) { - size_t ret; struct rb_node *nd; + size_t ret; + int i; - pthread_rwlock_rdlock(&machine->threads_lock); + for (i = 0; i < THREADS__TABLE_SIZE; i++) { + struct threads *threads = &machine->threads[i]; - ret = fprintf(fp, "Threads: %u\n", machine->nr_threads); + down_read(&threads->lock); - for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) { - struct thread *pos = rb_entry(nd, struct thread, rb_node); + ret = fprintf(fp, "Threads: %u\n", threads->nr); - ret += thread__fprintf(pos, fp); - } + for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) { + struct thread *pos = rb_entry(nd, struct thread, rb_node); - pthread_rwlock_unlock(&machine->threads_lock); + ret += thread__fprintf(pos, fp); + } + up_read(&threads->lock); + } return ret; } @@ -1293,7 +1321,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, struct dso *kernel = NULL; struct dso *dso; - pthread_rwlock_rdlock(&machine->dsos.lock); + down_read(&machine->dsos.lock); list_for_each_entry(dso, &machine->dsos.head, node) { @@ -1323,7 +1351,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, break; } - pthread_rwlock_unlock(&machine->dsos.lock); + up_read(&machine->dsos.lock); if (kernel == NULL) kernel = machine__findnew_dso(machine, kmmap_prefix); @@ -1480,23 +1508,25 @@ out_problem: static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock) { - if (machine->last_match == th) - machine->last_match = NULL; + struct threads *threads = machine__threads(machine, th->tid); + + if (threads->last_match == th) + threads->last_match = NULL; BUG_ON(refcount_read(&th->refcnt) == 0); if (lock) - pthread_rwlock_wrlock(&machine->threads_lock); - rb_erase_init(&th->rb_node, &machine->threads); + down_write(&threads->lock); + rb_erase_init(&th->rb_node, &threads->entries); RB_CLEAR_NODE(&th->rb_node); - --machine->nr_threads; + --threads->nr; /* * Move it first to the dead_threads list, then drop the reference, * if this is the last reference, then the thread__delete destructor * will be called and we will remove it from the dead_threads list. */ - list_add_tail(&th->node, &machine->dead_threads); + list_add_tail(&th->node, &threads->dead); if (lock) - pthread_rwlock_unlock(&machine->threads_lock); + up_write(&threads->lock); thread__put(th); } @@ -1680,6 +1710,26 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample, return mi; } +static char *callchain_srcline(struct map *map, struct symbol *sym, u64 ip) +{ + char *srcline = NULL; + + if (!map || callchain_param.key == CCKEY_FUNCTION) + return srcline; + + srcline = srcline__tree_find(&map->dso->srclines, ip); + if (!srcline) { + bool show_sym = false; + bool show_addr = callchain_param.key == CCKEY_ADDRESS; + + srcline = get_srcline(map->dso, map__rip_2objdump(map, ip), + sym, show_sym, show_addr); + srcline__tree_insert(&map->dso->srclines, ip, srcline); + } + + return srcline; +} + struct iterations { int nr_loop_iter; u64 cycles; @@ -1699,6 +1749,7 @@ static int add_callchain_ip(struct thread *thread, struct addr_location al; int nr_loop_iter = 0; u64 iter_cycles = 0; + const char *srcline = NULL; al.filtered = 0; al.sym = NULL; @@ -1754,9 +1805,10 @@ static int add_callchain_ip(struct thread *thread, iter_cycles = iter->cycles; } + srcline = callchain_srcline(al.map, al.sym, al.addr); return callchain_cursor_append(cursor, al.addr, al.map, al.sym, branch, flags, nr_loop_iter, - iter_cycles, branch_from); + iter_cycles, branch_from, srcline); } struct branch_info *sample__resolve_bstack(struct perf_sample *sample, @@ -2069,15 +2121,54 @@ check_calls: return 0; } +static int append_inlines(struct callchain_cursor *cursor, + struct map *map, struct symbol *sym, u64 ip) +{ + struct inline_node *inline_node; + struct inline_list *ilist; + u64 addr; + int ret = 1; + + if (!symbol_conf.inline_name || !map || !sym) + return ret; + + addr = map__rip_2objdump(map, ip); + + inline_node = inlines__tree_find(&map->dso->inlined_nodes, addr); + if (!inline_node) { + inline_node = dso__parse_addr_inlines(map->dso, addr, sym); + if (!inline_node) + return ret; + inlines__tree_insert(&map->dso->inlined_nodes, inline_node); + } + + list_for_each_entry(ilist, &inline_node->val, list) { + ret = callchain_cursor_append(cursor, ip, map, + ilist->symbol, false, + NULL, 0, 0, 0, ilist->srcline); + + if (ret != 0) + return ret; + } + + return ret; +} + static int unwind_entry(struct unwind_entry *entry, void *arg) { struct callchain_cursor *cursor = arg; + const char *srcline = NULL; if (symbol_conf.hide_unresolved && entry->sym == NULL) return 0; + + if (append_inlines(cursor, entry->map, entry->sym, entry->ip) == 0) + return 0; + + srcline = callchain_srcline(entry->map, entry->sym, entry->ip); return callchain_cursor_append(cursor, entry->ip, entry->map, entry->sym, - false, NULL, 0, 0, 0); + false, NULL, 0, 0, 0, srcline); } static int thread__resolve_callchain_unwind(struct thread *thread, @@ -2141,21 +2232,26 @@ int machine__for_each_thread(struct machine *machine, int (*fn)(struct thread *thread, void *p), void *priv) { + struct threads *threads; struct rb_node *nd; struct thread *thread; int rc = 0; + int i; - for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) { - thread = rb_entry(nd, struct thread, rb_node); - rc = fn(thread, priv); - if (rc != 0) - return rc; - } + for (i = 0; i < THREADS__TABLE_SIZE; i++) { + threads = &machine->threads[i]; + for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) { + thread = rb_entry(nd, struct thread, rb_node); + rc = fn(thread, priv); + if (rc != 0) + return rc; + } - list_for_each_entry(thread, &machine->dead_threads, node) { - rc = fn(thread, priv); - if (rc != 0) - return rc; + list_for_each_entry(thread, &threads->dead, node) { + rc = fn(thread, priv); + if (rc != 0) + return rc; + } } return rc; } @@ -2184,12 +2280,16 @@ int machines__for_each_thread(struct machines *machines, int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, struct target *target, struct thread_map *threads, perf_event__handler_t process, bool data_mmap, - unsigned int proc_map_timeout) + unsigned int proc_map_timeout, + unsigned int nr_threads_synthesize) { if (target__has_task(target)) return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap, proc_map_timeout); else if (target__has_cpu(target)) - return perf_event__synthesize_threads(tool, process, machine, data_mmap, proc_map_timeout); + return perf_event__synthesize_threads(tool, process, + machine, data_mmap, + proc_map_timeout, + nr_threads_synthesize); /* command specified */ return 0; } |