From bcdc09af3ef30ef071677544ce23a1c8873a2dda Mon Sep 17 00:00:00 2001 From: Brendan Gregg Date: Wed, 3 Aug 2016 02:47:49 +0000 Subject: perf script: Add 'bpf-output' field to usage message This adds the 'bpf-output' field to the perf script usage message, and docs. Signed-off-by: Brendan Gregg Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1470192469-11910-4-git-send-email-bgregg@netflix.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 4 ++-- tools/perf/builtin-script.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 1f6c70594f0f..053bbbd84ece 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -116,8 +116,8 @@ OPTIONS --fields:: Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, - srcline, period, iregs, brstack, brstacksym, flags. - Field list can be prepended with the type, trace, sw or hw, + srcline, period, iregs, brstack, brstacksym, flags, bpf-output, + callindent. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 971ff91b16cb..9c640a8081c7 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2116,7 +2116,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "Valid types: hw,sw,trace,raw. " "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," "addr,symoff,period,iregs,brstack,brstacksym,flags," - "callindent", parse_output_fields), + "bpf-output,callindent", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", -- cgit v1.2.3 From 887fa86d6fd7a45cee2d0f9d5f75026786d61df2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 5 Aug 2016 12:37:21 -0300 Subject: perf hists: Trim libtraceevent trace_seq buffers When we use libtraceevent to format trace event fields into printable strings to use in hist entries it is important to trim it from the default 4 KiB it starts with to what is really used, to reduce the memory footprint, so use realloc(seq.buffer, seq.len + 1) when returning the seq.buffer formatted with the fields contents. Reported-and-Tested-by: Wang Nan Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/n/tip-t3hl7uxmilrkigzmc90rlhk2@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 947d21f38398..3d3cb8392c86 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -588,7 +588,11 @@ static char *get_trace_output(struct hist_entry *he) } else { pevent_event_info(&seq, evsel->tp_format, &rec); } - return seq.buffer; + /* + * Trim the buffer, it starts at 4KB and we're not going to + * add anything more to this buffer. + */ + return realloc(seq.buffer, seq.len + 1); } static int64_t -- cgit v1.2.3 From 8e34189b347d76acf48ce05831176582201b664d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 6 Aug 2016 19:29:48 +0900 Subject: perf probe: Adjust map->reloc offset when finding kernel symbol from map Adjust map->reloc offset for the unmapped address when finding alternative symbol address from map, because KASLR can relocate the kernel symbol address. The same adjustment has been done when finding appropriate kernel symbol address from map which was introduced by commit f90acac75713 ("perf probe: Find given address from offline dwarf") Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Masami Hiramatsu Cc: Alexei Starovoitov Cc: Wang Nan Link: http://lkml.kernel.org/r/20160806192948.e366f3fbc4b194de600f8326@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 953dc1ab2ed7..d5ccb656fd81 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -385,7 +385,7 @@ static int find_alternative_probe_point(struct debuginfo *dinfo, if (uprobes) address = sym->start; else - address = map->unmap_ip(map, sym->start); + address = map->unmap_ip(map, sym->start) - map->reloc; break; } if (!address) { -- cgit v1.2.3 From cb3f3378cd09aa3fe975b4ad5ee0229dc76315bb Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 5 Aug 2016 15:22:36 +0300 Subject: perf probe: Fix module name matching If module is "module" then dso->short_name is "[module]". Substring comparing is't enough: "raid10" matches to "[raid1]". This patch also checks terminating zero in module name. Signed-off-by: Konstantin Khlebnikov Acked-by: Masami Hiramatsu Link: http://lkml.kernel.org/r/147039975648.715620.12985971832789032159.stgit@buzz Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index d5ccb656fd81..1201f73ca723 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -170,8 +170,10 @@ static struct map *kernel_get_module_map(const char *module) module = "kernel"; for (pos = maps__first(maps); pos; pos = map__next(pos)) { + /* short_name is "[module]" */ if (strncmp(pos->dso->short_name + 1, module, - pos->dso->short_name_len - 2) == 0) { + pos->dso->short_name_len - 2) == 0 && + module[pos->dso->short_name_len - 2] == '\0') { return pos; } } -- cgit v1.2.3 From 3df33eff2ba96be4f1535db4f672013d756dc9b1 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 9 Aug 2016 14:04:29 +0100 Subject: perf stat: Avoid skew when reading events When we don't have a tracee (i.e. we're attaching to a task or CPU), counters can still be running after our workload finishes, and can still be running as we read their values. As we read events one-by-one, there can be arbitrary skew between values of events, even within a group. This means that ratios within an event group are not reliable. This skew can be seen if measuring a group of identical events, e.g: # perf stat -a -C0 -e '{cycles,cycles}' sleep 1 To avoid this, we must stop groups from counting before we read the values of any constituent events. This patch adds and makes use of a new disable_counters() helper, which disables group leaders (and thus each group as a whole). This mirrors the use of enable_counters() for starting event groups in the absence of a tracee. Closing a group leader splits the group, and without a disabled group leader the newly split events will begin counting. Thus to ensure counts are reliable we must defer closing group leaders until all counts have been read. To do so this patch removes the event closing logic from the read_counters() helper, explicitly closes the events using perf_evlist__close(), which also aids legibility. Signed-off-by: Mark Rutland Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1470747869-3567-1-git-send-email-mark.rutland@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 0c16d20d7e32..3c7452b39f57 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -331,7 +331,7 @@ static int read_counter(struct perf_evsel *counter) return 0; } -static void read_counters(bool close_counters) +static void read_counters(void) { struct perf_evsel *counter; @@ -341,11 +341,6 @@ static void read_counters(bool close_counters) if (perf_stat_process_counter(&stat_config, counter)) pr_warning("failed to process counter %s\n", counter->name); - - if (close_counters) { - perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), - thread_map__nr(evsel_list->threads)); - } } } @@ -353,7 +348,7 @@ static void process_interval(void) { struct timespec ts, rs; - read_counters(false); + read_counters(); clock_gettime(CLOCK_MONOTONIC, &ts); diff_timespec(&rs, &ts, &ref_time); @@ -380,6 +375,17 @@ static void enable_counters(void) perf_evlist__enable(evsel_list); } +static void disable_counters(void) +{ + /* + * If we don't have tracee (attaching to task or cpu), counters may + * still be running. To get accurate group ratios, we must stop groups + * from counting before reading their constituent counters. + */ + if (!target__none(&target)) + perf_evlist__disable(evsel_list); +} + static volatile int workload_exec_errno; /* @@ -657,11 +663,20 @@ try_again: } } + disable_counters(); + t1 = rdclock(); update_stats(&walltime_nsecs_stats, t1 - t0); - read_counters(true); + /* + * Closing a group leader splits the group, and as we only disable + * group leaders, results in remaining events becoming enabled. To + * avoid arbitrary skew, we must read all counters before closing any + * group leaders. + */ + read_counters(); + perf_evlist__close(evsel_list); return WEXITSTATUS(status); } -- cgit v1.2.3 From 19f00b011729417f69e4df53cc3fe5ecc25134a4 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 9 Aug 2016 11:40:08 +0900 Subject: perf probe: Support signedness casting The 'perf probe' tool detects a variable's type and use the detected type to add a new probe. Then, kprobes prints its variable in hexadecimal format if the variable is unsigned and prints in decimal if it is signed. We sometimes want to see unsigned variable in decimal format (i.e. sector_t or size_t). In that case, we need to investigate the variable's size manually to specify just signedness. This patch add signedness casting support. By specifying "s" or "u" as a type, perf-probe will investigate variable size as usual and use the specified signedness. E.g. without this: $ perf probe -a 'submit_bio bio->bi_iter.bi_sector' Added new event: probe:submit_bio (on submit_bio with bi_sector=bio->bi_iter.bi_sector) You can now use it in all perf tools, such as: perf record -e probe:submit_bio -aR sleep 1 $ cat trace_pipe|head dbench-9692 [003] d..1 971.096633: submit_bio: (submit_bio+0x0/0x140) bi_sector=0x3a3d00 dbench-9692 [003] d..1 971.096685: submit_bio: (submit_bio+0x0/0x140) bi_sector=0x1a3d80 dbench-9692 [003] d..1 971.096687: submit_bio: (submit_bio+0x0/0x140) bi_sector=0x3a3d80 ... // need to investigate the variable size $ perf probe -a 'submit_bio bio->bi_iter.bi_sector:s64' Added new event: probe:submit_bio (on submit_bio with bi_sector=bio->bi_iter.bi_sector:s64) You can now use it in all perf tools, such as: perf record -e probe:submit_bio -aR sleep 1 With this: // just use "s" to cast its signedness $ perf probe -v -a 'submit_bio bio->bi_iter.bi_sector:s' Added new event: probe:submit_bio (on submit_bio with bi_sector=bio->bi_iter.bi_sector:s) You can now use it in all perf tools, such as: perf record -e probe:submit_bio -aR sleep 1 $ cat trace_pipe|head dbench-9689 [001] d..1 1212.391237: submit_bio: (submit_bio+0x0/0x140) bi_sector=128 dbench-9689 [001] d..1 1212.391252: submit_bio: (submit_bio+0x0/0x140) bi_sector=131072 dbench-9697 [006] d..1 1212.398611: submit_bio: (submit_bio+0x0/0x140) bi_sector=30208 This commit also update perf-probe.txt to describe "types". Most parts are based on existing documentation: Documentation/trace/kprobetrace.txt Committer note: Testing using 'perf trace': # perf probe -a 'submit_bio bio->bi_iter.bi_sector' Added new event: probe:submit_bio (on submit_bio with bi_sector=bio->bi_iter.bi_sector) You can now use it in all perf tools, such as: perf record -e probe:submit_bio -aR sleep 1 # trace --no-syscalls --ev probe:submit_bio 0.000 probe:submit_bio:(ffffffffac3aee00) bi_sector=0xc133c0) 3181.861 probe:submit_bio:(ffffffffac3aee00) bi_sector=0x6cffb8) 3181.881 probe:submit_bio:(ffffffffac3aee00) bi_sector=0x6cffc0) 3184.488 probe:submit_bio:(ffffffffac3aee00) bi_sector=0x6cffc8) 4717.927 probe:submit_bio:(ffffffffac3aee00) bi_sector=0x4dc7a88) 4717.970 probe:submit_bio:(ffffffffac3aee00) bi_sector=0x4dc7880) ^C[root@jouet ~]# Now, using this new feature: [root@jouet ~]# perf probe -a 'submit_bio bio->bi_iter.bi_sector:s' Added new event: probe:submit_bio (on submit_bio with bi_sector=bio->bi_iter.bi_sector:s) You can now use it in all perf tools, such as: perf record -e probe:submit_bio -aR sleep 1 [root@jouet ~]# trace --no-syscalls --ev probe:submit_bio 0.000 probe:submit_bio:(ffffffffac3aee00) bi_sector=7145704) 0.017 probe:submit_bio:(ffffffffac3aee00) bi_sector=7145712) 0.019 probe:submit_bio:(ffffffffac3aee00) bi_sector=7145720) 2.567 probe:submit_bio:(ffffffffac3aee00) bi_sector=7145728) 5631.919 probe:submit_bio:(ffffffffac3aee00) bi_sector=0) 5631.941 probe:submit_bio:(ffffffffac3aee00) bi_sector=8) 5631.945 probe:submit_bio:(ffffffffac3aee00) bi_sector=16) 5631.948 probe:submit_bio:(ffffffffac3aee00) bi_sector=24) ^C# With callchains: # trace --no-syscalls --ev probe:submit_bio/max-stack=10/ 0.000 probe:submit_bio:(ffffffffac3aee00) bi_sector=50662544) submit_bio+0xa8200001 ([kernel.kallsyms]) submit_bh+0xa8200013 ([kernel.kallsyms]) jbd2_journal_commit_transaction+0xa8200691 ([kernel.kallsyms]) kjournald2+0xa82000ca ([kernel.kallsyms]) kthread+0xa82000d8 ([kernel.kallsyms]) ret_from_fork+0xa820001f ([kernel.kallsyms]) 0.023 probe:submit_bio:(ffffffffac3aee00) bi_sector=50662552) submit_bio+0xa8200001 ([kernel.kallsyms]) submit_bh+0xa8200013 ([kernel.kallsyms]) jbd2_journal_commit_transaction+0xa8200691 ([kernel.kallsyms]) kjournald2+0xa82000ca ([kernel.kallsyms]) kthread+0xa82000d8 ([kernel.kallsyms]) ret_from_fork+0xa820001f ([kernel.kallsyms]) 0.027 probe:submit_bio:(ffffffffac3aee00) bi_sector=50662560) submit_bio+0xa8200001 ([kernel.kallsyms]) submit_bh+0xa8200013 ([kernel.kallsyms]) jbd2_journal_commit_transaction+0xa8200691 ([kernel.kallsyms]) kjournald2+0xa82000ca ([kernel.kallsyms]) kthread+0xa82000d8 ([kernel.kallsyms]) ret_from_fork+0xa820001f ([kernel.kallsyms]) 2.593 probe:submit_bio:(ffffffffac3aee00) bi_sector=50662568) submit_bio+0xa8200001 ([kernel.kallsyms]) submit_bh+0xa8200013 ([kernel.kallsyms]) journal_submit_commit_record+0xa82001ac ([kernel.kallsyms]) jbd2_journal_commit_transaction+0xa82012e8 ([kernel.kallsyms]) kjournald2+0xa82000ca ([kernel.kallsyms]) kthread+0xa82000d8 ([kernel.kallsyms]) ret_from_fork+0xa820001f ([kernel.kallsyms]) ^C# Signed-off-by: Naohiro Aota Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Hemant Kumar Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1470710408-23515-1-git-send-email-naohiro.aota@hgst.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-probe.txt | 10 +++++++++- tools/perf/util/probe-finder.c | 15 ++++++++++++--- 2 files changed, 21 insertions(+), 4 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index 736da44596e4..b303bcdd8ed1 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -176,10 +176,18 @@ Each probe argument follows below syntax. 'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.) '$vars' and '$params' special arguments are also available for NAME, '$vars' is expanded to the local variables (including function parameters) which can access at given probe point. '$params' is expanded to only the function parameters. -'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type. +'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), signedness casting (u/s), "string" and bitfield are supported. (see TYPES for detail) On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid. +TYPES +----- +Basic types (u8/u16/u32/u64/s8/s16/s32/s64) are integer types. Prefix 's' and 'u' means those types are signed and unsigned respectively. Traced arguments are shown in decimal (signed) or hex (unsigned). You can also use 's' or 'u' to specify only signedness and leave its size auto-detected by perf probe. +String type is a special type, which fetches a "null-terminated" string from kernel space. This means it will fail and store NULL if the string container has been paged out. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type. +Bitfield is another special type, which takes 3 parameters, bit-width, bit-offset, and container-size (usually 32). The syntax is; + + b@/ + LINE SYNTAX ----------- Line range is described by following syntax. diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index f2d9ff064e2d..5c290c682afe 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -297,10 +297,13 @@ static int convert_variable_type(Dwarf_Die *vr_die, char sbuf[STRERR_BUFSIZE]; int bsize, boffs, total; int ret; + char sign; /* TODO: check all types */ - if (cast && strcmp(cast, "string") != 0) { + if (cast && strcmp(cast, "string") != 0 && + strcmp(cast, "s") != 0 && strcmp(cast, "u") != 0) { /* Non string type is OK */ + /* and respect signedness cast */ tvar->type = strdup(cast); return (tvar->type == NULL) ? -ENOMEM : 0; } @@ -361,6 +364,13 @@ static int convert_variable_type(Dwarf_Die *vr_die, return (tvar->type == NULL) ? -ENOMEM : 0; } + if (cast && (strcmp(cast, "u") == 0)) + sign = 'u'; + else if (cast && (strcmp(cast, "s") == 0)) + sign = 's'; + else + sign = die_is_signed_type(&type) ? 's' : 'u'; + ret = dwarf_bytesize(&type); if (ret <= 0) /* No size ... try to use default type */ @@ -373,8 +383,7 @@ static int convert_variable_type(Dwarf_Die *vr_die, dwarf_diename(&type), MAX_BASIC_TYPE_BITS); ret = MAX_BASIC_TYPE_BITS; } - ret = snprintf(buf, 16, "%c%d", - die_is_signed_type(&type) ? 's' : 'u', ret); + ret = snprintf(buf, 16, "%c%d", sign, ret); formatted: if (ret < 0 || ret >= 16) { -- cgit v1.2.3 From d820456dc70b231d62171ba46b43db0045e9bd57 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 9 Aug 2016 01:23:24 -0500 Subject: perf probe: Add function to post process kernel trace events Instead of inline code, introduce function to post process kernel probe trace events. Signed-off-by: Ravi Bangoria Acked-by: Masami Hiramatsu Cc: Alexander Shishkin Cc: Ananth N Mavinakayanahalli Cc: Balbir Singh Cc: Namhyung Kim Cc: Naveen N. Rao Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1470723805-5081-1-git-send-email-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 1201f73ca723..234fbfb5c2ed 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -666,22 +666,14 @@ static int add_module_to_probe_trace_events(struct probe_trace_event *tevs, return ret; } -/* Post processing the probe events */ -static int post_process_probe_trace_events(struct probe_trace_event *tevs, - int ntevs, const char *module, - bool uprobe) +static int +post_process_kernel_probe_trace_events(struct probe_trace_event *tevs, + int ntevs) { struct ref_reloc_sym *reloc_sym; char *tmp; int i, skipped = 0; - if (uprobe) - return add_exec_to_probe_trace_events(tevs, ntevs, module); - - /* Note that currently ref_reloc_sym based probe is not for drivers */ - if (module) - return add_module_to_probe_trace_events(tevs, ntevs, module); - reloc_sym = kernel_get_ref_reloc_sym(); if (!reloc_sym) { pr_warning("Relocated base symbol is not found!\n"); @@ -713,6 +705,21 @@ static int post_process_probe_trace_events(struct probe_trace_event *tevs, return skipped; } +/* Post processing the probe events */ +static int post_process_probe_trace_events(struct probe_trace_event *tevs, + int ntevs, const char *module, + bool uprobe) +{ + if (uprobe) + return add_exec_to_probe_trace_events(tevs, ntevs, module); + + if (module) + /* Currently ref_reloc_sym based probe is not for drivers */ + return add_module_to_probe_trace_events(tevs, ntevs, module); + + return post_process_kernel_probe_trace_events(tevs, ntevs); +} + /* Try to find perf_probe_event with debuginfo */ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, struct probe_trace_event **tevs) -- cgit v1.2.3 From 99e608b5954c9e1ebadbf9660b74697d9dfd9f20 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 9 Aug 2016 01:23:25 -0500 Subject: perf probe ppc64le: Fix probe location when using DWARF Powerpc has Global Entry Point and Local Entry Point for functions. LEP catches call from both the GEP and the LEP. Symbol table of ELF contains GEP and Offset from which we can calculate LEP, but debuginfo does not have LEP info. Currently, perf prioritize symbol table over dwarf to probe on LEP for ppc64le. But when user tries to probe with function parameter, we fall back to using dwarf(i.e. GEP) and when function called via LEP, probe will never hit. For example: $ objdump -d vmlinux ... do_sys_open(): c0000000002eb4a0: e8 00 4c 3c addis r2,r12,232 c0000000002eb4a4: 60 00 42 38 addi r2,r2,96 c0000000002eb4a8: a6 02 08 7c mflr r0 c0000000002eb4ac: d0 ff 41 fb std r26,-48(r1) $ sudo ./perf probe do_sys_open $ sudo cat /sys/kernel/debug/tracing/kprobe_events p:probe/do_sys_open _text+3060904 $ sudo ./perf probe 'do_sys_open filename:string' $ sudo cat /sys/kernel/debug/tracing/kprobe_events p:probe/do_sys_open _text+3060896 filename_string=+0(%gpr4):string For second case, perf probed on GEP. So when function will be called via LEP, probe won't hit. $ sudo ./perf record -a -e probe:do_sys_open ls [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.195 MB perf.data ] To resolve this issue, let's not prioritize symbol table, let perf decide what it wants to use. Perf is already converting GEP to LEP when it uses symbol table. When perf uses debuginfo, let it find LEP offset form symbol table. This way we fall back to probe on LEP for all cases. After patch: $ sudo ./perf probe 'do_sys_open filename:string' $ sudo cat /sys/kernel/debug/tracing/kprobe_events p:probe/do_sys_open _text+3060904 filename_string=+0(%gpr4):string $ sudo ./perf record -a -e probe:do_sys_open ls [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.197 MB perf.data (11 samples) ] Signed-off-by: Ravi Bangoria Acked-by: Masami Hiramatsu Cc: Alexander Shishkin Cc: Ananth N Mavinakayanahalli Cc: Balbir Singh Cc: Namhyung Kim Cc: Naveen N. Rao Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1470723805-5081-2-git-send-email-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/sym-handling.c | 27 +++++++++++++++++---- tools/perf/util/probe-event.c | 37 ++++++++++++++++------------- tools/perf/util/probe-event.h | 6 ++++- 3 files changed, 49 insertions(+), 21 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index c6d0f91731a1..8d4dc97d80ba 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -54,10 +54,6 @@ int arch__compare_symbol_names(const char *namea, const char *nameb) #endif #if defined(_CALL_ELF) && _CALL_ELF == 2 -bool arch__prefers_symtab(void) -{ - return true; -} #ifdef HAVE_LIBELF_SUPPORT void arch__sym_update(struct symbol *s, GElf_Sym *sym) @@ -100,4 +96,27 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev, tev->point.offset += lep_offset; } } + +void arch__post_process_probe_trace_events(struct perf_probe_event *pev, + int ntevs) +{ + struct probe_trace_event *tev; + struct map *map; + struct symbol *sym = NULL; + struct rb_node *tmp; + int i = 0; + + map = get_target_map(pev->target, pev->uprobes); + if (!map || map__load(map, NULL) < 0) + return; + + for (i = 0; i < ntevs; i++) { + tev = &pev->tevs[i]; + map__for_each_symbol(map, sym, tmp) { + if (map->unmap_ip(map, sym->start) == tev->point.address) + arch__fix_tev_from_maps(pev, tev, map, sym); + } + } +} + #endif diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 234fbfb5c2ed..28733962cd80 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -180,7 +180,7 @@ static struct map *kernel_get_module_map(const char *module) return NULL; } -static struct map *get_target_map(const char *target, bool user) +struct map *get_target_map(const char *target, bool user) { /* Init maps of given executable or kernel */ if (user) @@ -705,19 +705,32 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs, return skipped; } +void __weak +arch__post_process_probe_trace_events(struct perf_probe_event *pev __maybe_unused, + int ntevs __maybe_unused) +{ +} + /* Post processing the probe events */ -static int post_process_probe_trace_events(struct probe_trace_event *tevs, +static int post_process_probe_trace_events(struct perf_probe_event *pev, + struct probe_trace_event *tevs, int ntevs, const char *module, bool uprobe) { - if (uprobe) - return add_exec_to_probe_trace_events(tevs, ntevs, module); + int ret; - if (module) + if (uprobe) + ret = add_exec_to_probe_trace_events(tevs, ntevs, module); + else if (module) /* Currently ref_reloc_sym based probe is not for drivers */ - return add_module_to_probe_trace_events(tevs, ntevs, module); + ret = add_module_to_probe_trace_events(tevs, ntevs, module); + else + ret = post_process_kernel_probe_trace_events(tevs, ntevs); - return post_process_kernel_probe_trace_events(tevs, ntevs); + if (ret >= 0) + arch__post_process_probe_trace_events(pev, ntevs); + + return ret; } /* Try to find perf_probe_event with debuginfo */ @@ -758,7 +771,7 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, if (ntevs > 0) { /* Succeeded to find trace events */ pr_debug("Found %d probe_trace_events.\n", ntevs); - ret = post_process_probe_trace_events(*tevs, ntevs, + ret = post_process_probe_trace_events(pev, *tevs, ntevs, pev->target, pev->uprobes); if (ret < 0 || ret == ntevs) { clear_probe_trace_events(*tevs, ntevs); @@ -2945,8 +2958,6 @@ errout: return err; } -bool __weak arch__prefers_symtab(void) { return false; } - /* Concatinate two arrays */ static void *memcat(void *a, size_t sz_a, void *b, size_t sz_b) { @@ -3167,12 +3178,6 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev, if (ret > 0 || pev->sdt) /* SDT can be found only in the cache */ return ret == 0 ? -ENOENT : ret; /* Found in probe cache */ - if (arch__prefers_symtab() && !perf_probe_event_need_dwarf(pev)) { - ret = find_probe_trace_events_from_map(pev, tevs); - if (ret > 0) - return ret; /* Found in symbol table */ - } - /* Convert perf_probe_event with debuginfo */ ret = try_to_find_probe_trace_events(pev, tevs); if (ret != 0) diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index e18ea9fe6385..f4f45db77c1c 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -158,7 +158,6 @@ int show_line_range(struct line_range *lr, const char *module, bool user); int show_available_vars(struct perf_probe_event *pevs, int npevs, struct strfilter *filter); int show_available_funcs(const char *module, struct strfilter *filter, bool user); -bool arch__prefers_symtab(void); void arch__fix_tev_from_maps(struct perf_probe_event *pev, struct probe_trace_event *tev, struct map *map, struct symbol *sym); @@ -173,4 +172,9 @@ int e_snprintf(char *str, size_t size, const char *format, ...) int copy_to_probe_trace_arg(struct probe_trace_arg *tvar, struct perf_probe_arg *pvar); +struct map *get_target_map(const char *target, bool user); + +void arch__post_process_probe_trace_events(struct perf_probe_event *pev, + int ntevs); + #endif /*_PROBE_EVENT_H */ -- cgit v1.2.3 From e1717e0485af4f47fc4da1e979ac817f9ad61b0f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 20 Jul 2016 12:00:06 +0300 Subject: perf intel-pt: Fix ip compression The June 2015 Intel SDM introduced IP Compression types 4 and 6. Refer to section 36.4.2.2 Target IP (TIP) Packet - IP Compression. Existing Intel PT packet decoder did not support type 4, and got type 6 wrong. Because type 3 and type 4 have the same number of bytes, the packet 'count' has been changed from being the number of ip bytes to being the type code. That allows the Intel PT decoder to correctly decide whether to sign-extend or use the last ip. However that also meant the code had to be adjusted in a number of places. Currently hardware is not using the new compression types, so this fix has no effect on existing hardware. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1469005206-3049-1-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/util/intel-pt-decoder/intel-pt-decoder.c | 44 +++++++++++----------- .../util/intel-pt-decoder/intel-pt-pkt-decoder.c | 24 ++++++++---- 2 files changed, 40 insertions(+), 28 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 9c8f15da86ce..8ff6c6a61291 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -123,8 +123,6 @@ struct intel_pt_decoder { bool have_calc_cyc_to_tsc; int exec_mode; unsigned int insn_bytes; - uint64_t sign_bit; - uint64_t sign_bits; uint64_t period; enum intel_pt_period_type period_type; uint64_t tot_insn_cnt; @@ -191,9 +189,6 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) decoder->data = params->data; decoder->return_compression = params->return_compression; - decoder->sign_bit = (uint64_t)1 << 47; - decoder->sign_bits = ~(((uint64_t)1 << 48) - 1); - decoder->period = params->period; decoder->period_type = params->period_type; @@ -362,21 +357,30 @@ int intel_pt__strerror(int code, char *buf, size_t buflen) return 0; } -static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder, - const struct intel_pt_pkt *packet, +static uint64_t intel_pt_calc_ip(const struct intel_pt_pkt *packet, uint64_t last_ip) { uint64_t ip; switch (packet->count) { - case 2: + case 1: ip = (last_ip & (uint64_t)0xffffffffffff0000ULL) | packet->payload; break; - case 4: + case 2: ip = (last_ip & (uint64_t)0xffffffff00000000ULL) | packet->payload; break; + case 3: + ip = packet->payload; + /* Sign-extend 6-byte ip */ + if (ip & (uint64_t)0x800000000000ULL) + ip |= (uint64_t)0xffff000000000000ULL; + break; + case 4: + ip = (last_ip & (uint64_t)0xffff000000000000ULL) | + packet->payload; + break; case 6: ip = packet->payload; break; @@ -384,16 +388,12 @@ static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder, return 0; } - if (ip & decoder->sign_bit) - return ip | decoder->sign_bits; - return ip; } static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder) { - decoder->last_ip = intel_pt_calc_ip(decoder, &decoder->packet, - decoder->last_ip); + decoder->last_ip = intel_pt_calc_ip(&decoder->packet, decoder->last_ip); } static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder) @@ -1657,6 +1657,12 @@ next: } } +static inline bool intel_pt_have_ip(struct intel_pt_decoder *decoder) +{ + return decoder->last_ip || decoder->packet.count == 0 || + decoder->packet.count == 3 || decoder->packet.count == 6; +} + /* Walk PSB+ packets to get in sync. */ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) { @@ -1677,8 +1683,7 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) case INTEL_PT_FUP: decoder->pge = true; - if (decoder->last_ip || decoder->packet.count == 6 || - decoder->packet.count == 0) { + if (intel_pt_have_ip(decoder)) { uint64_t current_ip = decoder->ip; intel_pt_set_ip(decoder); @@ -1767,8 +1772,7 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) case INTEL_PT_TIP_PGE: case INTEL_PT_TIP: decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD; - if (decoder->last_ip || decoder->packet.count == 6 || - decoder->packet.count == 0) + if (intel_pt_have_ip(decoder)) intel_pt_set_ip(decoder); if (decoder->ip) return 0; @@ -1776,9 +1780,7 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) case INTEL_PT_FUP: if (decoder->overflow) { - if (decoder->last_ip || - decoder->packet.count == 6 || - decoder->packet.count == 0) + if (intel_pt_have_ip(decoder)) intel_pt_set_ip(decoder); if (decoder->ip) return 0; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c index b1257c816310..4f7b32020487 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -292,36 +292,46 @@ static int intel_pt_get_ip(enum intel_pt_pkt_type type, unsigned int byte, const unsigned char *buf, size_t len, struct intel_pt_pkt *packet) { - switch (byte >> 5) { + int ip_len; + + packet->count = byte >> 5; + + switch (packet->count) { case 0: - packet->count = 0; + ip_len = 0; break; case 1: if (len < 3) return INTEL_PT_NEED_MORE_BYTES; - packet->count = 2; + ip_len = 2; packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1)); break; case 2: if (len < 5) return INTEL_PT_NEED_MORE_BYTES; - packet->count = 4; + ip_len = 4; packet->payload = le32_to_cpu(*(uint32_t *)(buf + 1)); break; case 3: - case 6: + case 4: if (len < 7) return INTEL_PT_NEED_MORE_BYTES; - packet->count = 6; + ip_len = 6; memcpy_le64(&packet->payload, buf + 1, 6); break; + case 6: + if (len < 9) + return INTEL_PT_NEED_MORE_BYTES; + ip_len = 8; + packet->payload = le64_to_cpu(*(uint64_t *)(buf + 1)); + break; default: return INTEL_PT_BAD_PACKET; } packet->type = type; - return packet->count + 1; + return ip_len + 1; } static int intel_pt_get_mode(const unsigned char *buf, size_t len, -- cgit v1.2.3 From 33da54fa86e29b87fe1e83bd0f15b4ef2be53ecb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 11 Aug 2016 10:50:57 +0200 Subject: perf tools mem: Fix -t store option for record command Michael reported 'perf mem -t store record' being broken. The reason is latest rework of this area: commit acbe613e0c03 ("perf tools: Add monitored events array") We don't mark perf_mem_events store record when -t store option is specified. Committer notes: Before: # perf mem -t store record usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.020 MB perf.data (7 samples) ] # perf evlist cycles:ppp # After: # perf mem -t store record usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.020 MB perf.data (7 samples) ] # perf evlist cpu/mem-stores/P # Reported-by: Michael Petlan Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: acbe613e0c03 ("perf tools: Add monitored events array") Link: http://lkml.kernel.org/r/1470905457-18311-1-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-mem.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index d608a2c9e48c..d1ce29be560e 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -88,6 +88,9 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) if (mem->operation & MEM_OPERATION_LOAD) perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true; + if (mem->operation & MEM_OPERATION_STORE) + perf_mem_events[PERF_MEM_EVENTS__STORE].record = true; + if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record) rec_argv[i++] = "-W"; -- cgit v1.2.3 From f046f3df665361d2f89e660f8c79ba164069f02a Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 11 Aug 2016 14:43:59 -0300 Subject: perf ppc64le: Fix build failure when libelf is not present arch__post_process_probe_trace_events() calls get_target_map() to prepare symbol table. get_target_map() is defined inside util/probe-event.c. probe-event.c will only get included in perf binary if CONFIG_LIBELF is set. Hence arch__post_process_probe_trace_events() needs to be defined inside #ifdef HAVE_LIBELF_SUPPORT to solve compilation error. Reported-and-Tested-by: Anton Blanchard Tested-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Alexander Shishkin Cc: Balbir Singh Cc: Naveen N. Rao Cc: Ananth N Mavinakayanahalli Cc: Masami Hiramatsu Cc: Wang Nan Cc: Namhyung Kim Link: http://lkml.kernel.org/r/57ABFF88.8030905@linux.vnet.ibm.com [ Thunderbird MUA mangled it, fix that ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/sym-handling.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index 8d4dc97d80ba..35745a733100 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -97,6 +97,7 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev, } } +#ifdef HAVE_LIBELF_SUPPORT void arch__post_process_probe_trace_events(struct perf_probe_event *pev, int ntevs) { @@ -118,5 +119,6 @@ void arch__post_process_probe_trace_events(struct perf_probe_event *pev, } } } +#endif /* HAVE_LIBELF_SUPPORT */ #endif -- cgit v1.2.3 From 49a7f01064a94c4fd6afb6b4f7e3ccc37d0edf99 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 13 Aug 2016 01:12:10 -0300 Subject: perf jitdump: Add the right header to get the major()/minor() definitions Noticed on Fedora Rawhide: $ gcc --version gcc (GCC) 6.1.1 20160721 (Red Hat 6.1.1-4) $ rpm -q glibc glibc-2.24.90-1.fc26.x86_64 $ CC /tmp/build/perf/util/jitdump.o util/jitdump.c: In function 'jit_repipe_code_load': util/jitdump.c:428:2: error: '__major_from_sys_types' is deprecated: In the GNU C Library, `major' is defined by . For historical compatibility, it is currently defined by as well, but we plan to remove this soon. To use `major', include directly. If you did not intend to use a system-defined macro `major', you should #undef it after including . [-Werror=deprecated-declarations] event->mmap2.maj = major(st.st_dev); ^~~~~ In file included from /usr/include/features.h:397:0, from /usr/include/sys/types.h:25, from util/jitdump.c:1: /usr/include/sys/sysmacros.h:87:1: note: declared here __SYSMACROS_DEFINE_MAJOR (__SYSMACROS_FST_IMPL_TEMPL) Fix it following that recomendation. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-3majvd0adhfr25rvx4v5e9te@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/jitdump.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/perf') diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index 9f3305f6b6d5..95f0884aae02 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -1,3 +1,4 @@ +#include #include #include #include -- cgit v1.2.3 From 88ded4d8d94a550624e1827478e13fecf97a7b0a Mon Sep 17 00:00:00 2001 From: He Kuang Date: Thu, 4 Aug 2016 11:25:42 +0000 Subject: perf script: Show proper message when failed list scripts Perf shows the usage message when perf scripts folder failed to open, which misleads users to let them think the command is being mistyped. This patch shows a proper message and guides users to check the PERF_EXEC_PATH environment variable in that case. Before: $ perf script --list Usage: perf script [] or: perf script [] record