diff options
author | Ingo Molnar <mingo@kernel.org> | 2019-10-22 01:15:45 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2019-10-22 01:15:45 +0200 |
commit | 27a0a90d6301dd883a748538e4db692a5fb923e1 (patch) | |
tree | 326b7d0970cab693343c5a808fbecb7b878402e9 /tools/perf/util/annotate.c | |
parent | aa7a7b72974a4612a85bb395a4b23b973ffc7d2b (diff) | |
parent | 27198a893ba074407e7a87e346252b3e6fab454f (diff) |
Merge tag 'perf-core-for-mingo-5.5-20191021' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:
perf trace:
- Add syscall failure stats to -s/--summary and -S/--with-summary, works in
combination with specifying just a set of syscalls, see below first with
-s/--summary, then with -S/--with-summary just for the syscalls we saw failing
with -s:
# perf trace -s sleep 1
Summary of events:
sleep (16218), 80 events, 93.0%
syscall calls errors total min avg max stddev
(msec) (msec) (msec) (msec) (%)
----------- ----- ------ -------- -------- -------- -------- ------
nanosleep 1 0 1000.091 1000.091 1000.091 1000.091 0.00%
mmap 8 0 0.045 0.005 0.006 0.008 7.09%
mprotect 4 0 0.028 0.005 0.007 0.009 11.38%
openat 3 0 0.021 0.005 0.007 0.009 14.07%
munmap 1 0 0.017 0.017 0.017 0.017 0.00%
brk 4 0 0.010 0.001 0.002 0.004 23.15%
read 4 0 0.009 0.002 0.002 0.003 8.13%
close 5 0 0.008 0.001 0.002 0.002 10.83%
fstat 3 0 0.006 0.002 0.002 0.002 6.97%
access 1 1 0.006 0.006 0.006 0.006 0.00%
lseek 3 0 0.005 0.001 0.002 0.002 7.37%
arch_prctl 2 1 0.004 0.001 0.002 0.002 17.64%
execve 1 0 0.000 0.000 0.000 0.000 0.00%
# perf trace -e access,arch_prctl -S sleep 1
0.000 ( 0.006 ms): sleep/19503 arch_prctl(option: 0x3001, arg2: 0x7fff165996b0) = -1 EINVAL (Invalid argument)
0.024 ( 0.006 ms): sleep/19503 access(filename: 0x2177e510, mode: R) = -1 ENOENT (No such file or directory)
0.136 ( 0.002 ms): sleep/19503 arch_prctl(option: SET_FS, arg2: 0x7f9421737580) = 0
Summary of events:
sleep (19503), 6 events, 50.0%
syscall calls errors total min avg max stddev
(msec) (msec) (msec) (msec) (%)
---------- ----- ------ ------ ------ ------ ------ ------
arch_prctl 2 1 0.008 0.002 0.004 0.006 57.22%
access 1 1 0.006 0.006 0.006 0.006 0.00%
#
- Introduce --errno-summary, to drill down a bit more in the errno stats:
# perf trace --errno-summary -e access,arch_prctl -S sleep 1
0.000 ( 0.006 ms): sleep/5587 arch_prctl(option: 0x3001, arg2: 0x7ffd6ba6aa00) = -1 EINVAL (Invalid argument)
0.028 ( 0.007 ms): sleep/5587 access(filename: 0xb83d9510, mode: R) = -1 ENOENT (No such file or directory)
0.172 ( 0.003 ms): sleep/5587 arch_prctl(option: SET_FS, arg2: 0x7f45b8392580) = 0
Summary of events:
sleep (5587), 6 events, 50.0%
syscall calls errors total min avg max stddev
(msec) (msec) (msec) (msec) (%)
---------- ----- ------ ------ ------ ------ ------ ------
arch_prctl 2 1 0.009 0.003 0.005 0.006 38.90%
EINVAL: 1
access 1 1 0.007 0.007 0.007 0.007 0.00%
ENOENT: 1
#
- Filter own pid to avoid a feedback look in 'perf trace record -a'
- Add the glue for the auto generated x86 IRQ vector array.
- Show error message when not finding a field used in a filter expression
# perf trace --max-events=4 -e syscalls:sys_enter_write --filter="cnt>32767"
Failed to set filter "(cnt>32767) && (common_pid != 19938 && common_pid != 8922)" on event syscalls:sys_enter_write with 22 (Invalid argument)
#
# perf trace --max-events=4 -e syscalls:sys_enter_write --filter="count>32767"
0.000 python3.5/17535 syscalls:sys_enter_write(fd: 3, buf: 0x564b0dc53600, count: 172086)
12.641 python3.5.post/17535 syscalls:sys_enter_write(fd: 3, buf: 0x564b0db63660, count: 75994)
27.738 python3.5.post/17535 syscalls:sys_enter_write(fd: 3, buf: 0x564b0db4b1e0, count: 41635)
136.070 python3.5.post/17535 syscalls:sys_enter_write(fd: 3, buf: 0x564b0dbab510, count: 62232)
#
- Add a generator for x86's IRQ vectors -> strings
- Introduce stroul() (string -> number) methods for the strarray and
strarrays classes, also strtoul_flags, allowing to go from both strings
and or-ed strings to numbers, allowing things like:
# perf trace -e syscalls:sys_enter_mmap --filter="flags==DENYWRITE|PRIVATE|FIXED" sleep 1
0.000 sleep/22588 syscalls:sys_enter_mmap(addr: 0x7f42d2aa5000, len: 1363968, prot: READ|EXEC, flags: PRIVATE|FIXED|DENYWRITE, fd: 3, off: 0x22000)
0.011 sleep/22588 syscalls:sys_enter_mmap(addr: 0x7f42d2bf2000, len: 311296, prot: READ, flags: PRIVATE|FIXED|DENYWRITE, fd: 3, off: 0x16f000)
0.015 sleep/22588 syscalls:sys_enter_mmap(addr: 0x7f42d2c3f000, len: 24576, prot: READ|WRITE, flags: PRIVATE|FIXED|DENYWRITE, fd: 3, off: 0x1bb000)
#
Allowing to narrow down from the complete set of mmap calls for that workload:
# perf trace -e syscalls:sys_enter_mmap sleep 1
0.000 sleep/22695 syscalls:sys_enter_mmap(len: 134773, prot: READ, flags: PRIVATE, fd: 3)
0.041 sleep/22695 syscalls:sys_enter_mmap(len: 8192, prot: READ|WRITE, flags: PRIVATE|ANONYMOUS)
0.053 sleep/22695 syscalls:sys_enter_mmap(len: 1857472, prot: READ, flags: PRIVATE|DENYWRITE, fd: 3)
0.069 sleep/22695 syscalls:sys_enter_mmap(addr: 0x7fd23ffb6000, len: 1363968, prot: READ|EXEC, flags: PRIVATE|FIXED|DENYWRITE, fd: 3, off: 0x22000)
0.077 sleep/22695 syscalls:sys_enter_mmap(addr: 0x7fd240103000, len: 311296, prot: READ, flags: PRIVATE|FIXED|DENYWRITE, fd: 3, off: 0x16f000)
0.083 sleep/22695 syscalls:sys_enter_mmap(addr: 0x7fd240150000, len: 24576, prot: READ|WRITE, flags: PRIVATE|FIXED|DENYWRITE, fd: 3, off: 0x1bb000)
0.095 sleep/22695 syscalls:sys_enter_mmap(addr: 0x7fd240156000, len: 14272, prot: READ|WRITE, flags: PRIVATE|FIXED|ANONYMOUS)
0.339 sleep/22695 syscalls:sys_enter_mmap(len: 217750512, prot: READ, flags: PRIVATE, fd: 3)
#
Works with all targets, so, for system wide, looking at who calls mmap with flags set to just "PRIVATE":
# perf trace --max-events=5 -e syscalls:sys_enter_mmap --filter="flags==PRIVATE"
0.000 pool/2242 syscalls:sys_enter_mmap(len: 756, prot: READ, flags: PRIVATE, fd: 14)
0.050 pool/2242 syscalls:sys_enter_mmap(len: 756, prot: READ, flags: PRIVATE, fd: 14)
0.062 pool/2242 syscalls:sys_enter_mmap(len: 756, prot: READ, flags: PRIVATE, fd: 14)
0.145 goa-identity-s/2240 syscalls:sys_enter_mmap(len: 756, prot: READ, flags: PRIVATE, fd: 18)
0.183 goa-identity-s/2240 syscalls:sys_enter_mmap(len: 756, prot: READ, flags: PRIVATE, fd: 18)
#
# perf trace --max-events=2 -e syscalls:sys_enter_lseek --filter="whence==SET && offset != 0"
0.000 Cache2 I/O/12047 syscalls:sys_enter_lseek(fd: 277, offset: 43, whence: SET)
1142.070 mozStorage #5/12302 syscalls:sys_enter_lseek(fd: 44</home/acme/.mozilla/firefox/ina67tev.default/cookies.sqlite-wal>, offset: 393536, whence: SET)
#
perf annotate:
- Fix objdump --no-show-raw-insn flag to work with goth gcc and clang.
- Streamline objdump execution, preserving the right error codes for better
reporting to user.
perf report:
- Add warning when libunwind not compiled in.
perf stat:
Jin Yao:
- Support --all-kernel/--all-user, to match options available in 'perf record',
asking that all the events specified work just with kernel or user events.
perf list:
Jin Yao:
- Hide deprecated events by default, allow showing them with --deprecated.
libbperf:
Jiri Olsa:
- Allow to build with -ltcmalloc.
- Finish mmap interface, getting more stuff from tools/perf while adding
abstractions to avoid pulling too much stuff, to get libperf to grow as
tools needs things like auxtrace, etc.
perf scripting engines:
Steven Rostedt (VMware):
- Iterate on tep event arrays directly, fixing script generation with
'-g python' when having multiple tracepoints in a perf.data file.
core:
- Allow to build with -ltcmalloc.
perf test:
Leo Yan:
- Report failure for mmap events.
- Avoid infinite loop for task exit case.
- Remove needless headers for bp_account test.
- Add dedicated checking helper is_supported().
- Disable bp_signal testing for arm64.
Vendor events:
arm64:
John Garry:
- Fix Hisi hip08 DDRC PMU eventname.
- Add some missing events for Hisi hip08 DDRC, L3C and HHA PMUs.
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf/util/annotate.c')
-rw-r--r-- | tools/perf/util/annotate.c | 196 |
1 files changed, 132 insertions, 64 deletions
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index bc69ca63b88e..ef1866a902c4 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -43,6 +43,7 @@ #include <linux/string.h> #include <bpf/libbpf.h> #include <subcmd/parse-options.h> +#include <subcmd/run-command.h> /* FIXME: For the HE_COLORSET */ #include "ui/browser.h" @@ -1489,44 +1490,26 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start * means that it's not a disassembly line so should be treated differently. * The ops.raw part will be parsed further according to type of the instruction. */ -static int symbol__parse_objdump_line(struct symbol *sym, FILE *file, +static int symbol__parse_objdump_line(struct symbol *sym, struct annotate_args *args, - int *line_nr) + char *parsed_line, int *line_nr) { struct map *map = args->ms.map; struct annotation *notes = symbol__annotation(sym); struct disasm_line *dl; - char *line = NULL, *parsed_line, *tmp, *tmp2; - size_t line_len; + char *tmp; s64 line_ip, offset = -1; regmatch_t match[2]; - if (getline(&line, &line_len, file) < 0) - return -1; - - if (!line) - return -1; - - line_ip = -1; - parsed_line = strim(line); - /* /filename:linenr ? Save line number and ignore. */ if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) { *line_nr = atoi(parsed_line + match[1].rm_so); return 0; } - tmp = skip_spaces(parsed_line); - if (*tmp) { - /* - * Parse hexa addresses followed by ':' - */ - line_ip = strtoull(tmp, &tmp2, 16); - if (*tmp2 != ':' || tmp == tmp2 || tmp2[1] == '\0') - line_ip = -1; - } - - if (line_ip != -1) { + /* Process hex address followed by ':'. */ + line_ip = strtoull(parsed_line, &tmp, 16); + if (parsed_line != tmp && tmp[0] == ':' && tmp[1] != '\0') { u64 start = map__rip_2objdump(map, sym->start), end = map__rip_2objdump(map, sym->end); @@ -1534,7 +1517,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, FILE *file, if ((u64)line_ip < start || (u64)line_ip >= end) offset = -1; else - parsed_line = tmp2 + 1; + parsed_line = tmp + 1; } args->offset = offset; @@ -1543,7 +1526,6 @@ static int symbol__parse_objdump_line(struct symbol *sym, FILE *file, args->ms.sym = sym; dl = disasm_line__new(args); - free(line); (*line_nr)++; if (dl == NULL) @@ -1861,6 +1843,67 @@ static int symbol__disassemble_bpf(struct symbol *sym __maybe_unused, } #endif // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT) +/* + * Possibly create a new version of line with tabs expanded. Returns the + * existing or new line, storage is updated if a new line is allocated. If + * allocation fails then NULL is returned. + */ +static char *expand_tabs(char *line, char **storage, size_t *storage_len) +{ + size_t i, src, dst, len, new_storage_len, num_tabs; + char *new_line; + size_t line_len = strlen(line); + + for (num_tabs = 0, i = 0; i < line_len; i++) + if (line[i] == '\t') + num_tabs++; + + if (num_tabs == 0) + return line; + + /* + * Space for the line and '\0', less the leading and trailing + * spaces. Each tab may introduce 7 additional spaces. + */ + new_storage_len = line_len + 1 + (num_tabs * 7); + + new_line = malloc(new_storage_len); + if (new_line == NULL) { + pr_err("Failure allocating memory for tab expansion\n"); + return NULL; + } + + /* + * Copy regions starting at src and expand tabs. If there are two + * adjacent tabs then 'src == i', the memcpy is of size 0 and the spaces + * are inserted. + */ + for (i = 0, src = 0, dst = 0; i < line_len && num_tabs; i++) { + if (line[i] == '\t') { + len = i - src; + memcpy(&new_line[dst], &line[src], len); + dst += len; + new_line[dst++] = ' '; + while (dst % 8 != 0) + new_line[dst++] = ' '; + src = i + 1; + num_tabs--; + } + } + + /* Expand the last region. */ + len = line_len + 1 - src; + memcpy(&new_line[dst], &line[src], len); + dst += len; + new_line[dst] = '\0'; + + free(*storage); + *storage = new_line; + *storage_len = new_storage_len; + return new_line; + +} + static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) { struct annotation_options *opts = args->options; @@ -1872,10 +1915,19 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) struct kcore_extract kce; bool delete_extract = false; bool decomp = false; - int stdout_fd[2]; int lineno = 0; int nline; - pid_t pid; + char *line; + size_t line_len; + const char *objdump_argv[] = { + "/bin/sh", + "-c", + NULL, /* Will be the objdump command to run. */ + "--", + NULL, /* Will be the symfs path. */ + NULL, + }; + struct child_process objdump_process; int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename)); if (err) @@ -1905,7 +1957,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) if (dso__decompress_kmodule_path(dso, symfs_filename, tmp, sizeof(tmp)) < 0) - goto out; + return -1; decomp = true; strcpy(symfs_filename, tmp); @@ -1914,13 +1966,13 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) err = asprintf(&command, "%s %s%s --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 - " -l -d %s %s -C \"$1\" 2>/dev/null|grep -v \"$1:\"|expand", + " -l -d %s %s -C \"$1\"", opts->objdump_path ?: "objdump", opts->disassembler_style ? "-M " : "", opts->disassembler_style ?: "", map__rip_2objdump(map, sym->start), map__rip_2objdump(map, sym->end), - opts->show_asm_raw ? "" : "--no-show-raw", + opts->show_asm_raw ? "" : "--no-show-raw-insn", opts->annotate_src ? "-S" : ""); if (err < 0) { @@ -1930,55 +1982,73 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) pr_debug("Executing: %s\n", command); - err = -1; - if (pipe(stdout_fd) < 0) { - pr_err("Failure creating the pipe to run %s\n", command); - goto out_free_command; - } + objdump_argv[2] = command; + objdump_argv[4] = symfs_filename; - pid = fork(); - if (pid < 0) { - pr_err("Failure forking to run %s\n", command); - goto out_close_stdout; - } - - if (pid == 0) { - close(stdout_fd[0]); - dup2(stdout_fd[1], 1); - close(stdout_fd[1]); - execl("/bin/sh", "sh", "-c", command, "--", symfs_filename, - NULL); - perror(command); - exit(-1); + /* Create a pipe to read from for stdout */ + memset(&objdump_process, 0, sizeof(objdump_process)); + objdump_process.argv = objdump_argv; + objdump_process.out = -1; + if (start_command(&objdump_process)) { + pr_err("Failure starting to run %s\n", command); + err = -1; + goto out_free_command; } - close(stdout_fd[1]); - - file = fdopen(stdout_fd[0], "r"); + file = fdopen(objdump_process.out, "r"); if (!file) { pr_err("Failure creating FILE stream for %s\n", command); /* * If we were using debug info should retry with * original binary. */ - goto out_free_command; + err = -1; + goto out_close_stdout; } + /* Storage for getline. */ + line = NULL; + line_len = 0; + nline = 0; while (!feof(file)) { + const char *match; + char *expanded_line; + + if (getline(&line, &line_len, file) < 0 || !line) + break; + + /* Skip lines containing "filename:" */ + match = strstr(line, symfs_filename); + if (match && match[strlen(symfs_filename)] == ':') + continue; + + expanded_line = strim(line); + expanded_line = expand_tabs(expanded_line, &line, &line_len); + if (!expanded_line) + break; + /* * The source code line number (lineno) needs to be kept in * across calls to symbol__parse_objdump_line(), so that it * can associate it with the instructions till the next one. * See disasm_line__new() and struct disasm_line::line_nr. */ - if (symbol__parse_objdump_line(sym, file, args, &lineno) < 0) + if (symbol__parse_objdump_line(sym, args, expanded_line, + &lineno) < 0) break; nline++; } + free(line); + + err = finish_command(&objdump_process); + if (err) + pr_err("Error running %s\n", command); - if (nline == 0) + if (nline == 0) { + err = -1; pr_err("No output from %s\n", command); + } /* * kallsyms does not have symbol sizes so there may a nop at the end. @@ -1988,23 +2058,21 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) delete_last_nop(sym); fclose(file); - err = 0; + +out_close_stdout: + close(objdump_process.out); + out_free_command: free(command); -out_remove_tmp: - close(stdout_fd[0]); +out_remove_tmp: if (decomp) unlink(symfs_filename); if (delete_extract) kcore_extract__delete(&kce); -out: - return err; -out_close_stdout: - close(stdout_fd[1]); - goto out_free_command; + return err; } static void calc_percent(struct sym_hist *sym_hist, |