From 5125bc22e7871b46e71312d6cc4455e9eea1619d Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 3 May 2013 15:49:53 +0200 Subject: tools: Get only verbose output with V=1 Fix having verbose build with V=0, e.g: make V=0 -C tools/ perf Signed-off-by: Robert Richter Link: http://lkml.kernel.org/r/20130503134953.GU8356@rric.localhost Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/Makefile | 2 +- tools/perf/config/utilities.mak | 2 +- tools/scripts/Makefile.include | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index eb30044a922a..f75175757892 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -150,7 +150,7 @@ NO_SUBDIR = : endif ifneq ($(findstring $(MAKEFLAGS),s),s) -ifndef V +ifneq ($(V),1) QUIET_ASCIIDOC = @echo ' ' ASCIIDOC $@; QUIET_XMLTO = @echo ' ' XMLTO $@; QUIET_DB2TEXI = @echo ' ' DB2TEXI $@; diff --git a/tools/perf/config/utilities.mak b/tools/perf/config/utilities.mak index 8ef3bd30a549..faffb52fcf80 100644 --- a/tools/perf/config/utilities.mak +++ b/tools/perf/config/utilities.mak @@ -181,7 +181,7 @@ _gea_err = $(if $(1),$(error Please set '$(1)' appropriately)) # try-cc # Usage: option = $(call try-cc, source-to-build, cc-options, msg) -ifndef V +ifneq ($(V),1) TRY_CC_OUTPUT= > /dev/null 2>&1 endif TRY_CC_MSG=echo " CHK $(3)" 1>&2; diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index f03e681f8891..0d0506d55c71 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -59,7 +59,7 @@ QUIET_SUBDIR0 = +$(MAKE) $(COMMAND_O) -C # space to separate -C and subdir QUIET_SUBDIR1 = ifneq ($(findstring $(MAKEFLAGS),s),s) -ifndef V +ifneq ($(V),1) QUIET_CC = @echo ' ' CC $@; QUIET_AR = @echo ' ' AR $@; QUIET_LINK = @echo ' ' LINK $@; -- cgit v1.2.3 From 761a0f395d5d8b7aafe563ecefbc8cf71a214a91 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 6 May 2013 20:40:14 +0200 Subject: perf tools: Fix output directory of Documentation/ The OUTPUT directory is wrongly determind leading to: make[3]: *** No rule to make target `.../.build/perf/PERF-VERSION-FILE'. Stop. Fixing this by using the generic approach in script/Makefile.include. Signed-off-by: Robert Richter Link: http://lkml.kernel.org/r/1367865614-30876-1-git-send-email-rric@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/Makefile | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index f75175757892..5a37a7c84e69 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -1,12 +1,6 @@ +include ../../scripts/Makefile.include include ../config/utilities.mak -OUTPUT := ./ -ifeq ("$(origin O)", "command line") - ifneq ($(O),) - OUTPUT := $(O)/ - endif -endif - MAN1_TXT= \ $(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \ $(wildcard perf-*.txt)) \ @@ -277,7 +271,7 @@ $(MAN_HTML): $(OUTPUT)%.html : %.txt $(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml $(QUIET_XMLTO)$(RM) $@ && \ - $(XMLTO) -o $(OUTPUT) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $< + $(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $< $(OUTPUT)%.xml : %.txt $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ -- cgit v1.2.3 From 107de3724eff5a6fa6474a4d2aa5460b63749ebf Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 11 Jun 2013 17:22:38 +0200 Subject: perf tools: Fix build errors with O and DESTDIR make vars set Fixing build errors with O and DESTDIR make vars set: $ make prefix=/usr/local O=$builddir DESTDIR=$destdir -C tools/ perf ... make[1]: Entering directory `.../.source/perf/tools/perf' CC .../.build/perf/perf/util/parse-events.o util/parse-events.c:14:32: fatal error: parse-events-bison.h: No such file or directory compilation terminated. make[1]: *** [.../.build/perf/perf/util/parse-events.o] Error 1 ... and: LINK /.../.build/perf/perf/perf gcc: error: /.../.build/perf/perf//.../.source/perf/tools/lib/lk/liblk.a: No such file or directory Signed-off-by: Robert Richter Signed-off-by: Robert Richter Cc: Ingo Molnar Cc: Jiri Olsa Cc: Robert Richter Link: http://lkml.kernel.org/r/1370964158-4135-1-git-send-email-rric@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 5 ++--- tools/perf/config/Makefile | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 203cb0eecff2..641fccddb249 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -121,17 +121,16 @@ SCRIPT_SH += perf-archive.sh grep-libs = $(filter -l%,$(1)) strip-libs = $(filter-out -l%,$(1)) -LK_PATH=$(LK_DIR) - ifneq ($(OUTPUT),) TE_PATH=$(OUTPUT) ifneq ($(subdir),) - LK_PATH=$(OUTPUT)$(LK_DIR) + LK_PATH=$(objtree)/lib/lk/ else LK_PATH=$(OUTPUT) endif else TE_PATH=$(TRACE_EVENT_DIR) + LK_PATH=$(LK_DIR) endif LIBTRACEEVENT = $(TE_PATH)libtraceevent.a diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index f139dcd2796e..f4468954d3dc 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -39,7 +39,7 @@ src-perf := $(srctree)/tools/perf endif ifeq ($(obj-perf),) -obj-perf := $(objtree) +obj-perf := $(OUTPUT) endif ifneq ($(obj-perf),) -- cgit v1.2.3 From 13966721a11f4a2ba8038191e48083b5f31822bb Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 6 Jun 2013 14:35:03 +0300 Subject: perf bench: Fix memory allocation fail check in mem{set,cpy} workloads Addresses of allocated memory areas saved to '*src' and '*dst', so we need to check them for NULL, not 'src' and 'dst'. Signed-off-by: Kirill A. Shutemov Acked-by: Hitoshi Mitake Cc: Hitoshi Mitake Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1370518503-4230-1-git-send-email-kirill.shutemov@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/mem-memcpy.c | 4 ++-- tools/perf/bench/mem-memset.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index 93c83e3cb4a7..25fd3f1966f1 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c @@ -111,11 +111,11 @@ static double timeval2double(struct timeval *ts) static void alloc_mem(void **dst, void **src, size_t length) { *dst = zalloc(length); - if (!dst) + if (!*dst) die("memory allocation failed - maybe length is too large?\n"); *src = zalloc(length); - if (!src) + if (!*src) die("memory allocation failed - maybe length is too large?\n"); } diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c index c6e4bc523492..4a2f12081964 100644 --- a/tools/perf/bench/mem-memset.c +++ b/tools/perf/bench/mem-memset.c @@ -111,7 +111,7 @@ static double timeval2double(struct timeval *ts) static void alloc_mem(void **dst, size_t length) { *dst = zalloc(length); - if (!dst) + if (!*dst) die("memory allocation failed - maybe length is too large?\n"); } -- cgit v1.2.3 From 756bbc84e30b57ca1010b550ea30594fd0b0494f Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 19 Jun 2013 10:02:30 +0900 Subject: perf tools: Include termios.h explicitly Building perf for android fails because it can't find the definition of struct winsize. This definition is in termios.h, so I add this header to util.h to solve the problem. It is missed by commit '2c803e52' which moves get_term_dimensions() from builtin-top.c to util.c, but missed to move termios.h header. Signed-off-by: Joonsoo Kim Acked-by: David Ahern Cc: David Ahern Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1371603750-15053-3-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/util.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 7a484c97e500..2732fad03908 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -72,6 +72,7 @@ #include "types.h" #include #include +#include extern const char *graph_line; extern const char *graph_dotted_line; @@ -274,6 +275,5 @@ void dump_stack(void); extern unsigned int page_size; -struct winsize; void get_term_dimensions(struct winsize *ws); #endif /* GIT_COMPAT_UTIL_H */ -- cgit v1.2.3 From 079787f209416416383c74ea5d5044be2d586f5e Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 19 Jun 2013 10:02:29 +0900 Subject: tools lib lk: Fix for cross build Currently, lib lk doesn't use CROSS_COMPILE environment variable, so cross build always fails. This is a quick fix for this problem. Signed-off-by: Joonsoo Kim Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1371603750-15053-2-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/lk/Makefile | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tools/lib/lk/Makefile b/tools/lib/lk/Makefile index 2c5a19733357..f0ecc0a13300 100644 --- a/tools/lib/lk/Makefile +++ b/tools/lib/lk/Makefile @@ -3,6 +3,21 @@ include ../../scripts/Makefile.include CC = $(CROSS_COMPILE)gcc AR = $(CROSS_COMPILE)ar +# Makefiles suck: This macro sets a default value of $(2) for the +# variable named by $(1), unless the variable has been set by +# environment or command line. This is necessary for CC and AR +# because make sets default values, so the simpler ?= approach +# won't work as expected. +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. +$(call allow-override,CC,$(CROSS_COMPILE)gcc) +$(call allow-override,AR,$(CROSS_COMPILE)ar) + # guard against environment variables LIB_H= LIB_OBJS= -- cgit v1.2.3 From d07f0b120642f442d81a61f68a9325fb7717004f Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 4 Jun 2013 17:44:26 +0200 Subject: perf stat: Avoid sending SIGTERM to random processes This patch fixes a problem with perf stat whereby on termination it may send a SIGTERM signal to random processes on systems with high PID recycling. I got some actual bug reports on this. There is race between the SIGCHLD and sig_atexit() handlers. This patch addresses this problem by clearing child_pid in the SIGCHLD handler. Signed-off-by: Stephane Eranian Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20130604154426.GA2928@quad Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 7e910bab1097..95768afaae3e 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -87,7 +87,7 @@ static int run_count = 1; static bool no_inherit = false; static bool scale = true; static enum aggr_mode aggr_mode = AGGR_GLOBAL; -static pid_t child_pid = -1; +static volatile pid_t child_pid = -1; static bool null_run = false; static int detailed_run = 0; static bool big_num = true; @@ -1148,13 +1148,34 @@ static void skip_signal(int signo) done = 1; signr = signo; + /* + * render child_pid harmless + * won't send SIGTERM to a random + * process in case of race condition + * and fast PID recycling + */ + child_pid = -1; } static void sig_atexit(void) { + sigset_t set, oset; + + /* + * avoid race condition with SIGCHLD handler + * in skip_signal() which is modifying child_pid + * goal is to avoid send SIGTERM to a random + * process + */ + sigemptyset(&set); + sigaddset(&set, SIGCHLD); + sigprocmask(SIG_BLOCK, &set, &oset); + if (child_pid != -1) kill(child_pid, SIGTERM); + sigprocmask(SIG_SETMASK, &oset, NULL); + if (signr == -1) return; -- cgit v1.2.3 From 563aecb2e63a16f86c7daabfdcfee23f3e7c5955 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 5 Jun 2013 13:35:06 +0200 Subject: perf record: Remove -A/--append option It's no longer working and needed. Quite straightforward discussion/vote was in here: http://marc.info/?t=137028288300004&r=1&w=2 Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-8fgdva12hl8w3xzzpsvvg7nx@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 4 +- tools/perf/builtin-record.c | 82 ++++---------------------------- tools/perf/util/header.c | 13 +---- 3 files changed, 11 insertions(+), 88 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index d4da111ef53d..7e3258066bc9 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -65,12 +65,10 @@ OPTIONS -r:: --realtime=:: Collect data with this RT SCHED_FIFO priority. + -D:: --no-delay:: Collect data without buffering. --A:: ---append:: - Append to the output file to do incremental profiling. -f:: --force:: diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index fff985cf3852..2990570b5c6d 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -61,11 +61,6 @@ static void __handle_on_exit_funcs(void) } #endif -enum write_mode_t { - WRITE_FORCE, - WRITE_APPEND -}; - struct perf_record { struct perf_tool tool; struct perf_record_opts opts; @@ -77,12 +72,9 @@ struct perf_record { int output; unsigned int page_size; int realtime_prio; - enum write_mode_t write_mode; bool no_buildid; bool no_buildid_cache; bool force; - bool file_new; - bool append_file; long samples; off_t post_processing_offset; }; @@ -200,25 +192,6 @@ static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg) signal(signr, SIG_DFL); } -static bool perf_evlist__equal(struct perf_evlist *evlist, - struct perf_evlist *other) -{ - struct perf_evsel *pos, *pair; - - if (evlist->nr_entries != other->nr_entries) - return false; - - pair = perf_evlist__first(other); - - list_for_each_entry(pos, &evlist->entries, node) { - if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0)) - return false; - pair = perf_evsel__next(pair); - } - - return true; -} - static int perf_record__open(struct perf_record *rec) { char msg[512]; @@ -273,16 +246,7 @@ try_again: goto out; } - if (rec->file_new) - session->evlist = evlist; - else { - if (!perf_evlist__equal(session->evlist, evlist)) { - fprintf(stderr, "incompatible append\n"); - rc = -1; - goto out; - } - } - + session->evlist = evlist; perf_session__set_id_hdr_size(session); out: return rc; @@ -415,23 +379,15 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) if (!strcmp(output_name, "-")) opts->pipe_output = true; else if (!stat(output_name, &st) && st.st_size) { - if (rec->write_mode == WRITE_FORCE) { - char oldname[PATH_MAX]; - snprintf(oldname, sizeof(oldname), "%s.old", - output_name); - unlink(oldname); - rename(output_name, oldname); - } - } else if (rec->write_mode == WRITE_APPEND) { - rec->write_mode = WRITE_FORCE; + char oldname[PATH_MAX]; + snprintf(oldname, sizeof(oldname), "%s.old", + output_name); + unlink(oldname); + rename(output_name, oldname); } } - flags = O_CREAT|O_RDWR; - if (rec->write_mode == WRITE_APPEND) - rec->file_new = 0; - else - flags |= O_TRUNC; + flags = O_CREAT|O_RDWR|O_TRUNC; if (opts->pipe_output) output = STDOUT_FILENO; @@ -445,7 +401,7 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) rec->output = output; session = perf_session__new(output_name, O_WRONLY, - rec->write_mode == WRITE_FORCE, false, NULL); + true, false, NULL); if (session == NULL) { pr_err("Not enough memory for reading perf file header\n"); return -1; @@ -465,12 +421,6 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) if (!rec->opts.branch_stack) perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); - if (!rec->file_new) { - err = perf_session__read_header(session, output); - if (err < 0) - goto out_delete_session; - } - if (forks) { err = perf_evlist__prepare_workload(evsel_list, &opts->target, argv, opts->pipe_output, @@ -498,7 +448,7 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) err = perf_header__write_pipe(output); if (err < 0) goto out_delete_session; - } else if (rec->file_new) { + } else { err = perf_session__write_header(session, evsel_list, output, false); if (err < 0) @@ -869,8 +819,6 @@ static struct perf_record record = { .uses_mmap = true, }, }, - .write_mode = WRITE_FORCE, - .file_new = true, }; #define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: " @@ -906,8 +854,6 @@ const struct option record_options[] = { "collect raw sample records from all opened counters"), OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, "system-wide collection from all CPUs"), - OPT_BOOLEAN('A', "append", &record.append_file, - "append to the output file to do incremental profiling"), OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", "list of cpus to monitor"), OPT_BOOLEAN('f', "force", &record.force, @@ -977,16 +923,6 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) if (!argc && perf_target__none(&rec->opts.target)) usage_with_options(record_usage, record_options); - if (rec->force && rec->append_file) { - ui__error("Can't overwrite and append at the same time." - " You need to choose between -f and -A"); - usage_with_options(record_usage, record_options); - } else if (rec->append_file) { - rec->write_mode = WRITE_APPEND; - } else { - rec->write_mode = WRITE_FORCE; - } - if (nr_cgroups && !rec->opts.target.system_wide) { ui__error("cgroup monitoring only available in" " system-wide mode\n"); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 738d3b8d9745..93dd31573fb5 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2303,29 +2303,18 @@ int perf_session__write_header(struct perf_session *session, struct perf_file_header f_header; struct perf_file_attr f_attr; struct perf_header *header = &session->header; - struct perf_evsel *evsel, *pair = NULL; + struct perf_evsel *evsel; int err; lseek(fd, sizeof(f_header), SEEK_SET); - if (session->evlist != evlist) - pair = perf_evlist__first(session->evlist); - list_for_each_entry(evsel, &evlist->entries, node) { evsel->id_offset = lseek(fd, 0, SEEK_CUR); err = do_write(fd, evsel->id, evsel->ids * sizeof(u64)); if (err < 0) { -out_err_write: pr_debug("failed to write perf header\n"); return err; } - if (session->evlist != evlist) { - err = do_write(fd, pair->id, pair->ids * sizeof(u64)); - if (err < 0) - goto out_err_write; - evsel->ids += pair->ids; - pair = perf_evsel__next(pair); - } } header->attr_offset = lseek(fd, 0, SEEK_CUR); -- cgit v1.2.3 From 4a4d371a4dfbd3b84a7eab8d535d4c7c3647b09e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 5 Jun 2013 13:37:21 +0200 Subject: perf record: Remove -f/--force option It no longer have any affect on the processing and is marked as obsolete anyway. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-tvwyspiqr4getzfib2lw06ty@git.kernel.org Link: http://lkml.kernel.org/r/1372307120-737-1-git-send-email-namhyung@kernel.org [ combined patch removing the -f usage in various sub-commands, such as 'perf sched', etc, by Namhyung Kim ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/examples.txt | 4 ++-- tools/perf/Documentation/perf-record.txt | 4 ---- tools/perf/builtin-kmem.c | 2 +- tools/perf/builtin-lock.c | 2 +- tools/perf/builtin-record.c | 3 --- tools/perf/builtin-sched.c | 1 - tools/perf/builtin-timechart.c | 4 ++-- 7 files changed, 6 insertions(+), 14 deletions(-) diff --git a/tools/perf/Documentation/examples.txt b/tools/perf/Documentation/examples.txt index 77f952762426..a4e392156488 100644 --- a/tools/perf/Documentation/examples.txt +++ b/tools/perf/Documentation/examples.txt @@ -66,7 +66,7 @@ Furthermore, these tracepoints can be used to sample the workload as well. For example the page allocations done by a 'git gc' can be captured the following way: - titan:~/git> perf record -f -e kmem:mm_page_alloc -c 1 ./git gc + titan:~/git> perf record -e kmem:mm_page_alloc -c 1 ./git gc Counting objects: 1148, done. Delta compression using up to 2 threads. Compressing objects: 100% (450/450), done. @@ -120,7 +120,7 @@ Furthermore, call-graph sampling can be done too, of page allocations - to see precisely what kind of page allocations there are: - titan:~/git> perf record -f -g -e kmem:mm_page_alloc -c 1 ./git gc + titan:~/git> perf record -g -e kmem:mm_page_alloc -c 1 ./git gc Counting objects: 1148, done. Delta compression using up to 2 threads. Compressing objects: 100% (450/450), done. diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 7e3258066bc9..e297b74471b8 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -70,10 +70,6 @@ OPTIONS --no-delay:: Collect data without buffering. --f:: ---force:: - Overwrite existing data file. (deprecated) - -c:: --count=:: Event period to sample. diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 46878daca5cc..0259502638b4 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -708,7 +708,7 @@ static int parse_line_opt(const struct option *opt __maybe_unused, static int __cmd_record(int argc, const char **argv) { const char * const record_args[] = { - "record", "-a", "-R", "-f", "-c", "1", + "record", "-a", "-R", "-c", "1", "-e", "kmem:kmalloc", "-e", "kmem:kmalloc_node", "-e", "kmem:kfree", diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 425830069749..76543a4a7a30 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -878,7 +878,7 @@ static int __cmd_report(void) static int __cmd_record(int argc, const char **argv) { const char *record_args[] = { - "record", "-R", "-f", "-m", "1024", "-c", "1", + "record", "-R", "-m", "1024", "-c", "1", }; unsigned int rec_argc, i, j; const char **rec_argv; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 2990570b5c6d..ecca62e27b28 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -74,7 +74,6 @@ struct perf_record { int realtime_prio; bool no_buildid; bool no_buildid_cache; - bool force; long samples; off_t post_processing_offset; }; @@ -856,8 +855,6 @@ const struct option record_options[] = { "system-wide collection from all CPUs"), OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", "list of cpus to monitor"), - OPT_BOOLEAN('f', "force", &record.force, - "overwrite existing data file (deprecated)"), OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), OPT_STRING('o', "output", &record.output_name, "file", "output file name"), diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 2da2a6ca22bf..fed9ae432c16 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1632,7 +1632,6 @@ static int __cmd_record(int argc, const char **argv) "record", "-a", "-R", - "-f", "-m", "1024", "-c", "1", "-e", "sched:sched_switch", diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index ab4cf232b852..4536a92b18f3 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -1005,7 +1005,7 @@ static int __cmd_record(int argc, const char **argv) { #ifdef SUPPORT_OLD_POWER_EVENTS const char * const record_old_args[] = { - "record", "-a", "-R", "-f", "-c", "1", + "record", "-a", "-R", "-c", "1", "-e", "power:power_start", "-e", "power:power_end", "-e", "power:power_frequency", @@ -1014,7 +1014,7 @@ static int __cmd_record(int argc, const char **argv) }; #endif const char * const record_new_args[] = { - "record", "-a", "-R", "-f", "-c", "1", + "record", "-a", "-R", "-c", "1", "-e", "power:cpu_frequency", "-e", "power:cpu_idle", "-e", "sched:sched_wakeup", -- cgit v1.2.3 From bcf3145fbeb1bd91ad2ca67b1946077530f7bfb1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 26 Jun 2013 16:14:15 +0900 Subject: perf evlist: Enhance perf_evlist__start_workload() When perf tries to start a workload, it relies on a pipe which the workload was blocked for reading. After closing the pipe on the parent, the workload (child) can start the actual work via exec(). However, if another process was forked after creating a workload, this mechanism cannot work since the other process (child) also inherits the pipe, so that closing the pipe in parent cannot unblock the workload. Fix it by using explicit write call can then closing it. For similar reason, the pipe fd on parent should be marked as CLOEXEC so that it can be closed after another child exec'ed. Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1372230862-15861-13-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 99b43dd18c57..8065ce8fa9a5 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -821,6 +821,7 @@ int perf_evlist__prepare_workload(struct perf_evlist *evlist, goto out_close_pipes; } + fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC); evlist->workload.cork_fd = go_pipe[1]; close(child_ready_pipe[0]); return 0; @@ -837,10 +838,17 @@ out_close_ready_pipe: int perf_evlist__start_workload(struct perf_evlist *evlist) { if (evlist->workload.cork_fd > 0) { + char bf; + int ret; /* * Remove the cork, let it rip! */ - return close(evlist->workload.cork_fd); + ret = write(evlist->workload.cork_fd, &bf, 1); + if (ret < 0) + perror("enable to write to pipe"); + + close(evlist->workload.cork_fd); + return ret; } return 0; -- cgit v1.2.3 From 0276c22a3f22b7f6696fa07b0a77635726b2c0fd Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 10 Jun 2013 08:21:21 +0200 Subject: perf tools: Fix -x/--exclude-other option for report command Currently we have symbol_conf.exclude_other being set as true every time so the -x/--exclude-other has nothing to do. Also we have no way to see the data with symbol_conf.exclude_other being false which is useful sometimes. Fixing it by making symbol_conf.exclude_other false by default. 1) Example without -x option: $ perf report -i perf.data.delete -p perf_session__delete -s parent + 99.91% [other] + 0.08% perf_session__delete + 0.00% perf_session__delete_dead_threads + 0.00% perf_session__delete_threads 2) Example with -x option: $ ./perf report -i perf.data.delete -p perf_session__delete -s parent -x + 96.22% perf_session__delete + 1.89% perf_session__delete_dead_threads + 1.89% perf_session__delete_threads In Example 1) we get the sorted out data together with the rest "[other]". This could help us estimate how much time we spent in the sorted data. In Example 2) the total is just the sorted data. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-sg8fvu0fyqohf9ur9l38lhkw@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 1 - tools/perf/builtin-report.c | 3 +-- tools/perf/builtin-top.c | 2 -- tools/perf/util/symbol.c | 1 - 4 files changed, 1 insertion(+), 6 deletions(-) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index da8f8eb383a0..0aac5f3e594d 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -607,7 +607,6 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused) input_new = "perf.data.guest"; } - symbol_conf.exclude_other = false; if (symbol__init() < 0) return -1; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index ca98d34cd58b..3662047cc6b1 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -939,8 +939,7 @@ repeat: */ if (!strstr(sort_order, "parent")) sort_parent.elide = 1; - } else - symbol_conf.exclude_other = false; + } if (argc) { /* diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index f036af9b6f09..e06c4f869330 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1130,8 +1130,6 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) if (top.evlist == NULL) return -ENOMEM; - symbol_conf.exclude_other = false; - argc = parse_options(argc, argv, options, top_usage, 0); if (argc) usage_with_options(top_usage, options); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 8cf3b5426a9a..d5528e1cc03a 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -32,7 +32,6 @@ int vmlinux_path__nr_entries; char **vmlinux_path; struct symbol_conf symbol_conf = { - .exclude_other = true, .use_modules = true, .try_vmlinux_path = true, .annotate_src = true, -- cgit v1.2.3 From 7e40c92019cef784fffbdfc51c6e731e7ee6ba10 Mon Sep 17 00:00:00 2001 From: Runzhen Wang Date: Fri, 28 Jun 2013 16:14:56 +0800 Subject: perf tools: fix a typo of a Power7 event name In the Power7 PMU guide: https://www.power.org/documentation/commonly-used-metrics-for-performance-analysis/ PM_BRU_MPRED is referred to as PM_BR_MPRED. It fixed the typo by changing the name of the event in kernel and documentation accordingly. This patch changes the ABI, there are some reasons I think it's ok: - It is relatively new interface, specific to the Power7 platform. - No tools that we know of actually use this interface at this point (none are listed near the interface). - Users of this interface (eg oprofile users migrating to perf) would be more used to the "PM_BR_MPRED" rather than "PM_BRU_MPRED". - These are in the ABI/testing at this point rather than ABI/stable, so hoping we have some wiggle room. Signed-off-by: Runzhen Wang Acked-by: Michael Ellerman Cc: icycoder@gmail.com Cc: linuxppc-dev@lists.ozlabs.org Cc: Michael Ellerman Cc: Paul Mackerras Cc: Runzhen Wang Cc: Sukadev Bhattiprolu Cc: Xiao Guangrong Link: http://lkml.kernel.org/r/1372407297-6996-2-git-send-email-runzhen@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../ABI/testing/sysfs-bus-event_source-devices-events | 2 +- arch/powerpc/perf/power7-pmu.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events index 8b25ffb42562..3c1cc24361bd 100644 --- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events @@ -29,7 +29,7 @@ Description: Generic performance monitoring events What: /sys/devices/cpu/events/PM_1PLUS_PPC_CMPL /sys/devices/cpu/events/PM_BRU_FIN - /sys/devices/cpu/events/PM_BRU_MPRED + /sys/devices/cpu/events/PM_BR_MPRED /sys/devices/cpu/events/PM_CMPLU_STALL /sys/devices/cpu/events/PM_CMPLU_STALL_BRU /sys/devices/cpu/events/PM_CMPLU_STALL_DCACHE_MISS diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index 13c3f0e547a2..d1821b8bbc4c 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -60,7 +60,7 @@ #define PME_PM_LD_REF_L1 0xc880 #define PME_PM_LD_MISS_L1 0x400f0 #define PME_PM_BRU_FIN 0x10068 -#define PME_PM_BRU_MPRED 0x400f6 +#define PME_PM_BR_MPRED 0x400f6 #define PME_PM_CMPLU_STALL_FXU 0x20014 #define PME_PM_CMPLU_STALL_DIV 0x40014 @@ -349,7 +349,7 @@ static int power7_generic_events[] = { [PERF_COUNT_HW_CACHE_REFERENCES] = PME_PM_LD_REF_L1, [PERF_COUNT_HW_CACHE_MISSES] = PME_PM_LD_MISS_L1, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PME_PM_BRU_FIN, - [PERF_COUNT_HW_BRANCH_MISSES] = PME_PM_BRU_MPRED, + [PERF_COUNT_HW_BRANCH_MISSES] = PME_PM_BR_MPRED, }; #define C(x) PERF_COUNT_HW_CACHE_##x @@ -405,7 +405,7 @@ GENERIC_EVENT_ATTR(instructions, INST_CMPL); GENERIC_EVENT_ATTR(cache-references, LD_REF_L1); GENERIC_EVENT_ATTR(cache-misses, LD_MISS_L1); GENERIC_EVENT_ATTR(branch-instructions, BRU_FIN); -GENERIC_EVENT_ATTR(branch-misses, BRU_MPRED); +GENERIC_EVENT_ATTR(branch-misses, BR_MPRED); POWER_EVENT_ATTR(CYC, CYC); POWER_EVENT_ATTR(GCT_NOSLOT_CYC, GCT_NOSLOT_CYC); @@ -414,7 +414,7 @@ POWER_EVENT_ATTR(INST_CMPL, INST_CMPL); POWER_EVENT_ATTR(LD_REF_L1, LD_REF_L1); POWER_EVENT_ATTR(LD_MISS_L1, LD_MISS_L1); POWER_EVENT_ATTR(BRU_FIN, BRU_FIN) -POWER_EVENT_ATTR(BRU_MPRED, BRU_MPRED); +POWER_EVENT_ATTR(BR_MPRED, BR_MPRED); POWER_EVENT_ATTR(CMPLU_STALL_FXU, CMPLU_STALL_FXU); POWER_EVENT_ATTR(CMPLU_STALL_DIV, CMPLU_STALL_DIV); @@ -449,7 +449,7 @@ static struct attribute *power7_events_attr[] = { GENERIC_EVENT_PTR(LD_REF_L1), GENERIC_EVENT_PTR(LD_MISS_L1), GENERIC_EVENT_PTR(BRU_FIN), - GENERIC_EVENT_PTR(BRU_MPRED), + GENERIC_EVENT_PTR(BR_MPRED), POWER_EVENT_PTR(CYC), POWER_EVENT_PTR(GCT_NOSLOT_CYC), @@ -458,7 +458,7 @@ static struct attribute *power7_events_attr[] = { POWER_EVENT_PTR(LD_REF_L1), POWER_EVENT_PTR(LD_MISS_L1), POWER_EVENT_PTR(BRU_FIN), - POWER_EVENT_PTR(BRU_MPRED), + POWER_EVENT_PTR(BR_MPRED), POWER_EVENT_PTR(CMPLU_STALL_FXU), POWER_EVENT_PTR(CMPLU_STALL_DIV), -- cgit v1.2.3 From 7cab84e8975cfb8a59ce3e79ce75e5eedd384484 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 2 Jul 2013 13:27:20 -0600 Subject: perf evsel: Fix count parameter to read call in event_format__new per realloc above the length of the buffer is alloc_size, not BUFSIZ. Adjust length per size as done for buf start. Addresses some valgrind complaints: ==1870== Syscall param read(buf) points to unaddressable byte(s) ==1870== at 0x4E3F610: __read_nocancel (in /lib64/libpthread-2.14.90.so) ==1870== by 0x44AEE1: event_format__new (unistd.h:45) ==1870== by 0x44B025: perf_evsel__newtp (evsel.c:158) ==1870== by 0x451919: add_tracepoint_event (parse-events.c:395) ==1870== by 0x479815: parse_events_parse (parse-events.y:292) ==1870== by 0x45463A: parse_events_option (parse-events.c:861) ==1870== by 0x44FEE4: get_value (parse-options.c:113) ==1870== by 0x450767: parse_options_step (parse-options.c:192) ==1870== by 0x450C40: parse_options (parse-options.c:422) ==1870== by 0x42735F: cmd_record (builtin-record.c:918) ==1870== by 0x419D72: run_builtin (perf.c:319) ==1870== by 0x4195F2: main (perf.c:376) ==1870== Address 0xcffebf0 is 0 bytes after a block of size 8,192 alloc'd ==1870== at 0x4C2A62F: malloc (vg_replace_malloc.c:270) ==1870== by 0x4C2A7A3: realloc (vg_replace_malloc.c:662) ==1870== by 0x44AF07: event_format__new (evsel.c:121) ==1870== by 0x44B025: perf_evsel__newtp (evsel.c:158) ==1870== by 0x451919: add_tracepoint_event (parse-events.c:395) ==1870== by 0x479815: parse_events_parse (parse-events.y:292) ==1870== by 0x45463A: parse_events_option (parse-events.c:861) ==1870== by 0x44FEE4: get_value (parse-options.c:113) ==1870== by 0x450767: parse_options_step (parse-options.c:192) ==1870== by 0x450C40: parse_options (parse-options.c:422) ==1870== by 0x42735F: cmd_record (builtin-record.c:918) ==1870== by 0x419D72: run_builtin (perf.c:319) Signed-off-by: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1372793245-4136-2-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 63b6f8c8edf2..df99ebe852ae 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -124,7 +124,7 @@ struct event_format *event_format__new(const char *sys, const char *name) bf = nbf; } - n = read(fd, bf + size, BUFSIZ); + n = read(fd, bf + size, alloc_size - size); if (n < 0) goto out_free_bf; size += n; -- cgit v1.2.3 From b2c34fde048f3c85ef0716a8cdabbe46ac67d1e6 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 4 Jul 2013 16:20:23 +0300 Subject: perf tools: Fix parse_events_terms() segfault on error path On the error path, 'data.terms' may not have been initialised. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1372944040-32690-5-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 6c8bb0fb189b..d8dcb8dbb1e2 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -860,7 +860,8 @@ int parse_events_terms(struct list_head *terms, const char *str) return 0; } - parse_events__free_terms(data.terms); + if (data.terms) + parse_events__free_terms(data.terms); return ret; } -- cgit v1.2.3 From 4be8be6b430611def94bcd583b7b302d197a9520 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 4 Jul 2013 16:20:24 +0300 Subject: perf tools: Fix new_term() missing free on error path On the error path, newly allocated 'term' must be freed. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1372944040-32690-6-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index d8dcb8dbb1e2..995fc25db8c6 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1184,6 +1184,7 @@ static int new_term(struct parse_events_term **_term, int type_val, term->val.str = str; break; default: + free(term); return -EINVAL; } -- cgit v1.2.3 From 7e0d6fc90fc6f9faea63a2b67233ae767903573c Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 4 Jul 2013 16:20:29 +0300 Subject: perf tools: Update symbol_conf.nr_events when processing attribute events Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1372944040-32690-11-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 93dd31573fb5..a4dafbee2511 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2956,6 +2956,8 @@ int perf_event__process_attr(union perf_event *event, perf_evlist__id_add(evlist, evsel, 0, i, event->attr.id[i]); } + symbol_conf.nr_events = evlist->nr_entries; + return 0; } -- cgit v1.2.3 From 54bd269205c188967d565f1f5552b13d08ca1be0 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 4 Jul 2013 16:20:34 +0300 Subject: perf evsel: Fix missing increment in sample parsing The final sample format bit used to be PERF_SAMPLE_STACK_USER which neglected to do a final increment of the array pointer. The result is that the following parsing might start at the wrong place. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1372944040-32690-16-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index df99ebe852ae..c9c7494506a1 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1170,7 +1170,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, } else { data->user_stack.data = (char *)array; array += size / sizeof(*array); - data->user_stack.size = *array; + data->user_stack.size = *array++; } } -- cgit v1.2.3 From f9ceffb605be7b3b3b2a6e6d14dd0d7a97eae580 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 9 May 2013 10:42:48 -0400 Subject: perf symbols: Fix vdso list searching When "perf record" was used on a large machine with a lot of CPUs, the perf post-processing time (the time after the workload was done until the perf command itself exited) could take a lot of minutes and even hours depending on how large the resulting perf.data file was. While running AIM7 1500-user high_systime workload on a 80-core x86-64 system with a 3.9 kernel (with only the -s -a options used), the workload itself took about 2 minutes to run and the perf.data file had a size of 1108.746 MB. However, the post-processing step took more than 10 minutes. With a gprof-profiled perf binary, the time spent by perf was as follows: % cumulative self self total time seconds seconds calls s/call s/call name 96.90 822.10 822.10 192156 0.00 0.00 dsos__find 0.81 828.96 6.86 172089958 0.00 0.00 rb_next 0.41 832.44 3.48 48539289 0.00 0.00 rb_erase So 97% (822 seconds) of the time was spent in a single dsos_find() function. After analyzing the call-graph data below: ----------------------------------------------- 0.00 822.12 192156/192156 map__new [6] [7] 96.9 0.00 822.12 192156 vdso__dso_findnew [7] 822.10 0.00 192156/192156 dsos__find [8] 0.01 0.00 192156/192156 dsos__add [62] 0.01 0.00 192156/192366 dso__new [61] 0.00 0.00 1/45282525 memdup [31] 0.00 0.00 192156/192230 dso__set_long_name [91] ----------------------------------------------- 822.10 0.00 192156/192156 vdso__dso_findnew [7] [8] 96.9 822.10 0.00 192156 dsos__find [8] ----------------------------------------------- It was found that the vdso__dso_findnew() function failed to locate VDSO__MAP_NAME ("[vdso]") in the dso list and have to insert a new entry at the end for 192156 times. This problem is due to the fact that there are 2 types of name in the dso entry - short name and long name. The initial dso__new() adds "[vdso]" to both the short and long names. After that, vdso__dso_findnew() modifies the long name to something like /tmp/perf-vdso.so-NoXkDj. The dsos__find() function only compares the long name. As a result, the same vdso entry is duplicated many time in the dso list. This bug increases memory consumption as well as slows the symbol processing time to a crawl. To resolve this problem, the dsos__find() function interface was modified to enable searching either the long name or the short name. The vdso__dso_findnew() will now search only the short name while the other call sites search for the long name as before. With this change, the cpu time of perf was reduced from 848.38s to 15.77s and dsos__find() only accounted for 0.06% of the total time. 0.06 15.73 0.01 192151 0.00 0.00 dsos__find Signed-off-by: Waiman Long Acked-by: Ingo Molnar Cc: "Chandramouleeswaran, Aswin" Cc: "Norton, Scott J" Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1368110568-64714-1-git-send-email-Waiman.Long@hp.com [ replaced TRUE/FALSE with stdbool.h equivalents, fixing builds where those macros are not present (NO_LIBPYTHON=1 NO_LIBPERL=1), fix from Jiri Olsa ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.c | 10 ++++++++-- tools/perf/util/dso.h | 3 ++- tools/perf/util/vdso.c | 2 +- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 6f7d5a9d6b05..c4374f07603c 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -513,10 +513,16 @@ void dsos__add(struct list_head *head, struct dso *dso) list_add_tail(&dso->node, head); } -struct dso *dsos__find(struct list_head *head, const char *name) +struct dso *dsos__find(struct list_head *head, const char *name, bool cmp_short) { struct dso *pos; + if (cmp_short) { + list_for_each_entry(pos, head, node) + if (strcmp(pos->short_name, name) == 0) + return pos; + return NULL; + } list_for_each_entry(pos, head, node) if (strcmp(pos->long_name, name) == 0) return pos; @@ -525,7 +531,7 @@ struct dso *dsos__find(struct list_head *head, const char *name) struct dso *__dsos__findnew(struct list_head *head, const char *name) { - struct dso *dso = dsos__find(head, name); + struct dso *dso = dsos__find(head, name, false); if (!dso) { dso = dso__new(name); diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 450199ab51b5..d51aaf272c68 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -133,7 +133,8 @@ struct dso *dso__kernel_findnew(struct machine *machine, const char *name, const char *short_name, int dso_type); void dsos__add(struct list_head *head, struct dso *dso); -struct dso *dsos__find(struct list_head *head, const char *name); +struct dso *dsos__find(struct list_head *head, const char *name, + bool cmp_short); struct dso *__dsos__findnew(struct list_head *head, const char *name); bool __dsos__read_build_ids(struct list_head *head, bool with_hits); diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index e60951fcdb12..39159822d58f 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -91,7 +91,7 @@ void vdso__exit(void) struct dso *vdso__dso_findnew(struct list_head *head) { - struct dso *dso = dsos__find(head, VDSO__MAP_NAME); + struct dso *dso = dsos__find(head, VDSO__MAP_NAME, true); if (!dso) { char *file; -- cgit v1.2.3 From 582ec0829b3dd74d8c0f58403a3f9df8cbaa9c7d Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Fri, 5 Jul 2013 19:06:45 +0200 Subject: perf stat: Fix per-socket output bug for uncore events This patch fixes a problem reported by Andi Kleen on perf stat when measuring uncore events: # perf stat --per-socket -e uncore_pcu/event=0x0/ -I1000 -a sleep 2 It would not report counts for the second socket. That was due to a cpu mapping bug in print_aggr(). This patch also fixes the socket numbering bug for events. Reported-by: Andi Kleen Signed-off-by: Stephane Eranian Tested-by: Andi Kleen Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: zheng.z.yan@intel.com Link: http://lkml.kernel.org/r/20130705170645.GA32519@quad Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 95768afaae3e..352fbd7ff4a1 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -924,7 +924,7 @@ static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) static void print_aggr(char *prefix) { struct perf_evsel *counter; - int cpu, s, s2, id, nr; + int cpu, cpu2, s, s2, id, nr; u64 ena, run, val; if (!(aggr_map || aggr_get_id)) @@ -936,7 +936,8 @@ static void print_aggr(char *prefix) val = ena = run = 0; nr = 0; for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { - s2 = aggr_get_id(evsel_list->cpus, cpu); + cpu2 = perf_evsel__cpus(counter)->map[cpu]; + s2 = aggr_get_id(evsel_list->cpus, cpu2); if (s2 != id) continue; val += counter->counts->cpu[cpu].val; @@ -948,7 +949,7 @@ static void print_aggr(char *prefix) fprintf(output, "%s", prefix); if (run == 0 || ena == 0) { - aggr_printout(counter, cpu, nr); + aggr_printout(counter, id, nr); fprintf(output, "%*s%s%*s", csv_output ? 0 : 18, -- cgit v1.2.3 From a4147f0f91386540316e468f3a3674a498dada5f Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 8 May 2013 11:43:34 +0200 Subject: perf tools: Fix perf version generation The tag of the perf version is wrongly determined, always the latest tag is taken regardless of the HEAD commit: $ perf --version perf version 3.9.rc8.gd7f5d3 $ git describe d7f5d3 v3.9-rc7-154-gd7f5d33 $ head -n 4 Makefile VERSION = 3 PATCHLEVEL = 9 SUBLEVEL = 0 EXTRAVERSION = -rc7 In other cases no tag might be found. This patch fixes this. This new implementation handles also the case if there are no tags at all found in the git repo but there is a commit id. Signed-off-by: Robert Richter Link: http://lkml.kernel.org/r/1368006214-12912-1-git-send-email-rric@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/PERF-VERSION-GEN | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN index 055fef34b6f6..15a77b7c0e36 100755 --- a/tools/perf/util/PERF-VERSION-GEN +++ b/tools/perf/util/PERF-VERSION-GEN @@ -13,13 +13,22 @@ LF=' # First check if there is a .git to get the version from git describe # otherwise try to get the version from the kernel Makefile # -if test -d ../../.git -o -f ../../.git && - VN=$(git tag 2>/dev/null | tail -1 | grep -E "v[0-9].[0-9]*") +CID= +TAG= +if test -d ../../.git -o -f ../../.git then - VN=$(echo $VN"-g"$(git log -1 --abbrev=4 --pretty=format:"%h" HEAD)) - VN=$(echo "$VN" | sed -e 's/-/./g'); -else - VN=$(MAKEFLAGS= make -sC ../.. kernelversion) + TAG=$(git describe --abbrev=0 --match "v[0-9].[0-9]*" 2>/dev/null ) + CID=$(git log -1 --abbrev=4 --pretty=format:"%h" 2>/dev/null) && CID="-g$CID" +fi +if test -z "$TAG" +then + TAG=$(MAKEFLAGS= make -sC ../.. kernelversion) +fi +VN="$TAG$CID" +if test -n "$CID" +then + # format version string, strip trailing zero of sublevel: + VN=$(echo "$VN" | sed -e 's/-/./g;s/\([0-9]*[.][0-9]*\)[.]0/\1/') fi VN=$(expr "$VN" : v*'\(.*\)') -- cgit v1.2.3 From a363a9da65d253fa7354ce5fd630f4f94df934cc Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Wed, 17 Apr 2013 02:23:16 +0000 Subject: perf tools: Revert regression in configuration of Python support Among other things, the following: commit 31160d7feab786c991780d7f0ce2755a469e0e5e Date: Tue Jan 8 16:22:36 2013 -0500 perf tools: Fix GNU make v3.80 compatibility issue attempts to aid the user by tapping into an existing error message, as described in the commit message: ... Also fix an issue where _get_attempt was called with only one argument. This prevented the error message from printing the name of the variable that can be used to fix the problem. or more precisely: -$(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2))) +$(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2),$(1))) However, The "missing" argument was in fact missing on purpose; it's absence is a signal that the error message should be skipped, because the failure would be due to the default value, not any user-supplied value. This can be seen in how `_ge_attempt' uses `gea_err' (in the config/utilities.mak file): _ge_attempt = $(if $(get-executable),$(get-executable),$(_gea_warn)$(call _gea_err,$(2))) _gea_warn = $(warning The path '$(1)' is not executable.) _gea_err = $(if $(1),$(error Please set '$(1)' appropriately)) That is, because the argument is no longer missing, the value `$(1)' (associated with `_gea_err') always evaluates to true, thus always triggering the error condition that is meant to be reserved for only the case when a user explicitly supplies an invalid value. Concretely, the result is a regression in the Makefile's configuration of python support; rather than gracefully disable support when the relevant executables cannot be found according to default values, the build process halts in error as though the user explicitly supplied the values. This new commit simply reverts the offending one-line change. Reported-by: Pekka Enberg Link: http://lkml.kernel.org/r/CAOJsxLHv17Ys3M7P5q25imkUxQW6LE_vABxh1N3Tt7Mv6Ho4iw@mail.gmail.com Signed-off-by: Michael Witten --- tools/perf/config/utilities.mak | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/config/utilities.mak b/tools/perf/config/utilities.mak index faffb52fcf80..94d2d4f9c35d 100644 --- a/tools/perf/config/utilities.mak +++ b/tools/perf/config/utilities.mak @@ -173,7 +173,7 @@ _ge-abspath = $(if $(is-executable),$(1)) # Usage: absolute-executable-path-or-empty = $(call get-executable-or-default,variable,default) # define get-executable-or-default -$(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2),$(1))) +$(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2))) endef _ge_attempt = $(if $(get-executable),$(get-executable),$(_gea_warn)$(call _gea_err,$(2))) _gea_warn = $(warning The path '$(1)' is not executable.) -- cgit v1.2.3 From d14c496588733ec1b586ec068932c1db228dd770 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Thu, 9 May 2013 00:17:44 -0400 Subject: perf tools: Fix -ldw/-lelf link test when static linking Since libelf sometimes uses libpthread, we have to list that after -lelf when someone tries to build statically. Else things go boom: Makefile:479: *** No libelf.h/libelf found, please install \ libelf-dev/elfutils-libelf-devel. Stop. Similarly, the -ldw test fails as it often uses -lz: Makefile:462: No libdw.h found or old libdw.h found or elfutils is older \ than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev And if we add debugging to try-cc, we see: + echo '#include int main(void) { Dwarf *dbg = dwarf_begin(0, DWARF_C_READ); return (long)dbg; }' + i686-pc-linux-gnu-gcc -x c - -O2 -pipe -march=atom -mtune=atom -mfpmath=sse -g \ -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE \ -ldw -lelf -static -lpthread -lrt -lelf -lm -o .24368 /usr/lib/libdw.a(dwarf_begin_elf.o):function check_section.isra.1: error: undefined reference to 'inflateInit_' /usr/lib/libdw.a(dwarf_begin_elf.o):function check_section.isra.1: error: undefined reference to 'inflate' /usr/lib/libdw.a(dwarf_begin_elf.o):function check_section.isra.1: error: undefined reference to 'inflateReset' /usr/lib/libdw.a(dwarf_begin_elf.o):function check_section.isra.1: error: undefined reference to 'inflateEnd' + echo '#include int main(void) { Elf *elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }' + i686-pc-linux-gnu-gcc -x c - -O2 -pipe -march=atom -mtune=atom -mfpmath=sse -g \ -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE \ -static -lpthread -lrt -lelf -lm -o .19216 /usr/lib/libelf.a(elf_begin.o):function file_read_elf: error: undefined reference to 'pthread_rwlock_init' /usr/lib/libelf.a(elf_begin.o):function __libelf_read_mmaped_file: error: undefined reference to 'pthread_rwlock_init' /usr/lib/libelf.a(elf_begin.o):function __libelf_read_mmaped_file: error: undefined reference to 'pthread_rwlock_init' /usr/lib/libelf.a(elf_begin.o):function read_file: error: undefined reference to 'pthread_rwlock_init' /usr/lib/libelf.a(elf_begin.o):function lock_dup_elf.8072: error: undefined reference to 'pthread_rwlock_unlock' /usr/lib/libelf.a(elf_begin.o):function lock_dup_elf.8072: error: undefined reference to 'pthread_rwlock_wrlock' /usr/lib/libelf.a(elf_begin.o):function elf_begin: error: undefined reference to 'pthread_rwlock_rdlock' /usr/lib/libelf.a(elf_begin.o):function elf_begin: error: undefined reference to 'pthread_rwlock_unlock' Signed-off-by: Mike Frysinger Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1368073064-18276-1-git-send-email-vapier@gentoo.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/lk/Makefile | 2 +- tools/perf/config/Makefile | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/lib/lk/Makefile b/tools/lib/lk/Makefile index f0ecc0a13300..280dd8205430 100644 --- a/tools/lib/lk/Makefile +++ b/tools/lib/lk/Makefile @@ -29,7 +29,7 @@ LIB_OBJS += $(OUTPUT)debugfs.o LIBFILE = liblk.a CFLAGS = -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) -fPIC -EXTLIBS = -lpthread -lrt -lelf -lm +EXTLIBS = -lelf -lpthread -lrt -lm ALL_CFLAGS = $(CFLAGS) $(BASIC_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 ALL_LDFLAGS = $(LDFLAGS) diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index f4468954d3dc..b5d9238cb181 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -85,7 +85,7 @@ CFLAGS += -Wall CFLAGS += -Wextra CFLAGS += -std=gnu99 -EXTLIBS = -lpthread -lrt -lelf -lm +EXTLIBS = -lelf -lpthread -lrt -lm ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -fstack-protector-all,-fstack-protector-all),y) CFLAGS += -fstack-protector-all @@ -165,7 +165,7 @@ else LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib endif - FLAGS_DWARF=$(CFLAGS) $(LIBDW_CFLAGS) -ldw -lelf $(LIBDW_LDFLAGS) $(LDFLAGS) $(EXTLIBS) + FLAGS_DWARF=$(CFLAGS) $(LIBDW_CFLAGS) -ldw -lz -lelf $(LIBDW_LDFLAGS) $(LDFLAGS) $(EXTLIBS) ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF),libdw),y) msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev); NO_DWARF := 1 -- cgit v1.2.3 From 750ade7e82709c2835cb221a7b6a9ef0a6a9c0ac Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Tue, 9 Jul 2013 15:30:30 +0530 Subject: perf script: Fix broken include in Context.xs 765532c8 (perf script: Finish the rename from trace to script, 2010-12-23) made a mistake during find-and-replace replacing "../../../util/trace-event.h" with "../../../util/script-event.h", a non-existent file. Fix this include. Signed-off-by: Ramkumar Ramachandra Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1373364033-7918-3-git-send-email-artagnon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/perl/Perf-Trace-Util/Context.xs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs index c1e2ed1ed34e..8c7ea42444d1 100644 --- a/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs +++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs @@ -23,7 +23,7 @@ #include "perl.h" #include "XSUB.h" #include "../../../perf.h" -#include "../../../util/script-event.h" +#include "../../../util/trace-event.h" MODULE = Perf::Trace::Context PACKAGE = Perf::Trace::Context PROTOTYPES: ENABLE -- cgit v1.2.3 From 734df5ab549ca44f40de0f07af1c8803856dfb18 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 9 Jul 2013 17:44:10 +0200 Subject: perf: Clone child context from parent context pmu Currently when the child context for inherited events is created, it's based on the pmu object of the first event of the parent context. This is wrong for the following scenario: - HW context having HW and SW event - HW event got removed (closed) - SW event stays in HW context as the only event and its pmu is used to clone the child context The issue starts when the cpu context object is touched based on the pmu context object (__get_cpu_context). In this case the HW context will work with SW cpu context ending up with following WARN below. Fixing this by using parent context pmu object to clone from child context. Addresses the following warning reported by Vince Weaver: [ 2716.472065] ------------[ cut here ]------------ [ 2716.476035] WARNING: at kernel/events/core.c:2122 task_ctx_sched_out+0x3c/0x) [ 2716.476035] Modules linked in: nfsd auth_rpcgss oid_registry nfs_acl nfs locn [ 2716.476035] CPU: 0 PID: 3164 Comm: perf_fuzzer Not tainted 3.10.0-rc4 #2 [ 2716.476035] Hardware name: AOpen DE7000/nMCP7ALPx-DE R1.06 Oct.19.2012, BI2 [ 2716.476035] 0000000000000000 ffffffff8102e215 0000000000000000 ffff88011fc18 [ 2716.476035] ffff8801175557f0 0000000000000000 ffff880119fda88c ffffffff810ad [ 2716.476035] ffff880119fda880 ffffffff810af02a 0000000000000009 ffff880117550 [ 2716.476035] Call Trace: [ 2716.476035] [] ? warn_slowpath_common+0x5b/0x70 [ 2716.476035] [] ? task_ctx_sched_out+0x3c/0x5f [ 2716.476035] [] ? perf_event_exit_task+0xbf/0x194 [ 2716.476035] [] ? do_exit+0x3e7/0x90c [ 2716.476035] [] ? __do_fault+0x359/0x394 [ 2716.476035] [] ? do_group_exit+0x66/0x98 [ 2716.476035] [] ? get_signal_to_deliver+0x479/0x4ad [ 2716.476035] [] ? __perf_event_task_sched_out+0x230/0x2d1 [ 2716.476035] [] ? do_signal+0x3c/0x432 [ 2716.476035] [] ? ctx_sched_in+0x43/0x141 [ 2716.476035] [] ? perf_event_context_sched_in+0x7a/0x90 [ 2716.476035] [] ? __perf_event_task_sched_in+0x31/0x118 [ 2716.476035] [] ? mmdrop+0xd/0x1c [ 2716.476035] [] ? finish_task_switch+0x7d/0xa6 [ 2716.476035] [] ? do_notify_resume+0x20/0x5d [ 2716.476035] [] ? retint_signal+0x3d/0x78 [ 2716.476035] ---[ end trace 827178d8a5966c3d ]--- Reported-by: Vince Weaver Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1373384651-6109-1-git-send-email-jolsa@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 1833bc5a84a7..1d1f030e2f1e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7465,7 +7465,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent, * child. */ - child_ctx = alloc_perf_context(event->pmu, child); + child_ctx = alloc_perf_context(parent_ctx->pmu, child); if (!child_ctx) return -ENOMEM; -- cgit v1.2.3 From 06f417968beac6e6b614e17b37d347aa6a6b1d30 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 9 Jul 2013 17:44:11 +0200 Subject: perf: Remove WARN_ON_ONCE() check in __perf_event_enable() for valid scenario The '!ctx->is_active' check has a valid scenario, so there's no need for the warning. The reason is that there's a time window between the 'ctx->is_active' check in the perf_event_enable() function and the __perf_event_enable() function having: - IRQs on - ctx->lock unlocked where the task could be killed and 'ctx' deactivated by perf_event_exit_task(), ending up with the warning below. So remove the WARN_ON_ONCE() check and add comments to explain it all. This addresses the following warning reported by Vince Weaver: [ 324.983534] ------------[ cut here ]------------ [ 324.984420] WARNING: at kernel/events/core.c:1953 __perf_event_enable+0x187/0x190() [ 324.984420] Modules linked in: [ 324.984420] CPU: 19 PID: 2715 Comm: nmi_bug_snb Not tainted 3.10.0+ #246 [ 324.984420] Hardware name: Supermicro X8DTN/X8DTN, BIOS 4.6.3 01/08/2010 [ 324.984420] 0000000000000009 ffff88043fce3ec8 ffffffff8160ea0b ffff88043fce3f00 [ 324.984420] ffffffff81080ff0 ffff8802314fdc00 ffff880231a8f800 ffff88043fcf7860 [ 324.984420] 0000000000000286 ffff880231a8f800 ffff88043fce3f10 ffffffff8108103a [ 324.984420] Call Trace: [ 324.984420] [] dump_stack+0x19/0x1b [ 324.984420] [] warn_slowpath_common+0x70/0xa0 [ 324.984420] [] warn_slowpath_null+0x1a/0x20 [ 324.984420] [] __perf_event_enable+0x187/0x190 [ 324.984420] [] remote_function+0x40/0x50 [ 324.984420] [] generic_smp_call_function_single_interrupt+0xbe/0x130 [ 324.984420] [] smp_call_function_single_interrupt+0x27/0x40 [ 324.984420] [] call_function_single_interrupt+0x6f/0x80 [ 324.984420] [] ? _raw_spin_unlock_irqrestore+0x41/0x70 [ 324.984420] [] perf_event_exit_task+0x14d/0x210 [ 324.984420] [] ? switch_task_namespaces+0x24/0x60 [ 324.984420] [] do_exit+0x2b6/0xa40 [ 324.984420] [] ? _raw_spin_unlock_irq+0x2c/0x30 [ 324.984420] [] do_group_exit+0x49/0xc0 [ 324.984420] [] get_signal_to_deliver+0x254/0x620 [ 324.984420] [] do_signal+0x57/0x5a0 [ 324.984420] [] ? __do_page_fault+0x2a4/0x4e0 [ 324.984420] [] ? retint_restore_args+0xe/0xe [ 324.984420] [] ? retint_signal+0x11/0x84 [ 324.984420] [] do_notify_resume+0x65/0x80 [ 324.984420] [] retint_signal+0x46/0x84 [ 324.984420] ---[ end trace 442ec2f04db3771a ]--- Reported-by: Vince Weaver Signed-off-by: Jiri Olsa Suggested-by: Peter Zijlstra Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1373384651-6109-2-git-send-email-jolsa@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 1d1f030e2f1e..ef5e7cc686e3 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1950,7 +1950,16 @@ static int __perf_event_enable(void *info) struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); int err; - if (WARN_ON_ONCE(!ctx->is_active)) + /* + * There's a time window between 'ctx->is_active' check + * in perf_event_enable function and this place having: + * - IRQs on + * - ctx->lock unlocked + * + * where the task could be killed and 'ctx' deactivated + * by perf_event_exit_task. + */ + if (!ctx->is_active) return -EINVAL; raw_spin_lock(&ctx->lock); -- cgit v1.2.3 From 058ebd0eba3aff16b144eabf4510ed9510e1416e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 12 Jul 2013 11:08:33 +0200 Subject: perf: Fix perf_lock_task_context() vs RCU Jiri managed to trigger this warning: [] ====================================================== [] [ INFO: possible circular locking dependency detected ] [] 3.10.0+ #228 Tainted: G W [] ------------------------------------------------------- [] p/6613 is trying to acquire lock: [] (rcu_node_0){..-...}, at: [] rcu_read_unlock_special+0xa7/0x250 [] [] but task is already holding lock: [] (&ctx->lock){-.-...}, at: [] perf_lock_task_context+0xd9/0x2c0 [] [] which lock already depends on the new lock. [] [] the existing dependency chain (in reverse order) is: [] [] -> #4 (&ctx->lock){-.-...}: [] -> #3 (&rq->lock){-.-.-.}: [] -> #2 (&p->pi_lock){-.-.-.}: [] -> #1 (&rnp->nocb_gp_wq[1]){......}: [] -> #0 (rcu_node_0){..-...}: Paul was quick to explain that due to preemptible RCU we cannot call rcu_read_unlock() while holding scheduler (or nested) locks when part of the read side critical section was preemptible. Therefore solve it by making the entire RCU read side non-preemptible. Also pull out the retry from under the non-preempt to play nice with RT. Reported-by: Jiri Olsa Helped-out-by: Paul E. McKenney Cc: Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/events/core.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index ef5e7cc686e3..eba8fb5834ae 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -947,8 +947,18 @@ perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags) { struct perf_event_context *ctx; - rcu_read_lock(); retry: + /* + * One of the few rules of preemptible RCU is that one cannot do + * rcu_read_unlock() while holding a scheduler (or nested) lock when + * part of the read side critical section was preemptible -- see + * rcu_read_unlock_special(). + * + * Since ctx->lock nests under rq->lock we must ensure the entire read + * side critical section is non-preemptible. + */ + preempt_disable(); + rcu_read_lock(); ctx = rcu_dereference(task->perf_event_ctxp[ctxn]); if (ctx) { /* @@ -964,6 +974,8 @@ retry: raw_spin_lock_irqsave(&ctx->lock, *flags); if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) { raw_spin_unlock_irqrestore(&ctx->lock, *flags); + rcu_read_unlock(); + preempt_enable(); goto retry; } @@ -973,6 +985,7 @@ retry: } } rcu_read_unlock(); + preempt_enable(); return ctx; } -- cgit v1.2.3 From baf64b85445546a38b44052d71782dfe7531e350 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 8 Jul 2013 14:44:04 -0700 Subject: perf/x86: Fix incorrect use of do_div() in NMI warning I completely botched understanding the calling conventions of do_div(). I assumed that do_div() returned the result instead of realizing that it modifies its argument and returns a remainder. The side-effect from this would be bogus numbers for the "msecs" value in the warning messages: INFO: NMI handler (perf_event_nmi_handler) took too long to run: 0.114 msecs Note, there was a second fix posted by Stephane Eranian for a separate patch which I also botched: http://lkml.kernel.org/r/20130704223010.GA30625@quad Signed-off-by: Dave Hansen Cc: Dave Hansen Link: http://lkml.kernel.org/r/20130708214404.B0B6EA66@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/nmi.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 0920212e6159..ba77ebc2c353 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -111,7 +111,7 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2 */ list_for_each_entry_rcu(a, &desc->head, list) { u64 before, delta, whole_msecs; - int decimal_msecs, thishandled; + int remainder_ns, decimal_msecs, thishandled; before = local_clock(); thishandled = a->handler(type, regs); @@ -123,8 +123,9 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2 continue; nmi_longest_ns = delta; - whole_msecs = do_div(delta, (1000 * 1000)); - decimal_msecs = do_div(delta, 1000) % 1000; + whole_msecs = delta; + remainder_ns = do_div(whole_msecs, (1000 * 1000)); + decimal_msecs = remainder_ns / 1000; printk_ratelimited(KERN_INFO "INFO: NMI handler (%ps) took too long to run: " "%lld.%03d msecs\n", a->handler, whole_msecs, -- cgit v1.2.3