summaryrefslogtreecommitdiff
path: root/tools/perf/util
diff options
context:
space:
mode:
authorJiri Olsa <jolsa@kernel.org>2017-07-26 14:02:06 +0200
committerArnaldo Carvalho de Melo <acme@redhat.com>2017-07-26 14:25:44 -0300
commit82bf311e15d22e2fa45423b1fb4a21cf925381fe (patch)
treeed6f171f8cd8f07e3e2966a0116cae3a75929d02 /tools/perf/util
parentf7794d525447f1e4e4b2228dd29dba084005e6bf (diff)
perf stat: Use group read for event groups
Make perf stat use group read if there are groups defined. The group read will get the values for all member of groups within a single syscall instead of calling read syscall for every event. We can see considerable less amount of kernel cycles spent on single group read, than reading each event separately, like for following perf stat command: # perf stat -e {cycles,instructions} -I 10 -a sleep 1 Monitored with "perf stat -r 5 -e '{cycles:u,cycles:k}'" Before: 24,325,676 cycles:u 297,040,775 cycles:k 1.038554134 seconds time elapsed After: 25,034,418 cycles:u 158,256,395 cycles:k 1.036864497 seconds time elapsed The perf_evsel__open fallback changes contributed by Andi Kleen. Signed-off-by: Jiri Olsa <jolsa@kernel.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: David Ahern <dsahern@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20170726120206.9099-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/util')
-rw-r--r--tools/perf/util/counts.h1
-rw-r--r--tools/perf/util/evsel.c10
2 files changed, 11 insertions, 0 deletions
diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h
index 34d8baaf558a..cb45a6aecf9d 100644
--- a/tools/perf/util/counts.h
+++ b/tools/perf/util/counts.h
@@ -12,6 +12,7 @@ struct perf_counts_values {
};
u64 values[3];
};
+ bool loaded;
};
struct perf_counts {
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 89aecf3a35c7..3735c9e0080d 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -49,6 +49,7 @@ static struct {
bool clockid_wrong;
bool lbr_flags;
bool write_backward;
+ bool group_read;
} perf_missing_features;
static clockid_t clockid;
@@ -1321,6 +1322,7 @@ perf_evsel__set_count(struct perf_evsel *counter, int cpu, int thread,
count->val = val;
count->ena = ena;
count->run = run;
+ count->loaded = true;
}
static int
@@ -1677,6 +1679,8 @@ fallback_missing_features:
if (perf_missing_features.lbr_flags)
evsel->attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
PERF_SAMPLE_BRANCH_NO_CYCLES);
+ if (perf_missing_features.group_read && evsel->attr.inherit)
+ evsel->attr.read_format &= ~(PERF_FORMAT_GROUP|PERF_FORMAT_ID);
retry_sample_id:
if (perf_missing_features.sample_id_all)
evsel->attr.sample_id_all = 0;
@@ -1832,6 +1836,12 @@ try_fallback:
perf_missing_features.lbr_flags = true;
pr_debug2("switching off branch sample type no (cycles/flags)\n");
goto fallback_missing_features;
+ } else if (!perf_missing_features.group_read &&
+ evsel->attr.inherit &&
+ (evsel->attr.read_format & PERF_FORMAT_GROUP)) {
+ perf_missing_features.group_read = true;
+ pr_debug2("switching off group read\n");
+ goto fallback_missing_features;
}
out_close:
do {