From 94fddb7ad019ad9f14d33cd0a6cd159a52a082b8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 17 Jul 2020 09:35:18 -0300 Subject: perf tools: Sync hashmap.h with libbpf's To pick up the changes in: b2f9f1535bb9 ("libbpf: Fix libbpf hashmap on (I)LP32 architectures") Silencing this warning: Warning: Kernel ABI header at 'tools/perf/util/hashmap.h' differs from latest version at 'tools/lib/bpf/hashmap.h' diff -u tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h I'll eventually update the warning to remove the "Kernel ABI" part and instead state libbpf when noticing that the original is at "tools/lib/something". Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Jakub Bogusz Cc: Jiri Olsa Cc: Namhyung Kim Ian Rogers Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hashmap.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h index df59fd4fc95b..e0af36b0e5d8 100644 --- a/tools/perf/util/hashmap.h +++ b/tools/perf/util/hashmap.h @@ -11,14 +11,18 @@ #include #include #include -#ifndef __WORDSIZE -#define __WORDSIZE (__SIZEOF_LONG__ * 8) -#endif static inline size_t hash_bits(size_t h, int bits) { /* shuffle bits and return requested number of upper bits */ - return (h * 11400714819323198485llu) >> (__WORDSIZE - bits); +#if (__SIZEOF_SIZE_T__ == __SIZEOF_LONG_LONG__) + /* LP64 case */ + return (h * 11400714819323198485llu) >> (__SIZEOF_LONG_LONG__ * 8 - bits); +#elif (__SIZEOF_SIZE_T__ <= __SIZEOF_LONG__) + return (h * 2654435769lu) >> (__SIZEOF_LONG__ * 8 - bits); +#else +# error "Unsupported size_t size" +#endif } typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx); -- cgit v1.2.3 From 3d3af181d370069861a3be94608464e2ff3682e2 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 17 Jul 2020 11:27:22 +0200 Subject: s390/cpum_cf,perf: change DFLT_CCERROR counter name Change the counter name DLFT_CCERROR to DLFT_CCFINISH on IBM z15. This counter counts completed DEFLATE instructions with exit code 0, 1 or 2. Since exit code 0 means success and exit code 1 or 2 indicate errors, change the counter name to avoid confusion. This counter is incremented each time the DEFLATE instruction completed regardless if an error was detected or not. Fixes: d68d5d51dc89 ("s390/cpum_cf: Add new extended counters for IBM z15") Fixes: e7950166e402 ("perf vendor events s390: Add new deflate counters for IBM z15") Cc: stable@vger.kernel.org # v5.7 Signed-off-by: Thomas Richter Reviewed-by: Sumanth Korikkar Signed-off-by: Heiko Carstens --- arch/s390/kernel/perf_cpum_cf_events.c | 4 ++-- tools/perf/pmu-events/arch/s390/cf_z15/extended.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c index 1e3df52b2b65..37265f551a11 100644 --- a/arch/s390/kernel/perf_cpum_cf_events.c +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -292,7 +292,7 @@ CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_SPECIAL, 0x00f5); CPUMF_EVENT_ATTR(cf_z15, DFLT_ACCESS, 0x00f7); CPUMF_EVENT_ATTR(cf_z15, DFLT_CYCLES, 0x00fc); CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x00108); -CPUMF_EVENT_ATTR(cf_z15, DFLT_CCERROR, 0x00109); +CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x00109); CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); @@ -629,7 +629,7 @@ static struct attribute *cpumcf_z15_pmu_event_attr[] __initdata = { CPUMF_EVENT_PTR(cf_z15, DFLT_ACCESS), CPUMF_EVENT_PTR(cf_z15, DFLT_CYCLES), CPUMF_EVENT_PTR(cf_z15, DFLT_CC), - CPUMF_EVENT_PTR(cf_z15, DFLT_CCERROR), + CPUMF_EVENT_PTR(cf_z15, DFLT_CCFINISH), CPUMF_EVENT_PTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE), CPUMF_EVENT_PTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE), NULL, diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/extended.json b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json index 2df2e231e9ee..24c4ba2a9ae5 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z15/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json @@ -380,7 +380,7 @@ { "Unit": "CPU-M-CF", "EventCode": "265", - "EventName": "DFLT_CCERROR", + "EventName": "DFLT_CCFINISH", "BriefDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed that ended in Condition Codes 0, 1 or 2", "PublicDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed that ended in Condition Codes 0, 1 or 2" }, -- cgit v1.2.3 From 463538a383a27337cb83ae195e432a839a52d639 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Wed, 29 Jul 2020 15:53:14 +0200 Subject: perf tests: Fix test 68 zstd compression for s390 Commit 5aa98879efe7 ("s390/cpum_sf: prohibit callchain data collection") prohibits call graph sampling for hardware events on s390. The information recorded is out of context and does not match. On s390 this commit now breaks test case 68 Zstd perf.data compression/decompression. Therefore omit call graph sampling on s390 in this test. Output before: [root@t35lp46 perf]# ./perf test -Fv 68 68: Zstd perf.data compression/decompression : --- start --- Collecting compressed record file: Error: cycles: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat' ---- end ---- Zstd perf.data compression/decompression: FAILED! [root@t35lp46 perf]# Output after: [root@t35lp46 perf]# ./perf test -Fv 68 68: Zstd perf.data compression/decompression : --- start --- Collecting compressed record file: 500+0 records in 500+0 records out 256000 bytes (256 kB, 250 KiB) copied, 0.00615638 s, 41.6 MB/s [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.004 MB /tmp/perf.data.X3M, compressed (original 0.002 MB, ratio is 3.609) ] Checking compressed events stats: # compressed : Zstd, level = 1, ratio = 4 COMPRESSED events: 1 2ELIFREPh---- end ---- Zstd perf.data compression/decompression: Ok [root@t35lp46 perf]# Signed-off-by: Thomas Richter Reviewed-by: Sumanth Korikkar Cc: Heiko Carstens Cc: Sven Schnelle Cc: Vasily Gorbik Link: http://lore.kernel.org/lkml/20200729135314.91281-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record+zstd_comp_decomp.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/tests/shell/record+zstd_comp_decomp.sh b/tools/perf/tests/shell/record+zstd_comp_decomp.sh index 63a91ec473bb..045723b3d992 100755 --- a/tools/perf/tests/shell/record+zstd_comp_decomp.sh +++ b/tools/perf/tests/shell/record+zstd_comp_decomp.sh @@ -12,7 +12,8 @@ skip_if_no_z_record() { collect_z_record() { echo "Collecting compressed record file:" - $perf_tool record -o $trace_file -g -z -F 5000 -- \ + [[ "$(uname -m)" != s390x ]] && gflag='-g' + $perf_tool record -o $trace_file $gflag -z -F 5000 -- \ dd count=500 if=/dev/urandom of=/dev/null } -- cgit v1.2.3 From bd3c628f8fafa6cbd6a1ca440034b841f0080160 Mon Sep 17 00:00:00 2001 From: Wei Li Date: Fri, 24 Jul 2020 15:11:10 +0800 Subject: perf tools: Fix record failure when mixed with ARM SPE event When recording with cache-misses and arm_spe_x event, I found that it will just fail without showing any error info if i put cache-misses after 'arm_spe_x' event. [root@localhost 0620]# perf record -e cache-misses \ -e arm_spe_0/ts_enable=1,pct_enable=1,pa_enable=1,load_filter=1,jitter=1,store_filter=1,min_latency=0/ sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.067 MB perf.data ] [root@localhost 0620]# [root@localhost 0620]# perf record -e arm_spe_0/ts_enable=1,pct_enable=1,pa_enable=1,load_filter=1,jitter=1,store_filter=1,min_latency=0/ \ -e cache-misses sleep 1 [root@localhost 0620]# The current code can only work if the only event to be traced is an 'arm_spe_x', or if it is the last event to be specified. Otherwise the last event type will be checked against all the arm_spe_pmus[i]->types, none will match and an out of bound 'i' index will be used in arm_spe_recording_init(). We don't support concurrent multiple arm_spe_x events currently, that is checked in arm_spe_recording_options(), and it will show the relevant info. So add the check and record of the first found 'arm_spe_pmu' to fix this issue here. Fixes: ffd3d18c20b8 ("perf tools: Add ARM Statistical Profiling Extensions (SPE) support") Signed-off-by: Wei Li Reviewed-by: Mathieu Poirier Tested-by-by: Leo Yan Cc: Alexander Shishkin Cc: Hanjun Guo Cc: Jiri Olsa Cc: Kim Phillips Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20200724071111.35593-2-liwei391@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/auxtrace.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c index 0a6e75b8777a..28a5d0c18b1d 100644 --- a/tools/perf/arch/arm/util/auxtrace.c +++ b/tools/perf/arch/arm/util/auxtrace.c @@ -56,7 +56,7 @@ struct auxtrace_record struct perf_pmu *cs_etm_pmu; struct evsel *evsel; bool found_etm = false; - bool found_spe = false; + struct perf_pmu *found_spe = NULL; static struct perf_pmu **arm_spe_pmus = NULL; static int nr_spes = 0; int i = 0; @@ -74,12 +74,12 @@ struct auxtrace_record evsel->core.attr.type == cs_etm_pmu->type) found_etm = true; - if (!nr_spes) + if (!nr_spes || found_spe) continue; for (i = 0; i < nr_spes; i++) { if (evsel->core.attr.type == arm_spe_pmus[i]->type) { - found_spe = true; + found_spe = arm_spe_pmus[i]; break; } } @@ -96,7 +96,7 @@ struct auxtrace_record #if defined(__aarch64__) if (found_spe) - return arm_spe_recording_init(err, arm_spe_pmus[i]); + return arm_spe_recording_init(err, found_spe); #endif /* -- cgit v1.2.3