summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-10-12 15:15:17 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-10-12 15:15:17 -0700
commit465a7e291fd4f056d81baf5d5ed557bdb44c5457 (patch)
treebd0db709f63d89ad2b57484d9da7c5ad8febd1bf /arch
parent9b4e40c8fe1e120fef93985de7ff6a97fe9e7dd3 (diff)
parent52e92f409dede388b7dc3ee13491fbf7a80db935 (diff)
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Ingo Molnar: "Mostly tooling fixes, but also a couple of updates for new Intel models (which are technically hw-enablement, but to users it's a fix to perf behavior on those new CPUs - hope this is fine), an AUX inheritance fix, event time-sharing fix, and a fix for lost non-perf NMI events on AMD systems" * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (36 commits) perf/x86/cstate: Add Tiger Lake CPU support perf/x86/msr: Add Tiger Lake CPU support perf/x86/intel: Add Tiger Lake CPU support perf/x86/cstate: Update C-state counters for Ice Lake perf/x86/msr: Add new CPU model numbers for Ice Lake perf/x86/cstate: Add Comet Lake CPU support perf/x86/msr: Add Comet Lake CPU support perf/x86/intel: Add Comet Lake CPU support perf/x86/amd: Change/fix NMI latency mitigation to use a timestamp perf/core: Fix corner case in perf_rotate_context() perf/core: Rework memory accounting in perf_mmap() perf/core: Fix inheritance of aux_output groups perf annotate: Don't return -1 for error when doing BPF disassembly perf annotate: Return appropriate error code for allocation failures perf annotate: Fix arch specific ->init() failure errors perf annotate: Propagate the symbol__annotate() error return perf annotate: Fix the signedness of failure returns perf annotate: Propagate perf_env__arch() error perf evsel: Fall back to global 'perf_env' in perf_evsel__env() perf tools: Propagate get_cpuid() error ...
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/events/amd/core.c30
-rw-r--r--arch/x86/events/intel/core.c4
-rw-r--r--arch/x86/events/intel/cstate.c44
-rw-r--r--arch/x86/events/msr.c7
4 files changed, 60 insertions, 25 deletions
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index e7d35f60d53f..64c3e70b0556 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -5,12 +5,14 @@
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/delay.h>
+#include <linux/jiffies.h>
#include <asm/apicdef.h>
#include <asm/nmi.h>
#include "../perf_event.h"
-static DEFINE_PER_CPU(unsigned int, perf_nmi_counter);
+static DEFINE_PER_CPU(unsigned long, perf_nmi_tstamp);
+static unsigned long perf_nmi_window;
static __initconst const u64 amd_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
@@ -641,11 +643,12 @@ static void amd_pmu_disable_event(struct perf_event *event)
* handler when multiple PMCs are active or PMC overflow while handling some
* other source of an NMI.
*
- * Attempt to mitigate this by using the number of active PMCs to determine
- * whether to return NMI_HANDLED if the perf NMI handler did not handle/reset
- * any PMCs. The per-CPU perf_nmi_counter variable is set to a minimum of the
- * number of active PMCs or 2. The value of 2 is used in case an NMI does not
- * arrive at the LAPIC in time to be collapsed into an already pending NMI.
+ * Attempt to mitigate this by creating an NMI window in which un-handled NMIs
+ * received during this window will be claimed. This prevents extending the
+ * window past when it is possible that latent NMIs should be received. The
+ * per-CPU perf_nmi_tstamp will be set to the window end time whenever perf has
+ * handled a counter. When an un-handled NMI is received, it will be claimed
+ * only if arriving within that window.
*/
static int amd_pmu_handle_irq(struct pt_regs *regs)
{
@@ -663,21 +666,19 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
handled = x86_pmu_handle_irq(regs);
/*
- * If a counter was handled, record the number of possible remaining
- * NMIs that can occur.
+ * If a counter was handled, record a timestamp such that un-handled
+ * NMIs will be claimed if arriving within that window.
*/
if (handled) {
- this_cpu_write(perf_nmi_counter,
- min_t(unsigned int, 2, active));
+ this_cpu_write(perf_nmi_tstamp,
+ jiffies + perf_nmi_window);
return handled;
}
- if (!this_cpu_read(perf_nmi_counter))
+ if (time_after(jiffies, this_cpu_read(perf_nmi_tstamp)))
return NMI_DONE;
- this_cpu_dec(perf_nmi_counter);
-
return NMI_HANDLED;
}
@@ -909,6 +910,9 @@ static int __init amd_core_pmu_init(void)
if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
return 0;
+ /* Avoid calulating the value each time in the NMI handler */
+ perf_nmi_window = msecs_to_jiffies(100);
+
switch (boot_cpu_data.x86) {
case 0x15:
pr_cont("Fam15h ");
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 27ee47a7be66..fcef678c3423 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4983,6 +4983,8 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_SKYLAKE:
case INTEL_FAM6_KABYLAKE_L:
case INTEL_FAM6_KABYLAKE:
+ case INTEL_FAM6_COMETLAKE_L:
+ case INTEL_FAM6_COMETLAKE:
x86_add_quirk(intel_pebs_isolation_quirk);
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
@@ -5031,6 +5033,8 @@ __init int intel_pmu_init(void)
/* fall through */
case INTEL_FAM6_ICELAKE_L:
case INTEL_FAM6_ICELAKE:
+ case INTEL_FAM6_TIGERLAKE_L:
+ case INTEL_FAM6_TIGERLAKE:
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 9f2f39003d96..e1daf4151e11 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -45,46 +45,49 @@
* MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
* perf code: 0x01
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,GLM,
- CNL
+ * CNL,KBL,CML
* Scope: Core
* MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
* perf code: 0x02
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
- * SKL,KNL,GLM,CNL
+ * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL
* Scope: Core
* MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
* perf code: 0x03
- * Available model: SNB,IVB,HSW,BDW,SKL,CNL
+ * Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML,
+ * ICL,TGL
* Scope: Core
* MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter.
* perf code: 0x00
- * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL
+ * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL,
+ * KBL,CML,ICL,TGL
* Scope: Package (physical package)
* MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter.
* perf code: 0x01
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL,
- * GLM,CNL
+ * GLM,CNL,KBL,CML,ICL,TGL
* Scope: Package (physical package)
* MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter.
* perf code: 0x02
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW
- * SKL,KNL,GLM,CNL
+ * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL
* Scope: Package (physical package)
* MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter.
* perf code: 0x03
- * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL
+ * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL,
+ * KBL,CML,ICL,TGL
* Scope: Package (physical package)
* MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter.
* perf code: 0x04
- * Available model: HSW ULT,KBL,CNL
+ * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL
* Scope: Package (physical package)
* MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter.
* perf code: 0x05
- * Available model: HSW ULT,KBL,CNL
+ * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL
* Scope: Package (physical package)
* MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
* perf code: 0x06
- * Available model: HSW ULT,KBL,GLM,CNL
+ * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL
* Scope: Package (physical package)
*
*/
@@ -544,6 +547,19 @@ static const struct cstate_model cnl_cstates __initconst = {
BIT(PERF_CSTATE_PKG_C10_RES),
};
+static const struct cstate_model icl_cstates __initconst = {
+ .core_events = BIT(PERF_CSTATE_CORE_C6_RES) |
+ BIT(PERF_CSTATE_CORE_C7_RES),
+
+ .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) |
+ BIT(PERF_CSTATE_PKG_C3_RES) |
+ BIT(PERF_CSTATE_PKG_C6_RES) |
+ BIT(PERF_CSTATE_PKG_C7_RES) |
+ BIT(PERF_CSTATE_PKG_C8_RES) |
+ BIT(PERF_CSTATE_PKG_C9_RES) |
+ BIT(PERF_CSTATE_PKG_C10_RES),
+};
+
static const struct cstate_model slm_cstates __initconst = {
.core_events = BIT(PERF_CSTATE_CORE_C1_RES) |
BIT(PERF_CSTATE_CORE_C6_RES),
@@ -614,6 +630,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_L, hswult_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE, hswult_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_COMETLAKE_L, hswult_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_COMETLAKE, hswult_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_CANNONLAKE_L, cnl_cstates),
@@ -625,8 +643,10 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_L, snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_L, icl_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE, icl_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_TIGERLAKE_L, icl_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_TIGERLAKE, icl_cstates),
{ },
};
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index b1afc77f0704..6f86650b3f77 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -89,7 +89,14 @@ static bool test_intel(int idx, void *data)
case INTEL_FAM6_SKYLAKE_X:
case INTEL_FAM6_KABYLAKE_L:
case INTEL_FAM6_KABYLAKE:
+ case INTEL_FAM6_COMETLAKE_L:
+ case INTEL_FAM6_COMETLAKE:
case INTEL_FAM6_ICELAKE_L:
+ case INTEL_FAM6_ICELAKE:
+ case INTEL_FAM6_ICELAKE_X:
+ case INTEL_FAM6_ICELAKE_D:
+ case INTEL_FAM6_TIGERLAKE_L:
+ case INTEL_FAM6_TIGERLAKE:
if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
return true;
break;