perf/x86: Add memory profiling via PEBS Load Latency

This patch adds support for memory profiling using the PEBS Load Latency facility. Load accesses are sampled by HW and the instruction address, data address, load latency, data source, tlb, locked information can be saved in the sampling buffer if using the PERF_SAMPLE_COST (for latency), PERF_SAMPLE_ADDR, PERF_SAMPLE_DATA_SRC types. To enable PEBS Load Latency, users have to use the model specific event: - on NHM/WSM: MEM_INST_RETIRED:LATENCY_ABOVE_THRESHOLD - on SNB/IVB: MEM_TRANS_RETIRED:LATENCY_ABOVE_THRESHOLD To make things easier, this patch also exports a generic alias via sysfs: mem-loads. It export the right event encoding based on the host CPU and can be used directly by the perf tool. Loosely based on Intel's Lin Ming patch posted on LKML in July 2011. Signed-off-by: Stephane Eranian <eranian@google.com> Cc: peterz@infradead.org Cc: ak@linux.intel.com Cc: acme@redhat.com Cc: jolsa@redhat.com Cc: namhyung.kim@lge.com Link: http://lkml.kernel.org/r/1359040242-8269-9-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
author: Stephane Eranian <eranian@google.com> 2013-01-24 16:10:32 +0100
committer: Arnaldo Carvalho de Melo <acme@redhat.com> 2013-04-01 12:16:31 -0300
commit: f20093eef5f7843a25adfc0512617d4b1ff1aa6e (patch)
tree: 1e1d008f98adab4477e3803ed24f3f2a22b34aaf /arch/x86/kernel/cpu/perf_event.h
parent: d6be9ad6c960f43800a6f118932bc8a5a4eadcd1 (diff)
1 files changed, 24 insertions, 1 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 9686d38eb458..f3a9a94e4d22 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -46,6 +46,7 @@ enum extra_reg_type {
 	EXTRA_REG_RSP_0 = 0,	/* offcore_response_0 */
 	EXTRA_REG_RSP_1 = 1,	/* offcore_response_1 */
 	EXTRA_REG_LBR   = 2,	/* lbr_select */
+	EXTRA_REG_LDLAT = 3,	/* ld_lat_threshold */
 
 	EXTRA_REG_MAX		/* number of entries needed */
 };
@@ -61,6 +62,10 @@ struct event_constraint {
 	int	overlap;
 	int	flags;
 };
+/*
+ * struct event_constraint flags
+ */
+#define PERF_X86_EVENT_PEBS_LDLAT	0x1 /* ld+ldlat data address sampling */
 
 struct amd_nb {
 	int nb_id;  /* NorthBridge id */
@@ -233,6 +238,10 @@ struct cpu_hw_events {
 #define INTEL_UEVENT_CONSTRAINT(c, n)	\
 	EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
 
+#define INTEL_PLD_CONSTRAINT(c, n)	\
+	__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+			   HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
+
 #define EVENT_CONSTRAINT_END		\
 	EVENT_CONSTRAINT(0, 0, 0)
 
@@ -262,12 +271,22 @@ struct extra_reg {
 	.msr = (ms),		\
 	.config_mask = (m),	\
 	.valid_mask = (vm),	\
-	.idx = EXTRA_REG_##i	\
+	.idx = EXTRA_REG_##i,	\
 	}
 
 #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx)	\
 	EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
 
+#define INTEL_UEVENT_EXTRA_REG(event, msr, vm, idx) \
+	EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT | \
+			ARCH_PERFMON_EVENTSEL_UMASK, vm, idx)
+
+#define INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(c) \
+	INTEL_UEVENT_EXTRA_REG(c, \
+			       MSR_PEBS_LD_LAT_THRESHOLD, \
+			       0xffff, \
+			       LDLAT)
+
 #define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
 
 union perf_capabilities {
@@ -357,6 +376,7 @@ struct x86_pmu {
 	 */
 	int		attr_rdpmc;
 	struct attribute **format_attrs;
+	struct attribute **event_attrs;
 
 	ssize_t		(*events_sysfs_show)(char *page, u64 config);
 	struct attribute **cpu_events;
@@ -648,6 +668,9 @@ int p6_pmu_init(void);
 
 int knc_pmu_init(void);
 
+ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
+			  char *page);
+
 #else /* CONFIG_CPU_SUP_INTEL */
 
 static inline void reserve_ds_buffers(void)
author	Stephane Eranian <eranian@google.com>	2013-01-24 16:10:32 +0100
committer	Arnaldo Carvalho de Melo <acme@redhat.com>	2013-04-01 12:16:31 -0300
commit	f20093eef5f7843a25adfc0512617d4b1ff1aa6e (patch)
tree	1e1d008f98adab4477e3803ed24f3f2a22b34aaf /arch/x86/kernel/cpu/perf_event.h
parent	d6be9ad6c960f43800a6f118932bc8a5a4eadcd1 (diff)