diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-06 14:16:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-06 14:16:36 -0700 |
commit | 90489a72fba9529c85e051067ecb41183b8e982e (patch) | |
tree | 6c61660a380c3ddf25607b5892d173c3f4feb0d2 /arch | |
parent | 007dc78fea62610bf06829e38f1d8c69b6ea5af6 (diff) | |
parent | d15d356887e770c5f2dcf963b52c7cb510c9e42d (diff) |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar:
"The main kernel changes were:
- add support for Intel's "adaptive PEBS v4" - which embedds LBS data
in PEBS records and can thus batch up and reduce the IRQ (NMI) rate
significantly - reducing overhead and making call-graph profiling
less intrusive.
- add Intel CPU core and uncore support updates for Tremont, Icelake,
- extend the x86 PMU constraints scheduler with 'constraint ranges'
to better support Icelake hw constraints,
- make x86 call-chain support work better with CONFIG_FRAME_POINTER=y
- misc other changes
Tooling changes:
- updates to the main tools: 'perf record', 'perf trace', 'perf
stat'
- updated Intel and S/390 vendor events
- libtraceevent updates
- misc other updates and fixes"
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (69 commits)
perf/x86: Make perf callchains work without CONFIG_FRAME_POINTER
watchdog: Fix typo in comment
perf/x86/intel: Add Tremont core PMU support
perf/x86/intel/uncore: Add Intel Icelake uncore support
perf/x86/msr: Add Icelake support
perf/x86/intel/rapl: Add Icelake support
perf/x86/intel/cstate: Add Icelake support
perf/x86/intel: Add Icelake support
perf/x86: Support constraint ranges
perf/x86/lbr: Avoid reading the LBRs when adaptive PEBS handles them
perf/x86/intel: Support adaptive PEBS v4
perf/x86/intel/ds: Extract code of event update in short period
perf/x86/intel: Extract memory code PEBS parser for reuse
perf/x86: Support outputting XMM registers
perf/x86/intel: Force resched when TFA sysctl is modified
perf/core: Add perf_pmu_resched() as global function
perf/headers: Fix stale comment for struct perf_addr_filter
perf/core: Make perf_swevent_init_cpu() static
perf/x86: Add sanity checks to x86_schedule_events()
perf/x86: Optimize x86_schedule_events()
...
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/events/core.c | 95 | ||||
-rw-r--r-- | arch/x86/events/intel/core.c | 296 | ||||
-rw-r--r-- | arch/x86/events/intel/cstate.c | 2 | ||||
-rw-r--r-- | arch/x86/events/intel/ds.c | 505 | ||||
-rw-r--r-- | arch/x86/events/intel/lbr.c | 35 | ||||
-rw-r--r-- | arch/x86/events/intel/rapl.c | 2 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore.c | 6 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore.h | 1 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore_snb.c | 91 | ||||
-rw-r--r-- | arch/x86/events/msr.c | 1 | ||||
-rw-r--r-- | arch/x86/events/perf_event.h | 98 | ||||
-rw-r--r-- | arch/x86/include/asm/intel_ds.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/msr-index.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/perf_event.h | 57 | ||||
-rw-r--r-- | arch/x86/include/asm/stacktrace.h | 13 | ||||
-rw-r--r-- | arch/x86/include/uapi/asm/perf_regs.h | 23 | ||||
-rw-r--r-- | arch/x86/kernel/perf_regs.c | 27 |
17 files changed, 1094 insertions, 161 deletions
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 81911e11a15d..f315425d8468 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -560,6 +560,21 @@ int x86_pmu_hw_config(struct perf_event *event) return -EINVAL; } + /* sample_regs_user never support XMM registers */ + if (unlikely(event->attr.sample_regs_user & PEBS_XMM_REGS)) + return -EINVAL; + /* + * Besides the general purpose registers, XMM registers may + * be collected in PEBS on some platforms, e.g. Icelake + */ + if (unlikely(event->attr.sample_regs_intr & PEBS_XMM_REGS)) { + if (x86_pmu.pebs_no_xmm_regs) + return -EINVAL; + + if (!event->attr.precise_ip) + return -EINVAL; + } + return x86_setup_perfctr(event); } @@ -661,6 +676,10 @@ static inline int is_x86_event(struct perf_event *event) return event->pmu == &pmu; } +struct pmu *x86_get_pmu(void) +{ + return &pmu; +} /* * Event scheduler state: * @@ -849,18 +868,43 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) struct event_constraint *c; unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; struct perf_event *e; - int i, wmin, wmax, unsched = 0; + int n0, i, wmin, wmax, unsched = 0; struct hw_perf_event *hwc; bitmap_zero(used_mask, X86_PMC_IDX_MAX); + /* + * Compute the number of events already present; see x86_pmu_add(), + * validate_group() and x86_pmu_commit_txn(). For the former two + * cpuc->n_events hasn't been updated yet, while for the latter + * cpuc->n_txn contains the number of events added in the current + * transaction. + */ + n0 = cpuc->n_events; + if (cpuc->txn_flags & PERF_PMU_TXN_ADD) + n0 -= cpuc->n_txn; + if (x86_pmu.start_scheduling) x86_pmu.start_scheduling(cpuc); for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { - cpuc->event_constraint[i] = NULL; - c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]); - cpuc->event_constraint[i] = c; + c = cpuc->event_constraint[i]; + + /* + * Previously scheduled events should have a cached constraint, + * while new events should not have one. + */ + WARN_ON_ONCE((c && i >= n0) || (!c && i < n0)); + + /* + * Request constraints for new events; or for those events that + * have a dynamic constraint -- for those the constraint can + * change due to external factors (sibling state, allow_tfa). + */ + if (!c || (c->flags & PERF_X86_EVENT_DYNAMIC)) { + c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]); + cpuc->event_constraint[i] = c; + } wmin = min(wmin, c->weight); wmax = max(wmax, c->weight); @@ -925,25 +969,20 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if (!unsched && assign) { for (i = 0; i < n; i++) { e = cpuc->event_list[i]; - e->hw.flags |= PERF_X86_EVENT_COMMITTED; if (x86_pmu.commit_scheduling) x86_pmu.commit_scheduling(cpuc, i, assign[i]); } } else { - for (i = 0; i < n; i++) { + for (i = n0; i < n; i++) { e = cpuc->event_list[i]; - /* - * do not put_constraint() on comitted events, - * because they are good to go - */ - if ((e->hw.flags & PERF_X86_EVENT_COMMITTED)) - continue; /* * release events that failed scheduling */ if (x86_pmu.put_event_constraints) x86_pmu.put_event_constraints(cpuc, e); + + cpuc->event_constraint[i] = NULL; } } @@ -1373,11 +1412,6 @@ static void x86_pmu_del(struct perf_event *event, int flags) int i; /* - * event is descheduled - */ - event->hw.flags &= ~PERF_X86_EVENT_COMMITTED; - - /* * If we're called during a txn, we only need to undo x86_pmu.add. * The events never got scheduled and ->cancel_txn will truncate * the event_list. @@ -1413,6 +1447,7 @@ static void x86_pmu_del(struct perf_event *event, int flags) cpuc->event_list[i-1] = cpuc->event_list[i]; cpuc->event_constraint[i-1] = cpuc->event_constraint[i]; } + cpuc->event_constraint[i-1] = NULL; --cpuc->n_events; perf_event_update_userpage(event); @@ -2024,7 +2059,7 @@ static int validate_event(struct perf_event *event) if (IS_ERR(fake_cpuc)) return PTR_ERR(fake_cpuc); - c = x86_pmu.get_event_constraints(fake_cpuc, -1, event); + c = x86_pmu.get_event_constraints(fake_cpuc, 0, event); if (!c || !c->weight) ret = -EINVAL; @@ -2072,8 +2107,7 @@ static int validate_group(struct perf_event *event) if (n < 0) goto out; - fake_cpuc->n_events = n; - + fake_cpuc->n_events = 0; ret = x86_pmu.schedule_events(fake_cpuc, n, NULL); out: @@ -2348,6 +2382,15 @@ void arch_perf_update_userpage(struct perf_event *event, cyc2ns_read_end(); } +/* + * Determine whether the regs were taken from an irq/exception handler rather + * than from perf_arch_fetch_caller_regs(). + */ +static bool perf_hw_regs(struct pt_regs *regs) +{ + return regs->flags & X86_EFLAGS_FIXED; +} + void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { @@ -2359,11 +2402,15 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re return; } - if (perf_callchain_store(entry, regs->ip)) - return; + if (perf_hw_regs(regs)) { + if (perf_callchain_store(entry, regs->ip)) + return; + unwind_start(&state, current, regs, NULL); + } else { + unwind_start(&state, current, NULL, (void *)regs->sp); + } - for (unwind_start(&state, current, regs, NULL); !unwind_done(&state); - unwind_next_frame(&state)) { + for (; !unwind_done(&state); unwind_next_frame(&state)) { addr = unwind_get_return_address(&state); if (!addr || perf_callchain_store(entry, addr)) return; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index d35f4775d5f1..ef763f535e3a 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -239,6 +239,35 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = { EVENT_EXTRA_END }; +static struct event_constraint intel_icl_event_constraints[] = { + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + INTEL_UEVENT_CONSTRAINT(0x1c0, 0), /* INST_RETIRED.PREC_DIST */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ + FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */ + INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf), + INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf), + INTEL_EVENT_CONSTRAINT(0x32, 0xf), /* SW_PREFETCH_ACCESS.* */ + INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x54, 0xf), + INTEL_EVENT_CONSTRAINT_RANGE(0x60, 0x8b, 0xf), + INTEL_UEVENT_CONSTRAINT(0x04a3, 0xff), /* CYCLE_ACTIVITY.STALLS_TOTAL */ + INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff), /* CYCLE_ACTIVITY.STALLS_MEM_ANY */ + INTEL_EVENT_CONSTRAINT(0xa3, 0xf), /* CYCLE_ACTIVITY.* */ + INTEL_EVENT_CONSTRAINT_RANGE(0xa8, 0xb0, 0xf), + INTEL_EVENT_CONSTRAINT_RANGE(0xb7, 0xbd, 0xf), + INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xe6, 0xf), + INTEL_EVENT_CONSTRAINT_RANGE(0xf0, 0xf4, 0xf), + EVENT_CONSTRAINT_END +}; + +static struct extra_reg intel_icl_extra_regs[] __read_mostly = { + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff9fffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff9fffull, RSP_1), + INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), + INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE), + EVENT_EXTRA_END +}; + EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); @@ -1827,6 +1856,45 @@ static __initconst const u64 glp_hw_cache_extra_regs }, }; +#define TNT_LOCAL_DRAM BIT_ULL(26) +#define TNT_DEMAND_READ GLM_DEMAND_DATA_RD +#define TNT_DEMAND_WRITE GLM_DEMAND_RFO +#define TNT_LLC_ACCESS GLM_ANY_RESPONSE +#define TNT_SNP_ANY (SNB_SNP_NOT_NEEDED|SNB_SNP_MISS| \ + SNB_NO_FWD|SNB_SNP_FWD|SNB_HITM) +#define TNT_LLC_MISS (TNT_SNP_ANY|SNB_NON_DRAM|TNT_LOCAL_DRAM) + +static __initconst const u64 tnt_hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = TNT_DEMAND_READ| + TNT_LLC_ACCESS, + [C(RESULT_MISS)] = TNT_DEMAND_READ| + TNT_LLC_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = TNT_DEMAND_WRITE| + TNT_LLC_ACCESS, + [C(RESULT_MISS)] = TNT_DEMAND_WRITE| + TNT_LLC_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x0, + [C(RESULT_MISS)] = 0x0, + }, + }, +}; + +static struct extra_reg intel_tnt_extra_regs[] __read_mostly = { + /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffffff9fffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xffffff9fffull, RSP_1), + EVENT_EXTRA_END +}; + #define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */ #define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */ #define KNL_MCDRAM_LOCAL BIT_ULL(21) @@ -2015,7 +2083,7 @@ static void intel_tfa_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int /* * We're going to use PMC3, make sure TFA is set before we touch it. */ - if (cntr == 3 && !cpuc->is_fake) + if (cntr == 3) intel_set_tfa(cpuc, true); } @@ -2149,6 +2217,11 @@ static void intel_pmu_enable_fixed(struct perf_event *event) bits <<= (idx * 4); mask = 0xfULL << (idx * 4); + if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip) { + bits |= ICL_FIXED_0_ADAPTIVE << (idx * 4); + mask |= ICL_FIXED_0_ADAPTIVE << (idx * 4); + } + rdmsrl(hwc->config_base, ctrl_val); ctrl_val &= ~mask; ctrl_val |= bits; @@ -2692,7 +2765,7 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx, if (x86_pmu.event_constraints) { for_each_event_constraint(c, x86_pmu.event_constraints) { - if ((event->hw.config & c->cmask) == c->code) { + if (constraint_match(c, event->hw.config)) { event->hw.flags |= c->flags; return c; } @@ -2842,7 +2915,7 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; struct intel_excl_states *xlo; int tid = cpuc->excl_thread_id; - int is_excl, i; + int is_excl, i, w; /* * validating a group does not require @@ -2898,36 +2971,40 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, * SHARED : sibling counter measuring non-exclusive event * UNUSED : sibling counter unused */ + w = c->weight; for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) { /* * exclusive event in sibling counter * our corresponding counter cannot be used * regardless of our event */ - if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE) + if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE) { __clear_bit(i, c->idxmsk); + w--; + continue; + } /* * if measuring an exclusive event, sibling * measuring non-exclusive, then counter cannot * be used */ - if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED) + if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED) { __clear_bit(i, c->idxmsk); + w--; + continue; + } } /* - * recompute actual bit weight for scheduling algorithm - */ - c->weight = hweight64(c->idxmsk64); - - /* * if we return an empty mask, then switch * back to static empty constraint to avoid * the cost of freeing later on */ - if (c->weight == 0) + if (!w) c = &emptyconstraint; + c->weight = w; + return c; } @@ -2935,11 +3012,9 @@ static struct event_constraint * intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event) { - struct event_constraint *c1 = NULL; - struct event_constraint *c2; + struct event_constraint *c1, *c2; - if (idx >= 0) /* fake does < 0 */ - c1 = cpuc->event_constraint[idx]; + c1 = cpuc->event_constraint[idx]; /* * first time only @@ -2947,7 +3022,8 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, * - dynamic constraint: handled by intel_get_excl_constraints() */ c2 = __intel_get_event_constraints(cpuc, idx, event); - if (c1 && (c1->flags & PERF_X86_EVENT_DYNAMIC)) { + if (c1) { + WARN_ON_ONCE(!(c1->flags & PERF_X86_EVENT_DYNAMIC)); bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX); c1->weight = c2->weight; c2 = c1; @@ -3370,6 +3446,12 @@ static struct event_constraint counter0_constraint = static struct event_constraint counter2_constraint = EVENT_CONSTRAINT(0, 0x4, 0); +static struct event_constraint fixed0_constraint = + FIXED_EVENT_CONSTRAINT(0x00c0, 0); + +static struct event_constraint fixed0_counter0_constraint = + INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000001ULL); + static struct event_constraint * hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event) @@ -3389,6 +3471,21 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx, } static struct event_constraint * +icl_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + /* + * Fixed counter 0 has less skid. + * Force instruction:ppp in Fixed counter 0 + */ + if ((event->attr.precise_ip == 3) && + constraint_match(&fixed0_constraint, event->hw.config)) + return &fixed0_constraint; + + return hsw_get_event_constraints(cpuc, idx, event); +} + +static struct event_constraint * glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event) { @@ -3403,6 +3500,29 @@ glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx, return c; } +static struct event_constraint * +tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct event_constraint *c; + + /* + * :ppp means to do reduced skid PEBS, + * which is available on PMC0 and fixed counter 0. + */ + if (event->attr.precise_ip == 3) { + /* Force instruction:ppp on PMC0 and Fixed counter 0 */ + if (constraint_match(&fixed0_constraint, event->hw.config)) + return &fixed0_counter0_constraint; + + return &counter0_constraint; + } + + c = intel_get_event_constraints(cpuc, idx, event); + + return c; +} + static bool allow_tsx_force_abort = true; static struct event_constraint * @@ -3414,7 +3534,7 @@ tfa_get_event_constraints(struct cpu_hw_events *cpuc, int idx, /* * Without TFA we must not use PMC3. */ - if (!allow_tsx_force_abort && test_bit(3, c->idxmsk) && idx >= 0) { + if (!allow_tsx_force_abort && test_bit(3, c->idxmsk)) { c = dyn_constraint(cpuc, c, idx); c->idxmsk64 &= ~(1ULL << 3); c->weight--; @@ -3511,6 +3631,8 @@ static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu) int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu) { + cpuc->pebs_record_size = x86_pmu.pebs_record_size; + if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) { cpuc->shared_regs = allocate_shared_regs(cpu); if (!cpuc->shared_regs) @@ -4118,6 +4240,42 @@ static struct attribute *hsw_tsx_events_attrs[] = { NULL }; +EVENT_ATTR_STR(tx-capacity-read, tx_capacity_read, "event=0x54,umask=0x80"); +EVENT_ATTR_STR(tx-capacity-write, tx_capacity_write, "event=0x54,umask=0x2"); +EVENT_ATTR_STR(el-capacity-read, el_capacity_read, "event=0x54,umask=0x80"); +EVENT_ATTR_STR(el-capacity-write, el_capacity_write, "event=0x54,umask=0x2"); + +static struct attribute *icl_events_attrs[] = { + EVENT_PTR(mem_ld_hsw), + EVENT_PTR(mem_st_hsw), + NULL, +}; + +static struct attribute *icl_tsx_events_attrs[] = { + EVENT_PTR(tx_start), + EVENT_PTR(tx_abort), + EVENT_PTR(tx_commit), + EVENT_PTR(tx_capacity_read), + EVENT_PTR(tx_capacity_write), + EVENT_PTR(tx_conflict), + EVENT_PTR(el_start), + EVENT_PTR(el_abort), + EVENT_PTR(el_commit), + EVENT_PTR(el_capacity_read), + EVENT_PTR(el_capacity_write), + EVENT_PTR(el_conflict), + EVENT_PTR(cycles_t), + EVENT_PTR(cycles_ct), + NULL, +}; + +static __init struct attribute **get_icl_events_attrs(void) +{ + return boot_cpu_has(X86_FEATURE_RTM) ? + merge_attr(icl_events_attrs, icl_tsx_events_attrs) : + icl_events_attrs; +} + static ssize_t freeze_on_smi_show(struct device *cdev, struct device_attribute *attr, char *buf) @@ -4157,6 +4315,50 @@ done: return count; } +static void update_tfa_sched(void *ignored) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + /* + * check if PMC3 is used + * and if so force schedule out for all event types all contexts + */ + if (test_bit(3, cpuc->active_mask)) + perf_pmu_resched(x86_get_pmu()); +} + +static ssize_t show_sysctl_tfa(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, 40, "%d\n", allow_tsx_force_abort); +} + +static ssize_t set_sysctl_tfa(struct device *cdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + bool val; + ssize_t ret; + + ret = kstrtobool(buf, &val); + if (ret) + return ret; + + /* no change */ + if (val == allow_tsx_force_abort) + return count; + + allow_tsx_force_abort = val; + + get_online_cpus(); + on_each_cpu(update_tfa_sched, NULL, 1); + put_online_cpus(); + + return count; +} + + static DEVICE_ATTR_RW(freeze_on_smi); static ssize_t branches_show(struct device *cdev, @@ -4189,7 +4391,9 @@ static struct attribute *intel_pmu_caps_attrs[] = { NULL }; -static DEVICE_BOOL_ATTR(allow_tsx_force_abort, 0644, allow_tsx_force_abort); +static DEVICE_ATTR(allow_tsx_force_abort, 0644, + show_sysctl_tfa, + set_sysctl_tfa); static struct attribute *intel_pmu_attrs[] = { &dev_attr_freeze_on_smi.attr, @@ -4450,6 +4654,32 @@ __init int intel_pmu_init(void) name = "goldmont_plus"; break; + case INTEL_FAM6_ATOM_TREMONT_X: + x86_pmu.late_ack = true; + memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); + hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; + + intel_pmu_lbr_init_skl(); + + x86_pmu.event_constraints = intel_slm_event_constraints; + x86_pmu.extra_regs = intel_tnt_extra_regs; + /* + * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS + * for precise cycles. + */ + x86_pmu.pebs_aliases = NULL; + x86_pmu.pebs_prec_dist = true; + x86_pmu.lbr_pt_coexist = true; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.get_event_constraints = tnt_get_event_constraints; + extra_attr = slm_format_attr; + pr_cont("Tremont events, "); + name = "Tremont"; + break; + case INTEL_FAM6_WESTMERE: case INTEL_FAM6_WESTMERE_EP: case INTEL_FAM6_WESTMERE_EX: @@ -4698,13 +4928,41 @@ __init int intel_pmu_init(void) x86_pmu.get_event_constraints = tfa_get_event_constraints; x86_pmu.enable_all = intel_tfa_pmu_enable_all; x86_pmu.commit_scheduling = intel_tfa_commit_scheduling; - intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr.attr; + intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr; } pr_cont("Skylake events, "); name = "skylake"; break; + case INTEL_FAM6_ICELAKE_MOBILE: + x86_pmu.late_ack = true; + memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); + hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; + intel_pmu_lbr_init_skl(); + + x86_pmu.event_constraints = intel_icl_event_constraints; + x86_pmu.pebs_constraints = intel_icl_pebs_event_constraints; + x86_pmu.extra_regs = intel_icl_extra_regs; + x86_pmu.pebs_aliases = NULL; + x86_pmu.pebs_prec_dist = true; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_NO_HT_SHARING; + + x86_pmu.hw_config = hsw_hw_config; + x86_pmu.get_event_constraints = icl_get_event_constraints; + extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? + hsw_format_attr : nhm_format_attr; + extra_attr = merge_attr(extra_attr, skl_format_attr); + x86_pmu.cpu_events = get_icl_events_attrs(); + x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xca, .umask=0x02); + x86_pmu.lbr_pt_coexist = true; + intel_pmu_pebs_data_source_skl(false); + pr_cont("Icelake events, "); + name = "icelake"; + break; + default: switch (x86_pmu.version) { case 1: diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index d41de9af7a39..6072f92cb8ea 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -578,6 +578,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_X, glm_cstates), X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates), + + X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_MOBILE, snb_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 10c99ce1fead..7a9f5dac5abe 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -849,6 +849,26 @@ struct event_constraint intel_skl_pebs_event_constraints[] = { EVENT_CONSTRAINT_END }; +struct event_constraint intel_icl_pebs_event_constraints[] = { + INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x400000000ULL), /* SLOTS */ + + INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf), /* MEM_INST_RETIRED.LOAD */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf), /* MEM_INST_RETIRED.STORE */ + + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */ + + INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */ + + /* + * Everything else is handled by PMU_FL_PEBS_ALL, because we + * need the full constraints from the main table. + */ + + EVENT_CONSTRAINT_END +}; + struct event_constraint *intel_pebs_constraints(struct perf_event *event) { struct event_constraint *c; @@ -858,7 +878,7 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event) if (x86_pmu.pebs_constraints) { for_each_event_constraint(c, x86_pmu.pebs_constraints) { - if ((event->hw.config & c->cmask) == c->code) { + if (constraint_match(c, event->hw.config)) { event->hw.flags |= c->flags; return c; } @@ -906,17 +926,87 @@ static inline void pebs_update_threshold(struct cpu_hw_events *cpuc) if (cpuc->n_pebs == cpuc->n_large_pebs) { threshold = ds->pebs_absolute_maximum - - reserved * x86_pmu.pebs_record_size; + reserved * cpuc->pebs_record_size; } else { - threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size; + threshold = ds->pebs_buffer_base + cpuc->pebs_record_size; } ds->pebs_interrupt_threshold = threshold; } +static void adaptive_pebs_record_size_update(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + u64 pebs_data_cfg = cpuc->pebs_data_cfg; + int sz = sizeof(struct pebs_basic); + + if (pebs_data_cfg & PEBS_DATACFG_MEMINFO) + sz += sizeof(struct pebs_meminfo); + if (pebs_data_cfg & PEBS_DATACFG_GP) + sz += sizeof(struct pebs_gprs); + if (pebs_data_cfg & PEBS_DATACFG_XMMS) + sz += sizeof(struct pebs_xmm); + if (pebs_data_cfg & PEBS_DATACFG_LBRS) + sz += x86_pmu.lbr_nr * sizeof(struct pebs_lbr_entry); + + cpuc->pebs_record_size = sz; +} + +#define PERF_PEBS_MEMINFO_TYPE (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC | \ + PERF_SAMPLE_PHYS_ADDR | PERF_SAMPLE_WEIGHT | \ + PERF_SAMPLE_TRANSACTION) + +static u64 pebs_update_adaptive_cfg(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + u64 sample_type = attr->sample_type; + u64 pebs_data_cfg = 0; + bool gprs, tsx_weight; + + if (!(sample_type & ~(PERF_SAMPLE_IP|PERF_SAMPLE_TIME)) && + attr->precise_ip > 1) + return pebs_data_cfg; + + if (sample_type & PERF_PEBS_MEMINFO_TYPE) + pebs_data_cfg |= PEBS_DATACFG_MEMINFO; + + /* + * We need GPRs when: + * + user requested them + * + precise_ip < 2 for the non event IP + * + For RTM TSX weight we need GPRs for the abort code. + */ + gprs = (sample_type & PERF_SAMPLE_REGS_INTR) && + (attr->sample_regs_intr & PEBS_GP_REGS); + + tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT) && + ((attr->config & INTEL_ARCH_EVENT_MASK) == + x86_pmu.rtm_abort_event); + + if (gprs || (attr->precise_ip < 2) || tsx_weight) + pebs_data_cfg |= PEBS_DATACFG_GP; + + if ((sample_type & PERF_SAMPLE_REGS_INTR) && + (attr->sample_regs_intr & PEBS_XMM_REGS)) + pebs_data_cfg |= PEBS_DATACFG_XMMS; + + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + /* + * For now always log all LBRs. Could configure this + * later. + */ + pebs_data_cfg |= PEBS_DATACFG_LBRS | + ((x86_pmu.lbr_nr-1) << PEBS_DATACFG_LBR_SHIFT); + } + + return pebs_data_cfg; +} + static void -pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu) +pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, + struct perf_event *event, bool add) { + struct pmu *pmu = event->ctx->pmu; /* * Make sure we get updated with the first PEBS * event. It will trigger also during removal, but @@ -933,6 +1023,29 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu) update = true; } + /* + * The PEBS record doesn't shrink on pmu::del(). Doing so would require + * iterating all remaining PEBS events to reconstruct the config. + */ + if (x86_pmu.intel_cap.pebs_baseline && add) { + u64 pebs_data_cfg; + + /* Clear pebs_data_cfg and pebs_record_size for first PEBS. */ + if (cpuc->n_pebs == 1) { + cpuc->pebs_data_cfg = 0; + cpuc->pebs_record_size = sizeof(struct pebs_basic); + } + + pebs_data_cfg = pebs_update_adaptive_cfg(event); + + /* Update pebs_record_size if new event requires more data. */ + if (pebs_data_cfg & ~cpuc->pebs_data_cfg) { + cpuc->pebs_data_cfg |= pebs_data_cfg; + adaptive_pebs_record_size_update(); + update = true; + } + } + if (update) pebs_update_threshold(cpuc); } @@ -947,7 +1060,7 @@ void intel_pmu_pebs_add(struct perf_event *event) if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS) cpuc->n_large_pebs++; - pebs_update_state(needed_cb, cpuc, event->ctx->pmu); + pebs_update_state(needed_cb, cpuc, event, true); } void intel_pmu_pebs_enable(struct perf_event *event) @@ -960,11 +1073,19 @@ void intel_pmu_pebs_enable(struct perf_event *event) cpuc->pebs_enabled |= 1ULL << hwc->idx; - if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) + if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5)) cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32); else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) cpuc->pebs_enabled |= 1ULL << 63; + if (x86_pmu.intel_cap.pebs_baseline) { + hwc->config |= ICL_EVENTSEL_ADAPTIVE; + if (cpuc->pebs_data_cfg != cpuc->active_pebs_data_cfg) { + wrmsrl(MSR_PEBS_DATA_CFG, cpuc->pebs_data_cfg); + cpuc->active_pebs_data_cfg = cpuc->pebs_data_cfg; + } + } + /* * Use auto-reload if possible to save a MSR write in the PMI. * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD. @@ -991,7 +1112,7 @@ void intel_pmu_pebs_del(struct perf_event *event) if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS) cpuc->n_large_pebs--; - pebs_update_state(needed_cb, cpuc, event->ctx->pmu); + pebs_update_state(needed_cb, cpuc, event, false); } void intel_pmu_pebs_disable(struct perf_event *event) @@ -1004,7 +1125,8 @@ void intel_pmu_pebs_disable(struct perf_event *event) cpuc->pebs_enabled &= ~(1ULL << hwc->idx); - if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) + if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && + (x86_pmu.version < 5)) cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32)); else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) cpuc->pebs_enabled &= ~(1ULL << 63); @@ -1125,34 +1247,57 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) return 0; } -static inline u64 intel_hsw_weight(struct pebs_record_skl *pebs) +static inline u64 intel_get_tsx_weight(u64 tsx_tuning) { - if (pebs->tsx_tuning) { - union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning }; + if (tsx_tuning) { + union hsw_tsx_tuning tsx = { .value = tsx_tuning }; return tsx.cycles_last_block; } return 0; } -static inline u64 intel_hsw_transaction(struct pebs_record_skl *pebs) +static inline u64 intel_get_tsx_transaction(u64 tsx_tuning, u64 ax) { - u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32; + u64 txn = (tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32; /* For RTM XABORTs also log the abort code from AX */ - if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1)) - txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT; + if ((txn & PERF_TXN_TRANSACTION) && (ax & 1)) + txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT; return txn; } -static void setup_pebs_sample_data(struct perf_event *event, - struct pt_regs *iregs, void *__pebs, - struct perf_sample_data *data, - struct pt_regs *regs) +static inline u64 get_pebs_status(void *n) { + if (x86_pmu.intel_cap.pebs_format < 4) + return ((struct pebs_record_nhm *)n)->status; + return ((struct pebs_basic *)n)->applicable_counters; +} + #define PERF_X86_EVENT_PEBS_HSW_PREC \ (PERF_X86_EVENT_PEBS_ST_HSW | \ PERF_X86_EVENT_PEBS_LD_HSW | \ PERF_X86_EVENT_PEBS_NA_HSW) + +static u64 get_data_src(struct perf_event *event, u64 aux) +{ + u64 val = PERF_MEM_NA; + int fl = event->hw.flags; + bool fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC); + + if (fl & PERF_X86_EVENT_PEBS_LDLAT) + val = load_latency_data(aux); + else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC)) + val = precise_datala_hsw(event, aux); + else if (fst) + val = precise_store_data(aux); + return val; +} + +static void setup_pebs_fixed_sample_data(struct perf_event *event, + struct pt_regs *iregs, void *__pebs, + struct perf_sample_data *data, + struct pt_regs *regs) +{ /* * We cast to the biggest pebs_record but are careful not to * unconditionally access the 'extra' entries. @@ -1160,17 +1305,13 @@ static void setup_pebs_sample_data(struct perf_event *event, struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct pebs_record_skl *pebs = __pebs; u64 sample_type; - int fll, fst, dsrc; - int fl = event->hw.flags; + int fll; if (pebs == NULL) return; sample_type = event->attr.sample_type; - dsrc = sample_type & PERF_SAMPLE_DATA_SRC; - - fll = fl & PERF_X86_EVENT_PEBS_LDLAT; - fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC); + fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT; perf_sample_data_init(data, 0, event->hw.last_period); @@ -1185,16 +1326,8 @@ static void setup_pebs_sample_data(struct perf_event *event, /* * data.data_src encodes the data source */ - if (dsrc) { - u64 val = PERF_MEM_NA; - if (fll) - val = load_latency_data(pebs->dse); - else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC)) - val = precise_datala_hsw(event, pebs->dse); - else if (fst) - val = precise_store_data(pebs->dse); - data->data_src.val = val; - } + if (sample_type & PERF_SAMPLE_DATA_SRC) + data->data_src.val = get_data_src(event, pebs->dse); /* * We must however always use iregs for the unwinder to stay sane; the @@ -1281,10 +1414,11 @@ static void setup_pebs_sample_data(struct perf_event *event, if (x86_pmu.intel_cap.pebs_format >= 2) { /* Only set the TSX weight when no memory weight. */ if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll) - data->weight = intel_hsw_weight(pebs); + data->weight = intel_get_tsx_weight(pebs->tsx_tuning); if (sample_type & PERF_SAMPLE_TRANSACTION) - data->txn = intel_hsw_transaction(pebs); + data->txn = intel_get_tsx_transaction(pebs->tsx_tuning, + pebs->ax); } /* @@ -1301,6 +1435,140 @@ static void setup_pebs_sample_data(struct perf_event *event, data->br_stack = &cpuc->lbr_stack; } +static void adaptive_pebs_save_regs(struct pt_regs *regs, + struct pebs_gprs *gprs) +{ + regs->ax = gprs->ax; + regs->bx = gprs->bx; + regs->cx = gprs->cx; + regs->dx = gprs->dx; + regs->si = gprs->si; + regs->di = gprs->di; + regs->bp = gprs->bp; + regs->sp = gprs->sp; +#ifndef CONFIG_X86_32 + regs->r8 = gprs->r8; + regs->r9 = gprs->r9; + regs->r10 = gprs->r10; + regs->r11 = gprs->r11; + regs->r12 = gprs->r12; + regs->r13 = gprs->r13; + regs->r14 = gprs->r14; + regs->r15 = gprs->r15; +#endif +} + +/* + * With adaptive PEBS the layout depends on what fields are configured. + */ + +static void setup_pebs_adaptive_sample_data(struct perf_event *event, + struct pt_regs *iregs, void *__pebs, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct pebs_basic *basic = __pebs; + void *next_record = basic + 1; + u64 sample_type; + u64 format_size; + struct pebs_meminfo *meminfo = NULL; + struct pebs_gprs *gprs = NULL; + struct x86_perf_regs *perf_regs; + + if (basic == NULL) + return; + + perf_regs = container_of(regs, struct x86_perf_regs, regs); + perf_regs->xmm_regs = NULL; + + sample_type = event->attr.sample_type; + format_size = basic->format_size; + perf_sample_data_init(data, 0, event->hw.last_period); + data->period = event->hw.last_period; + + if (event->attr.use_clockid == 0) + data->time = native_sched_clock_from_tsc(basic->tsc); + + /* + * We must however always use iregs for the unwinder to stay sane; the + * record BP,SP,IP can point into thin air when the record is from a + * previous PMI context or an (I)RET happened between the record and + * PMI. + */ + if (sample_type & PERF_SAMPLE_CALLCHAIN) + data->callchain = perf_callchain(event, iregs); + + *regs = *iregs; + /* The ip in basic is EventingIP */ + set_linear_ip(regs, basic->ip); + regs->flags = PERF_EFLAGS_EXACT; + + /* + * The record for MEMINFO is in front of GP + * But PERF_SAMPLE_TRANSACTION needs gprs->ax. + * Save the pointer here but process later. + */ + if (format_size & PEBS_DATACFG_MEMINFO) { + meminfo = next_record; + next_record = meminfo + 1; + } + + if (format_size & PEBS_DATACFG_GP) { + gprs = next_record; + next_record = gprs + 1; + + if (event->attr.precise_ip < 2) { + set_linear_ip(regs, gprs->ip); + regs->flags &= ~PERF_EFLAGS_EXACT; + } + + if (sample_type & PERF_SAMPLE_REGS_INTR) + adaptive_pebs_save_regs(regs, gprs); + } + + if (format_size & PEBS_DATACFG_MEMINFO) { + if (sample_type & PERF_SAMPLE_WEIGHT) + data->weight = meminfo->latency ?: + intel_get_tsx_weight(meminfo->tsx_tuning); + + if (sample_type & PERF_SAMPLE_DATA_SRC) + data->data_src.val = get_data_src(event, meminfo->aux); + + if (sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) + data->addr = meminfo->address; + + if (sample_type & PERF_SAMPLE_TRANSACTION) + data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning, + gprs ? gprs->ax : 0); + } + + if (format_size & PEBS_DATACFG_XMMS) { + struct pebs_xmm *xmm = next_record; + + next_record = xmm + 1; + perf_regs->xmm_regs = xmm->xmm; + } + + if (format_size & PEBS_DATACFG_LBRS) { + struct pebs_lbr *lbr = next_record; + int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT) + & 0xff) + 1; + next_record = next_record + num_lbr*sizeof(struct pebs_lbr_entry); + + if (has_branch_stack(event)) { + intel_pmu_store_pebs_lbrs(lbr); + data->br_stack = &cpuc->lbr_stack; + } + } + + WARN_ONCE(next_record != __pebs + (format_size >> 48), + "PEBS record size %llu, expected %llu, config %llx\n", + format_size >> 48, + (u64)(next_record - __pebs), + basic->format_size); +} + static inline void * get_next_pebs_record_by_bit(void *base, void *top, int bit) { @@ -1318,19 +1586,19 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit) if (base == NULL) return NULL; - for (at = base; at < top; at += x86_pmu.pebs_record_size) { - struct pebs_record_nhm *p = at; + for (at = base; at < top; at += cpuc->pebs_record_size) { + unsigned long status = get_pebs_status(at); - if (test_bit(bit, (unsigned long *)&p->status)) { + if (test_bit(bit, (unsigned long *)&status)) { /* PEBS v3 has accurate status bits */ if (x86_pmu.intel_cap.pebs_format >= 3) return at; - if (p->status == (1 << bit)) + if (status == (1 << bit)) return at; /* clear non-PEBS bit and re-check */ - pebs_status = p->status & cpuc->pebs_enabled; + pebs_status = status & cpuc->pebs_enabled; pebs_status &= PEBS_COUNTER_MASK; if (pebs_status == (1 << bit)) return at; @@ -1410,11 +1678,18 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count) static void __intel_pmu_pebs_event(struct perf_event *event, struct pt_regs *iregs, void *base, void *top, - int bit, int count) + int bit, int count, + void (*setup_sample)(struct perf_event *, + struct pt_regs *, + void *, + struct perf_sample_data *, + struct pt_regs *)) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; struct perf_sample_data data; - struct pt_regs regs; + struct x86_perf_regs perf_regs; + struct pt_regs *regs = &perf_regs.regs; void *at = get_next_pebs_record_by_bit(base, top, bit); if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { @@ -1429,20 +1704,20 @@ static void __intel_pmu_pebs_event(struct perf_event *event, return; while (count > 1) { - setup_pebs_sample_data(event, iregs, at, &data, ®s); - perf_event_output(event, &data, ®s); - at += x86_pmu.pebs_record_size; + setup_sample(event, iregs, at, &data, regs); + perf_event_output(event, &data, regs); + at += cpuc->pebs_record_size; at = get_next_pebs_record_by_bit(at, top, bit); count--; } - setup_pebs_sample_data(event, iregs, at, &data, ®s); + setup_sample(event, iregs, at, &data, regs); /* * All but the last records are processed. * The last one is left to be able to call the overflow handler. */ - if (perf_event_overflow(event, &data, ®s)) { + if (perf_event_overflow(event, &data, regs)) { x86_pmu_stop(event, 0); return; } @@ -1483,7 +1758,27 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) return; } - __intel_pmu_pebs_event(event, iregs, at, top, 0, n); + __intel_pmu_pebs_event(event, iregs, at, top, 0, n, + setup_pebs_fixed_sample_data); +} + +static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size) +{ + struct perf_event *event; + int bit; + + /* + * The drain_pebs() could be called twice in a short period + * for auto-reload event in pmu::read(). There are no + * overflows have happened in between. + * It needs to call intel_pmu_save_and_restart_reload() to + * update the event->count for this case. + */ + for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, size) { + event = cpuc->events[bit]; + if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) + intel_pmu_save_and_restart_reload(event, 0); + } } static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) @@ -1513,19 +1808,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) } if (unlikely(base >= top)) { - /* - * The drain_pebs() could be called twice in a short period - * for auto-reload event in pmu::read(). There are no - * overflows have happened in between. - * It needs to call intel_pmu_save_and_restart_reload() to - * update the event->count for this case. - */ - for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, - size) { - event = cpuc->events[bit]; - if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) - intel_pmu_save_and_restart_reload(event, 0); - } + intel_pmu_pebs_event_update_no_drain(cpuc, size); return; } @@ -1538,8 +1821,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) /* PEBS v3 has more accurate status bits */ if (x86_pmu.intel_cap.pebs_format >= 3) { - for_each_set_bit(bit, (unsigned long *)&pebs_status, - size) + for_each_set_bit(bit, (unsigned long *)&pebs_status, size) counts[bit]++; continue; @@ -1578,8 +1860,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) * If collision happened, the record will be dropped. */ if (p->status != (1ULL << bit)) { - for_each_set_bit(i, (unsigned long *)&pebs_status, - x86_pmu.max_pebs_events) + for_each_set_bit(i, (unsigned long *)&pebs_status, size) error[i]++; continue; } @@ -1587,7 +1868,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) counts[bit]++; } - for (bit = 0; bit < size; bit++) { + for_each_set_bit(bit, (unsigned long *)&mask, size) { if ((counts[bit] == 0) && (error[bit] == 0)) continue; @@ -1608,11 +1889,66 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) if (counts[bit]) { __intel_pmu_pebs_event(event, iregs, base, - top, bit, counts[bit]); + top, bit, counts[bit], + setup_pebs_fixed_sample_data); } } } +static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs) +{ + short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {}; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct debug_store *ds = cpuc->ds; + struct perf_event *event; + void *base, *at, *top; + int bit, size; + u64 mask; + + if (!x86_pmu.pebs_active) + return; + + base = (struct pebs_basic *)(unsigned long)ds->pebs_buffer_base; + top = (struct pebs_basic *)(unsigned long)ds->pebs_index; + + ds->pebs_index = ds->pebs_buffer_base; + + mask = ((1ULL << x86_pmu.max_pebs_events) - 1) | + (((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED); + size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed; + + if (unlikely(base >= top)) { + intel_pmu_pebs_event_update_no_drain(cpuc, size); + return; + } + + for (at = base; at < top; at += cpuc->pebs_record_size) { + u64 pebs_status; + + pebs_status = get_pebs_status(at) & cpuc->pebs_enabled; + pebs_status &= mask; + + for_each_set_bit(bit, (unsigned long *)&pebs_status, size) + counts[bit]++; + } + + for_each_set_bit(bit, (unsigned long *)&mask, size) { + if (counts[bit] == 0) + continue; + + event = cpuc->events[bit]; + if (WARN_ON_ONCE(!event)) + continue; + + if (WARN_ON_ONCE(!event->attr.precise_ip)) + continue; + + __intel_pmu_pebs_event(event, iregs, base, + top, bit, counts[bit], + setup_pebs_adaptive_sample_data); + } +} + /* * BTS, PEBS probe and setup */ @@ -1628,12 +1964,18 @@ void __init intel_ds_init(void) x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; - if (x86_pmu.version <= 4) + if (x86_pmu.version <= 4) { x86_pmu.pebs_no_isolation = 1; + x86_pmu.pebs_no_xmm_regs = 1; + } if (x86_pmu.pebs) { char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; + char *pebs_qual = ""; int format = x86_pmu.intel_cap.pebs_format; + if (format < 4) + x86_pmu.intel_cap.pebs_baseline = 0; + switch (format) { case 0: pr_cont("PEBS fmt0%c, ", pebs_type); @@ -1669,6 +2011,29 @@ void __init intel_ds_init(void) x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME; break; + case 4: + x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl; + x86_pmu.pebs_record_size = sizeof(struct pebs_basic); + if (x86_pmu.intel_cap.pebs_baseline) { + x86_pmu.large_pebs_flags |= + PERF_SAMPLE_BRANCH_STACK | + PERF_SAMPLE_TIME; + x86_pmu.flags |= PMU_FL_PEBS_ALL; + pebs_qual = "-baseline"; + } else { + /* Only basic record supported */ + x86_pmu.pebs_no_xmm_regs = 1; + x86_pmu.large_pebs_flags &= + ~(PERF_SAMPLE_ADDR | + PERF_SAMPLE_TIME | + PERF_SAMPLE_DATA_SRC | + PERF_SAMPLE_TRANSACTION | + PERF_SAMPLE_REGS_USER | + PERF_SAMPLE_REGS_INTR); + } + pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual); + break; + default: pr_cont("no PEBS fmt%d%c, ", format, pebs_type); x86_pmu.pebs = 0; diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 580c1b91c454..6f814a27416b 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -488,6 +488,8 @@ void intel_pmu_lbr_add(struct perf_event *event) * be 'new'. Conversely, a new event can get installed through the * context switch path for the first time. */ + if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0) + cpuc->lbr_pebs_users++; perf_sched_cb_inc(event->ctx->pmu); if (!cpuc->lbr_users++ && !event->total_time_running) intel_pmu_lbr_reset(); @@ -507,8 +509,11 @@ void intel_pmu_lbr_del(struct perf_event *event) task_ctx->lbr_callstack_users--; } + if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0) + cpuc->lbr_pebs_users--; cpuc->lbr_users--; WARN_ON_ONCE(cpuc->lbr_users < 0); + WARN_ON_ONCE(cpuc->lbr_pebs_users < 0); perf_sched_cb_dec(event->ctx->pmu); } @@ -658,7 +663,13 @@ void intel_pmu_lbr_read(void) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - if (!cpuc->lbr_users) + /* + * Don't read when all LBRs users are using adaptive PEBS. + * + * This could be smarter and actually check the event, + * but this simple approach seems to work for now. + */ + if (!cpuc->lbr_users || cpuc->lbr_users == cpuc->lbr_pebs_users) return; if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) @@ -1080,6 +1091,28 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) } } +void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int i; + + cpuc->lbr_stack.nr = x86_pmu.lbr_nr; + for (i = 0; i < x86_pmu.lbr_nr; i++) { + u64 info = lbr->lbr[i].info; + struct perf_branch_entry *e = &cpuc->lbr_entries[i]; + + e->from = lbr->lbr[i].from; + e->to = lbr->lbr[i].to; + e->mispred = !!(info & LBR_INFO_MISPRED); + e->predicted = !(info & LBR_INFO_MISPRED); + e->in_tx = !!(info & LBR_INFO_IN_TX); + e->abort = !!(info & LBR_INFO_ABORT); + e->cycles = info & LBR_INFO_CYCLES; + e->reserved = 0; + } + intel_pmu_lbr_filter(cpuc); +} + /* * Map interface branch filters onto LBR filters */ diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 94dc564146ca..37ebf6fc5415 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -775,6 +775,8 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = { X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_X, hsw_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, hsw_rapl_init), + + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, skl_rapl_init), {}, }; diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 9fe64c01a2e5..fc40a1473058 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1367,6 +1367,11 @@ static const struct intel_uncore_init_fun skx_uncore_init __initconst = { .pci_init = skx_uncore_pci_init, }; +static const struct intel_uncore_init_fun icl_uncore_init __initconst = { + .cpu_init = icl_uncore_cpu_init, + .pci_init = skl_uncore_pci_init, +}; + static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP, nhm_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM, nhm_uncore_init), @@ -1393,6 +1398,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, skx_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, skl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, icl_uncore_init), {}, }; diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 853a49a8ccf6..79eb2e21e4f0 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -512,6 +512,7 @@ int skl_uncore_pci_init(void); void snb_uncore_cpu_init(void); void nhm_uncore_cpu_init(void); void skl_uncore_cpu_init(void); +void icl_uncore_cpu_init(void); int snb_pci2phy_map_init(int devid); /* uncore_snbep.c */ diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 13493f43b247..f8431819b3e1 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -34,6 +34,8 @@ #define PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC 0x3e33 #define PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC 0x3eca #define PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC 0x3e32 +#define PCI_DEVICE_ID_INTEL_ICL_U_IMC 0x8a02 +#define PCI_DEVICE_ID_INTEL_ICL_U2_IMC 0x8a12 /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff @@ -93,6 +95,12 @@ #define SKL_UNC_PERF_GLOBAL_CTL 0xe01 #define SKL_UNC_GLOBAL_CTL_CORE_ALL ((1 << 5) - 1) +/* ICL Cbo register */ +#define ICL_UNC_CBO_CONFIG 0x396 +#define ICL_UNC_NUM_CBO_MASK 0xf +#define ICL_UNC_CBO_0_PER_CTR0 0x702 +#define ICL_UNC_CBO_MSR_OFFSET 0x8 + DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); @@ -280,6 +288,70 @@ void skl_uncore_cpu_init(void) snb_uncore_arb.ops = &skl_uncore_msr_ops; } +static struct intel_uncore_type icl_uncore_cbox = { + .name = "cbox", + .num_counters = 4, + .perf_ctr_bits = 44, + .perf_ctr = ICL_UNC_CBO_0_PER_CTR0, + .event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0, + .event_mask = SNB_UNC_RAW_EVENT_MASK, + .msr_offset = ICL_UNC_CBO_MSR_OFFSET, + .ops = &skl_uncore_msr_ops, + .format_group = &snb_uncore_format_group, +}; + +static struct uncore_event_desc icl_uncore_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff"), + { /* end: all zeroes */ }, +}; + +static struct attribute *icl_uncore_clock_formats_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group icl_uncore_clock_format_group = { + .name = "format", + .attrs = icl_uncore_clock_formats_attr, +}; + +static struct intel_uncore_type icl_uncore_clockbox = { + .name = "clock", + .num_counters = 1, + .num_boxes = 1, + .fixed_ctr_bits = 48, + .fixed_ctr = SNB_UNC_FIXED_CTR, + .fixed_ctl = SNB_UNC_FIXED_CTR_CTRL, + .single_fixed = 1, + .event_mask = SNB_UNC_CTL_EV_SEL_MASK, + .format_group = &icl_uncore_clock_format_group, + .ops = &skl_uncore_msr_ops, + .event_descs = icl_uncore_events, +}; + +static struct intel_uncore_type *icl_msr_uncores[] = { + &icl_uncore_cbox, + &snb_uncore_arb, + &icl_uncore_clockbox, + NULL, +}; + +static int icl_get_cbox_num(void) +{ + u64 num_boxes; + + rdmsrl(ICL_UNC_CBO_CONFIG, num_boxes); + + return num_boxes & ICL_UNC_NUM_CBO_MASK; +} + +void icl_uncore_cpu_init(void) +{ + uncore_msr_uncores = icl_msr_uncores; + icl_uncore_cbox.num_boxes = icl_get_cbox_num(); + snb_uncore_arb.ops = &skl_uncore_msr_ops; +} + enum { SNB_PCI_UNCORE_IMC, }; @@ -668,6 +740,18 @@ static const struct pci_device_id skl_uncore_pci_ids[] = { { /* end: all zeroes */ }, }; +static const struct pci_device_id icl_uncore_pci_ids[] = { + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICL_U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICL_U2_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* end: all zeroes */ }, +}; + static struct pci_driver snb_uncore_pci_driver = { .name = "snb_uncore", .id_table = snb_uncore_pci_ids, @@ -693,6 +777,11 @@ static struct pci_driver skl_uncore_pci_driver = { .id_table = skl_uncore_pci_ids, }; +static struct pci_driver icl_uncore_pci_driver = { + .name = "icl_uncore", + .id_table = icl_uncore_pci_ids, +}; + struct imc_uncore_pci_dev { __u32 pci_id; struct pci_driver *driver; @@ -732,6 +821,8 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(CFL_4S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Server */ IMC_DEV(CFL_6S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Server */ IMC_DEV(CFL_8S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Server */ + IMC_DEV(ICL_U_IMC, &icl_uncore_pci_driver), /* 10th Gen Core Mobile */ + IMC_DEV(ICL_U2_IMC, &icl_uncore_pci_driver), /* 10th Gen Core Mobile */ { /* end marker */ } }; diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index a878e6286e4a..f3f4c2263501 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -89,6 +89,7 @@ static bool test_intel(int idx) case INTEL_FAM6_SKYLAKE_X: case INTEL_FAM6_KABYLAKE_MOBILE: case INTEL_FAM6_KABYLAKE_DESKTOP: + case INTEL_FAM6_ICELAKE_MOBILE: if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF) return true; break; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 1e98a42b560a..07fc84bb85c1 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -49,28 +49,33 @@ struct event_constraint { unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; u64 idxmsk64; }; - u64 code; - u64 cmask; - int weight; - int overlap; - int flags; + u64 code; + u64 cmask; + int weight; + int overlap; + int flags; + unsigned int size; }; + +static inline bool constraint_match(struct event_constraint *c, u64 ecode) +{ + return ((ecode & c->cmask) - c->code) <= (u64)c->size; +} + /* * struct hw_perf_event.flags flags */ #define PERF_X86_EVENT_PEBS_LDLAT 0x0001 /* ld+ldlat data address sampling */ #define PERF_X86_EVENT_PEBS_ST 0x0002 /* st data address sampling */ #define PERF_X86_EVENT_PEBS_ST_HSW 0x0004 /* haswell style datala, store */ -#define PERF_X86_EVENT_COMMITTED 0x0008 /* event passed commit_txn */ -#define PERF_X86_EVENT_PEBS_LD_HSW 0x0010 /* haswell style datala, load */ -#define PERF_X86_EVENT_PEBS_NA_HSW 0x0020 /* haswell style datala, unknown */ -#define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */ -#define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */ -#define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */ -#define PERF_X86_EVENT_EXCL_ACCT 0x0200 /* accounted EXCL event */ -#define PERF_X86_EVENT_AUTO_RELOAD 0x0400 /* use PEBS auto-reload */ -#define PERF_X86_EVENT_LARGE_PEBS 0x0800 /* use large PEBS */ - +#define PERF_X86_EVENT_PEBS_LD_HSW 0x0008 /* haswell style datala, load */ +#define PERF_X86_EVENT_PEBS_NA_HSW 0x0010 /* haswell style datala, unknown */ +#define PERF_X86_EVENT_EXCL 0x0020 /* HT exclusivity on counter */ +#define PERF_X86_EVENT_DYNAMIC 0x0040 /* dynamic alloc'd constraint */ +#define PERF_X86_EVENT_RDPMC_ALLOWED 0x0080 /* grant rdpmc permission */ +#define PERF_X86_EVENT_EXCL_ACCT 0x0100 /* accounted EXCL event */ +#define PERF_X86_EVENT_AUTO_RELOAD 0x0200 /* use PEBS auto-reload */ +#define PERF_X86_EVENT_LARGE_PEBS 0x0400 /* use large PEBS */ struct amd_nb { int nb_id; /* NorthBridge id */ @@ -116,6 +121,24 @@ struct amd_nb { (1ULL << PERF_REG_X86_R14) | \ (1ULL << PERF_REG_X86_R15)) +#define PEBS_XMM_REGS \ + ((1ULL << PERF_REG_X86_XMM0) | \ + (1ULL << PERF_REG_X86_XMM1) | \ + (1ULL << PERF_REG_X86_XMM2) | \ + (1ULL << PERF_REG_X86_XMM3) | \ + (1ULL << PERF_REG_X86_XMM4) | \ + (1ULL << PERF_REG_X86_XMM5) | \ + (1ULL << PERF_REG_X86_XMM6) | \ + (1ULL << PERF_REG_X86_XMM7) | \ + (1ULL << PERF_REG_X86_XMM8) | \ + (1ULL << PERF_REG_X86_XMM9) | \ + (1ULL << PERF_REG_X86_XMM10) | \ + (1ULL << PERF_REG_X86_XMM11) | \ + (1ULL << PERF_REG_X86_XMM12) | \ + (1ULL << PERF_REG_X86_XMM13) | \ + (1ULL << PERF_REG_X86_XMM14) | \ + (1ULL << PERF_REG_X86_XMM15)) + /* * Per register state. */ @@ -207,10 +230,16 @@ struct cpu_hw_events { int n_pebs; int n_large_pebs; + /* Current super set of events hardware configuration */ + u64 pebs_data_cfg; + u64 active_pebs_data_cfg; + int pebs_record_size; + /* * Intel LBR bits */ int lbr_users; + int lbr_pebs_users; struct perf_branch_stack lbr_stack; struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; struct er_account *lbr_sel; @@ -257,18 +286,29 @@ struct cpu_hw_events { void *kfree_on_online[X86_PERF_KFREE_MAX]; }; -#define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\ +#define __EVENT_CONSTRAINT_RANGE(c, e, n, m, w, o, f) { \ { .idxmsk64 = (n) }, \ .code = (c), \ + .size = (e) - (c), \ .cmask = (m), \ .weight = (w), \ .overlap = (o), \ .flags = f, \ } +#define __EVENT_CONSTRAINT(c, n, m, w, o, f) \ + __EVENT_CONSTRAINT_RANGE(c, c, n, m, w, o, f) + #define EVENT_CONSTRAINT(c, n, m) \ __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0) +/* + * The constraint_match() function only works for 'simple' event codes + * and not for extended (AMD64_EVENTSEL_EVENT) events codes. + */ +#define EVENT_CONSTRAINT_RANGE(c, e, n, m) \ + __EVENT_CONSTRAINT_RANGE(c, e, n, m, HWEIGHT(n), 0, 0) + #define INTEL_EXCLEVT_CONSTRAINT(c, n) \ __EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT, HWEIGHT(n),\ 0, PERF_X86_EVENT_EXCL) @@ -304,6 +344,12 @@ struct cpu_hw_events { EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT) /* + * Constraint on a range of Event codes + */ +#define INTEL_EVENT_CONSTRAINT_RANGE(c, e, n) \ + EVENT_CONSTRAINT_RANGE(c, e, n, ARCH_PERFMON_EVENTSEL_EVENT) + +/* * Constraint on the Event code + UMask + fixed-mask * * filter mask to validate fixed counter events. @@ -350,6 +396,9 @@ struct cpu_hw_events { #define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \ EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS) +#define INTEL_FLAGS_EVENT_CONSTRAINT_RANGE(c, e, n) \ + EVENT_CONSTRAINT_RANGE(c, e, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS) + /* Check only flags, but allow all event/umask */ #define INTEL_ALL_EVENT_CONSTRAINT(code, n) \ EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS) @@ -366,6 +415,11 @@ struct cpu_hw_events { ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW) +#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(code, end, n) \ + __EVENT_CONSTRAINT_RANGE(code, end, n, \ + ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW) + #define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(code, n) \ __EVENT_CONSTRAINT(code, n, \ ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \ @@ -473,6 +527,7 @@ union perf_capabilities { * values > 32bit. */ u64 full_width_write:1; + u64 pebs_baseline:1; }; u64 capabilities; }; @@ -613,14 +668,16 @@ struct x86_pmu { pebs_broken :1, pebs_prec_dist :1, pebs_no_tlb :1, - pebs_no_isolation :1; + pebs_no_isolation :1, + pebs_no_xmm_regs :1; int pebs_record_size; int pebs_buffer_size; + int max_pebs_events; void (*drain_pebs)(struct pt_regs *regs); struct event_constraint *pebs_constraints; void (*pebs_aliases)(struct perf_event *event); - int max_pebs_events; unsigned long large_pebs_flags; + u64 rtm_abort_event; /* * Intel LBR @@ -714,6 +771,7 @@ static struct perf_pmu_events_ht_attr event_attr_##v = { \ .event_str_ht = ht, \ } +struct pmu *x86_get_pmu(void); extern struct x86_pmu x86_pmu __read_mostly; static inline bool x86_pmu_has_lbr_callstack(void) @@ -941,6 +999,8 @@ extern struct event_constraint intel_bdw_pebs_event_constraints[]; extern struct event_constraint intel_skl_pebs_event_constraints[]; +extern struct event_constraint intel_icl_pebs_event_constraints[]; + struct event_constraint *intel_pebs_constraints(struct perf_event *event); void intel_pmu_pebs_add(struct perf_event *event); @@ -959,6 +1019,8 @@ void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in); void intel_pmu_auto_reload_read(struct perf_event *event); +void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr); + void intel_ds_init(void); void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in); diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h index ae26df1c2789..8380c3ddd4b2 100644 --- a/arch/x86/include/asm/intel_ds.h +++ b/arch/x86/include/asm/intel_ds.h @@ -8,7 +8,7 @@ /* The maximal number of PEBS events: */ #define MAX_PEBS_EVENTS 8 -#define MAX_FIXED_PEBS_EVENTS 3 +#define MAX_FIXED_PEBS_EVENTS 4 /* * A debug store configuration. diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ca5bc0eacb95..1378518cf63f 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -116,6 +116,7 @@ #define LBR_INFO_CYCLES 0xffff #define MSR_IA32_PEBS_ENABLE 0x000003f1 +#define MSR_PEBS_DATA_CFG 0x000003f2 #define MSR_IA32_DS_AREA 0x00000600 #define MSR_IA32_PERF_CAPABILITIES 0x00000345 #define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 8bdf74902293..1392d5e6e8d6 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -7,7 +7,7 @@ */ #define INTEL_PMC_MAX_GENERIC 32 -#define INTEL_PMC_MAX_FIXED 3 +#define INTEL_PMC_MAX_FIXED 4 #define INTEL_PMC_IDX_FIXED 32 #define X86_PMC_IDX_MAX 64 @@ -32,6 +32,8 @@ #define HSW_IN_TX (1ULL << 32) #define HSW_IN_TX_CHECKPOINTED (1ULL << 33) +#define ICL_EVENTSEL_ADAPTIVE (1ULL << 34) +#define ICL_FIXED_0_ADAPTIVE (1ULL << 32) #define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36) #define AMD64_EVENTSEL_GUESTONLY (1ULL << 40) @@ -87,6 +89,12 @@ #define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 #define ARCH_PERFMON_EVENTS_COUNT 7 +#define PEBS_DATACFG_MEMINFO BIT_ULL(0) +#define PEBS_DATACFG_GP BIT_ULL(1) +#define PEBS_DATACFG_XMMS BIT_ULL(2) +#define PEBS_DATACFG_LBRS BIT_ULL(3) +#define PEBS_DATACFG_LBR_SHIFT 24 + /* * Intel "Architectural Performance Monitoring" CPUID * detection/enumeration details: @@ -177,6 +185,41 @@ struct x86_pmu_capability { #define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(55) /* + * Adaptive PEBS v4 + */ + +struct pebs_basic { + u64 format_size; + u64 ip; + u64 applicable_counters; + u64 tsc; +}; + +struct pebs_meminfo { + u64 address; + u64 aux; + u64 latency; + u64 tsx_tuning; +}; + +struct pebs_gprs { + u64 flags, ip, ax, cx, dx, bx, sp, bp, si, di; + u64 r8, r9, r10, r11, r12, r13, r14, r15; +}; + +struct pebs_xmm { + u64 xmm[16*2]; /* two entries for each register */ +}; + +struct pebs_lbr_entry { + u64 from, to, info; +}; + +struct pebs_lbr { + struct pebs_lbr_entry lbr[0]; /* Variable length */ +}; + +/* * IBS cpuid feature detection */ @@ -248,6 +291,11 @@ extern void perf_events_lapic_init(void); #define PERF_EFLAGS_VM (1UL << 5) struct pt_regs; +struct x86_perf_regs { + struct pt_regs regs; + u64 *xmm_regs; +}; + extern unsigned long perf_instruction_pointer(struct pt_regs *regs); extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_misc_flags(regs) perf_misc_flags(regs) @@ -260,14 +308,9 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); */ #define perf_arch_fetch_caller_regs(regs, __ip) { \ (regs)->ip = (__ip); \ - (regs)->bp = caller_frame_pointer(); \ + (regs)->sp = (unsigned long)__builtin_frame_address(0); \ (regs)->cs = __KERNEL_CS; \ regs->flags = 0; \ - asm volatile( \ - _ASM_MOV "%%"_ASM_SP ", %0\n" \ - : "=m" ((regs)->sp) \ - :: "memory" \ - ); \ } struct perf_guest_switch_msr { diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index f335aad404a4..beef7ad9e43a 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -98,19 +98,6 @@ struct stack_frame_ia32 { u32 return_address; }; -static inline unsigned long caller_frame_pointer(void) -{ - struct stack_frame *frame; - - frame = __builtin_frame_address(0); - -#ifdef CONFIG_FRAME_POINTER - frame = frame->next_frame; -#endif - - return (unsigned long)frame; -} - void show_opcodes(struct pt_regs *regs, const char *loglvl); void show_ip(struct pt_regs *regs, const char *loglvl); #endif /* _ASM_X86_STACKTRACE_H */ diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h index f3329cabce5c..ac67bbea10ca 100644 --- a/arch/x86/include/uapi/asm/perf_regs.h +++ b/arch/x86/include/uapi/asm/perf_regs.h @@ -27,8 +27,29 @@ enum perf_event_x86_regs { PERF_REG_X86_R13, PERF_REG_X86_R14, PERF_REG_X86_R15, - + /* These are the limits for the GPRs. */ PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1, PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1, + + /* These all need two bits set because they are 128bit */ + PERF_REG_X86_XMM0 = 32, + PERF_REG_X86_XMM1 = 34, + PERF_REG_X86_XMM2 = 36, + PERF_REG_X86_XMM3 = 38, + PERF_REG_X86_XMM4 = 40, + PERF_REG_X86_XMM5 = 42, + PERF_REG_X86_XMM6 = 44, + PERF_REG_X86_XMM7 = 46, + PERF_REG_X86_XMM8 = 48, + PERF_REG_X86_XMM9 = 50, + PERF_REG_X86_XMM10 = 52, + PERF_REG_X86_XMM11 = 54, + PERF_REG_X86_XMM12 = 56, + PERF_REG_X86_XMM13 = 58, + PERF_REG_X86_XMM14 = 60, + PERF_REG_X86_XMM15 = 62, + + /* These include both GPRs and XMMX registers */ + PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2, }; #endif /* _ASM_X86_PERF_REGS_H */ diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c index c06c4c16c6b6..07c30ee17425 100644 --- a/arch/x86/kernel/perf_regs.c +++ b/arch/x86/kernel/perf_regs.c @@ -59,18 +59,34 @@ static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = { u64 perf_reg_value(struct pt_regs *regs, int idx) { + struct x86_perf_regs *perf_regs; + + if (idx >= PERF_REG_X86_XMM0 && idx < PERF_REG_X86_XMM_MAX) { + perf_regs = container_of(regs, struct x86_perf_regs, regs); + if (!perf_regs->xmm_regs) + return 0; + return perf_regs->xmm_regs[idx - PERF_REG_X86_XMM0]; + } + if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset))) return 0; return regs_get_register(regs, pt_regs_offset[idx]); } -#define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL)) - #ifdef CONFIG_X86_32 +#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_R8) | \ + (1ULL << PERF_REG_X86_R9) | \ + (1ULL << PERF_REG_X86_R10) | \ + (1ULL << PERF_REG_X86_R11) | \ + (1ULL << PERF_REG_X86_R12) | \ + (1ULL << PERF_REG_X86_R13) | \ + (1ULL << PERF_REG_X86_R14) | \ + (1ULL << PERF_REG_X86_R15)) + int perf_reg_validate(u64 mask) { - if (!mask || mask & REG_RESERVED) + if (!mask || (mask & REG_NOSUPPORT)) return -EINVAL; return 0; @@ -96,10 +112,7 @@ void perf_get_regs_user(struct perf_regs *regs_user, int perf_reg_validate(u64 mask) { - if (!mask || mask & REG_RESERVED) - return -EINVAL; - - if (mask & REG_NOSUPPORT) + if (!mask || (mask & REG_NOSUPPORT)) return -EINVAL; return 0; |