diff options
author | Sandipan Das <sandipan.das@amd.com> | 2022-08-11 17:59:54 +0530 |
---|---|---|
committer | Peter Zijlstra <peterz@infradead.org> | 2022-08-27 00:05:43 +0200 |
commit | ca5b7c0d9621702e107c83216316a6d722878b64 (patch) | |
tree | 27a9ec5b954583f63da8a0d083d4e91074fa5a26 /arch/x86/events/amd/core.c | |
parent | 703fb765f48897214e3eb110f35dddec80682f60 (diff) |
perf/x86/amd/lbr: Add LbrExtV2 branch record support
If AMD Last Branch Record Extension Version 2 (LbrExtV2) is detected,
enable it alongside LBR Freeze on PMI when an event requests branch stack
i.e. PERF_SAMPLE_BRANCH_STACK.
Each branch record is represented by a pair of registers, LBR From and LBR
To. The freeze feature prevents any updates to these registers once a PMC
overflows. The contents remain unchanged until the freeze bit is cleared by
the PMI handler.
The branch records are read and copied to sample data before unfreezing.
However, only valid entries are copied. There is no additional register to
denote which of the register pairs represent the top of the stack (TOS)
since internal register renaming always ensures that the first pair (i.e.
index 0) is the one representing the most recent branch and so on.
The LBR registers are per-thread resources and are cleared explicitly
whenever a new task is scheduled in. There are no special implications on
the contents of these registers when transitioning to deep C-states.
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/d3b8500a3627a0d4d0259b005891ee248f248d91.1660211399.git.sandipan.das@amd.com
Diffstat (limited to 'arch/x86/events/amd/core.c')
-rw-r--r-- | arch/x86/events/amd/core.c | 47 |
1 files changed, 36 insertions, 11 deletions
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index a3aa67b4fd50..d799628016c8 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -620,7 +620,7 @@ static inline u64 amd_pmu_get_global_status(void) /* PerfCntrGlobalStatus is read-only */ rdmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status); - return status & amd_pmu_global_cntr_mask; + return status; } static inline void amd_pmu_ack_global_status(u64 status) @@ -631,8 +631,6 @@ static inline void amd_pmu_ack_global_status(u64 status) * clears the same bit in PerfCntrGlobalStatus */ - /* Only allow modifications to PerfCntrGlobalStatus.PerfCntrOvfl */ - status &= amd_pmu_global_cntr_mask; wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status); } @@ -742,11 +740,17 @@ static void amd_pmu_v2_enable_event(struct perf_event *event) __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); } -static void amd_pmu_v2_enable_all(int added) +static __always_inline void amd_pmu_core_enable_all(void) { amd_pmu_set_global_ctl(amd_pmu_global_cntr_mask); } +static void amd_pmu_v2_enable_all(int added) +{ + amd_pmu_lbr_enable_all(); + amd_pmu_core_enable_all(); +} + static void amd_pmu_disable_event(struct perf_event *event) { x86_pmu_disable_event(event); @@ -771,10 +775,15 @@ static void amd_pmu_disable_all(void) amd_pmu_check_overflow(); } -static void amd_pmu_v2_disable_all(void) +static __always_inline void amd_pmu_core_disable_all(void) { - /* Disable all PMCs */ amd_pmu_set_global_ctl(0); +} + +static void amd_pmu_v2_disable_all(void) +{ + amd_pmu_core_disable_all(); + amd_pmu_lbr_disable_all(); amd_pmu_check_overflow(); } @@ -877,8 +886,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) pmu_enabled = cpuc->enabled; cpuc->enabled = 0; - /* Stop counting */ - amd_pmu_v2_disable_all(); + /* Stop counting but do not disable LBR */ + amd_pmu_core_disable_all(); status = amd_pmu_get_global_status(); @@ -886,6 +895,12 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) if (!status) goto done; + /* Read branch records before unfreezing */ + if (status & GLOBAL_STATUS_LBRS_FROZEN) { + amd_pmu_lbr_read(); + status &= ~GLOBAL_STATUS_LBRS_FROZEN; + } + for (idx = 0; idx < x86_pmu.num_counters; idx++) { if (!test_bit(idx, cpuc->active_mask)) continue; @@ -905,6 +920,9 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) if (!x86_perf_event_set_period(event)) continue; + if (has_branch_stack(event)) + data.br_stack = &cpuc->lbr_stack; + if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); @@ -918,7 +936,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) */ WARN_ON(status > 0); - /* Clear overflow bits */ + /* Clear overflow and freeze bits */ amd_pmu_ack_global_status(~status); /* @@ -932,7 +950,7 @@ done: /* Resume counting only if PMU is active */ if (pmu_enabled) - amd_pmu_v2_enable_all(0); + amd_pmu_core_enable_all(); return amd_pmu_adjust_nmi_window(handled); } @@ -1375,7 +1393,14 @@ static int __init amd_core_pmu_init(void) } /* LBR and BRS are mutually exclusive features */ - if (amd_pmu_lbr_init() && !amd_brs_init()) { + if (!amd_pmu_lbr_init()) { + /* LBR requires flushing on context switch */ + x86_pmu.sched_task = amd_pmu_lbr_sched_task; + static_call_update(amd_pmu_branch_hw_config, amd_pmu_lbr_hw_config); + static_call_update(amd_pmu_branch_reset, amd_pmu_lbr_reset); + static_call_update(amd_pmu_branch_add, amd_pmu_lbr_add); + static_call_update(amd_pmu_branch_del, amd_pmu_lbr_del); + } else if (!amd_brs_init()) { /* * BRS requires special event constraints and flushing on ctxsw. */ |