diff options
Diffstat (limited to 'kernel/events')
-rw-r--r-- | kernel/events/core.c | 66 | ||||
-rw-r--r-- | kernel/events/hw_breakpoint_test.c | 4 |
2 files changed, 58 insertions, 12 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index 65e20c5c3c44..e47914ac8732 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2248,6 +2248,7 @@ event_sched_out(struct perf_event *event, struct perf_event_context *ctx) !event->pending_work) { event->pending_work = 1; dec = false; + WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount)); task_work_add(current, &event->pending_task, TWA_RESUME); } if (dec) @@ -2287,6 +2288,7 @@ group_sched_out(struct perf_event *group_event, struct perf_event_context *ctx) #define DETACH_GROUP 0x01UL #define DETACH_CHILD 0x02UL +#define DETACH_DEAD 0x04UL /* * Cross CPU call to remove a performance event @@ -2308,12 +2310,20 @@ __perf_remove_from_context(struct perf_event *event, update_cgrp_time_from_cpuctx(cpuctx, false); } + /* + * Ensure event_sched_out() switches to OFF, at the very least + * this avoids raising perf_pending_task() at this time. + */ + if (flags & DETACH_DEAD) + event->pending_disable = 1; event_sched_out(event, ctx); if (flags & DETACH_GROUP) perf_group_detach(event); if (flags & DETACH_CHILD) perf_child_detach(event); list_del_event(event, ctx); + if (flags & DETACH_DEAD) + event->state = PERF_EVENT_STATE_DEAD; if (!pmu_ctx->nr_events) { pmu_ctx->rotate_necessary = 0; @@ -5299,9 +5309,7 @@ int perf_event_release_kernel(struct perf_event *event) ctx = perf_event_ctx_lock(event); WARN_ON_ONCE(ctx->parent_ctx); - perf_remove_from_context(event, DETACH_GROUP); - raw_spin_lock_irq(&ctx->lock); /* * Mark this event as STATE_DEAD, there is no external reference to it * anymore. @@ -5313,8 +5321,7 @@ int perf_event_release_kernel(struct perf_event *event) * Thus this guarantees that we will in fact observe and kill _ALL_ * child events. */ - event->state = PERF_EVENT_STATE_DEAD; - raw_spin_unlock_irq(&ctx->lock); + perf_remove_from_context(event, DETACH_GROUP|DETACH_DEAD); perf_event_ctx_unlock(event, ctx); @@ -6755,6 +6762,8 @@ static void perf_pending_task(struct callback_head *head) if (rctx >= 0) perf_swevent_put_recursion_context(rctx); preempt_enable_notrace(); + + put_event(event); } #ifdef CONFIG_GUEST_PERF_EVENTS @@ -9196,7 +9205,7 @@ static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog, PERF_RECORD_KSYMBOL_TYPE_BPF, (u64)(unsigned long)subprog->bpf_func, subprog->jited_len, unregister, - prog->aux->ksym.name); + subprog->aux->ksym.name); } } } @@ -9439,6 +9448,19 @@ int perf_event_account_interrupt(struct perf_event *event) return __perf_event_account_interrupt(event, 1); } +static inline bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs) +{ + /* + * Due to interrupt latency (AKA "skid"), we may enter the + * kernel before taking an overflow, even if the PMU is only + * counting user events. + */ + if (event->attr.exclude_kernel && !user_mode(regs)) + return false; + + return true; +} + /* * Generic event overflow handling, sampling. */ @@ -9473,15 +9495,38 @@ static int __perf_event_overflow(struct perf_event *event, if (event->attr.sigtrap) { /* - * Should not be able to return to user space without processing - * pending_sigtrap (kernel events can overflow multiple times). + * The desired behaviour of sigtrap vs invalid samples is a bit + * tricky; on the one hand, one should not loose the SIGTRAP if + * it is the first event, on the other hand, we should also not + * trigger the WARN or override the data address. */ - WARN_ON_ONCE(event->pending_sigtrap && event->attr.exclude_kernel); + bool valid_sample = sample_is_allowed(event, regs); + unsigned int pending_id = 1; + + if (regs) + pending_id = hash32_ptr((void *)instruction_pointer(regs)) ?: 1; if (!event->pending_sigtrap) { - event->pending_sigtrap = 1; + event->pending_sigtrap = pending_id; local_inc(&event->ctx->nr_pending); + } else if (event->attr.exclude_kernel && valid_sample) { + /* + * Should not be able to return to user space without + * consuming pending_sigtrap; with exceptions: + * + * 1. Where !exclude_kernel, events can overflow again + * in the kernel without returning to user space. + * + * 2. Events that can overflow again before the IRQ- + * work without user space progress (e.g. hrtimer). + * To approximate progress (with false negatives), + * check 32-bit hash of the current IP. + */ + WARN_ON_ONCE(event->pending_sigtrap != pending_id); } - event->pending_addr = data->addr; + + event->pending_addr = 0; + if (valid_sample && (data->sample_flags & PERF_SAMPLE_ADDR)) + event->pending_addr = data->addr; irq_work_queue(&event->pending_irq); } @@ -10088,6 +10133,7 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, perf_sample_data_init(&data, 0, 0); data.raw = &raw; + data.sample_flags |= PERF_SAMPLE_RAW; perf_trace_buf_update(record, event_type); diff --git a/kernel/events/hw_breakpoint_test.c b/kernel/events/hw_breakpoint_test.c index 5ced822df788..c57610f52bb4 100644 --- a/kernel/events/hw_breakpoint_test.c +++ b/kernel/events/hw_breakpoint_test.c @@ -295,11 +295,11 @@ static int test_init(struct kunit *test) { /* Most test cases want 2 distinct CPUs. */ if (num_online_cpus() < 2) - return -EINVAL; + kunit_skip(test, "not enough cpus"); /* Want the system to not use breakpoints elsewhere. */ if (hw_breakpoint_is_used()) - return -EBUSY; + kunit_skip(test, "hw breakpoint already in use"); return 0; } |