summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--kernel/events/core.c155
1 files changed, 91 insertions, 64 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 89b47050a2e8..c27e04655d86 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -126,6 +126,38 @@ static int cpu_function_call(int cpu, remote_function_f func, void *info)
return data.ret;
}
+/*
+ * On task ctx scheduling...
+ *
+ * When !ctx->nr_events a task context will not be scheduled. This means
+ * we can disable the scheduler hooks (for performance) without leaving
+ * pending task ctx state.
+ *
+ * This however results in two special cases:
+ *
+ * - removing the last event from a task ctx; this is relatively straight
+ * forward and is done in __perf_remove_from_context.
+ *
+ * - adding the first event to a task ctx; this is tricky because we cannot
+ * rely on ctx->is_active and therefore cannot use event_function_call().
+ * See perf_install_in_context().
+ *
+ * This is because we need a ctx->lock serialized variable (ctx->is_active)
+ * to reliably determine if a particular task/context is scheduled in. The
+ * task_curr() use in task_function_call() is racy in that a remote context
+ * switch is not a single atomic operation.
+ *
+ * As is, the situation is 'safe' because we set rq->curr before we do the
+ * actual context switch. This means that task_curr() will fail early, but
+ * we'll continue spinning on ctx->is_active until we've passed
+ * perf_event_task_sched_out().
+ *
+ * Without this ctx->lock serialized variable we could have race where we find
+ * the task (and hence the context) would not be active while in fact they are.
+ *
+ * If ctx->nr_events, then ctx->is_active and cpuctx->task_ctx are set.
+ */
+
static void event_function_call(struct perf_event *event,
int (*active)(void *),
void (*inactive)(void *),
@@ -1686,9 +1718,13 @@ static int __perf_remove_from_context(void *info)
if (re->detach_group)
perf_group_detach(event);
list_del_event(event, ctx);
- if (!ctx->nr_events && cpuctx->task_ctx == ctx) {
+
+ if (!ctx->nr_events && ctx->is_active) {
ctx->is_active = 0;
- cpuctx->task_ctx = NULL;
+ if (ctx->task) {
+ WARN_ON_ONCE(cpuctx->task_ctx != ctx);
+ cpuctx->task_ctx = NULL;
+ }
}
raw_spin_unlock(&ctx->lock);
@@ -2056,18 +2092,6 @@ static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
}
-static void ___perf_install_in_context(void *info)
-{
- struct perf_event *event = info;
- struct perf_event_context *ctx = event->ctx;
-
- /*
- * Since the task isn't running, its safe to add the event, us holding
- * the ctx->lock ensures the task won't get scheduled in.
- */
- add_event_to_ctx(event, ctx);
-}
-
static void ctx_resched(struct perf_cpu_context *cpuctx,
struct perf_event_context *task_ctx)
{
@@ -2086,55 +2110,27 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,
*/
static int __perf_install_in_context(void *info)
{
- struct perf_event *event = info;
- struct perf_event_context *ctx = event->ctx;
+ struct perf_event_context *ctx = info;
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
struct perf_event_context *task_ctx = cpuctx->task_ctx;
- struct task_struct *task = current;
-
- perf_ctx_lock(cpuctx, task_ctx);
- perf_pmu_disable(cpuctx->ctx.pmu);
- /*
- * If there was an active task_ctx schedule it out.
- */
- if (task_ctx)
- task_ctx_sched_out(cpuctx, task_ctx);
+ if (ctx->task) {
+ /*
+ * If we hit the 'wrong' task, we've since scheduled and
+ * everything should be sorted, nothing to do!
+ */
+ if (ctx->task != current)
+ return 0;
- /*
- * If the context we're installing events in is not the
- * active task_ctx, flip them.
- */
- if (ctx->task && task_ctx != ctx) {
- if (task_ctx)
- raw_spin_unlock(&task_ctx->lock);
- raw_spin_lock(&ctx->lock);
+ /*
+ * If task_ctx is set, it had better be to us.
+ */
+ WARN_ON_ONCE(cpuctx->task_ctx != ctx && cpuctx->task_ctx);
task_ctx = ctx;
}
- if (task_ctx) {
- cpuctx->task_ctx = task_ctx;
- task = task_ctx->task;
- }
-
- cpu_ctx_sched_out(cpuctx, EVENT_ALL);
-
- update_context_time(ctx);
- /*
- * update cgrp time only if current cgrp
- * matches event->cgrp. Must be done before
- * calling add_event_to_ctx()
- */
- update_cgrp_time_from_event(event);
-
- add_event_to_ctx(event, ctx);
-
- /*
- * Schedule everything back in
- */
- perf_event_sched_in(cpuctx, task_ctx, task);
-
- perf_pmu_enable(cpuctx->ctx.pmu);
+ perf_ctx_lock(cpuctx, task_ctx);
+ ctx_resched(cpuctx, task_ctx);
perf_ctx_unlock(cpuctx, task_ctx);
return 0;
@@ -2148,14 +2144,38 @@ perf_install_in_context(struct perf_event_context *ctx,
struct perf_event *event,
int cpu)
{
+ struct task_struct *task = NULL;
+
lockdep_assert_held(&ctx->mutex);
event->ctx = ctx;
if (event->cpu != -1)
event->cpu = cpu;
- event_function_call(event, __perf_install_in_context,
- ___perf_install_in_context, event);
+ /*
+ * Installing events is tricky because we cannot rely on ctx->is_active
+ * to be set in case this is the nr_events 0 -> 1 transition.
+ *
+ * So what we do is we add the event to the list here, which will allow
+ * a future context switch to DTRT and then send a racy IPI. If the IPI
+ * fails to hit the right task, this means a context switch must have
+ * happened and that will have taken care of business.
+ */
+ raw_spin_lock_irq(&ctx->lock);
+ update_context_time(ctx);
+ /*
+ * Update cgrp time only if current cgrp matches event->cgrp.
+ * Must be done before calling add_event_to_ctx().
+ */
+ update_cgrp_time_from_event(event);
+ add_event_to_ctx(event, ctx);
+ task = ctx->task;
+ raw_spin_unlock_irq(&ctx->lock);
+
+ if (task)
+ task_function_call(task, __perf_install_in_context, ctx);
+ else
+ cpu_function_call(cpu, __perf_install_in_context, ctx);
}
/*
@@ -2328,6 +2348,16 @@ static void ctx_sched_out(struct perf_event_context *ctx,
lockdep_assert_held(&ctx->lock);
+ if (likely(!ctx->nr_events)) {
+ /*
+ * See __perf_remove_from_context().
+ */
+ WARN_ON_ONCE(ctx->is_active);
+ if (ctx->task)
+ WARN_ON_ONCE(cpuctx->task_ctx);
+ return;
+ }
+
ctx->is_active &= ~event_type;
if (ctx->task) {
WARN_ON_ONCE(cpuctx->task_ctx != ctx);
@@ -2335,9 +2365,6 @@ static void ctx_sched_out(struct perf_event_context *ctx,
cpuctx->task_ctx = NULL;
}
- if (likely(!ctx->nr_events))
- return;
-
update_context_time(ctx);
update_cgrp_time_from_cpuctx(cpuctx);
if (!ctx->nr_active)
@@ -2716,6 +2743,9 @@ ctx_sched_in(struct perf_event_context *ctx,
lockdep_assert_held(&ctx->lock);
+ if (likely(!ctx->nr_events))
+ return;
+
ctx->is_active |= event_type;
if (ctx->task) {
if (!is_active)
@@ -2724,9 +2754,6 @@ ctx_sched_in(struct perf_event_context *ctx,
WARN_ON_ONCE(cpuctx->task_ctx != ctx);
}
- if (likely(!ctx->nr_events))
- return;
-
now = perf_clock();
ctx->timestamp = now;
perf_cgroup_set_timestamp(task, ctx);