diff options
author | Wanpeng Li <wanpeng.li@hotmail.com> | 2016-08-31 18:27:44 +0800 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-09-05 13:29:45 +0200 |
commit | 61c7aca695b6fabe85d0fc424fe8ae2f66f267dd (patch) | |
tree | 9229a25dcbae4bd98d3899989415d86b453b3370 /kernel/sched/deadline.c | |
parent | efca03ecbe29a46c2c5ae539563b6326af9dcba7 (diff) |
sched/deadline: Fix the intention to re-evalute tick dependency for offline CPU
The dl task will be replenished after dl task timer fire and start a
new period. It will be enqueued and to re-evaluate its dependency on
the tick in order to restart it. However, if the CPU is hot-unplugged,
irq_work_queue will splash since the target CPU is offline.
As a result we get:
WARNING: CPU: 2 PID: 0 at kernel/irq_work.c:69 irq_work_queue_on+0xad/0xe0
Call Trace:
dump_stack+0x99/0xd0
__warn+0xd1/0xf0
warn_slowpath_null+0x1d/0x20
irq_work_queue_on+0xad/0xe0
tick_nohz_full_kick_cpu+0x44/0x50
tick_nohz_dep_set_cpu+0x74/0xb0
enqueue_task_dl+0x226/0x480
activate_task+0x5c/0xa0
dl_task_timer+0x19b/0x2c0
? push_dl_task.part.31+0x190/0x190
This can be triggered by hot-unplugging the full dynticks CPU which dl
task is running on.
We enqueue the dl task on the offline CPU, because we need to do
replenish for start_dl_timer(). So, as Juri pointed out, we would
need to do is calling replenish_dl_entity() directly, instead of
enqueue_task_dl(). pi_se shouldn't be a problem as the task shouldn't
be boosted if it was throttled.
This patch fixes it by avoiding the whole enqueue+dequeue+enqueue story, by
first migrating (set_task_cpu()) and then doing 1 enqueue.
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Juri Lelli <juri.lelli@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luca Abeni <luca.abeni@unitn.it>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1472639264-3932-1-git-send-email-wanpeng.li@hotmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched/deadline.c')
-rw-r--r-- | kernel/sched/deadline.c | 46 |
1 files changed, 18 insertions, 28 deletions
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 18fb0b8fc911..0c75bc656178 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -243,10 +243,8 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq); static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p) { struct rq *later_rq = NULL; - bool fallback = false; later_rq = find_lock_later_rq(p, rq); - if (!later_rq) { int cpu; @@ -254,7 +252,6 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p * If we cannot preempt any rq, fall back to pick any * online cpu. */ - fallback = true; cpu = cpumask_any_and(cpu_active_mask, tsk_cpus_allowed(p)); if (cpu >= nr_cpu_ids) { /* @@ -274,16 +271,7 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p double_lock_balance(rq, later_rq); } - /* - * By now the task is replenished and enqueued; migrate it. - */ - deactivate_task(rq, p, 0); set_task_cpu(p, later_rq->cpu); - activate_task(later_rq, p, 0); - - if (!fallback) - resched_curr(later_rq); - double_unlock_balance(later_rq, rq); return later_rq; @@ -641,29 +629,31 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) goto unlock; } - enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); - if (dl_task(rq->curr)) - check_preempt_curr_dl(rq, p, 0); - else - resched_curr(rq); - #ifdef CONFIG_SMP - /* - * Perform balancing operations here; after the replenishments. We - * cannot drop rq->lock before this, otherwise the assertion in - * start_dl_timer() about not missing updates is not true. - * - * If we find that the rq the task was on is no longer available, we - * need to select a new rq. - * - * XXX figure out if select_task_rq_dl() deals with offline cpus. - */ if (unlikely(!rq->online)) { + /* + * If the runqueue is no longer available, migrate the + * task elsewhere. This necessarily changes rq. + */ lockdep_unpin_lock(&rq->lock, rf.cookie); rq = dl_task_offline_migration(rq, p); rf.cookie = lockdep_pin_lock(&rq->lock); + + /* + * Now that the task has been migrated to the new RQ and we + * have that locked, proceed as normal and enqueue the task + * there. + */ } +#endif + enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); + if (dl_task(rq->curr)) + check_preempt_curr_dl(rq, p, 0); + else + resched_curr(rq); + +#ifdef CONFIG_SMP /* * Queueing this task back might have overloaded rq, check if we need * to kick someone away. |