diff options
-rw-r--r-- | include/linux/cpuhotplug.h | 1 | ||||
-rw-r--r-- | include/linux/sched/hotplug.h | 2 | ||||
-rw-r--r-- | kernel/cpu.c | 9 | ||||
-rw-r--r-- | kernel/sched/core.c | 154 |
4 files changed, 46 insertions, 120 deletions
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index bc56287a1ed1..0042ef362511 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -152,6 +152,7 @@ enum cpuhp_state { CPUHP_AP_ONLINE, CPUHP_TEARDOWN_CPU, CPUHP_AP_ONLINE_IDLE, + CPUHP_AP_SCHED_WAIT_EMPTY, CPUHP_AP_SMPBOOT_THREADS, CPUHP_AP_X86_VDSO_VMA_ONLINE, CPUHP_AP_IRQ_AFFINITY_ONLINE, diff --git a/include/linux/sched/hotplug.h b/include/linux/sched/hotplug.h index 9a62ffdd296f..412cdaba33eb 100644 --- a/include/linux/sched/hotplug.h +++ b/include/linux/sched/hotplug.h @@ -11,8 +11,10 @@ extern int sched_cpu_activate(unsigned int cpu); extern int sched_cpu_deactivate(unsigned int cpu); #ifdef CONFIG_HOTPLUG_CPU +extern int sched_cpu_wait_empty(unsigned int cpu); extern int sched_cpu_dying(unsigned int cpu); #else +# define sched_cpu_wait_empty NULL # define sched_cpu_dying NULL #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index 6ff2578ecf17..fa535eaa4826 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1602,7 +1602,7 @@ static struct cpuhp_step cpuhp_hp_states[] = { .name = "ap:online", }, /* - * Handled on controll processor until the plugged processor manages + * Handled on control processor until the plugged processor manages * this itself. */ [CPUHP_TEARDOWN_CPU] = { @@ -1611,6 +1611,13 @@ static struct cpuhp_step cpuhp_hp_states[] = { .teardown.single = takedown_cpu, .cant_stop = true, }, + + [CPUHP_AP_SCHED_WAIT_EMPTY] = { + .name = "sched:waitempty", + .startup.single = NULL, + .teardown.single = sched_cpu_wait_empty, + }, + /* Handle smpboot threads park/unpark */ [CPUHP_AP_SMPBOOT_THREADS] = { .name = "smpboot/threads:online", diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e1093c443ff9..6c89806c834b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6741,120 +6741,6 @@ void idle_task_exit(void) /* finish_cpu(), as ran on the BP, will clean up the active_mm state */ } -/* - * Since this CPU is going 'away' for a while, fold any nr_active delta - * we might have. Assumes we're called after migrate_tasks() so that the - * nr_active count is stable. We need to take the teardown thread which - * is calling this into account, so we hand in adjust = 1 to the load - * calculation. - * - * Also see the comment "Global load-average calculations". - */ -static void calc_load_migrate(struct rq *rq) -{ - long delta = calc_load_fold_active(rq, 1); - if (delta) - atomic_long_add(delta, &calc_load_tasks); -} - -static struct task_struct *__pick_migrate_task(struct rq *rq) -{ - const struct sched_class *class; - struct task_struct *next; - - for_each_class(class) { - next = class->pick_next_task(rq); - if (next) { - next->sched_class->put_prev_task(rq, next); - return next; - } - } - - /* The idle class should always have a runnable task */ - BUG(); -} - -/* - * Migrate all tasks from the rq, sleeping tasks will be migrated by - * try_to_wake_up()->select_task_rq(). - * - * Called with rq->lock held even though we'er in stop_machine() and - * there's no concurrency possible, we hold the required locks anyway - * because of lock validation efforts. - */ -static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf) -{ - struct rq *rq = dead_rq; - struct task_struct *next, *stop = rq->stop; - struct rq_flags orf = *rf; - int dest_cpu; - - /* - * Fudge the rq selection such that the below task selection loop - * doesn't get stuck on the currently eligible stop task. - * - * We're currently inside stop_machine() and the rq is either stuck - * in the stop_machine_cpu_stop() loop, or we're executing this code, - * either way we should never end up calling schedule() until we're - * done here. - */ - rq->stop = NULL; - - /* - * put_prev_task() and pick_next_task() sched - * class method both need to have an up-to-date - * value of rq->clock[_task] - */ - update_rq_clock(rq); - - for (;;) { - /* - * There's this thread running, bail when that's the only - * remaining thread: - */ - if (rq->nr_running == 1) - break; - - next = __pick_migrate_task(rq); - - /* - * Rules for changing task_struct::cpus_mask are holding - * both pi_lock and rq->lock, such that holding either - * stabilizes the mask. - * - * Drop rq->lock is not quite as disastrous as it usually is - * because !cpu_active at this point, which means load-balance - * will not interfere. Also, stop-machine. - */ - rq_unlock(rq, rf); - raw_spin_lock(&next->pi_lock); - rq_relock(rq, rf); - - /* - * Since we're inside stop-machine, _nothing_ should have - * changed the task, WARN if weird stuff happened, because in - * that case the above rq->lock drop is a fail too. - */ - if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) { - raw_spin_unlock(&next->pi_lock); - continue; - } - - /* Find suitable destination for @next, with force if needed. */ - dest_cpu = select_fallback_rq(dead_rq->cpu, next); - rq = __migrate_task(rq, rf, next, dest_cpu); - if (rq != dead_rq) { - rq_unlock(rq, rf); - rq = dead_rq; - *rf = orf; - rq_relock(rq, rf); - } - raw_spin_unlock(&next->pi_lock); - } - - rq->stop = stop; -} - static int __balance_push_cpu_stop(void *arg) { struct task_struct *p = arg; @@ -7123,10 +7009,6 @@ int sched_cpu_deactivate(unsigned int cpu) return ret; } sched_domains_numa_masks_clear(cpu); - - /* Wait for all non per CPU kernel threads to vanish. */ - balance_hotplug_wait(); - return 0; } @@ -7146,6 +7028,41 @@ int sched_cpu_starting(unsigned int cpu) } #ifdef CONFIG_HOTPLUG_CPU + +/* + * Invoked immediately before the stopper thread is invoked to bring the + * CPU down completely. At this point all per CPU kthreads except the + * hotplug thread (current) and the stopper thread (inactive) have been + * either parked or have been unbound from the outgoing CPU. Ensure that + * any of those which might be on the way out are gone. + * + * If after this point a bound task is being woken on this CPU then the + * responsible hotplug callback has failed to do it's job. + * sched_cpu_dying() will catch it with the appropriate fireworks. + */ +int sched_cpu_wait_empty(unsigned int cpu) +{ + balance_hotplug_wait(); + return 0; +} + +/* + * Since this CPU is going 'away' for a while, fold any nr_active delta we + * might have. Called from the CPU stopper task after ensuring that the + * stopper is the last running task on the CPU, so nr_active count is + * stable. We need to take the teardown thread which is calling this into + * account, so we hand in adjust = 1 to the load calculation. + * + * Also see the comment "Global load-average calculations". + */ +static void calc_load_migrate(struct rq *rq) +{ + long delta = calc_load_fold_active(rq, 1); + + if (delta) + atomic_long_add(delta, &calc_load_tasks); +} + int sched_cpu_dying(unsigned int cpu) { struct rq *rq = cpu_rq(cpu); @@ -7159,7 +7076,6 @@ int sched_cpu_dying(unsigned int cpu) BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); set_rq_offline(rq); } - migrate_tasks(rq, &rf); BUG_ON(rq->nr_running != 1); rq_unlock_irqrestore(rq, &rf); |