diff options
author | James Simmons <jsimmons@infradead.org> | 2012-11-14 18:15:32 +0000 |
---|---|---|
committer | James Simmons <jsimmons@infradead.org> | 2012-11-14 18:15:32 +0000 |
commit | b44959f2817032bc4668434bbe1cf9ab28b8cb21 (patch) | |
tree | de3bd861b36c836ac9b7b6e8dc179ca1985b6593 /kernel/sched/fair.c | |
parent | e22956a2faa9cf34cec4a471131d50c6be75b0c2 (diff) | |
parent | 2216c9e74fb3baac3cb73952158dbe38b703997e (diff) |
Conflicts:
drivers/gpu/drm/tdfx/tdfx_drv.c
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r-- | kernel/sched/fair.c | 134 |
1 files changed, 107 insertions, 27 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c099cc6eebe..96e2b18b628 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2052,7 +2052,7 @@ static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) hrtimer_cancel(&cfs_b->slack_timer); } -void unthrottle_offline_cfs_rqs(struct rq *rq) +static void unthrottle_offline_cfs_rqs(struct rq *rq) { struct cfs_rq *cfs_rq; @@ -2106,7 +2106,7 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) return NULL; } static inline void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} -void unthrottle_offline_cfs_rqs(struct rq *rq) {} +static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {} #endif /* CONFIG_CFS_BANDWIDTH */ @@ -3068,18 +3068,24 @@ static unsigned long __read_mostly max_load_balance_interval = HZ/10; #define LBF_ALL_PINNED 0x01 #define LBF_NEED_BREAK 0x02 +#define LBF_SOME_PINNED 0x04 struct lb_env { struct sched_domain *sd; - int src_cpu; struct rq *src_rq; + int src_cpu; int dst_cpu; struct rq *dst_rq; + struct cpumask *dst_grpmask; + int new_dst_cpu; enum cpu_idle_type idle; long imbalance; + /* The set of CPUs under consideration for load-balancing */ + struct cpumask *cpus; + unsigned int flags; unsigned int loop; @@ -3145,9 +3151,31 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) * 3) are cache-hot on their current CPU. */ if (!cpumask_test_cpu(env->dst_cpu, tsk_cpus_allowed(p))) { + int new_dst_cpu; + schedstat_inc(p, se.statistics.nr_failed_migrations_affine); + + /* + * Remember if this task can be migrated to any other cpu in + * our sched_group. We may want to revisit it if we couldn't + * meet load balance goals by pulling other tasks on src_cpu. + * + * Also avoid computing new_dst_cpu if we have already computed + * one in current iteration. + */ + if (!env->dst_grpmask || (env->flags & LBF_SOME_PINNED)) + return 0; + + new_dst_cpu = cpumask_first_and(env->dst_grpmask, + tsk_cpus_allowed(p)); + if (new_dst_cpu < nr_cpu_ids) { + env->flags |= LBF_SOME_PINNED; + env->new_dst_cpu = new_dst_cpu; + } return 0; } + + /* Record that we found atleast one task that could run on dst_cpu */ env->flags &= ~LBF_ALL_PINNED; if (task_running(env->src_rq, p)) { @@ -3373,6 +3401,14 @@ static int tg_load_down(struct task_group *tg, void *data) static void update_h_load(long cpu) { + struct rq *rq = cpu_rq(cpu); + unsigned long now = jiffies; + + if (rq->h_load_throttle == now) + return; + + rq->h_load_throttle = now; + rcu_read_lock(); walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); rcu_read_unlock(); @@ -3636,14 +3672,12 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group) * @group: sched_group whose statistics are to be updated. * @load_idx: Load index of sched_domain of this_cpu for load calc. * @local_group: Does group contain this_cpu. - * @cpus: Set of cpus considered for load balancing. * @balance: Should we balance. * @sgs: variable to hold the statistics for this group. */ static inline void update_sg_lb_stats(struct lb_env *env, struct sched_group *group, int load_idx, - int local_group, const struct cpumask *cpus, - int *balance, struct sg_lb_stats *sgs) + int local_group, int *balance, struct sg_lb_stats *sgs) { unsigned long nr_running, max_nr_running, min_nr_running; unsigned long load, max_cpu_load, min_cpu_load; @@ -3660,7 +3694,7 @@ static inline void update_sg_lb_stats(struct lb_env *env, max_nr_running = 0; min_nr_running = ~0UL; - for_each_cpu_and(i, sched_group_cpus(group), cpus) { + for_each_cpu_and(i, sched_group_cpus(group), env->cpus) { struct rq *rq = cpu_rq(i); nr_running = rq->nr_running; @@ -3784,13 +3818,11 @@ static bool update_sd_pick_busiest(struct lb_env *env, /** * update_sd_lb_stats - Update sched_domain's statistics for load balancing. * @env: The load balancing environment. - * @cpus: Set of cpus considered for load balancing. * @balance: Should we balance. * @sds: variable to hold the statistics for this sched_domain. */ static inline void update_sd_lb_stats(struct lb_env *env, - const struct cpumask *cpus, - int *balance, struct sd_lb_stats *sds) + int *balance, struct sd_lb_stats *sds) { struct sched_domain *child = env->sd->child; struct sched_group *sg = env->sd->groups; @@ -3807,8 +3839,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, local_group = cpumask_test_cpu(env->dst_cpu, sched_group_cpus(sg)); memset(&sgs, 0, sizeof(sgs)); - update_sg_lb_stats(env, sg, load_idx, local_group, - cpus, balance, &sgs); + update_sg_lb_stats(env, sg, load_idx, local_group, balance, &sgs); if (local_group && !(*balance)) return; @@ -4044,7 +4075,6 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s * to restore balance. * * @env: The load balancing environment. - * @cpus: The set of CPUs under consideration for load-balancing. * @balance: Pointer to a variable indicating if this_cpu * is the appropriate cpu to perform load balancing at this_level. * @@ -4054,7 +4084,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s * put to idle by rebalancing its tasks onto our group. */ static struct sched_group * -find_busiest_group(struct lb_env *env, const struct cpumask *cpus, int *balance) +find_busiest_group(struct lb_env *env, int *balance) { struct sd_lb_stats sds; @@ -4064,7 +4094,7 @@ find_busiest_group(struct lb_env *env, const struct cpumask *cpus, int *balance) * Compute the various statistics relavent for load balancing at * this level. */ - update_sd_lb_stats(env, cpus, balance, &sds); + update_sd_lb_stats(env, balance, &sds); /* * this_cpu is not the appropriate cpu to perform load balancing at @@ -4144,8 +4174,7 @@ ret: * find_busiest_queue - find the busiest runqueue among the cpus in group. */ static struct rq *find_busiest_queue(struct lb_env *env, - struct sched_group *group, - const struct cpumask *cpus) + struct sched_group *group) { struct rq *busiest = NULL, *rq; unsigned long max_load = 0; @@ -4160,7 +4189,7 @@ static struct rq *find_busiest_queue(struct lb_env *env, if (!capacity) capacity = fix_small_capacity(env->sd, group); - if (!cpumask_test_cpu(i, cpus)) + if (!cpumask_test_cpu(i, env->cpus)) continue; rq = cpu_rq(i); @@ -4227,7 +4256,8 @@ static int load_balance(int this_cpu, struct rq *this_rq, struct sched_domain *sd, enum cpu_idle_type idle, int *balance) { - int ld_moved, active_balance = 0; + int ld_moved, cur_ld_moved, active_balance = 0; + int lb_iterations, max_lb_iterations; struct sched_group *group; struct rq *busiest; unsigned long flags; @@ -4237,16 +4267,19 @@ static int load_balance(int this_cpu, struct rq *this_rq, .sd = sd, .dst_cpu = this_cpu, .dst_rq = this_rq, + .dst_grpmask = sched_group_cpus(sd->groups), .idle = idle, .loop_break = sched_nr_migrate_break, + .cpus = cpus, }; cpumask_copy(cpus, cpu_active_mask); + max_lb_iterations = cpumask_weight(env.dst_grpmask); schedstat_inc(sd, lb_count[idle]); redo: - group = find_busiest_group(&env, cpus, balance); + group = find_busiest_group(&env, balance); if (*balance == 0) goto out_balanced; @@ -4256,7 +4289,7 @@ redo: goto out_balanced; } - busiest = find_busiest_queue(&env, group, cpus); + busiest = find_busiest_queue(&env, group); if (!busiest) { schedstat_inc(sd, lb_nobusyq[idle]); goto out_balanced; @@ -4267,6 +4300,7 @@ redo: schedstat_add(sd, lb_imbalance[idle], env.imbalance); ld_moved = 0; + lb_iterations = 1; if (busiest->nr_running > 1) { /* * Attempt to move tasks. If find_busiest_group has found @@ -4279,12 +4313,17 @@ redo: env.src_rq = busiest; env.loop_max = min(sysctl_sched_nr_migrate, busiest->nr_running); + update_h_load(env.src_cpu); more_balance: local_irq_save(flags); double_rq_lock(this_rq, busiest); - if (!env.loop) - update_h_load(env.src_cpu); - ld_moved += move_tasks(&env); + + /* + * cur_ld_moved - load moved in current iteration + * ld_moved - cumulative load moved across iterations + */ + cur_ld_moved = move_tasks(&env); + ld_moved += cur_ld_moved; double_rq_unlock(this_rq, busiest); local_irq_restore(flags); @@ -4296,14 +4335,52 @@ more_balance: /* * some other cpu did the load balance for us. */ - if (ld_moved && this_cpu != smp_processor_id()) - resched_cpu(this_cpu); + if (cur_ld_moved && env.dst_cpu != smp_processor_id()) + resched_cpu(env.dst_cpu); + + /* + * Revisit (affine) tasks on src_cpu that couldn't be moved to + * us and move them to an alternate dst_cpu in our sched_group + * where they can run. The upper limit on how many times we + * iterate on same src_cpu is dependent on number of cpus in our + * sched_group. + * + * This changes load balance semantics a bit on who can move + * load to a given_cpu. In addition to the given_cpu itself + * (or a ilb_cpu acting on its behalf where given_cpu is + * nohz-idle), we now have balance_cpu in a position to move + * load to given_cpu. In rare situations, this may cause + * conflicts (balance_cpu and given_cpu/ilb_cpu deciding + * _independently_ and at _same_ time to move some load to + * given_cpu) causing exceess load to be moved to given_cpu. + * This however should not happen so much in practice and + * moreover subsequent load balance cycles should correct the + * excess load moved. + */ + if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0 && + lb_iterations++ < max_lb_iterations) { + + this_rq = cpu_rq(env.new_dst_cpu); + env.dst_rq = this_rq; + env.dst_cpu = env.new_dst_cpu; + env.flags &= ~LBF_SOME_PINNED; + env.loop = 0; + env.loop_break = sched_nr_migrate_break; + /* + * Go back to "more_balance" rather than "redo" since we + * need to continue with same src_cpu. + */ + goto more_balance; + } /* All tasks on this runqueue were pinned by CPU affinity */ if (unlikely(env.flags & LBF_ALL_PINNED)) { cpumask_clear_cpu(cpu_of(busiest), cpus); - if (!cpumask_empty(cpus)) + if (!cpumask_empty(cpus)) { + env.loop = 0; + env.loop_break = sched_nr_migrate_break; goto redo; + } goto out_balanced; } } @@ -4891,6 +4968,9 @@ static void rq_online_fair(struct rq *rq) static void rq_offline_fair(struct rq *rq) { update_sysctl(); + + /* Ensure any throttled groups are reachable by pick_next_task */ + unthrottle_offline_cfs_rqs(rq); } #endif /* CONFIG_SMP */ |