summaryrefslogtreecommitdiff
path: root/kernel/sched
diff options
context:
space:
mode:
authorRicardo Neri <ricardo.neri-calderon@linux.intel.com>2023-04-06 13:31:42 -0700
committerPeter Zijlstra <peterz@infradead.org>2023-05-08 10:58:35 +0200
commit43726bdedd29797d8e1fee2e7300a6d2b9a74ba8 (patch)
tree17a46aa138e80a9f0b600096cd00ba86999230ea /kernel/sched
parent5fd6d7f43958cb62da105c8413eac3e78480f09a (diff)
sched/fair: Use the busiest group to set prefer_sibling
The prefer_sibling setting acts on the busiest group to move excess tasks to the local group. This should be done as per request of the child of the busiest group's sched domain, not the local group's. Using the flags of the child domain of the local group works fortuitously if both groups have child domains. There are cases, however, in which the busiest group's sched domain has child but the local group's does not. Consider, for instance a non-SMT core (or an SMT core with only one online sibling) doing load balance with an SMT core at the MC level. SD_PREFER_SIBLING of the busiest group's child domain will not be honored. We are left with a fully busy SMT core and an idle non-SMT core. Suggested-by: Dietmar Eggemann <dietmar.eggemann@arm.com> Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Tested-by: Zhang Rui <rui.zhang@intel.com> Link: https://lore.kernel.org/r/20230406203148.19182-7-ricardo.neri-calderon@linux.intel.com
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/fair.c15
1 files changed, 11 insertions, 4 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4a9f04095742..3bb89346dbb3 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -10109,7 +10109,6 @@ static void update_idle_cpu_scan(struct lb_env *env,
static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sds)
{
- struct sched_domain *child = env->sd->child;
struct sched_group *sg = env->sd->groups;
struct sg_lb_stats *local = &sds->local_stat;
struct sg_lb_stats tmp_sgs;
@@ -10150,8 +10149,13 @@ next_group:
sg = sg->next;
} while (sg != env->sd->groups);
- /* Tag domain that child domain prefers tasks go to siblings first */
- sds->prefer_sibling = child && child->flags & SD_PREFER_SIBLING;
+ /*
+ * Indicate that the child domain of the busiest group prefers tasks
+ * go to a child's sibling domains first. NB the flags of a sched group
+ * are those of the child domain.
+ */
+ if (sds->busiest)
+ sds->prefer_sibling = !!(sds->busiest->flags & SD_PREFER_SIBLING);
if (env->sd->flags & SD_NUMA)
@@ -10461,7 +10465,10 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
goto out_balanced;
}
- /* Try to move all excess tasks to child's sibling domain */
+ /*
+ * Try to move all excess tasks to a sibling domain of the busiest
+ * group's child domain.
+ */
if (sds.prefer_sibling && local->group_type == group_has_spare &&
busiest->sum_nr_running > local->sum_nr_running + 1)
goto force_balance;