summaryrefslogtreecommitdiff
path: root/kernel/sched/core.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2024-08-20 08:55:03 -1000
committerTejun Heo <tj@kernel.org>2024-08-20 08:55:26 -1000
commit5ac998574f93ac042cb84b4f1d919e2b20966afe (patch)
tree4ee83c519b85793071813b10c2dcbbe63d44ab2e /kernel/sched/core.c
parent89909296a51e792f296e52e104a04aed0cb7a9e9 (diff)
parentaef6987d89544d63a47753cf3741cabff0b5574c (diff)
Merge branch 'tip/sched/core' into for-6.12
To receive 863ccdbb918a ("sched: Allow sched_class::dequeue_task() to fail") which makes sched_class.dequeue_task() return bool instead of void. This leads to compile breakage and will be fixed by a follow-up patch. Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r--kernel/sched/core.c71
1 files changed, 55 insertions, 16 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0e64a9362aa1..b0cec06bb1fa 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -166,7 +166,7 @@ static inline int __task_prio(const struct task_struct *p)
if (p->dl_server)
return -1; /* deadline */
- if (rt_prio(p->prio)) /* includes deadline */
+ if (rt_or_dl_prio(p->prio))
return p->prio; /* [-1, 99] */
if (p->sched_class == &idle_sched_class)
@@ -1702,6 +1702,9 @@ static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
if (unlikely(!p->sched_class->uclamp_enabled))
return;
+ if (p->se.sched_delayed)
+ return;
+
for_each_clamp_id(clamp_id)
uclamp_rq_inc_id(rq, p, clamp_id);
@@ -1726,6 +1729,9 @@ static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p)
if (unlikely(!p->sched_class->uclamp_enabled))
return;
+ if (p->se.sched_delayed)
+ return;
+
for_each_clamp_id(clamp_id)
uclamp_rq_dec_id(rq, p, clamp_id);
}
@@ -2005,14 +2011,21 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
psi_enqueue(p, (flags & ENQUEUE_WAKEUP) && !(flags & ENQUEUE_MIGRATED));
}
- uclamp_rq_inc(rq, p);
p->sched_class->enqueue_task(rq, p, flags);
+ /*
+ * Must be after ->enqueue_task() because ENQUEUE_DELAYED can clear
+ * ->sched_delayed.
+ */
+ uclamp_rq_inc(rq, p);
if (sched_core_enabled(rq))
sched_core_enqueue(rq, p);
}
-void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
+/*
+ * Must only return false when DEQUEUE_SLEEP.
+ */
+inline bool dequeue_task(struct rq *rq, struct task_struct *p, int flags)
{
if (sched_core_enabled(rq))
sched_core_dequeue(rq, p, flags);
@@ -2025,8 +2038,12 @@ void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
psi_dequeue(p, flags & DEQUEUE_SLEEP);
}
+ /*
+ * Must be before ->dequeue_task() because ->dequeue_task() can 'fail'
+ * and mark the task ->sched_delayed.
+ */
uclamp_rq_dec(rq, p);
- p->sched_class->dequeue_task(rq, p, flags);
+ return p->sched_class->dequeue_task(rq, p, flags);
}
void activate_task(struct rq *rq, struct task_struct *p, int flags)
@@ -2044,12 +2061,25 @@ void activate_task(struct rq *rq, struct task_struct *p, int flags)
void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
{
- WRITE_ONCE(p->on_rq, (flags & DEQUEUE_SLEEP) ? 0 : TASK_ON_RQ_MIGRATING);
+ SCHED_WARN_ON(flags & DEQUEUE_SLEEP);
+
+ WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING);
ASSERT_EXCLUSIVE_WRITER(p->on_rq);
+ /*
+ * Code explicitly relies on TASK_ON_RQ_MIGRATING begin set *before*
+ * dequeue_task() and cleared *after* enqueue_task().
+ */
+
dequeue_task(rq, p, flags);
}
+static void block_task(struct rq *rq, struct task_struct *p, int flags)
+{
+ if (dequeue_task(rq, p, DEQUEUE_SLEEP | flags))
+ __block_task(rq, p);
+}
+
/**
* task_curr - is this task currently executing on a CPU?
* @p: the task in question.
@@ -3697,12 +3727,14 @@ static int ttwu_runnable(struct task_struct *p, int wake_flags)
rq = __task_rq_lock(p, &rf);
if (task_on_rq_queued(p)) {
+ update_rq_clock(rq);
+ if (p->se.sched_delayed)
+ enqueue_task(rq, p, ENQUEUE_NOCLOCK | ENQUEUE_DELAYED);
if (!task_on_cpu(rq, p)) {
/*
* When on_rq && !on_cpu the task is preempted, see if
* it should preempt the task that is current now.
*/
- update_rq_clock(rq);
wakeup_preempt(rq, p, wake_flags);
}
ttwu_do_wakeup(p);
@@ -4091,11 +4123,16 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
* case the whole 'p->on_rq && ttwu_runnable()' case below
* without taking any locks.
*
+ * Specifically, given current runs ttwu() we must be before
+ * schedule()'s block_task(), as such this must not observe
+ * sched_delayed.
+ *
* In particular:
* - we rely on Program-Order guarantees for all the ordering,
* - we're serialized against set_special_state() by virtue of
* it disabling IRQs (this allows not taking ->pi_lock).
*/
+ SCHED_WARN_ON(p->se.sched_delayed);
if (!ttwu_state_match(p, state, &success))
goto out;
@@ -4384,9 +4421,11 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
p->se.nr_migrations = 0;
p->se.vruntime = 0;
p->se.vlag = 0;
- p->se.slice = sysctl_sched_base_slice;
INIT_LIST_HEAD(&p->se.group_node);
+ /* A delayed task cannot be in clone(). */
+ SCHED_WARN_ON(p->se.sched_delayed);
+
#ifdef CONFIG_FAIR_GROUP_SCHED
p->se.cfs_rq = NULL;
#endif
@@ -4638,6 +4677,8 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
p->prio = p->normal_prio = p->static_prio;
set_load_weight(p, false);
+ p->se.custom_slice = 0;
+ p->se.slice = sysctl_sched_base_slice;
/*
* We don't need the reset flag anymore after the fork. It has
@@ -6562,13 +6603,15 @@ static void __sched notrace __schedule(unsigned int sched_mode)
if (signal_pending_state(prev_state, prev)) {
WRITE_ONCE(prev->__state, TASK_RUNNING);
} else {
+ int flags = DEQUEUE_NOCLOCK;
+
prev->sched_contributes_to_load =
(prev_state & TASK_UNINTERRUPTIBLE) &&
!(prev_state & TASK_NOLOAD) &&
!(prev_state & TASK_FROZEN);
- if (prev->sched_contributes_to_load)
- rq->nr_uninterruptible++;
+ if (unlikely(is_special_task_state(prev_state)))
+ flags |= DEQUEUE_SPECIAL;
/*
* __schedule() ttwu()
@@ -6581,12 +6624,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
*
* After this, schedule() must not care about p->state any more.
*/
- deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK);
-
- if (prev->in_iowait) {
- atomic_inc(&rq->nr_iowait);
- delayacct_blkio_start();
- }
+ block_task(rq, prev, flags);
}
switch_count = &prev->nvcsw;
}
@@ -8461,6 +8499,7 @@ void __init sched_init(void)
}
set_load_weight(&init_task, false);
+ init_task.se.slice = sysctl_sched_base_slice,
/*
* The boot idle thread does lazy MMU switching as well:
@@ -8677,7 +8716,7 @@ void normalize_rt_tasks(void)
schedstat_set(p->stats.sleep_start, 0);
schedstat_set(p->stats.block_start, 0);
- if (!dl_task(p) && !rt_task(p)) {
+ if (!rt_or_dl_task(p)) {
/*
* Renice negative nice level userspace
* tasks back to 0: