Merge branch 'tip/sched/core' into sched_ext/for-6.12

Pull in tip/sched/core to resolve two merge conflicts: - 96fd6c65efc6 ("sched: Factor out update_other_load_avgs() from __update_blocked_others()") 5d871a63997f ("sched/fair: Move effective_cpu_util() and effective_cpu_util() in fair.c") A simple context conflict. The former added __update_blocked_others() in the same #ifdef CONFIG_SMP block that effective_cpu_util() and sched_cpu_util() are in and the latter moved those functions to fair.c. This makes __update_blocked_others() more out of place. Will follow up with a patch to relocate. - 96fd6c65efc6 ("sched: Factor out update_other_load_avgs() from __update_blocked_others()") 84d265281d6c ("sched/pelt: Use rq_clock_task() for hw_pressure") The former factored out the body of __update_blocked_others() into update_other_load_avgs(). The latter changed how update_hw_load_avg() is called in the body. Resolved by applying the change to update_other_load_avgs() instead. Signed-off-by: Tejun Heo <tj@kernel.org>
author: Tejun Heo <tj@kernel.org> 2024-09-11 08:43:26 -1000
committer: Tejun Heo <tj@kernel.org> 2024-09-11 08:43:26 -1000
commit: 0b1777f0fa045c561fd26c8fda61f5eb7a930ed3 (patch)
tree: 7f61b85ef4ab6ba1f4d5b6ee477c9f5b36a92690 /kernel/sched/core.c
parent: 513ed0c7ccc103c2ff668154854ec410729a3170 (diff)
parent: bc9057da1a220ff2cb6c8885fd5352558aceba2c (diff)
1 files changed, 32 insertions, 19 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b01b63e78d5e..c415d61a646b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -267,6 +267,9 @@ static inline int rb_sched_core_cmp(const void *key, const struct rb_node *node)
 
 void sched_core_enqueue(struct rq *rq, struct task_struct *p)
 {
+	if (p->se.sched_delayed)
+		return;
+
 	rq->core->core_task_seq++;
 
 	if (!p->core_cookie)
@@ -277,6 +280,9 @@ void sched_core_enqueue(struct rq *rq, struct task_struct *p)
 
 void sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags)
 {
+	if (p->se.sched_delayed)
+		return;
+
 	rq->core->core_task_seq++;
 
 	if (sched_core_enqueued(p)) {
@@ -6477,19 +6483,12 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
  * Constants for the sched_mode argument of __schedule().
  *
  * The mode argument allows RT enabled kernels to differentiate a
- * preemption from blocking on an 'sleeping' spin/rwlock. Note that
- * SM_MASK_PREEMPT for !RT has all bits set, which allows the compiler to
- * optimize the AND operation out and just check for zero.
+ * preemption from blocking on an 'sleeping' spin/rwlock.
  */
-#define SM_NONE			0x0
-#define SM_PREEMPT		0x1
-#define SM_RTLOCK_WAIT		0x2
-
-#ifndef CONFIG_PREEMPT_RT
-# define SM_MASK_PREEMPT	(~0U)
-#else
-# define SM_MASK_PREEMPT	SM_PREEMPT
-#endif
+#define SM_IDLE			(-1)
+#define SM_NONE			0
+#define SM_PREEMPT		1
+#define SM_RTLOCK_WAIT		2
 
 /*
  * __schedule() is the main scheduler function.
@@ -6530,9 +6529,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
  *
  * WARNING: must be called with preemption disabled!
  */
-static void __sched notrace __schedule(unsigned int sched_mode)
+static void __sched notrace __schedule(int sched_mode)
 {
 	struct task_struct *prev, *next;
+	/*
+	 * On PREEMPT_RT kernel, SM_RTLOCK_WAIT is noted
+	 * as a preemption by schedule_debug() and RCU.
+	 */
+	bool preempt = sched_mode > SM_NONE;
 	unsigned long *switch_count;
 	unsigned long prev_state;
 	struct rq_flags rf;
@@ -6543,13 +6547,13 @@ static void __sched notrace __schedule(unsigned int sched_mode)
 	rq = cpu_rq(cpu);
 	prev = rq->curr;
 
-	schedule_debug(prev, !!sched_mode);
+	schedule_debug(prev, preempt);
 
 	if (sched_feat(HRTICK) || sched_feat(HRTICK_DL))
 		hrtick_clear(rq);
 
 	local_irq_disable();
-	rcu_note_context_switch(!!sched_mode);
+	rcu_note_context_switch(preempt);
 
 	/*
 	 * Make sure that signal_pending_state()->signal_pending() below
@@ -6578,12 +6582,20 @@ static void __sched notrace __schedule(unsigned int sched_mode)
 
 	switch_count = &prev->nivcsw;
 
+	/* Task state changes only considers SM_PREEMPT as preemption */
+	preempt = sched_mode == SM_PREEMPT;
+
 	/*
 	 * We must load prev->state once (task_struct::state is volatile), such
 	 * that we form a control dependency vs deactivate_task() below.
 	 */
 	prev_state = READ_ONCE(prev->__state);
-	if (!(sched_mode & SM_MASK_PREEMPT) && prev_state) {
+	if (sched_mode == SM_IDLE) {
+		if (!rq->nr_running) {
+			next = prev;
+			goto picked;
+		}
+	} else if (!preempt && prev_state) {
 		if (signal_pending_state(prev_state, prev)) {
 			WRITE_ONCE(prev->__state, TASK_RUNNING);
 		} else {
@@ -6614,6 +6626,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
 	}
 
 	next = pick_next_task(rq, prev, &rf);
+picked:
 	clear_tsk_need_resched(prev);
 	clear_preempt_need_resched();
 #ifdef CONFIG_SCHED_DEBUG
@@ -6655,7 +6668,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
 		psi_account_irqtime(rq, prev, next);
 		psi_sched_switch(prev, next, !task_on_rq_queued(prev));
 
-		trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next, prev_state);
+		trace_sched_switch(preempt, prev, next, prev_state);
 
 		/* Also unlocks the rq: */
 		rq = context_switch(rq, prev, next, &rf);
@@ -6731,7 +6744,7 @@ static void sched_update_worker(struct task_struct *tsk)
 	}
 }
 
-static __always_inline void __schedule_loop(unsigned int sched_mode)
+static __always_inline void __schedule_loop(int sched_mode)
 {
 	do {
 		preempt_disable();
@@ -6776,7 +6789,7 @@ void __sched schedule_idle(void)
 	 */
 	WARN_ON_ONCE(current->__state);
 	do {
-		__schedule(SM_NONE);
+		__schedule(SM_IDLE);
 	} while (need_resched());
 }
author	Tejun Heo <tj@kernel.org>	2024-09-11 08:43:26 -1000
committer	Tejun Heo <tj@kernel.org>	2024-09-11 08:43:26 -1000
commit	0b1777f0fa045c561fd26c8fda61f5eb7a930ed3 (patch)
tree	7f61b85ef4ab6ba1f4d5b6ee477c9f5b36a92690 /kernel/sched/core.c
parent	513ed0c7ccc103c2ff668154854ec410729a3170 (diff)
parent	bc9057da1a220ff2cb6c8885fd5352558aceba2c (diff)