summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2024-10-17 09:58:07 +0200
committerIngo Molnar <mingo@kernel.org>2024-10-17 09:58:07 +0200
commitbe602cde657ee43d23adbf309be6d700d0106dc9 (patch)
treed27775738d0deefe7c3071be7d667912dc3ee932 /kernel
parentcd9626e9ebc77edec33023fe95dab4b04ffc819d (diff)
parentc964ced7726294d40913f2127c3f185a92cb4a41 (diff)
Merge branch 'linus' into sched/urgent, to resolve conflict
Conflicts: kernel/sched/ext.c There's a context conflict between this upstream commit: 3fdb9ebcec10 sched_ext: Start schedulers with consistent p->scx.slice values ... and this fix in sched/urgent: 98442f0ccd82 sched: Fix delayed_dequeue vs switched_from_fair() Resolve it. Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/core.c2
-rw-r--r--kernel/debug/gdbstub.c2
-rw-r--r--kernel/fork.c32
-rw-r--r--kernel/kthread.c2
-rw-r--r--kernel/rcu/tree.c9
-rw-r--r--kernel/rcu/tree_nocb.h8
-rw-r--r--kernel/resource_kunit.c18
-rw-r--r--kernel/sched/core.c21
-rw-r--r--kernel/sched/ext.c218
-rw-r--r--kernel/sched/psi.c26
-rw-r--r--kernel/sched/sched.h3
-rw-r--r--kernel/trace/ring_buffer.c53
-rw-r--r--kernel/trace/trace.c21
-rw-r--r--kernel/trace/trace_hwlat.c2
-rw-r--r--kernel/trace/trace_osnoise.c22
15 files changed, 257 insertions, 182 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 4e07cc057d6f..5e77c58e0601 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -40,7 +40,7 @@
#include <linux/execmem.h>
#include <asm/barrier.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
/* Registers */
#define BPF_R0 regs[BPF_REG_0]
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
index 9d34d2364b5a..f625172d4b67 100644
--- a/kernel/debug/gdbstub.c
+++ b/kernel/debug/gdbstub.c
@@ -33,7 +33,7 @@
#include <linux/reboot.h>
#include <linux/uaccess.h>
#include <asm/cacheflush.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include "debug_core.h"
#define KGDB_MAX_THREAD_QUERY 17
diff --git a/kernel/fork.c b/kernel/fork.c
index 60c0b4868fd4..89ceb4a68af2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1756,33 +1756,30 @@ static int copy_files(unsigned long clone_flags, struct task_struct *tsk,
int no_files)
{
struct files_struct *oldf, *newf;
- int error = 0;
/*
* A background process may not have any files ...
*/
oldf = current->files;
if (!oldf)
- goto out;
+ return 0;
if (no_files) {
tsk->files = NULL;
- goto out;
+ return 0;
}
if (clone_flags & CLONE_FILES) {
atomic_inc(&oldf->count);
- goto out;
+ return 0;
}
- newf = dup_fd(oldf, NR_OPEN_MAX, &error);
- if (!newf)
- goto out;
+ newf = dup_fd(oldf, NULL);
+ if (IS_ERR(newf))
+ return PTR_ERR(newf);
tsk->files = newf;
- error = 0;
-out:
- return error;
+ return 0;
}
static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
@@ -3238,17 +3235,16 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
/*
* Unshare file descriptor table if it is being shared
*/
-int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
- struct files_struct **new_fdp)
+static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
{
struct files_struct *fd = current->files;
- int error = 0;
if ((unshare_flags & CLONE_FILES) &&
(fd && atomic_read(&fd->count) > 1)) {
- *new_fdp = dup_fd(fd, max_fds, &error);
- if (!*new_fdp)
- return error;
+ fd = dup_fd(fd, NULL);
+ if (IS_ERR(fd))
+ return PTR_ERR(fd);
+ *new_fdp = fd;
}
return 0;
@@ -3306,7 +3302,7 @@ int ksys_unshare(unsigned long unshare_flags)
err = unshare_fs(unshare_flags, &new_fs);
if (err)
goto bad_unshare_out;
- err = unshare_fd(unshare_flags, NR_OPEN_MAX, &new_fd);
+ err = unshare_fd(unshare_flags, &new_fd);
if (err)
goto bad_unshare_cleanup_fs;
err = unshare_userns(unshare_flags, &new_cred);
@@ -3398,7 +3394,7 @@ int unshare_files(void)
struct files_struct *old, *copy = NULL;
int error;
- error = unshare_fd(CLONE_FILES, NR_OPEN_MAX, &copy);
+ error = unshare_fd(CLONE_FILES, &copy);
if (error || !copy)
return error;
diff --git a/kernel/kthread.c b/kernel/kthread.c
index db4ceb0f503c..9bb36897b6c6 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -623,6 +623,8 @@ void kthread_unpark(struct task_struct *k)
{
struct kthread *kthread = to_kthread(k);
+ if (!test_bit(KTHREAD_SHOULD_PARK, &kthread->flags))
+ return;
/*
* Newly created kthread was parked when the CPU was offline.
* The binding was lost and we need to set it again.
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index a60616e69b66..b1f883fcd918 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3607,11 +3607,12 @@ kvfree_rcu_queue_batch(struct kfree_rcu_cpu *krcp)
}
// One work is per one batch, so there are three
- // "free channels", the batch can handle. It can
- // be that the work is in the pending state when
- // channels have been detached following by each
- // other.
+ // "free channels", the batch can handle. Break
+ // the loop since it is done with this CPU thus
+ // queuing an RCU work is _always_ success here.
queued = queue_rcu_work(system_unbound_wq, &krwp->rcu_work);
+ WARN_ON_ONCE(!queued);
+ break;
}
}
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 97b99cd06923..16865475120b 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -554,13 +554,19 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
rcu_nocb_unlock(rdp);
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY,
TPS("WakeLazy"));
- } else if (!irqs_disabled_flags(flags)) {
+ } else if (!irqs_disabled_flags(flags) && cpu_online(rdp->cpu)) {
/* ... if queue was empty ... */
rcu_nocb_unlock(rdp);
wake_nocb_gp(rdp, false);
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
TPS("WakeEmpty"));
} else {
+ /*
+ * Don't do the wake-up upfront on fragile paths.
+ * Also offline CPUs can't call swake_up_one_online() from
+ * (soft-)IRQs. Rely on the final deferred wake-up from
+ * rcutree_report_cpu_dead()
+ */
rcu_nocb_unlock(rdp);
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE,
TPS("WakeEmptyIsDeferred"));
diff --git a/kernel/resource_kunit.c b/kernel/resource_kunit.c
index 42d2d8d20f5d..b8ef75b99eb2 100644
--- a/kernel/resource_kunit.c
+++ b/kernel/resource_kunit.c
@@ -169,6 +169,8 @@ static void resource_test_intersection(struct kunit *test)
#define RES_TEST_RAM3_SIZE SZ_1M
#define RES_TEST_TOTAL_SIZE ((RES_TEST_WIN1_OFFSET + RES_TEST_WIN1_SIZE))
+KUNIT_DEFINE_ACTION_WRAPPER(kfree_wrapper, kfree, const void *);
+
static void remove_free_resource(void *ctx)
{
struct resource *res = (struct resource *)ctx;
@@ -177,6 +179,14 @@ static void remove_free_resource(void *ctx)
kfree(res);
}
+static void resource_test_add_action_or_abort(
+ struct kunit *test, void (*action)(void *), void *ctx)
+{
+ KUNIT_ASSERT_EQ_MSG(test, 0,
+ kunit_add_action_or_reset(test, action, ctx),
+ "Fail to add action");
+}
+
static void resource_test_request_region(struct kunit *test, struct resource *parent,
resource_size_t start, resource_size_t size,
const char *name, unsigned long flags)
@@ -185,7 +195,7 @@ static void resource_test_request_region(struct kunit *test, struct resource *pa
res = __request_region(parent, start, size, name, flags);
KUNIT_ASSERT_NOT_NULL(test, res);
- kunit_add_action_or_reset(test, remove_free_resource, res);
+ resource_test_add_action_or_abort(test, remove_free_resource, res);
}
static void resource_test_insert_resource(struct kunit *test, struct resource *parent,
@@ -202,11 +212,11 @@ static void resource_test_insert_resource(struct kunit *test, struct resource *p
res->end = start + size - 1;
res->flags = flags;
if (insert_resource(parent, res)) {
- kfree(res);
+ resource_test_add_action_or_abort(test, kfree_wrapper, res);
KUNIT_FAIL_AND_ABORT(test, "Fail to insert resource %pR\n", res);
}
- kunit_add_action_or_reset(test, remove_free_resource, res);
+ resource_test_add_action_or_abort(test, remove_free_resource, res);
}
static void resource_test_region_intersects(struct kunit *test)
@@ -220,7 +230,7 @@ static void resource_test_region_intersects(struct kunit *test)
"test resources");
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, parent);
start = parent->start;
- kunit_add_action_or_reset(test, remove_free_resource, parent);
+ resource_test_add_action_or_abort(test, remove_free_resource, parent);
resource_test_request_region(test, parent, start + RES_TEST_RAM0_OFFSET,
RES_TEST_RAM0_SIZE, "Test System RAM 0", flags);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7db711ba6d12..dbfb5717d6af 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3523,14 +3523,16 @@ out:
* The caller (fork, wakeup) owns p->pi_lock, ->cpus_ptr is stable.
*/
static inline
-int select_task_rq(struct task_struct *p, int cpu, int wake_flags)
+int select_task_rq(struct task_struct *p, int cpu, int *wake_flags)
{
lockdep_assert_held(&p->pi_lock);
- if (p->nr_cpus_allowed > 1 && !is_migration_disabled(p))
- cpu = p->sched_class->select_task_rq(p, cpu, wake_flags);
- else
+ if (p->nr_cpus_allowed > 1 && !is_migration_disabled(p)) {
+ cpu = p->sched_class->select_task_rq(p, cpu, *wake_flags);
+ *wake_flags |= WF_RQ_SELECTED;
+ } else {
cpu = cpumask_any(p->cpus_ptr);
+ }
/*
* In order not to call set_task_cpu() on a blocking task we need
@@ -3664,6 +3666,8 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
rq->nr_uninterruptible--;
#ifdef CONFIG_SMP
+ if (wake_flags & WF_RQ_SELECTED)
+ en_flags |= ENQUEUE_RQ_SELECTED;
if (wake_flags & WF_MIGRATED)
en_flags |= ENQUEUE_MIGRATED;
else
@@ -4125,6 +4129,8 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
guard(preempt)();
int cpu, success = 0;
+ wake_flags |= WF_TTWU;
+
if (p == current) {
/*
* We're waking current, this means 'p->on_rq' and 'task_cpu(p)
@@ -4257,7 +4263,7 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
*/
smp_cond_load_acquire(&p->on_cpu, !VAL);
- cpu = select_task_rq(p, p->wake_cpu, wake_flags | WF_TTWU);
+ cpu = select_task_rq(p, p->wake_cpu, &wake_flags);
if (task_cpu(p) != cpu) {
if (p->in_iowait) {
delayacct_blkio_end(p);
@@ -4799,6 +4805,7 @@ void wake_up_new_task(struct task_struct *p)
{
struct rq_flags rf;
struct rq *rq;
+ int wake_flags = WF_FORK;
raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
WRITE_ONCE(p->__state, TASK_RUNNING);
@@ -4813,7 +4820,7 @@ void wake_up_new_task(struct task_struct *p)
*/
p->recent_used_cpu = task_cpu(p);
rseq_migrate(p);
- __set_task_cpu(p, select_task_rq(p, task_cpu(p), WF_FORK));
+ __set_task_cpu(p, select_task_rq(p, task_cpu(p), &wake_flags));
#endif
rq = __task_rq_lock(p, &rf);
update_rq_clock(rq);
@@ -4821,7 +4828,7 @@ void wake_up_new_task(struct task_struct *p)
activate_task(rq, p, ENQUEUE_NOCLOCK | ENQUEUE_INITIAL);
trace_sched_wakeup_new(p);
- wakeup_preempt(rq, p, WF_FORK);
+ wakeup_preempt(rq, p, wake_flags);
#ifdef CONFIG_SMP
if (p->sched_class->task_woken) {
/*
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 6f9de573ee93..5900b06fd036 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -9,7 +9,6 @@
#define SCX_OP_IDX(op) (offsetof(struct sched_ext_ops, op) / sizeof(void (*)(void)))
enum scx_consts {
- SCX_SLICE_BYPASS = SCX_SLICE_DFL / 4,
SCX_DSP_DFL_MAX_BATCH = 32,
SCX_DSP_MAX_LOOPS = 32,
SCX_WATCHDOG_MAX_TIMEOUT = 30 * HZ,
@@ -19,6 +18,12 @@ enum scx_consts {
SCX_EXIT_DUMP_DFL_LEN = 32768,
SCX_CPUPERF_ONE = SCHED_CAPACITY_SCALE,
+
+ /*
+ * Iterating all tasks may take a while. Periodically drop
+ * scx_tasks_lock to avoid causing e.g. CSD and RCU stalls.
+ */
+ SCX_OPS_TASK_ITER_BATCH = 32,
};
enum scx_exit_kind {
@@ -625,6 +630,10 @@ struct sched_ext_ops {
/**
* exit - Clean up after the BPF scheduler
* @info: Exit info
+ *
+ * ops.exit() is also called on ops.init() failure, which is a bit
+ * unusual. This is to allow rich reporting through @info on how
+ * ops.init() failed.
*/
void (*exit)(struct scx_exit_info *info);
@@ -692,6 +701,7 @@ enum scx_enq_flags {
/* expose select ENQUEUE_* flags as enums */
SCX_ENQ_WAKEUP = ENQUEUE_WAKEUP,
SCX_ENQ_HEAD = ENQUEUE_HEAD,
+ SCX_ENQ_CPU_SELECTED = ENQUEUE_RQ_SELECTED,
/* high 32bits are SCX specific */
@@ -1269,86 +1279,104 @@ struct scx_task_iter {
struct task_struct *locked;
struct rq *rq;
struct rq_flags rf;
+ u32 cnt;
};
/**
- * scx_task_iter_init - Initialize a task iterator
+ * scx_task_iter_start - Lock scx_tasks_lock and start a task iteration
* @iter: iterator to init
*
- * Initialize @iter. Must be called with scx_tasks_lock held. Once initialized,
- * @iter must eventually be exited with scx_task_iter_exit().
+ * Initialize @iter and return with scx_tasks_lock held. Once initialized, @iter
+ * must eventually be stopped with scx_task_iter_stop().
*
- * scx_tasks_lock may be released between this and the first next() call or
- * between any two next() calls. If scx_tasks_lock is released between two
- * next() calls, the caller is responsible for ensuring that the task being
- * iterated remains accessible either through RCU read lock or obtaining a
- * reference count.
+ * scx_tasks_lock and the rq lock may be released using scx_task_iter_unlock()
+ * between this and the first next() call or between any two next() calls. If
+ * the locks are released between two next() calls, the caller is responsible
+ * for ensuring that the task being iterated remains accessible either through
+ * RCU read lock or obtaining a reference count.
*
* All tasks which existed when the iteration started are guaranteed to be
* visited as long as they still exist.
*/
-static void scx_task_iter_init(struct scx_task_iter *iter)
+static void scx_task_iter_start(struct scx_task_iter *iter)
{
- lockdep_assert_held(&scx_tasks_lock);
-
BUILD_BUG_ON(__SCX_DSQ_ITER_ALL_FLAGS &
((1U << __SCX_DSQ_LNODE_PRIV_SHIFT) - 1));
+ spin_lock_irq(&scx_tasks_lock);
+
iter->cursor = (struct sched_ext_entity){ .flags = SCX_TASK_CURSOR };
list_add(&iter->cursor.tasks_node, &scx_tasks);
iter->locked = NULL;
+ iter->cnt = 0;
+}
+
+static void __scx_task_iter_rq_unlock(struct scx_task_iter *iter)
+{
+ if (iter->locked) {
+ task_rq_unlock(iter->rq, iter->locked, &iter->rf);
+ iter->locked = NULL;
+ }
}
/**
- * scx_task_iter_rq_unlock - Unlock rq locked by a task iterator
- * @iter: iterator to unlock rq for
+ * scx_task_iter_unlock - Unlock rq and scx_tasks_lock held by a task iterator
+ * @iter: iterator to unlock
*
* If @iter is in the middle of a locked iteration, it may be locking the rq of
- * the task currently being visited. Unlock the rq if so. This function can be
- * safely called anytime during an iteration.
+ * the task currently being visited in addition to scx_tasks_lock. Unlock both.
+ * This function can be safely called anytime during an iteration.
+ */
+static void scx_task_iter_unlock(struct scx_task_iter *iter)
+{
+ __scx_task_iter_rq_unlock(iter);
+ spin_unlock_irq(&scx_tasks_lock);
+}
+
+/**
+ * scx_task_iter_relock - Lock scx_tasks_lock released by scx_task_iter_unlock()
+ * @iter: iterator to re-lock
*
- * Returns %true if the rq @iter was locking is unlocked. %false if @iter was
- * not locking an rq.
+ * Re-lock scx_tasks_lock unlocked by scx_task_iter_unlock(). Note that it
+ * doesn't re-lock the rq lock. Must be called before other iterator operations.
*/
-static bool scx_task_iter_rq_unlock(struct scx_task_iter *iter)
+static void scx_task_iter_relock(struct scx_task_iter *iter)
{
- if (iter->locked) {
- task_rq_unlock(iter->rq, iter->locked, &iter->rf);
- iter->locked = NULL;
- return true;
- } else {
- return false;
- }
+ spin_lock_irq(&scx_tasks_lock);
}
/**
- * scx_task_iter_exit - Exit a task iterator
+ * scx_task_iter_stop - Stop a task iteration and unlock scx_tasks_lock
* @iter: iterator to exit
*
- * Exit a previously initialized @iter. Must be called with scx_tasks_lock held.
- * If the iterator holds a task's rq lock, that rq lock is released. See
- * scx_task_iter_init() for details.
+ * Exit a previously initialized @iter. Must be called with scx_tasks_lock held
+ * which is released on return. If the iterator holds a task's rq lock, that rq
+ * lock is also released. See scx_task_iter_start() for details.
*/
-static void scx_task_iter_exit(struct scx_task_iter *iter)
+static void scx_task_iter_stop(struct scx_task_iter *iter)
{
- lockdep_assert_held(&scx_tasks_lock);
-
- scx_task_iter_rq_unlock(iter);
list_del_init(&iter->cursor.tasks_node);
+ scx_task_iter_unlock(iter);
}
/**
* scx_task_iter_next - Next task
* @iter: iterator to walk
*
- * Visit the next task. See scx_task_iter_init() for details.
+ * Visit the next task. See scx_task_iter_start() for details. Locks are dropped
+ * and re-acquired every %SCX_OPS_TASK_ITER_BATCH iterations to avoid causing
+ * stalls by holding scx_tasks_lock for too long.
*/
static struct task_struct *scx_task_iter_next(struct scx_task_iter *iter)
{
struct list_head *cursor = &iter->cursor.tasks_node;
struct sched_ext_entity *pos;
- lockdep_assert_held(&scx_tasks_lock);
+ if (!(++iter->cnt % SCX_OPS_TASK_ITER_BATCH)) {
+ scx_task_iter_unlock(iter);
+ cond_resched();
+ scx_task_iter_relock(iter);
+ }
list_for_each_entry(pos, cursor, tasks_node) {
if (&pos->tasks_node == &scx_tasks)
@@ -1369,14 +1397,14 @@ static struct task_struct *scx_task_iter_next(struct scx_task_iter *iter)
* @include_dead: Whether we should include dead tasks in the iteration
*
* Visit the non-idle task with its rq lock held. Allows callers to specify
- * whether they would like to filter out dead tasks. See scx_task_iter_init()
+ * whether they would like to filter out dead tasks. See scx_task_iter_start()
* for details.
*/
static struct task_struct *scx_task_iter_next_locked(struct scx_task_iter *iter)
{
struct task_struct *p;
- scx_task_iter_rq_unlock(iter);
+ __scx_task_iter_rq_unlock(iter);
while ((p = scx_task_iter_next(iter))) {
/*
@@ -1944,7 +1972,6 @@ static bool scx_rq_online(struct rq *rq)
static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
int sticky_cpu)
{
- bool bypassing = scx_rq_bypassing(rq);
struct task_struct **ddsp_taskp;
unsigned long qseq;
@@ -1962,7 +1989,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
if (!scx_rq_online(rq))
goto local;
- if (bypassing)
+ if (scx_rq_bypassing(rq))
goto global;
if (p->scx.ddsp_dsq_id != SCX_DSQ_INVALID)
@@ -2017,7 +2044,7 @@ local_norefill:
global:
touch_core_sched(rq, p); /* see the comment in local: */
- p->scx.slice = bypassing ? SCX_SLICE_BYPASS : SCX_SLICE_DFL;
+ p->scx.slice = SCX_SLICE_DFL;
dispatch_enqueue(find_global_dsq(p), p, enq_flags);
}
@@ -2953,8 +2980,8 @@ static struct task_struct *pick_task_scx(struct rq *rq)
if (unlikely(!p->scx.slice)) {
if (!scx_rq_bypassing(rq) && !scx_warned_zero_slice) {
- printk_deferred(KERN_WARNING "sched_ext: %s[%d] has zero slice in pick_next_task_scx()\n",
- p->comm, p->pid);
+ printk_deferred(KERN_WARNING "sched_ext: %s[%d] has zero slice in %s()\n",
+ p->comm, p->pid, __func__);
scx_warned_zero_slice = true;
}
p->scx.slice = SCX_SLICE_DFL;
@@ -3059,11 +3086,6 @@ static s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
*found = false;
- if (!static_branch_likely(&scx_builtin_idle_enabled)) {
- scx_ops_error("built-in idle tracking is disabled");
- return prev_cpu;
- }
-
/*
* If WAKE_SYNC, the waker's local DSQ is empty, and the system is
* under utilized, wake up @p to the local DSQ of the waker. Checking
@@ -3128,7 +3150,7 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag
if (unlikely(wake_flags & WF_EXEC))
return prev_cpu;
- if (SCX_HAS_OP(select_cpu)) {
+ if (SCX_HAS_OP(select_cpu) && !scx_rq_bypassing(task_rq(p))) {
s32 cpu;
struct task_struct **ddsp_taskp;
@@ -3193,7 +3215,7 @@ void __scx_update_idle(struct rq *rq, bool idle)
{
int cpu = cpu_of(rq);
- if (SCX_HAS_OP(update_idle)) {
+ if (SCX_HAS_OP(update_idle) && !scx_rq_bypassing(rq)) {
SCX_CALL_OP(SCX_KF_REST, update_idle, cpu_of(rq), idle);
if (!static_branch_unlikely(&scx_builtin_idle_enabled))
return;
@@ -4048,7 +4070,6 @@ static void scx_cgroup_exit(void)
percpu_rwsem_assert_held(&scx_cgroup_rwsem);
- WARN_ON_ONCE(!scx_cgroup_enabled);
scx_cgroup_enabled = false;
/*
@@ -4117,6 +4138,7 @@ static int scx_cgroup_init(void)
css->cgroup, &args);
if (ret) {
css_put(css);
+ scx_ops_error("ops.cgroup_init() failed (%d)", ret);
return ret;
}
tg->scx_flags |= SCX_TG_INITED;
@@ -4256,21 +4278,23 @@ bool task_should_scx(struct task_struct *p)
* the DISABLING state and then cycling the queued tasks through dequeue/enqueue
* to force global FIFO scheduling.
*
- * a. ops.enqueue() is ignored and tasks are queued in simple global FIFO order.
- * %SCX_OPS_ENQ_LAST is also ignored.
+ * - ops.select_cpu() is ignored and the default select_cpu() is used.
+ *
+ * - ops.enqueue() is ignored and tasks are queued in simple global FIFO order.
+ * %SCX_OPS_ENQ_LAST is also ignored.
*
- * b. ops.dispatch() is ignored.
+ * - ops.dispatch() is ignored.
*
- * c. balance_scx() does not set %SCX_RQ_BAL_KEEP on non-zero slice as slice
- * can't be trusted. Whenever a tick triggers, the running task is rotated to
- * the tail of the queue with core_sched_at touched.
+ * - balance_scx() does not set %SCX_RQ_BAL_KEEP on non-zero slice as slice
+ * can't be trusted. Whenever a tick triggers, the running task is rotated to
+ * the tail of the queue with core_sched_at touched.
*
- * d. pick_next_task() suppresses zero slice warning.
+ * - pick_next_task() suppresses zero slice warning.
*
- * e. scx_bpf_kick_cpu() is disabled to avoid irq_work malfunction during PM
- * operations.
+ * - scx_bpf_kick_cpu() is disabled to avoid irq_work malfunction during PM
+ * operations.
*
- * f. scx_prio_less() reverts to the default core_sched_at order.
+ * - scx_prio_less() reverts to the default core_sched_at order.
*/
static void scx_ops_bypass(bool bypass)
{
@@ -4340,7 +4364,7 @@ static void scx_ops_bypass(bool bypass)
rq_unlock_irqrestore(rq, &rf);
- /* kick to restore ticks */
+ /* resched to restore ticks and idle state */
resched_cpu(cpu);
}
}
@@ -4462,15 +4486,13 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
scx_ops_init_task_enabled = false;
- spin_lock_irq(&scx_tasks_lock);
- scx_task_iter_init(&sti);
+ scx_task_iter_start(&sti);
while ((p = scx_task_iter_next_locked(&sti))) {
const struct sched_class *old_class = p->sched_class;
struct sched_enq_and_set_ctx ctx;
sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
- p->scx.slice = min_t(u64, p->scx.slice, SCX_SLICE_DFL);
p->sched_class = __setscheduler_class(p, p->prio);
check_class_changing(task_rq(p), p, old_class);
@@ -4479,8 +4501,7 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
check_class_changed(task_rq(p), p, old_class, p->prio);
scx_ops_exit_task(p);
}
- scx_task_iter_exit(&sti);
- spin_unlock_irq(&scx_tasks_lock);
+ scx_task_iter_stop(&sti);
percpu_up_write(&scx_fork_rwsem);
/* no task is on scx, turn off all the switches and flush in-progress calls */
@@ -5041,6 +5062,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
if (ret) {
ret = ops_sanitize_err("init", ret);
cpus_read_unlock();
+ scx_ops_error("ops.init() failed (%d)", ret);
goto err_disable;
}
}
@@ -5130,8 +5152,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
if (ret)
goto err_disable_unlock_all;
- spin_lock_irq(&scx_tasks_lock);
- scx_task_iter_init(&sti);
+ scx_task_iter_start(&sti);
while ((p = scx_task_iter_next_locked(&sti))) {
/*
* @p may already be dead, have lost all its usages counts and
@@ -5141,27 +5162,24 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
if (!tryget_task_struct(p))
continue;
- scx_task_iter_rq_unlock(&sti);
- spin_unlock_irq(&scx_tasks_lock);
+ scx_task_iter_unlock(&sti);
ret = scx_ops_init_task(p, task_group(p), false);
if (ret) {
put_task_struct(p);
- spin_lock_irq(&scx_tasks_lock);
- scx_task_iter_exit(&sti);
- spin_unlock_irq(&scx_tasks_lock);
- pr_err("sched_ext: ops.init_task() failed (%d) for %s[%d] while loading\n",
- ret, p->comm, p->pid);
+ scx_task_iter_relock(&sti);
+ scx_task_iter_stop(&sti);
+ scx_ops_error("ops.init_task() failed (%d) for %s[%d]",
+ ret, p->comm, p->pid);
goto err_disable_unlock_all;
}
scx_set_task_state(p, SCX_TASK_READY);
put_task_struct(p);
- spin_lock_irq(&scx_tasks_lock);
+ scx_task_iter_relock(&sti);
}
- scx_task_iter_exit(&sti);
- spin_unlock_irq(&scx_tasks_lock);
+ scx_task_iter_stop(&sti);
scx_cgroup_unlock();
percpu_up_write(&scx_fork_rwsem);
@@ -5178,14 +5196,14 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
* scx_tasks_lock.
*/
percpu_down_write(&scx_fork_rwsem);
- spin_lock_irq(&scx_tasks_lock);
- scx_task_iter_init(&sti);
+ scx_task_iter_start(&sti);
while ((p = scx_task_iter_next_locked(&sti))) {
const struct sched_class *old_class = p->sched_class;
struct sched_enq_and_set_ctx ctx;
sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
+ p->scx.slice = SCX_SLICE_DFL;
p->sched_class = __setscheduler_class(p, p->prio);
check_class_changing(task_rq(p), p, old_class);
@@ -5193,20 +5211,13 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
check_class_changed(task_rq(p), p, old_class, p->prio);
}
- scx_task_iter_exit(&sti);
- spin_unlock_irq(&scx_tasks_lock);
+ scx_task_iter_stop(&sti);
percpu_up_write(&scx_fork_rwsem);
scx_ops_bypass(false);
- /*
- * Returning an error code here would lose the recorded error
- * information. Exit indicating success so that the error is notified
- * through ops.exit() with all the details.
- */
if (!scx_ops_tryset_enable_state(SCX_OPS_ENABLED, SCX_OPS_ENABLING)) {
WARN_ON_ONCE(atomic_read(&scx_exit_kind) == SCX_EXIT_NONE);
- ret = 0;
goto err_disable;
}
@@ -5241,10 +5252,18 @@ err_disable_unlock_all:
scx_ops_bypass(false);
err_disable:
mutex_unlock(&scx_ops_enable_mutex);
- /* must be fully disabled before returning */
- scx_ops_disable(SCX_EXIT_ERROR);
+ /*
+ * Returning an error code here would not pass all the error information
+ * to userspace. Record errno using scx_ops_error() for cases
+ * scx_ops_error() wasn't already invoked and exit indicating success so
+ * that the error is notified through ops.exit() with all the details.
+ *
+ * Flush scx_ops_disable_work to ensure that error is reported before
+ * init completion.
+ */
+ scx_ops_error("scx_ops_enable() failed (%d)", ret);
kthread_flush_work(&scx_ops_disable_work);
- return ret;
+ return 0;
}
@@ -5864,16 +5883,21 @@ __bpf_kfunc_start_defs();
__bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
u64 wake_flags, bool *is_idle)
{
- if (!scx_kf_allowed(SCX_KF_SELECT_CPU)) {
- *is_idle = false;
- return prev_cpu;
+ if (!static_branch_likely(&scx_builtin_idle_enabled)) {
+ scx_ops_error("built-in idle tracking is disabled");
+ goto prev_cpu;
}
+
+ if (!scx_kf_allowed(SCX_KF_SELECT_CPU))
+ goto prev_cpu;
+
#ifdef CONFIG_SMP
return scx_select_cpu_dfl(p, prev_cpu, wake_flags, is_idle);
-#else
+#endif
+
+prev_cpu:
*is_idle = false;
return prev_cpu;
-#endif
}
__bpf_kfunc_end_defs();
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index 020d58967d4e..84dad1511d1e 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -769,12 +769,13 @@ static void record_times(struct psi_group_cpu *groupc, u64 now)
}
static void psi_group_change(struct psi_group *group, int cpu,
- unsigned int clear, unsigned int set, u64 now,
+ unsigned int clear, unsigned int set,
bool wake_clock)
{
struct psi_group_cpu *groupc;
unsigned int t, m;
u32 state_mask;
+ u64 now;
lockdep_assert_rq_held(cpu_rq(cpu));
groupc = per_cpu_ptr(group->pcpu, cpu);
@@ -789,6 +790,7 @@ static void psi_group_change(struct psi_group *group, int cpu,
* SOME and FULL time these may have resulted in.
*/
write_seqcount_begin(&groupc->seq);
+ now = cpu_clock(cpu);
/*
* Start with TSK_ONCPU, which doesn't have a corresponding
@@ -899,18 +901,15 @@ void psi_task_change(struct task_struct *task, int clear, int set)
{
int cpu = task_cpu(task);
struct psi_group *group;
- u64 now;
if (!task->pid)
return;
psi_flags_change(task, clear, set);
- now = cpu_clock(cpu);
-
group = task_psi_group(task);
do {
- psi_group_change(group, cpu, clear, set, now, true);
+ psi_group_change(group, cpu, clear, set, true);
} while ((group = group->parent));
}
@@ -919,7 +918,6 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
{
struct psi_group *group, *common = NULL;
int cpu = task_cpu(prev);
- u64 now = cpu_clock(cpu);
if (next->pid) {
psi_flags_change(next, 0, TSK_ONCPU);
@@ -936,7 +934,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
break;
}
- psi_group_change(group, cpu, 0, TSK_ONCPU, now, true);
+ psi_group_change(group, cpu, 0, TSK_ONCPU, true);
} while ((group = group->parent));
}
@@ -974,7 +972,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
do {
if (group == common)
break;
- psi_group_change(group, cpu, clear, set, now, wake_clock);
+ psi_group_change(group, cpu, clear, set, wake_clock);
} while ((group = group->parent));
/*
@@ -986,7 +984,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
if ((prev->psi_flags ^ next->psi_flags) & ~TSK_ONCPU) {
clear &= ~TSK_ONCPU;
for (; group; group = group->parent)
- psi_group_change(group, cpu, clear, set, now, wake_clock);
+ psi_group_change(group, cpu, clear, set, wake_clock);
}
}
}
@@ -997,8 +995,8 @@ void psi_account_irqtime(struct rq *rq, struct task_struct *curr, struct task_st
int cpu = task_cpu(curr);
struct psi_group *group;
struct psi_group_cpu *groupc;
- u64 now, irq;
s64 delta;
+ u64 irq;
if (static_branch_likely(&psi_disabled))
return;
@@ -1011,7 +1009,6 @@ void psi_account_irqtime(struct rq *rq, struct task_struct *curr, struct task_st
if (prev && task_psi_group(prev) == group)
return;
- now = cpu_clock(cpu);
irq = irq_time_read(cpu);
delta = (s64)(irq - rq->psi_irq_time);
if (delta < 0)
@@ -1019,12 +1016,15 @@ void psi_account_irqtime(struct rq *rq, struct task_struct *curr, struct task_st
rq->psi_irq_time = irq;
do {
+ u64 now;
+
if (!group->enabled)
continue;
groupc = per_cpu_ptr(group->pcpu, cpu);
write_seqcount_begin(&groupc->seq);
+ now = cpu_clock(cpu);
record_times(groupc, now);
groupc->times[PSI_IRQ_FULL] += delta;
@@ -1223,11 +1223,9 @@ void psi_cgroup_restart(struct psi_group *group)
for_each_possible_cpu(cpu) {
struct rq *rq = cpu_rq(cpu);
struct rq_flags rf;
- u64 now;
rq_lock_irq(rq, &rf);
- now = cpu_clock(cpu);
- psi_group_change(group, cpu, 0, 0, now, true);
+ psi_group_change(group, cpu, 0, 0, true);
rq_unlock_irq(rq, &rf);
}
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index fba524c81c63..081519ffab46 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2292,6 +2292,7 @@ static inline int task_on_rq_migrating(struct task_struct *p)
#define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */
#define WF_MIGRATED 0x20 /* Internal use, task got migrated */
#define WF_CURRENT_CPU 0x40 /* Prefer to move the wakee to the current CPU. */
+#define WF_RQ_SELECTED 0x80 /* ->select_task_rq() was called */
#ifdef CONFIG_SMP
static_assert(WF_EXEC == SD_BALANCE_EXEC);
@@ -2334,6 +2335,7 @@ extern const u32 sched_prio_to_wmult[40];
* ENQUEUE_HEAD - place at front of runqueue (tail if not specified)
* ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
* ENQUEUE_MIGRATED - the task was migrated during wakeup
+ * ENQUEUE_RQ_SELECTED - ->select_task_rq() was called
*
*/
@@ -2360,6 +2362,7 @@ extern const u32 sched_prio_to_wmult[40];
#define ENQUEUE_INITIAL 0x80
#define ENQUEUE_MIGRATING 0x100
#define ENQUEUE_DELAYED 0x200
+#define ENQUEUE_RQ_SELECTED 0x400
#define RETRY_TASK ((void *)-1UL)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 77dc0b25140e..3ea4f7bb1837 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2337,9 +2337,12 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
if (!buffer->buffers[cpu])
goto fail_free_buffers;
- ret = cpuhp_state_add_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node);
- if (ret < 0)
- goto fail_free_buffers;
+ /* If already mapped, do not hook to CPU hotplug */
+ if (!start) {
+ ret = cpuhp_state_add_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node);
+ if (ret < 0)
+ goto fail_free_buffers;
+ }
mutex_init(&buffer->mutex);
@@ -6725,39 +6728,38 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order)
}
for_each_buffer_cpu(buffer, cpu) {
+ struct buffer_data_page *old_free_data_page;
+ struct list_head old_pages;
+ unsigned long flags;
if (!cpumask_test_cpu(cpu, buffer->cpumask))
continue;
cpu_buffer = buffer->buffers[cpu];
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+
/* Clear the head bit to make the link list normal to read */
rb_head_page_deactivate(cpu_buffer);
- /* Now walk the list and free all the old sub buffers */
- list_for_each_entry_safe(bpage, tmp, cpu_buffer->pages, list) {
- list_del_init(&bpage->list);
- free_buffer_page(bpage);
- }
- /* The above loop stopped an the last page needing to be freed */
- bpage = list_entry(cpu_buffer->pages, struct buffer_page, list);
- free_buffer_page(bpage);
-
- /* Free the current reader page */
- free_buffer_page(cpu_buffer->reader_page);
+ /*
+ * Collect buffers from the cpu_buffer pages list and the
+ * reader_page on old_pages, so they can be freed later when not
+ * under a spinlock. The pages list is a linked list with no
+ * head, adding old_pages turns it into a regular list with
+ * old_pages being the head.
+ */
+ list_add(&old_pages, cpu_buffer->pages);
+ list_add(&cpu_buffer->reader_page->list, &old_pages);
/* One page was allocated for the reader page */
cpu_buffer->reader_page = list_entry(cpu_buffer->new_pages.next,
struct buffer_page, list);
list_del_init(&cpu_buffer->reader_page->list);
- /* The cpu_buffer pages are a link list with no head */
+ /* Install the new pages, remove the head from the list */
cpu_buffer->pages = cpu_buffer->new_pages.next;
- cpu_buffer->new_pages.next->prev = cpu_buffer->new_pages.prev;
- cpu_buffer->new_pages.prev->next = cpu_buffer->new_pages.next;
-
- /* Clear the new_pages list */
- INIT_LIST_HEAD(&cpu_buffer->new_pages);
+ list_del_init(&cpu_buffer->new_pages);
cpu_buffer->head_page
= list_entry(cpu_buffer->pages, struct buffer_page, list);
@@ -6766,11 +6768,20 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order)
cpu_buffer->nr_pages = cpu_buffer->nr_pages_to_update;
cpu_buffer->nr_pages_to_update = 0;
- free_pages((unsigned long)cpu_buffer->free_page, old_order);
+ old_free_data_page = cpu_buffer->free_page;
cpu_buffer->free_page = NULL;
rb_head_page_activate(cpu_buffer);
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+ /* Free old sub buffers */
+ list_for_each_entry_safe(bpage, tmp, &old_pages, list) {
+ list_del_init(&bpage->list);
+ free_buffer_page(bpage);
+ }
+ free_pages((unsigned long)old_free_data_page, old_order);
+
rb_check_pages(cpu_buffer);
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c01375adc471..a8f52b6527ca 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3697,8 +3697,8 @@ static void test_can_verify(void)
void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
va_list ap)
{
- long text_delta = iter->tr->text_delta;
- long data_delta = iter->tr->data_delta;
+ long text_delta = 0;
+ long data_delta = 0;
const char *p = fmt;
const char *str;
bool good;
@@ -3710,6 +3710,17 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
if (static_branch_unlikely(&trace_no_verify))
goto print;
+ /*
+ * When the kernel is booted with the tp_printk command line
+ * parameter, trace events go directly through to printk().
+ * It also is checked by this function, but it does not
+ * have an associated trace_array (tr) for it.
+ */
+ if (iter->tr) {
+ text_delta = iter->tr->text_delta;
+ data_delta = iter->tr->data_delta;
+ }
+
/* Don't bother checking when doing a ftrace_dump() */
if (iter->fmt == static_fmt_buf)
goto print;
@@ -10610,10 +10621,10 @@ __init static void enable_instances(void)
* cannot be deleted by user space, so keep the reference
* to it.
*/
- if (start)
+ if (start) {
tr->flags |= TRACE_ARRAY_FL_BOOT;
- else
- trace_array_put(tr);
+ tr->ref++;
+ }
while ((tok = strsep(&curr_str, ","))) {
early_enable_events(tr, tok, true);
diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index b791524a6536..3bd6071441ad 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -520,6 +520,8 @@ static void hwlat_hotplug_workfn(struct work_struct *dummy)
if (!hwlat_busy || hwlat_data.thread_mode != MODE_PER_CPU)
goto out_unlock;
+ if (!cpu_online(cpu))
+ goto out_unlock;
if (!cpumask_test_cpu(cpu, tr->tracing_cpumask))
goto out_unlock;
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index 1439064f65d6..a50ed23bee77 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -1953,12 +1953,8 @@ static void stop_kthread(unsigned int cpu)
{
struct task_struct *kthread;
- mutex_lock(&interface_lock);
- kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread;
+ kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL);
if (kthread) {
- per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
- mutex_unlock(&interface_lock);
-
if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask) &&
!WARN_ON(!test_bit(OSN_WORKLOAD, &osnoise_options))) {
kthread_stop(kthread);
@@ -1972,7 +1968,6 @@ static void stop_kthread(unsigned int cpu)
put_task_struct(kthread);
}
} else {
- mutex_unlock(&interface_lock);
/* if no workload, just return */
if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
/*
@@ -1994,8 +1989,12 @@ static void stop_per_cpu_kthreads(void)
{
int cpu;
- for_each_possible_cpu(cpu)
+ cpus_read_lock();
+
+ for_each_online_cpu(cpu)
stop_kthread(cpu);
+
+ cpus_read_unlock();
}
/*
@@ -2007,6 +2006,10 @@ static int start_kthread(unsigned int cpu)
void *main = osnoise_main;
char comm[24];
+ /* Do not start a new thread if it is already running */
+ if (per_cpu(per_cpu_osnoise_var, cpu).kthread)
+ return 0;
+
if (timerlat_enabled()) {
snprintf(comm, 24, "timerlat/%d", cpu);
main = timerlat_main;
@@ -2061,11 +2064,10 @@ static int start_per_cpu_kthreads(void)
if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask)) {
struct task_struct *kthread;
- kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread;
+ kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL);
if (!WARN_ON(!kthread))
kthread_stop(kthread);
}
- per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
}
for_each_cpu(cpu, current_mask) {
@@ -2095,6 +2097,8 @@ static void osnoise_hotplug_workfn(struct work_struct *dummy)
mutex_lock(&interface_lock);
cpus_read_lock();
+ if (!cpu_online(cpu))
+ goto out_unlock;
if (!cpumask_test_cpu(cpu, &osnoise_cpumask))
goto out_unlock;