From 6e33cad0af49336952e5541464bd02f5b5fd433e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 26 Mar 2021 18:55:06 +0100 Subject: sched: Trivial core scheduling cookie management In order to not have to use pid_struct, create a new, smaller, structure to manage task cookies for core scheduling. Signed-off-by: Peter Zijlstra (Intel) Tested-by: Don Hiatt Tested-by: Hongyu Ning Tested-by: Vincent Guittot Link: https://lkml.kernel.org/r/20210422123308.919768100@infradead.org --- kernel/fork.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index dc06afd725cb..d16c60c9daca 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -742,6 +742,7 @@ void __put_task_struct(struct task_struct *tsk) exit_creds(tsk); delayacct_tsk_free(tsk); put_signal_struct(tsk->signal); + sched_core_free(tsk); if (!profile_handoff_task(tsk)) free_task(tsk); -- cgit v1.2.3 From 85dd3f61203c5cfa72b308ff327b5fbf3fc1ce5e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 29 Mar 2021 15:18:35 +0200 Subject: sched: Inherit task cookie on fork() Note that sched_core_fork() is called from under tasklist_lock, and not from sched_fork() earlier. This avoids a few races later. Signed-off-by: Peter Zijlstra (Intel) Tested-by: Don Hiatt Tested-by: Hongyu Ning Tested-by: Vincent Guittot Link: https://lkml.kernel.org/r/20210422123308.980003687@infradead.org --- include/linux/sched.h | 2 ++ kernel/fork.c | 3 +++ kernel/sched/core_sched.c | 6 ++++++ 3 files changed, 11 insertions(+) (limited to 'kernel/fork.c') diff --git a/include/linux/sched.h b/include/linux/sched.h index eab3f7c4251b..fba47e52e482 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2181,8 +2181,10 @@ const struct cpumask *sched_trace_rd_span(struct root_domain *rd); #ifdef CONFIG_SCHED_CORE extern void sched_core_free(struct task_struct *tsk); +extern void sched_core_fork(struct task_struct *p); #else static inline void sched_core_free(struct task_struct *tsk) { } +static inline void sched_core_fork(struct task_struct *p) { } #endif #endif diff --git a/kernel/fork.c b/kernel/fork.c index d16c60c9daca..e7fd928fcafe 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2251,6 +2251,8 @@ static __latent_entropy struct task_struct *copy_process( klp_copy_process(p); + sched_core_fork(p); + spin_lock(¤t->sighand->siglock); /* @@ -2338,6 +2340,7 @@ static __latent_entropy struct task_struct *copy_process( return p; bad_fork_cancel_cgroup: + sched_core_free(p); spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); cgroup_cancel_fork(p, args); diff --git a/kernel/sched/core_sched.c b/kernel/sched/core_sched.c index 8d0869a9eb8c..dcbbeaefaaa3 100644 --- a/kernel/sched/core_sched.c +++ b/kernel/sched/core_sched.c @@ -103,6 +103,12 @@ static unsigned long sched_core_clone_cookie(struct task_struct *p) return cookie; } +void sched_core_fork(struct task_struct *p) +{ + RB_CLEAR_NODE(&p->core_node); + p->core_cookie = sched_core_clone_cookie(current); +} + void sched_core_free(struct task_struct *p) { sched_core_put_cookie(p->core_cookie); -- cgit v1.2.3 From f1a0a376ca0c4ef1fc3d24e3e502acbb5b795674 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Wed, 12 May 2021 10:46:36 +0100 Subject: sched/core: Initialize the idle task with preemption disabled As pointed out by commit de9b8f5dcbd9 ("sched: Fix crash trying to dequeue/enqueue the idle thread") init_idle() can and will be invoked more than once on the same idle task. At boot time, it is invoked for the boot CPU thread by sched_init(). Then smp_init() creates the threads for all the secondary CPUs and invokes init_idle() on them. As the hotplug machinery brings the secondaries to life, it will issue calls to idle_thread_get(), which itself invokes init_idle() yet again. In this case it's invoked twice more per secondary: at _cpu_up(), and at bringup_cpu(). Given smp_init() already initializes the idle tasks for all *possible* CPUs, no further initialization should be required. Now, removing init_idle() from idle_thread_get() exposes some interesting expectations with regards to the idle task's preempt_count: the secondary startup always issues a preempt_disable(), requiring some reset of the preempt count to 0 between hot-unplug and hotplug, which is currently served by idle_thread_get() -> idle_init(). Given the idle task is supposed to have preemption disabled once and never see it re-enabled, it seems that what we actually want is to initialize its preempt_count to PREEMPT_DISABLED and leave it there. Do that, and remove init_idle() from idle_thread_get(). Secondary startups were patched via coccinelle: @begone@ @@ -preempt_disable(); ... cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); Signed-off-by: Valentin Schneider Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20210512094636.2958515-1-valentin.schneider@arm.com --- arch/alpha/kernel/smp.c | 1 - arch/arc/kernel/smp.c | 1 - arch/arm/kernel/smp.c | 1 - arch/arm64/include/asm/preempt.h | 2 +- arch/arm64/kernel/smp.c | 1 - arch/csky/kernel/smp.c | 1 - arch/ia64/kernel/smpboot.c | 1 - arch/mips/kernel/smp.c | 1 - arch/openrisc/kernel/smp.c | 2 -- arch/parisc/kernel/smp.c | 1 - arch/powerpc/kernel/smp.c | 1 - arch/riscv/kernel/smpboot.c | 1 - arch/s390/include/asm/preempt.h | 4 ++-- arch/s390/kernel/smp.c | 1 - arch/sh/kernel/smp.c | 2 -- arch/sparc/kernel/smp_32.c | 1 - arch/sparc/kernel/smp_64.c | 3 --- arch/x86/include/asm/preempt.h | 2 +- arch/x86/kernel/smpboot.c | 1 - arch/xtensa/kernel/smp.c | 1 - include/asm-generic/preempt.h | 2 +- init/main.c | 6 +----- kernel/fork.c | 2 +- kernel/sched/core.c | 2 +- kernel/smpboot.c | 1 - 25 files changed, 8 insertions(+), 34 deletions(-) (limited to 'kernel/fork.c') diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index f4dd9f3f3001..4b2575f936d4 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -166,7 +166,6 @@ smp_callin(void) DBGS(("smp_callin: commencing CPU %d current %p active_mm %p\n", cpuid, current, current->active_mm)); - preempt_disable(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index 52906d314537..db0e104d6835 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -189,7 +189,6 @@ void start_kernel_secondary(void) pr_info("## CPU%u LIVE ##: Executing Code...\n", cpu); local_irq_enable(); - preempt_disable(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 74679240a9d8..c7bb168b0d97 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -432,7 +432,6 @@ asmlinkage void secondary_start_kernel(void) #endif pr_debug("CPU%u: Booted secondary processor\n", cpu); - preempt_disable(); trace_hardirqs_off(); /* diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h index 80e946b2abee..e83f0982b99c 100644 --- a/arch/arm64/include/asm/preempt.h +++ b/arch/arm64/include/asm/preempt.h @@ -23,7 +23,7 @@ static inline void preempt_count_set(u64 pc) } while (0) #define init_idle_preempt_count(p, cpu) do { \ - task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \ + task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \ } while (0) static inline void set_preempt_need_resched(void) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index dcd7041b2b07..6671000a8b7d 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -224,7 +224,6 @@ asmlinkage notrace void secondary_start_kernel(void) init_gic_priority_masking(); rcu_cpu_starting(cpu); - preempt_disable(); trace_hardirqs_off(); /* diff --git a/arch/csky/kernel/smp.c b/arch/csky/kernel/smp.c index 0f9f5eef9338..e2993539af8e 100644 --- a/arch/csky/kernel/smp.c +++ b/arch/csky/kernel/smp.c @@ -281,7 +281,6 @@ void csky_start_secondary(void) pr_info("CPU%u Online: %s...\n", cpu, __func__); local_irq_enable(); - preempt_disable(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 49b488580939..d10f780c13b9 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -441,7 +441,6 @@ start_secondary (void *unused) #endif efi_map_pal_code(); cpu_init(); - preempt_disable(); smp_callin(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index ef86fbad8546..d542fb7af3ba 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -348,7 +348,6 @@ asmlinkage void start_secondary(void) */ calibrate_delay(); - preempt_disable(); cpu = smp_processor_id(); cpu_data[cpu].udelay_val = loops_per_jiffy; diff --git a/arch/openrisc/kernel/smp.c b/arch/openrisc/kernel/smp.c index 48e1092a64de..415e209732a3 100644 --- a/arch/openrisc/kernel/smp.c +++ b/arch/openrisc/kernel/smp.c @@ -145,8 +145,6 @@ asmlinkage __init void secondary_start_kernel(void) set_cpu_online(cpu, true); local_irq_enable(); - - preempt_disable(); /* * OK, it's off to the idle thread for us */ diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 10227f667c8a..1405b603b91b 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -302,7 +302,6 @@ void __init smp_callin(unsigned long pdce_proc) #endif smp_cpu_init(slave_id); - preempt_disable(); flush_cache_all_local(); /* start with known state */ flush_tlb_all_local(NULL); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 2e05c783440a..6c6e4d934d86 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1547,7 +1547,6 @@ void start_secondary(void *unused) smp_store_cpu_info(cpu); set_dec(tb_ticks_per_jiffy); rcu_cpu_starting(cpu); - preempt_disable(); cpu_callin_map[cpu] = 1; if (smp_ops->setup_cpu) diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 9a408e2942ac..bd82375db51a 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -180,7 +180,6 @@ asmlinkage __visible void smp_callin(void) * Disable preemption before enabling interrupts, so we don't try to * schedule a CPU that hasn't actually started yet. */ - preempt_disable(); local_irq_enable(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index b49e0492842c..23ff51be7e29 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -32,7 +32,7 @@ static inline void preempt_count_set(int pc) #define init_task_preempt_count(p) do { } while (0) #define init_idle_preempt_count(p, cpu) do { \ - S390_lowcore.preempt_count = PREEMPT_ENABLED; \ + S390_lowcore.preempt_count = PREEMPT_DISABLED; \ } while (0) static inline void set_preempt_need_resched(void) @@ -91,7 +91,7 @@ static inline void preempt_count_set(int pc) #define init_task_preempt_count(p) do { } while (0) #define init_idle_preempt_count(p, cpu) do { \ - S390_lowcore.preempt_count = PREEMPT_ENABLED; \ + S390_lowcore.preempt_count = PREEMPT_DISABLED; \ } while (0) static inline void set_preempt_need_resched(void) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 2fec2b80d35d..111909aeb8d2 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -878,7 +878,6 @@ static void smp_init_secondary(void) restore_access_regs(S390_lowcore.access_regs_save_area); cpu_init(); rcu_cpu_starting(cpu); - preempt_disable(); init_cpu_timer(); vtime_init(); vdso_getcpu_init(); diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c index 372acdc9033e..65924d9ec245 100644 --- a/arch/sh/kernel/smp.c +++ b/arch/sh/kernel/smp.c @@ -186,8 +186,6 @@ asmlinkage void start_secondary(void) per_cpu_trap_init(); - preempt_disable(); - notify_cpu_starting(cpu); local_irq_enable(); diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c index 50c127ab46d5..22b148e5a5f8 100644 --- a/arch/sparc/kernel/smp_32.c +++ b/arch/sparc/kernel/smp_32.c @@ -348,7 +348,6 @@ static void sparc_start_secondary(void *arg) */ arch_cpu_pre_starting(arg); - preempt_disable(); cpu = smp_processor_id(); notify_cpu_starting(cpu); diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index e38d8bf454e8..ae5faa1d989d 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -138,9 +138,6 @@ void smp_callin(void) set_cpu_online(cpuid, true); - /* idle thread is expected to have preempt disabled */ - preempt_disable(); - local_irq_enable(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index f8cb8af4de5c..fe5efbcba824 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -44,7 +44,7 @@ static __always_inline void preempt_count_set(int pc) #define init_task_preempt_count(p) do { } while (0) #define init_idle_preempt_count(p, cpu) do { \ - per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \ + per_cpu(__preempt_count, (cpu)) = PREEMPT_DISABLED; \ } while (0) /* diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 0ad5214f598a..0936f5ba3222 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -236,7 +236,6 @@ static void notrace start_secondary(void *unused) cpu_init(); rcu_cpu_starting(raw_smp_processor_id()); x86_cpuinit.early_percpu_clock_init(); - preempt_disable(); smp_callin(); enable_start_cpu0 = 0; diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index cd85a7a2722b..1254da07ead1 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -145,7 +145,6 @@ void secondary_start_kernel(void) cpumask_set_cpu(cpu, mm_cpumask(mm)); enter_lazy_tlb(mm, current); - preempt_disable(); trace_hardirqs_off(); calibrate_delay(); diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h index d683f5e6d791..b4d43a4af5f7 100644 --- a/include/asm-generic/preempt.h +++ b/include/asm-generic/preempt.h @@ -29,7 +29,7 @@ static __always_inline void preempt_count_set(int pc) } while (0) #define init_idle_preempt_count(p, cpu) do { \ - task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \ + task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \ } while (0) static __always_inline void set_preempt_need_resched(void) diff --git a/init/main.c b/init/main.c index eb01e121d2f1..7b027d9c5c89 100644 --- a/init/main.c +++ b/init/main.c @@ -941,11 +941,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) * time - but meanwhile we still have a functioning scheduler. */ sched_init(); - /* - * Disable preemption - early bootup scheduling is extremely - * fragile until we cpu_idle() for the first time. - */ - preempt_disable(); + if (WARN(!irqs_disabled(), "Interrupts were enabled *very* early, fixing it\n")) local_irq_disable(); diff --git a/kernel/fork.c b/kernel/fork.c index e7fd928fcafe..ace4631b5b54 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2412,7 +2412,7 @@ static inline void init_idle_pids(struct task_struct *idle) } } -struct task_struct *fork_idle(int cpu) +struct task_struct * __init fork_idle(int cpu) { struct task_struct *task; struct kernel_clone_args args = { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 55b2d9399e12..9d00f4958bde 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8227,7 +8227,7 @@ void show_state_filter(unsigned long state_filter) * NOTE: this function does not set the idle thread's NEED_RESCHED * flag, to make booting more robust. */ -void init_idle(struct task_struct *idle, int cpu) +void __init init_idle(struct task_struct *idle, int cpu) { struct rq *rq = cpu_rq(cpu); unsigned long flags; diff --git a/kernel/smpboot.c b/kernel/smpboot.c index f25208e8df83..e4163042c4d6 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -33,7 +33,6 @@ struct task_struct *idle_thread_get(unsigned int cpu) if (!tsk) return ERR_PTR(-ENOMEM); - init_idle(tsk, cpu); return tsk; } -- cgit v1.2.3 From a8ea6fc9b089156d9230bfeef964dd9be101a4a9 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 26 May 2021 01:58:49 +0200 Subject: sched: Stop PF_NO_SETAFFINITY from being inherited by various init system threads Commit: 00b89fe0197f ("sched: Make the idle task quack like a per-CPU kthread") ... added PF_KTHREAD | PF_NO_SETAFFINITY to the idle kernel threads. Unfortunately these properties are inherited to the init/0 children through kernel_thread() calls: init/1 and kthreadd. There are several side effects to that: 1) kthreadd affinity can not be reset anymore from userspace. Also PF_NO_SETAFFINITY propagates to all kthreadd children, including the unbound kthreads Therefore it's not possible anymore to overwrite the affinity of any of them. Here is an example of warning reported by rcutorture: WARNING: CPU: 0 PID: 116 at kernel/rcu/tree_nocb.h:1306 rcu_bind_current_to_nocb+0x31/0x40 Call Trace: rcu_torture_fwd_prog+0x62/0x730 kthread+0x122/0x140 ret_from_fork+0x22/0x30 2) init/1 does an exec() in the end which clears both PF_KTHREAD and PF_NO_SETAFFINITY so we are fine once kernel_init() escapes to userspace. But until then, no initcall or init code can successfully call sched_setaffinity() to init/1. Also PF_KTHREAD looks legit on init/1 before it calls exec() but we better be careful with unknown introduced side effects. One way to solve the PF_NO_SETAFFINITY issue is to not inherit this flag on copy_process() at all. The cases where it matters are: * fork_idle(): explicitly set the flag already. * fork() syscalls: userspace tasks that shouldn't be concerned by that. * create_io_thread(): the callers explicitly attribute the flag to the newly created tasks. * kernel_thread(): - Fix the issues on init/1 and kthreadd - Fix the issues on kthreadd children. - Usermode helper created by an unbound workqueue. This shouldn't matter. In the worst case it gives more control to userspace on setting affinity to these short living tasks although this can be tuned with inherited unbound workqueues affinity already. Fixes: 00b89fe0197f ("sched: Make the idle task quack like a per-CPU kthread") Reported-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar Tested-by: Paul E. McKenney Link: https://lore.kernel.org/r/20210525235849.441842-1-frederic@kernel.org --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index ace4631b5b54..e595e77913eb 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2000,7 +2000,7 @@ static __latent_entropy struct task_struct *copy_process( goto bad_fork_cleanup_count; delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ - p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER | PF_IDLE); + p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER | PF_IDLE | PF_NO_SETAFFINITY); p->flags |= PF_FORKNOEXEC; INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); -- cgit v1.2.3 From 2f064a59a11ff9bc22e52e9678bc601404c7cb34 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 11 Jun 2021 10:28:17 +0200 Subject: sched: Change task_struct::state Change the type and name of task_struct::state. Drop the volatile and shrink it to an 'unsigned int'. Rename it in order to find all uses such that we can use READ_ONCE/WRITE_ONCE as appropriate. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Daniel Bristot de Oliveira Acked-by: Will Deacon Acked-by: Daniel Thompson Link: https://lore.kernel.org/r/20210611082838.550736351@infradead.org --- arch/ia64/kernel/mca.c | 2 +- arch/ia64/kernel/ptrace.c | 8 +++---- arch/powerpc/xmon/xmon.c | 13 ++++++----- block/blk-mq.c | 2 +- drivers/md/dm.c | 6 ++--- fs/binfmt_elf.c | 8 ++++--- fs/binfmt_elf_fdpic.c | 4 +++- fs/userfaultfd.c | 4 ++-- include/linux/sched.h | 31 ++++++++++++------------ include/linux/sched/debug.h | 2 +- include/linux/sched/signal.h | 2 +- init/init_task.c | 2 +- kernel/cgroup/cgroup-v1.c | 2 +- kernel/debug/kdb/kdb_support.c | 18 +++++++------- kernel/fork.c | 4 ++-- kernel/hung_task.c | 2 +- kernel/kthread.c | 4 ++-- kernel/locking/mutex.c | 6 ++--- kernel/locking/rtmutex.c | 4 ++-- kernel/locking/rwsem.c | 2 +- kernel/ptrace.c | 12 +++++----- kernel/rcu/rcutorture.c | 4 ++-- kernel/rcu/tree_stall.h | 12 +++++----- kernel/sched/core.c | 53 ++++++++++++++++++++++-------------------- kernel/sched/deadline.c | 10 ++++---- kernel/sched/fair.c | 11 +++++---- lib/syscall.c | 4 ++-- net/core/dev.c | 2 +- 28 files changed, 123 insertions(+), 111 deletions(-) (limited to 'kernel/fork.c') diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index cdbac4b52f30..e628a88607bb 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -1788,7 +1788,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset, ti->task = p; ti->cpu = cpu; p->stack = ti; - p->state = TASK_UNINTERRUPTIBLE; + p->__state = TASK_UNINTERRUPTIBLE; cpumask_set_cpu(cpu, &p->cpus_mask); INIT_LIST_HEAD(&p->tasks); p->parent = p->real_parent = p->group_leader = p; diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index e14f5653393a..df28c7dd164f 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -641,11 +641,11 @@ ptrace_attach_sync_user_rbs (struct task_struct *child) read_lock(&tasklist_lock); if (child->sighand) { spin_lock_irq(&child->sighand->siglock); - if (child->state == TASK_STOPPED && + if (READ_ONCE(child->__state) == TASK_STOPPED && !test_and_set_tsk_thread_flag(child, TIF_RESTORE_RSE)) { set_notify_resume(child); - child->state = TASK_TRACED; + WRITE_ONCE(child->__state, TASK_TRACED); stopped = 1; } spin_unlock_irq(&child->sighand->siglock); @@ -665,9 +665,9 @@ ptrace_attach_sync_user_rbs (struct task_struct *child) read_lock(&tasklist_lock); if (child->sighand) { spin_lock_irq(&child->sighand->siglock); - if (child->state == TASK_TRACED && + if (READ_ONCE(child->__state) == TASK_TRACED && (child->signal->flags & SIGNAL_STOP_STOPPED)) { - child->state = TASK_STOPPED; + WRITE_ONCE(child->__state, TASK_STOPPED); } spin_unlock_irq(&child->sighand->siglock); } diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index c8173e92f19d..84de2d7c2f40 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -3162,6 +3162,7 @@ memzcan(void) static void show_task(struct task_struct *tsk) { + unsigned int p_state = READ_ONCE(tsk->__state); char state; /* @@ -3169,14 +3170,14 @@ static void show_task(struct task_struct *tsk) * appropriate for calling from xmon. This could be moved * to a common, generic, routine used by both. */ - state = (tsk->state == 0) ? 'R' : - (tsk->state < 0) ? 'U' : - (tsk->state & TASK_UNINTERRUPTIBLE) ? 'D' : - (tsk->state & TASK_STOPPED) ? 'T' : - (tsk->state & TASK_TRACED) ? 'C' : + state = (p_state == 0) ? 'R' : + (p_state < 0) ? 'U' : + (p_state & TASK_UNINTERRUPTIBLE) ? 'D' : + (p_state & TASK_STOPPED) ? 'T' : + (p_state & TASK_TRACED) ? 'C' : (tsk->exit_state & EXIT_ZOMBIE) ? 'Z' : (tsk->exit_state & EXIT_DEAD) ? 'E' : - (tsk->state & TASK_INTERRUPTIBLE) ? 'S' : '?'; + (p_state & TASK_INTERRUPTIBLE) ? 'S' : '?'; printf("%16px %16lx %16px %6d %6d %c %2d %s\n", tsk, tsk->thread.ksp, tsk->thread.regs, diff --git a/block/blk-mq.c b/block/blk-mq.c index 56270bb06365..e41edae97487 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3886,7 +3886,7 @@ static bool blk_mq_poll_hybrid(struct request_queue *q, int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin) { struct blk_mq_hw_ctx *hctx; - long state; + unsigned int state; if (!blk_qc_t_valid(cookie) || !test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ca2aedd8ee7d..190e714cb565 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2328,7 +2328,7 @@ static bool md_in_flight_bios(struct mapped_device *md) return sum != 0; } -static int dm_wait_for_bios_completion(struct mapped_device *md, long task_state) +static int dm_wait_for_bios_completion(struct mapped_device *md, unsigned int task_state) { int r = 0; DEFINE_WAIT(wait); @@ -2351,7 +2351,7 @@ static int dm_wait_for_bios_completion(struct mapped_device *md, long task_state return r; } -static int dm_wait_for_completion(struct mapped_device *md, long task_state) +static int dm_wait_for_completion(struct mapped_device *md, unsigned int task_state) { int r = 0; @@ -2478,7 +2478,7 @@ static void unlock_fs(struct mapped_device *md) * are being added to md->deferred list. */ static int __dm_suspend(struct mapped_device *md, struct dm_table *map, - unsigned suspend_flags, long task_state, + unsigned suspend_flags, unsigned int task_state, int dmf_suspended_flag) { bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 187b3f2b9202..3d73cbb439fa 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1537,7 +1537,8 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, { const struct cred *cred; unsigned int i, len; - + unsigned int state; + /* first copy the parameters from user space */ memset(psinfo, 0, sizeof(struct elf_prpsinfo)); @@ -1559,7 +1560,8 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, psinfo->pr_pgrp = task_pgrp_vnr(p); psinfo->pr_sid = task_session_vnr(p); - i = p->state ? ffz(~p->state) + 1 : 0; + state = READ_ONCE(p->__state); + i = state ? ffz(~state) + 1 : 0; psinfo->pr_state = i; psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i]; psinfo->pr_zomb = psinfo->pr_sname == 'Z'; @@ -1571,7 +1573,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid)); rcu_read_unlock(); strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname)); - + return 0; } diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 2c99b102c860..ab9c31ddffda 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1331,6 +1331,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, { const struct cred *cred; unsigned int i, len; + unsigned int state; /* first copy the parameters from user space */ memset(psinfo, 0, sizeof(struct elf_prpsinfo)); @@ -1353,7 +1354,8 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, psinfo->pr_pgrp = task_pgrp_vnr(p); psinfo->pr_sid = task_session_vnr(p); - i = p->state ? ffz(~p->state) + 1 : 0; + state = READ_ONCE(p->__state); + i = state ? ffz(~state) + 1 : 0; psinfo->pr_state = i; psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i]; psinfo->pr_zomb = psinfo->pr_sname == 'Z'; diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 14f92285d04f..dd7a6c62b56f 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -337,7 +337,7 @@ out: return ret; } -static inline long userfaultfd_get_blocking_state(unsigned int flags) +static inline unsigned int userfaultfd_get_blocking_state(unsigned int flags) { if (flags & FAULT_FLAG_INTERRUPTIBLE) return TASK_INTERRUPTIBLE; @@ -370,7 +370,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) struct userfaultfd_wait_queue uwq; vm_fault_t ret = VM_FAULT_SIGBUS; bool must_wait; - long blocking_state; + unsigned int blocking_state; /* * We don't do userfault handling for the final child pid update. diff --git a/include/linux/sched.h b/include/linux/sched.h index 395c8906f502..50db9496c99d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -113,13 +113,13 @@ struct task_group; __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \ TASK_PARKED) -#define task_is_running(task) (READ_ONCE((task)->state) == TASK_RUNNING) +#define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING) -#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) +#define task_is_traced(task) ((READ_ONCE(task->__state) & __TASK_TRACED) != 0) -#define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0) +#define task_is_stopped(task) ((READ_ONCE(task->__state) & __TASK_STOPPED) != 0) -#define task_is_stopped_or_traced(task) ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) +#define task_is_stopped_or_traced(task) ((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0) #ifdef CONFIG_DEBUG_ATOMIC_SLEEP @@ -134,14 +134,14 @@ struct task_group; do { \ WARN_ON_ONCE(is_special_task_state(state_value));\ current->task_state_change = _THIS_IP_; \ - current->state = (state_value); \ + WRITE_ONCE(current->__state, (state_value)); \ } while (0) #define set_current_state(state_value) \ do { \ WARN_ON_ONCE(is_special_task_state(state_value));\ current->task_state_change = _THIS_IP_; \ - smp_store_mb(current->state, (state_value)); \ + smp_store_mb(current->__state, (state_value)); \ } while (0) #define set_special_state(state_value) \ @@ -150,7 +150,7 @@ struct task_group; WARN_ON_ONCE(!is_special_task_state(state_value)); \ raw_spin_lock_irqsave(¤t->pi_lock, flags); \ current->task_state_change = _THIS_IP_; \ - current->state = (state_value); \ + WRITE_ONCE(current->__state, (state_value)); \ raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \ } while (0) #else @@ -192,10 +192,10 @@ struct task_group; * Also see the comments of try_to_wake_up(). */ #define __set_current_state(state_value) \ - current->state = (state_value) + WRITE_ONCE(current->__state, (state_value)) #define set_current_state(state_value) \ - smp_store_mb(current->state, (state_value)) + smp_store_mb(current->__state, (state_value)) /* * set_special_state() should be used for those states when the blocking task @@ -207,13 +207,13 @@ struct task_group; do { \ unsigned long flags; /* may shadow */ \ raw_spin_lock_irqsave(¤t->pi_lock, flags); \ - current->state = (state_value); \ + WRITE_ONCE(current->__state, (state_value)); \ raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \ } while (0) #endif -#define get_current_state() READ_ONCE(current->state) +#define get_current_state() READ_ONCE(current->__state) /* Task command name length: */ #define TASK_COMM_LEN 16 @@ -666,8 +666,7 @@ struct task_struct { */ struct thread_info thread_info; #endif - /* -1 unrunnable, 0 runnable, >0 stopped: */ - volatile long state; + unsigned int __state; /* * This begins the randomizable portion of task_struct. Only @@ -1532,7 +1531,7 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk) static inline unsigned int task_state_index(struct task_struct *tsk) { - unsigned int tsk_state = READ_ONCE(tsk->state); + unsigned int tsk_state = READ_ONCE(tsk->__state); unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT; BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX); @@ -1840,10 +1839,10 @@ static __always_inline void scheduler_ipi(void) */ preempt_fold_need_resched(); } -extern unsigned long wait_task_inactive(struct task_struct *, long match_state); +extern unsigned long wait_task_inactive(struct task_struct *, unsigned int match_state); #else static inline void scheduler_ipi(void) { } -static inline unsigned long wait_task_inactive(struct task_struct *p, long match_state) +static inline unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state) { return 1; } diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h index ae51f4529fc9..b5035afa2396 100644 --- a/include/linux/sched/debug.h +++ b/include/linux/sched/debug.h @@ -14,7 +14,7 @@ extern void dump_cpu_task(int cpu); /* * Only dump TASK_* tasks. (0 for all tasks) */ -extern void show_state_filter(unsigned long state_filter); +extern void show_state_filter(unsigned int state_filter); static inline void show_state(void) { diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 7f4278fa21fe..c9cf678c347d 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -382,7 +382,7 @@ static inline int fatal_signal_pending(struct task_struct *p) return task_sigpending(p) && __fatal_signal_pending(p); } -static inline int signal_pending_state(long state, struct task_struct *p) +static inline int signal_pending_state(unsigned int state, struct task_struct *p) { if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL))) return 0; diff --git a/init/init_task.c b/init/init_task.c index 8b08c2e19cbb..562f2ef8d157 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -71,7 +71,7 @@ struct task_struct init_task .thread_info = INIT_THREAD_INFO(init_task), .stack_refcount = REFCOUNT_INIT(1), #endif - .state = 0, + .__state = 0, .stack = init_stack, .usage = REFCOUNT_INIT(2), .flags = PF_KTHREAD, diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 1f274d7fc934..ee93b6e89587 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -713,7 +713,7 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) css_task_iter_start(&cgrp->self, 0, &it); while ((tsk = css_task_iter_next(&it))) { - switch (tsk->state) { + switch (READ_ONCE(tsk->__state)) { case TASK_RUNNING: stats->nr_running++; break; diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index 91bb666d7c03..9f50d22d68e6 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c @@ -609,23 +609,25 @@ unsigned long kdb_task_state_string(const char *s) */ char kdb_task_state_char (const struct task_struct *p) { - int cpu; - char state; + unsigned int p_state; unsigned long tmp; + char state; + int cpu; if (!p || copy_from_kernel_nofault(&tmp, (char *)p, sizeof(unsigned long))) return 'E'; cpu = kdb_process_cpu(p); - state = (p->state == 0) ? 'R' : - (p->state < 0) ? 'U' : - (p->state & TASK_UNINTERRUPTIBLE) ? 'D' : - (p->state & TASK_STOPPED) ? 'T' : - (p->state & TASK_TRACED) ? 'C' : + p_state = READ_ONCE(p->__state); + state = (p_state == 0) ? 'R' : + (p_state < 0) ? 'U' : + (p_state & TASK_UNINTERRUPTIBLE) ? 'D' : + (p_state & TASK_STOPPED) ? 'T' : + (p_state & TASK_TRACED) ? 'C' : (p->exit_state & EXIT_ZOMBIE) ? 'Z' : (p->exit_state & EXIT_DEAD) ? 'E' : - (p->state & TASK_INTERRUPTIBLE) ? 'S' : '?'; + (p_state & TASK_INTERRUPTIBLE) ? 'S' : '?'; if (is_idle_task(p)) { /* Idle task. Is it really idle, apart from the kdb * interrupt? */ diff --git a/kernel/fork.c b/kernel/fork.c index e595e77913eb..1a9af73b47c1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -425,7 +425,7 @@ static int memcg_charge_kernel_stack(struct task_struct *tsk) static void release_task_stack(struct task_struct *tsk) { - if (WARN_ON(tsk->state != TASK_DEAD)) + if (WARN_ON(READ_ONCE(tsk->__state) != TASK_DEAD)) return; /* Better to leak the stack than to free prematurely */ account_kernel_stack(tsk, -1); @@ -2392,7 +2392,7 @@ bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); exit_creds(p); bad_fork_free: - p->state = TASK_DEAD; + WRITE_ONCE(p->__state, TASK_DEAD); put_task_stack(p); delayed_free_task(p); fork_out: diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 396ebaebea3f..b0ce8b3f3822 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -196,7 +196,7 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) last_break = jiffies; } /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */ - if (t->state == TASK_UNINTERRUPTIBLE) + if (READ_ONCE(t->__state) == TASK_UNINTERRUPTIBLE) check_hung_task(t, timeout); } unlock: diff --git a/kernel/kthread.c b/kernel/kthread.c index 3d326833092b..7bbfeeb0e956 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -457,7 +457,7 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), } EXPORT_SYMBOL(kthread_create_on_node); -static void __kthread_bind_mask(struct task_struct *p, const struct cpumask *mask, long state) +static void __kthread_bind_mask(struct task_struct *p, const struct cpumask *mask, unsigned int state) { unsigned long flags; @@ -473,7 +473,7 @@ static void __kthread_bind_mask(struct task_struct *p, const struct cpumask *mas raw_spin_unlock_irqrestore(&p->pi_lock, flags); } -static void __kthread_bind(struct task_struct *p, unsigned int cpu, long state) +static void __kthread_bind(struct task_struct *p, unsigned int cpu, unsigned int state) { __kthread_bind_mask(p, cpumask_of(cpu), state); } diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 013e1b08a1bf..d2df5e68b503 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -923,7 +923,7 @@ __ww_mutex_add_waiter(struct mutex_waiter *waiter, * Lock a mutex (possibly interruptible), slowpath: */ static __always_inline int __sched -__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, +__mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclass, struct lockdep_map *nest_lock, unsigned long ip, struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx) { @@ -1098,14 +1098,14 @@ err_early_kill: } static int __sched -__mutex_lock(struct mutex *lock, long state, unsigned int subclass, +__mutex_lock(struct mutex *lock, unsigned int state, unsigned int subclass, struct lockdep_map *nest_lock, unsigned long ip) { return __mutex_lock_common(lock, state, subclass, nest_lock, ip, NULL, false); } static int __sched -__ww_mutex_lock(struct mutex *lock, long state, unsigned int subclass, +__ww_mutex_lock(struct mutex *lock, unsigned int state, unsigned int subclass, struct lockdep_map *nest_lock, unsigned long ip, struct ww_acquire_ctx *ww_ctx) { diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 406818196a9f..b5d9bb5202c6 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1135,7 +1135,7 @@ void __sched rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) * * Must be called with lock->wait_lock held and interrupts disabled */ -static int __sched __rt_mutex_slowlock(struct rt_mutex *lock, int state, +static int __sched __rt_mutex_slowlock(struct rt_mutex *lock, unsigned int state, struct hrtimer_sleeper *timeout, struct rt_mutex_waiter *waiter) { @@ -1190,7 +1190,7 @@ static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, /* * Slow path lock function: */ -static int __sched rt_mutex_slowlock(struct rt_mutex *lock, int state, +static int __sched rt_mutex_slowlock(struct rt_mutex *lock, unsigned int state, struct hrtimer_sleeper *timeout, enum rtmutex_chainwalk chwalk) { diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 809b0016d344..16bfbb10c74d 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -889,7 +889,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem) * Wait for the read lock to be granted */ static struct rw_semaphore __sched * -rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, int state) +rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int state) { long adjustment = -RWSEM_READER_BIAS; long rcnt = (count >> RWSEM_READER_SHIFT); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 2997ca600d18..f8589bf8d7dc 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -197,7 +197,7 @@ static bool ptrace_freeze_traced(struct task_struct *task) spin_lock_irq(&task->sighand->siglock); if (task_is_traced(task) && !looks_like_a_spurious_pid(task) && !__fatal_signal_pending(task)) { - task->state = __TASK_TRACED; + WRITE_ONCE(task->__state, __TASK_TRACED); ret = true; } spin_unlock_irq(&task->sighand->siglock); @@ -207,7 +207,7 @@ static bool ptrace_freeze_traced(struct task_struct *task) static void ptrace_unfreeze_traced(struct task_struct *task) { - if (task->state != __TASK_TRACED) + if (READ_ONCE(task->__state) != __TASK_TRACED) return; WARN_ON(!task->ptrace || task->parent != current); @@ -217,11 +217,11 @@ static void ptrace_unfreeze_traced(struct task_struct *task) * Recheck state under the lock to close this race. */ spin_lock_irq(&task->sighand->siglock); - if (task->state == __TASK_TRACED) { + if (READ_ONCE(task->__state) == __TASK_TRACED) { if (__fatal_signal_pending(task)) wake_up_state(task, __TASK_TRACED); else - task->state = TASK_TRACED; + WRITE_ONCE(task->__state, TASK_TRACED); } spin_unlock_irq(&task->sighand->siglock); } @@ -256,7 +256,7 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state) */ read_lock(&tasklist_lock); if (child->ptrace && child->parent == current) { - WARN_ON(child->state == __TASK_TRACED); + WARN_ON(READ_ONCE(child->__state) == __TASK_TRACED); /* * child->sighand can't be NULL, release_task() * does ptrace_unlink() before __exit_signal(). @@ -273,7 +273,7 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state) * ptrace_stop() changes ->state back to TASK_RUNNING, * so we should not worry about leaking __TASK_TRACED. */ - WARN_ON(child->state == __TASK_TRACED); + WARN_ON(READ_ONCE(child->__state) == __TASK_TRACED); ret = -ESRCH; } } diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 29d2f4c647d3..194b9c145c40 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1831,10 +1831,10 @@ rcu_torture_stats_print(void) srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp, &flags, &gp_seq); wtp = READ_ONCE(writer_task); - pr_alert("??? Writer stall state %s(%d) g%lu f%#x ->state %#lx cpu %d\n", + pr_alert("??? Writer stall state %s(%d) g%lu f%#x ->state %#x cpu %d\n", rcu_torture_writer_state_getname(), rcu_torture_writer_state, gp_seq, flags, - wtp == NULL ? ~0UL : wtp->state, + wtp == NULL ? ~0U : wtp->__state, wtp == NULL ? -1 : (int)task_cpu(wtp)); if (!splatted && wtp) { sched_show_task(wtp); diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 59b95cc5cbdf..acb2288063b5 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -460,12 +460,12 @@ static void rcu_check_gp_kthread_starvation(void) if (rcu_is_gp_kthread_starving(&j)) { cpu = gpk ? task_cpu(gpk) : -1; - pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n", + pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#x ->cpu=%d\n", rcu_state.name, j, (long)rcu_seq_current(&rcu_state.gp_seq), data_race(rcu_state.gp_flags), gp_state_getname(rcu_state.gp_state), rcu_state.gp_state, - gpk ? gpk->state : ~0, cpu); + gpk ? gpk->__state : ~0, cpu); if (gpk) { pr_err("\tUnless %s kthread gets sufficient CPU time, OOM is now expected behavior.\n", rcu_state.name); pr_err("RCU grace-period kthread stack dump:\n"); @@ -503,12 +503,12 @@ static void rcu_check_gp_kthread_expired_fqs_timer(void) time_after(jiffies, jiffies_fqs + RCU_STALL_MIGHT_MIN) && gpk && !READ_ONCE(gpk->on_rq)) { cpu = task_cpu(gpk); - pr_err("%s kthread timer wakeup didn't happen for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx\n", + pr_err("%s kthread timer wakeup didn't happen for %ld jiffies! g%ld f%#x %s(%d) ->state=%#x\n", rcu_state.name, (jiffies - jiffies_fqs), (long)rcu_seq_current(&rcu_state.gp_seq), data_race(rcu_state.gp_flags), gp_state_getname(RCU_GP_WAIT_FQS), RCU_GP_WAIT_FQS, - gpk->state); + gpk->__state); pr_err("\tPossible timer handling issue on cpu=%d timer-softirq=%u\n", cpu, kstat_softirqs_cpu(TIMER_SOFTIRQ, cpu)); } @@ -735,9 +735,9 @@ void show_rcu_gp_kthreads(void) ja = j - data_race(rcu_state.gp_activity); jr = j - data_race(rcu_state.gp_req_activity); jw = j - data_race(rcu_state.gp_wake_time); - pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_flags %#x\n", + pr_info("%s: wait state: %s(%d) ->state: %#x delta ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_flags %#x\n", rcu_state.name, gp_state_getname(rcu_state.gp_state), - rcu_state.gp_state, t ? t->state : 0x1ffffL, + rcu_state.gp_state, t ? t->__state : 0x1ffff, ja, jr, jw, (long)data_race(rcu_state.gp_wake_seq), (long)data_race(rcu_state.gp_seq), (long)data_race(rcu_get_root()->gp_seq_needed), diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 45ebb3cfe86c..309745a7ec51 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2638,7 +2638,7 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag return -EINVAL; } - if (task_running(rq, p) || p->state == TASK_WAKING) { + if (task_running(rq, p) || READ_ONCE(p->__state) == TASK_WAKING) { /* * MIGRATE_ENABLE gets here because 'p == current', but for * anything else we cannot do is_migration_disabled(), punt @@ -2781,19 +2781,20 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); void set_task_cpu(struct task_struct *p, unsigned int new_cpu) { #ifdef CONFIG_SCHED_DEBUG + unsigned int state = READ_ONCE(p->__state); + /* * We should never call set_task_cpu() on a blocked task, * ttwu() will sort out the placement. */ - WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && - !p->on_rq); + WARN_ON_ONCE(state != TASK_RUNNING && state != TASK_WAKING && !p->on_rq); /* * Migrating fair class task must have p->on_rq = TASK_ON_RQ_MIGRATING, * because schedstat_wait_{start,end} rebase migrating task's wait_start * time relying on p->on_rq. */ - WARN_ON_ONCE(p->state == TASK_RUNNING && + WARN_ON_ONCE(state == TASK_RUNNING && p->sched_class == &fair_sched_class && (p->on_rq && !task_on_rq_migrating(p))); @@ -2965,7 +2966,7 @@ out: * smp_call_function() if an IPI is sent by the same process we are * waiting to become inactive. */ -unsigned long wait_task_inactive(struct task_struct *p, long match_state) +unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state) { int running, queued; struct rq_flags rf; @@ -2993,7 +2994,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) * is actually now running somewhere else! */ while (task_running(rq, p)) { - if (match_state && unlikely(p->state != match_state)) + if (match_state && unlikely(READ_ONCE(p->__state) != match_state)) return 0; cpu_relax(); } @@ -3008,7 +3009,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) running = task_running(rq, p); queued = task_on_rq_queued(p); ncsw = 0; - if (!match_state || p->state == match_state) + if (!match_state || READ_ONCE(p->__state) == match_state) ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ task_rq_unlock(rq, p, &rf); @@ -3317,7 +3318,7 @@ static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags, struct rq_flags *rf) { check_preempt_curr(rq, p, wake_flags); - p->state = TASK_RUNNING; + WRITE_ONCE(p->__state, TASK_RUNNING); trace_sched_wakeup(p); #ifdef CONFIG_SMP @@ -3709,12 +3710,12 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) * - we're serialized against set_special_state() by virtue of * it disabling IRQs (this allows not taking ->pi_lock). */ - if (!(p->state & state)) + if (!(READ_ONCE(p->__state) & state)) goto out; success = 1; trace_sched_waking(p); - p->state = TASK_RUNNING; + WRITE_ONCE(p->__state, TASK_RUNNING); trace_sched_wakeup(p); goto out; } @@ -3727,7 +3728,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) */ raw_spin_lock_irqsave(&p->pi_lock, flags); smp_mb__after_spinlock(); - if (!(p->state & state)) + if (!(READ_ONCE(p->__state) & state)) goto unlock; trace_sched_waking(p); @@ -3793,7 +3794,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) * TASK_WAKING such that we can unlock p->pi_lock before doing the * enqueue, such as ttwu_queue_wakelist(). */ - p->state = TASK_WAKING; + WRITE_ONCE(p->__state, TASK_WAKING); /* * If the owning (remote) CPU is still in the middle of schedule() with @@ -3886,7 +3887,7 @@ bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct t ret = func(p, arg); rq_unlock(rq, &rf); } else { - switch (p->state) { + switch (READ_ONCE(p->__state)) { case TASK_RUNNING: case TASK_WAKING: break; @@ -4086,7 +4087,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) * nobody will actually run it, and a signal or other external * event cannot wake it up and insert it on the runqueue either. */ - p->state = TASK_NEW; + p->__state = TASK_NEW; /* * Make sure we do not leak PI boosting priority to the child. @@ -4192,7 +4193,7 @@ void wake_up_new_task(struct task_struct *p) struct rq *rq; raw_spin_lock_irqsave(&p->pi_lock, rf.flags); - p->state = TASK_RUNNING; + WRITE_ONCE(p->__state, TASK_RUNNING); #ifdef CONFIG_SMP /* * Fork balancing, do it here and not earlier because: @@ -4554,7 +4555,7 @@ static struct rq *finish_task_switch(struct task_struct *prev) * running on another CPU and we could rave with its RUNNING -> DEAD * transition, resulting in a double drop. */ - prev_state = prev->state; + prev_state = READ_ONCE(prev->__state); vtime_task_switch(prev); perf_event_task_sched_in(prev, current); finish_task(prev); @@ -5248,7 +5249,7 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt) #endif #ifdef CONFIG_DEBUG_ATOMIC_SLEEP - if (!preempt && prev->state && prev->non_block_count) { + if (!preempt && READ_ONCE(prev->__state) && prev->non_block_count) { printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n", prev->comm, prev->pid, prev->non_block_count); dump_stack(); @@ -5874,10 +5875,10 @@ static void __sched notrace __schedule(bool preempt) * - we form a control dependency vs deactivate_task() below. * - ptrace_{,un}freeze_traced() can change ->state underneath us. */ - prev_state = prev->state; + prev_state = READ_ONCE(prev->__state); if (!preempt && prev_state) { if (signal_pending_state(prev_state, prev)) { - prev->state = TASK_RUNNING; + WRITE_ONCE(prev->__state, TASK_RUNNING); } else { prev->sched_contributes_to_load = (prev_state & TASK_UNINTERRUPTIBLE) && @@ -6049,7 +6050,7 @@ void __sched schedule_idle(void) * current task can be in any other state. Note, idle is always in the * TASK_RUNNING state. */ - WARN_ON_ONCE(current->state); + WARN_ON_ONCE(current->__state); do { __schedule(false); } while (need_resched()); @@ -8176,26 +8177,28 @@ EXPORT_SYMBOL_GPL(sched_show_task); static inline bool state_filter_match(unsigned long state_filter, struct task_struct *p) { + unsigned int state = READ_ONCE(p->__state); + /* no filter, everything matches */ if (!state_filter) return true; /* filter, but doesn't match */ - if (!(p->state & state_filter)) + if (!(state & state_filter)) return false; /* * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows * TASK_KILLABLE). */ - if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE) + if (state_filter == TASK_UNINTERRUPTIBLE && state == TASK_IDLE) return false; return true; } -void show_state_filter(unsigned long state_filter) +void show_state_filter(unsigned int state_filter) { struct task_struct *g, *p; @@ -8252,7 +8255,7 @@ void __init init_idle(struct task_struct *idle, int cpu) raw_spin_lock_irqsave(&idle->pi_lock, flags); raw_spin_rq_lock(rq); - idle->state = TASK_RUNNING; + idle->__state = TASK_RUNNING; idle->se.exec_start = sched_clock(); /* * PF_KTHREAD should already be set at this point; regardless, make it @@ -9567,7 +9570,7 @@ static int cpu_cgroup_can_attach(struct cgroup_taskset *tset) * has happened. This would lead to problems with PELT, due to * move wanting to detach+attach while we're not attached yet. */ - if (task->state == TASK_NEW) + if (READ_ONCE(task->__state) == TASK_NEW) ret = -EINVAL; raw_spin_unlock_irq(&task->pi_lock); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 3829c5a1b936..22878cd5bd70 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -348,10 +348,10 @@ static void task_non_contending(struct task_struct *p) if ((zerolag_time < 0) || hrtimer_active(&dl_se->inactive_timer)) { if (dl_task(p)) sub_running_bw(dl_se, dl_rq); - if (!dl_task(p) || p->state == TASK_DEAD) { + if (!dl_task(p) || READ_ONCE(p->__state) == TASK_DEAD) { struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); - if (p->state == TASK_DEAD) + if (READ_ONCE(p->__state) == TASK_DEAD) sub_rq_bw(&p->dl, &rq->dl); raw_spin_lock(&dl_b->lock); __dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p))); @@ -1355,10 +1355,10 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer) sched_clock_tick(); update_rq_clock(rq); - if (!dl_task(p) || p->state == TASK_DEAD) { + if (!dl_task(p) || READ_ONCE(p->__state) == TASK_DEAD) { struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); - if (p->state == TASK_DEAD && dl_se->dl_non_contending) { + if (READ_ONCE(p->__state) == TASK_DEAD && dl_se->dl_non_contending) { sub_running_bw(&p->dl, dl_rq_of_se(&p->dl)); sub_rq_bw(&p->dl, dl_rq_of_se(&p->dl)); dl_se->dl_non_contending = 0; @@ -1722,7 +1722,7 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused { struct rq *rq; - if (p->state != TASK_WAKING) + if (READ_ONCE(p->__state) != TASK_WAKING) return; rq = task_rq(p); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5d1a6aace138..7b8990fd4896 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -993,11 +993,14 @@ update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) if ((flags & DEQUEUE_SLEEP) && entity_is_task(se)) { struct task_struct *tsk = task_of(se); + unsigned int state; - if (tsk->state & TASK_INTERRUPTIBLE) + /* XXX racy against TTWU */ + state = READ_ONCE(tsk->__state); + if (state & TASK_INTERRUPTIBLE) __schedstat_set(se->statistics.sleep_start, rq_clock(rq_of(cfs_rq))); - if (tsk->state & TASK_UNINTERRUPTIBLE) + if (state & TASK_UNINTERRUPTIBLE) __schedstat_set(se->statistics.block_start, rq_clock(rq_of(cfs_rq))); } @@ -6888,7 +6891,7 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu) * min_vruntime -- the latter is done by enqueue_entity() when placing * the task on the new runqueue. */ - if (p->state == TASK_WAKING) { + if (READ_ONCE(p->__state) == TASK_WAKING) { struct sched_entity *se = &p->se; struct cfs_rq *cfs_rq = cfs_rq_of(se); u64 min_vruntime; @@ -11053,7 +11056,7 @@ static inline bool vruntime_normalized(struct task_struct *p) * waiting for actually being woken up by sched_ttwu_pending(). */ if (!se->sum_exec_runtime || - (p->state == TASK_WAKING && p->sched_remote_wakeup)) + (READ_ONCE(p->__state) == TASK_WAKING && p->sched_remote_wakeup)) return true; return false; diff --git a/lib/syscall.c b/lib/syscall.c index ba13e924c430..006e256d2264 100644 --- a/lib/syscall.c +++ b/lib/syscall.c @@ -68,13 +68,13 @@ static int collect_syscall(struct task_struct *target, struct syscall_info *info */ int task_current_syscall(struct task_struct *target, struct syscall_info *info) { - long state; unsigned long ncsw; + unsigned int state; if (target == current) return collect_syscall(target, info); - state = target->state; + state = READ_ONCE(target->__state); if (unlikely(!state)) return -EAGAIN; diff --git a/net/core/dev.c b/net/core/dev.c index ef8cf7619baf..2512f672bf8a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4363,7 +4363,7 @@ static inline void ____napi_schedule(struct softnet_data *sd, * makes sure to proceed with napi polling * if the thread is explicitly woken from here. */ - if (READ_ONCE(thread->state) != TASK_INTERRUPTIBLE) + if (READ_ONCE(thread->__state) != TASK_INTERRUPTIBLE) set_bit(NAPI_STATE_SCHED_THREADED, &napi->state); wake_up_process(thread); return; -- cgit v1.2.3