diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-04-11 23:38:16 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-04-11 23:38:16 -0700 |
commit | 06ea4c38bce80f3eb99b01d7c17419eb1a49fec2 (patch) | |
tree | 2af3797bce33f38ecfe39aa34ecec01e8ccd4b9c | |
parent | af94bdfa798b7da92cc82609acca61a287a004a4 (diff) | |
parent | bfb0b80db5f9dca5ac0a5fd0edb765ee555e5a8e (diff) |
Merge branch 'for-4.11-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup fixes from Tejun Heo:
"This contains fixes for two long standing subtle bugs:
- kthread_bind() on a new kthread binds it to specific CPUs and
prevents userland from messing with the affinity or cgroup
membership. Unfortunately, for cgroup membership, there's a window
between kthread creation and kthread_bind*() invocation where the
kthread can be moved into a non-root cgroup by userland.
Depending on what controllers are in effect, this can assign the
kthread unexpected attributes. For example, in the reported case,
workqueue workers ended up in a non-root cpuset cgroups and had
their CPU affinities overridden. This broke workqueue invariants
and led to workqueue stalls.
Fixed by closing the window between kthread creation and
kthread_bind() as suggested by Oleg.
- There was a bug in cgroup mount path which could allow two
competing mount attempts to attach the same cgroup_root to two
different superblocks.
This was caused by mishandling return value from kernfs_pin_sb().
Fixed"
* 'for-4.11-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
cgroup: avoid attaching a cgroup root to two different superblocks
cgroup, kthread: close race window where new kthreads can be migrated to non-root cgroups
-rw-r--r-- | include/linux/cgroup.h | 21 | ||||
-rw-r--r-- | include/linux/sched.h | 4 | ||||
-rw-r--r-- | kernel/cgroup/cgroup-v1.c | 2 | ||||
-rw-r--r-- | kernel/cgroup/cgroup.c | 9 | ||||
-rw-r--r-- | kernel/kthread.c | 3 |
5 files changed, 34 insertions, 5 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index f6b43fbb141c..af9c86e958bd 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -570,6 +570,25 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp) pr_cont_kernfs_path(cgrp->kn); } +static inline void cgroup_init_kthreadd(void) +{ + /* + * kthreadd is inherited by all kthreads, keep it in the root so + * that the new kthreads are guaranteed to stay in the root until + * initialization is finished. + */ + current->no_cgroup_migration = 1; +} + +static inline void cgroup_kthread_ready(void) +{ + /* + * This kthread finished initialization. The creator should have + * set PF_NO_SETAFFINITY if this kthread should stay in the root. + */ + current->no_cgroup_migration = 0; +} + #else /* !CONFIG_CGROUPS */ struct cgroup_subsys_state; @@ -590,6 +609,8 @@ static inline void cgroup_free(struct task_struct *p) {} static inline int cgroup_init_early(void) { return 0; } static inline int cgroup_init(void) { return 0; } +static inline void cgroup_init_kthreadd(void) {} +static inline void cgroup_kthread_ready(void) {} static inline bool task_under_cgroup_hierarchy(struct task_struct *task, struct cgroup *ancestor) diff --git a/include/linux/sched.h b/include/linux/sched.h index d67eee84fd43..4cf9a59a4d08 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -604,6 +604,10 @@ struct task_struct { #ifdef CONFIG_COMPAT_BRK unsigned brk_randomized:1; #endif +#ifdef CONFIG_CGROUPS + /* disallow userland-initiated cgroup migration */ + unsigned no_cgroup_migration:1; +#endif unsigned long atomic_flags; /* Flags requiring atomic access. */ diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 1dc22f6b49f5..12e19f0636ea 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -1146,7 +1146,7 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, * path is super cold. Let's just sleep a bit and retry. */ pinned_sb = kernfs_pin_sb(root->kf_root, NULL); - if (IS_ERR(pinned_sb) || + if (IS_ERR_OR_NULL(pinned_sb) || !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) { mutex_unlock(&cgroup_mutex); if (!IS_ERR_OR_NULL(pinned_sb)) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 48851327a15e..687f5e0194ef 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2425,11 +2425,12 @@ ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, tsk = tsk->group_leader; /* - * Workqueue threads may acquire PF_NO_SETAFFINITY and become - * trapped in a cpuset, or RT worker may be born in a cgroup - * with no rt_runtime allocated. Just say no. + * kthreads may acquire PF_NO_SETAFFINITY during initialization. + * If userland migrates such a kthread to a non-root cgroup, it can + * become trapped in a cpuset, or RT kthread may be born in a + * cgroup with no rt_runtime allocated. Just say no. */ - if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) { + if (tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY)) { ret = -EINVAL; goto out_unlock_rcu; } diff --git a/kernel/kthread.c b/kernel/kthread.c index 2f26adea0f84..26db528c1d88 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -20,6 +20,7 @@ #include <linux/freezer.h> #include <linux/ptrace.h> #include <linux/uaccess.h> +#include <linux/cgroup.h> #include <trace/events/sched.h> static DEFINE_SPINLOCK(kthread_create_lock); @@ -225,6 +226,7 @@ static int kthread(void *_create) ret = -EINTR; if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) { + cgroup_kthread_ready(); __kthread_parkme(self); ret = threadfn(data); } @@ -538,6 +540,7 @@ int kthreadd(void *unused) set_mems_allowed(node_states[N_MEMORY]); current->flags |= PF_NOFREEZE; + cgroup_init_kthreadd(); for (;;) { set_current_state(TASK_INTERRUPTIBLE); |