From f440d98f8ebab02a768c1de17395e4239af9a97d Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 1 Mar 2013 15:02:15 +0800 Subject: cpuset: use cgroup_name() in cpuset_print_task_mems_allowed() Use cgroup_name() instead of cgrp->dentry->name. This makes the code a bit simpler. While at it, remove cpuset_name and make cpuset_nodelist a local variable to cpuset_print_task_mems_allowed(). Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 32 +++++++++----------------------- 1 file changed, 9 insertions(+), 23 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 4f9dfe43ecb..ace5bfcdcb3 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -264,17 +264,6 @@ static struct cpuset top_cpuset = { static DEFINE_MUTEX(cpuset_mutex); static DEFINE_MUTEX(callback_mutex); -/* - * cpuset_buffer_lock protects both the cpuset_name and cpuset_nodelist - * buffers. They are statically allocated to prevent using excess stack - * when calling cpuset_print_task_mems_allowed(). - */ -#define CPUSET_NAME_LEN (128) -#define CPUSET_NODELIST_LEN (256) -static char cpuset_name[CPUSET_NAME_LEN]; -static char cpuset_nodelist[CPUSET_NODELIST_LEN]; -static DEFINE_SPINLOCK(cpuset_buffer_lock); - /* * CPU / memory hotplug is handled asynchronously. */ @@ -2592,6 +2581,8 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed); } +#define CPUSET_NODELIST_LEN (256) + /** * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed * @task: pointer to task_struct of some task. @@ -2602,24 +2593,19 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, */ void cpuset_print_task_mems_allowed(struct task_struct *tsk) { - struct dentry *dentry; + /* Statically allocated to prevent using excess stack. */ + static char cpuset_nodelist[CPUSET_NODELIST_LEN]; + static DEFINE_SPINLOCK(cpuset_buffer_lock); - dentry = task_cs(tsk)->css.cgroup->dentry; - spin_lock(&cpuset_buffer_lock); + struct cgroup *cgrp = task_cs(tsk)->css.cgroup; - if (!dentry) { - strcpy(cpuset_name, "/"); - } else { - spin_lock(&dentry->d_lock); - strlcpy(cpuset_name, (const char *)dentry->d_name.name, - CPUSET_NAME_LEN); - spin_unlock(&dentry->d_lock); - } + spin_lock(&cpuset_buffer_lock); nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN, tsk->mems_allowed); printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n", - tsk->comm, cpuset_name, cpuset_nodelist); + tsk->comm, cgroup_name(cgrp), cpuset_nodelist); + spin_unlock(&cpuset_buffer_lock); } -- cgit v1.2.3 From cfb5966bef85412dab9c93553db10b3e99ac32e8 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 12 Mar 2013 10:28:39 +0800 Subject: cpuset: fix RCU lockdep splat in cpuset_print_task_mems_allowed() Sasha reported a lockdep warning when OOM was triggered. The reason is cgroup_name() should be called with rcu_read_lock() held. Reported-by: Sasha Levin Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index ace5bfcdcb3..efbfca7a33e 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2599,6 +2599,7 @@ void cpuset_print_task_mems_allowed(struct task_struct *tsk) struct cgroup *cgrp = task_cs(tsk)->css.cgroup; + rcu_read_lock(); spin_lock(&cpuset_buffer_lock); nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN, @@ -2607,6 +2608,7 @@ void cpuset_print_task_mems_allowed(struct task_struct *tsk) tsk->comm, cgroup_name(cgrp), cpuset_nodelist); spin_unlock(&cpuset_buffer_lock); + rcu_read_unlock(); } /* -- cgit v1.2.3 From 081aa458c38ba576bdd4265fc807fa95b48b9e79 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 13 Mar 2013 09:17:09 +0800 Subject: cgroup: consolidate cgroup_attach_task() and cgroup_attach_proc() These two functions share most of the code. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 3 +- kernel/cgroup.c | 109 +++++++++---------------------------------------- kernel/cpuset.c | 2 +- 3 files changed, 23 insertions(+), 91 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 7e818a3ef60..01c48c6806d 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -693,7 +693,8 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp, struct cgroup_iter *it); void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it); int cgroup_scan_tasks(struct cgroup_scanner *scan); -int cgroup_attach_task(struct cgroup *, struct task_struct *); +int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk, + bool threadgroup); int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); /* diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 54689fc008f..04fa2abf94b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -59,7 +59,7 @@ #include /* TODO: replace with more sophisticated array */ #include #include -#include /* used in cgroup_attach_proc */ +#include /* used in cgroup_attach_task */ #include #include @@ -1943,82 +1943,6 @@ static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, put_css_set(oldcg); } -/** - * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp' - * @cgrp: the cgroup the task is attaching to - * @tsk: the task to be attached - * - * Call with cgroup_mutex and threadgroup locked. May take task_lock of - * @tsk during call. - */ -int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) -{ - int retval = 0; - struct cgroup_subsys *ss, *failed_ss = NULL; - struct cgroup *oldcgrp; - struct cgroupfs_root *root = cgrp->root; - struct cgroup_taskset tset = { }; - struct css_set *newcg; - - /* @tsk either already exited or can't exit until the end */ - if (tsk->flags & PF_EXITING) - return -ESRCH; - - /* Nothing to do if the task is already in that cgroup */ - oldcgrp = task_cgroup_from_root(tsk, root); - if (cgrp == oldcgrp) - return 0; - - tset.single.task = tsk; - tset.single.cgrp = oldcgrp; - - for_each_subsys(root, ss) { - if (ss->can_attach) { - retval = ss->can_attach(cgrp, &tset); - if (retval) { - /* - * Remember on which subsystem the can_attach() - * failed, so that we only call cancel_attach() - * against the subsystems whose can_attach() - * succeeded. (See below) - */ - failed_ss = ss; - goto out; - } - } - } - - newcg = find_css_set(tsk->cgroups, cgrp); - if (!newcg) { - retval = -ENOMEM; - goto out; - } - - cgroup_task_migrate(cgrp, oldcgrp, tsk, newcg); - - for_each_subsys(root, ss) { - if (ss->attach) - ss->attach(cgrp, &tset); - } - -out: - if (retval) { - for_each_subsys(root, ss) { - if (ss == failed_ss) - /* - * This subsystem was the one that failed the - * can_attach() check earlier, so we don't need - * to call cancel_attach() against it or any - * remaining subsystems. - */ - break; - if (ss->cancel_attach) - ss->cancel_attach(cgrp, &tset); - } - } - return retval; -} - /** * cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from' * @from: attach to all cgroups of a given task @@ -2033,7 +1957,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) for_each_active_root(root) { struct cgroup *from_cg = task_cgroup_from_root(from, root); - retval = cgroup_attach_task(from_cg, tsk); + retval = cgroup_attach_task(from_cg, tsk, false); if (retval) break; } @@ -2044,21 +1968,22 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) EXPORT_SYMBOL_GPL(cgroup_attach_task_all); /** - * cgroup_attach_proc - attach all threads in a threadgroup to a cgroup + * cgroup_attach_task - attach a task or a whole threadgroup to a cgroup * @cgrp: the cgroup to attach to - * @leader: the threadgroup leader task_struct of the group to be attached + * @tsk: the task or the leader of the threadgroup to be attached + * @threadgroup: attach the whole threadgroup? * * Call holding cgroup_mutex and the group_rwsem of the leader. Will take - * task_lock of each thread in leader's threadgroup individually in turn. + * task_lock of @tsk or each thread in the threadgroup individually in turn. */ -static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) +int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk, + bool threadgroup) { int retval, i, group_size; struct cgroup_subsys *ss, *failed_ss = NULL; - /* guaranteed to be initialized later, but the compiler needs this */ struct cgroupfs_root *root = cgrp->root; /* threadgroup list cursor and array */ - struct task_struct *tsk; + struct task_struct *leader = tsk; struct task_and_cgroup *tc; struct flex_array *group; struct cgroup_taskset tset = { }; @@ -2070,7 +1995,10 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) * group - group_rwsem prevents new threads from appearing, and if * threads exit, this will just be an over-estimate. */ - group_size = get_nr_threads(leader); + if (threadgroup) + group_size = get_nr_threads(tsk); + else + group_size = 1; /* flex_array supports very large thread-groups better than kmalloc. */ group = flex_array_alloc(sizeof(*tc), group_size, GFP_KERNEL); if (!group) @@ -2080,7 +2008,6 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) if (retval) goto out_free_group_list; - tsk = leader; i = 0; /* * Prevent freeing of tasks while we take a snapshot. Tasks that are @@ -2109,6 +2036,9 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) retval = flex_array_put(group, i, &ent, GFP_ATOMIC); BUG_ON(retval != 0); i++; + + if (!threadgroup) + break; } while_each_thread(leader, tsk); rcu_read_unlock(); /* remember the number of threads in the array for later. */ @@ -2262,9 +2192,10 @@ retry_find_task: put_task_struct(tsk); goto retry_find_task; } - ret = cgroup_attach_proc(cgrp, tsk); - } else - ret = cgroup_attach_task(cgrp, tsk); + } + + ret = cgroup_attach_task(cgrp, tsk, threadgroup); + threadgroup_unlock(tsk); put_task_struct(tsk); diff --git a/kernel/cpuset.c b/kernel/cpuset.c index efbfca7a33e..98d458aad78 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2008,7 +2008,7 @@ static void cpuset_do_move_task(struct task_struct *tsk, struct cgroup *new_cgroup = scan->data; cgroup_lock(); - cgroup_attach_task(new_cgroup, tsk); + cgroup_attach_task(new_cgroup, tsk, false); cgroup_unlock(); } -- cgit v1.2.3 From 8cc9934520e7f752fe45d5199664d741ba24a932 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 7 Apr 2013 09:29:50 -0700 Subject: cgroup, cpuset: replace move_member_tasks_to_cpuset() with cgroup_transfer_tasks() When a cpuset becomes empty (no CPU or memory), its tasks are transferred with the nearest ancestor with execution resources. This is implemented using cgroup_scan_tasks() with a callback which grabs cgroup_mutex and invokes cgroup_attach_task() on each task. Both cgroup_mutex and cgroup_attach_task() are scheduled to be unexported. Implement cgroup_transfer_tasks() in cgroup proper which is essentially the same as move_member_tasks_to_cpuset() except that it takes cgroups instead of cpusets and @to comes before @from like normal functions with those arguments, and replace move_member_tasks_to_cpuset() with it. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 1 + kernel/cgroup.c | 28 +++++++++++++++++++++++++++ kernel/cpuset.c | 51 ++++++-------------------------------------------- 3 files changed, 35 insertions(+), 45 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 01c48c6806d..f8eb01d75dd 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -696,6 +696,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan); int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk, bool threadgroup); int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); +int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); /* * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 4aee5bdd66c..147d7ccb3b0 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3269,6 +3269,34 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) return 0; } +static void cgroup_transfer_one_task(struct task_struct *task, + struct cgroup_scanner *scan) +{ + struct cgroup *new_cgroup = scan->data; + + cgroup_lock(); + cgroup_attach_task(new_cgroup, task, false); + cgroup_unlock(); +} + +/** + * cgroup_trasnsfer_tasks - move tasks from one cgroup to another + * @to: cgroup to which the tasks will be moved + * @from: cgroup in which the tasks currently reside + */ +int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) +{ + struct cgroup_scanner scan; + + scan.cg = from; + scan.test_task = NULL; /* select all tasks in cgroup */ + scan.process_task = cgroup_transfer_one_task; + scan.heap = NULL; + scan.data = to; + + return cgroup_scan_tasks(&scan); +} + /* * Stuff for reading the 'tasks'/'procs' files. * diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 98d458aad78..866d78ee793 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1994,50 +1994,6 @@ int __init cpuset_init(void) return 0; } -/** - * cpuset_do_move_task - move a given task to another cpuset - * @tsk: pointer to task_struct the task to move - * @scan: struct cgroup_scanner contained in its struct cpuset_hotplug_scanner - * - * Called by cgroup_scan_tasks() for each task in a cgroup. - * Return nonzero to stop the walk through the tasks. - */ -static void cpuset_do_move_task(struct task_struct *tsk, - struct cgroup_scanner *scan) -{ - struct cgroup *new_cgroup = scan->data; - - cgroup_lock(); - cgroup_attach_task(new_cgroup, tsk, false); - cgroup_unlock(); -} - -/** - * move_member_tasks_to_cpuset - move tasks from one cpuset to another - * @from: cpuset in which the tasks currently reside - * @to: cpuset to which the tasks will be moved - * - * Called with cpuset_mutex held - * callback_mutex must not be held, as cpuset_attach() will take it. - * - * The cgroup_scan_tasks() function will scan all the tasks in a cgroup, - * calling callback functions for each. - */ -static void move_member_tasks_to_cpuset(struct cpuset *from, struct cpuset *to) -{ - struct cgroup_scanner scan; - - scan.cg = from->css.cgroup; - scan.test_task = NULL; /* select all tasks in cgroup */ - scan.process_task = cpuset_do_move_task; - scan.heap = NULL; - scan.data = to->css.cgroup; - - if (cgroup_scan_tasks(&scan)) - printk(KERN_ERR "move_member_tasks_to_cpuset: " - "cgroup_scan_tasks failed\n"); -} - /* * If CPU and/or memory hotplug handlers, below, unplug any CPUs * or memory nodes, we need to walk over the cpuset hierarchy, @@ -2058,7 +2014,12 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs) nodes_empty(parent->mems_allowed)) parent = parent_cs(parent); - move_member_tasks_to_cpuset(cs, parent); + if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) { + rcu_read_lock(); + printk(KERN_ERR "cpuset: failed to transfer tasks out of empty cpuset %s\n", + cgroup_name(cs->css.cgroup)); + rcu_read_unlock(); + } } /** -- cgit v1.2.3 From e0e80a02e5701c8790bd348ab59edb154fbda60b Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Sat, 27 Apr 2013 06:52:43 -0700 Subject: cpuset: use rebuild_sched_domains() in cpuset_hotplug_workfn() In cpuset_hotplug_workfn(), partition_sched_domains() is called without hotplug lock held, which is actually needed (stated in the function header of partition_sched_domains()). This patch tries to use rebuild_sched_domains() to solve the above issue, and makes the code looks a little simpler. Signed-off-by: Li Zhong Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 866d78ee793..8f0f45e002f 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2172,17 +2172,8 @@ static void cpuset_hotplug_workfn(struct work_struct *work) flush_workqueue(cpuset_propagate_hotplug_wq); /* rebuild sched domains if cpus_allowed has changed */ - if (cpus_updated) { - struct sched_domain_attr *attr; - cpumask_var_t *doms; - int ndoms; - - mutex_lock(&cpuset_mutex); - ndoms = generate_sched_domains(&doms, &attr); - mutex_unlock(&cpuset_mutex); - - partition_sched_domains(ndoms, doms, attr); - } + if (cpus_updated) + rebuild_sched_domains(); } void cpuset_update_active_cpus(bool cpu_online) -- cgit v1.2.3 From 5b16c2a493fe1e439c4e7ad51f58153968ca6cf3 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Sat, 27 Apr 2013 06:52:43 -0700 Subject: cpuset: fix cpu hotplug vs rebuild_sched_domains() race rebuild_sched_domains() might pass doms with offlined cpu to partition_sched_domains(), which results in an oops: general protection fault: 0000 [#1] SMP ... RIP: 0010:[] [] get_group+0x6e/0x90 ... Call Trace: [] build_sched_domains+0x70c/0xcb0 [] ? build_sched_domains+0x937/0xcb0 [] ? kfree+0xe4/0x1b0 [] ? partition_sched_domains+0xc0/0x470 [] partition_sched_domains+0x2e5/0x470 [] ? partition_sched_domains+0xc0/0x470 [] ? generate_sched_domains+0xc7/0x530 [] rebuild_sched_domains_locked+0x38/0x70 [] cpuset_write_resmask+0x1a4/0x500 [] ? cpuset_mount+0xe0/0xe0 [] ? cpuset_read_u64+0x100/0x100 [] ? cgroup_iter_next+0x90/0x90 [] ? cpuset_css_offline+0x70/0x70 [] cgroup_file_write+0x133/0x2e0 [] vfs_write+0xcb/0x130 [] sys_write+0x64/0xa0 Reported-by: Li Zhong Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 8f0f45e002f..93d140f159c 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -769,12 +769,20 @@ static void rebuild_sched_domains_locked(void) lockdep_assert_held(&cpuset_mutex); get_online_cpus(); + /* + * We have raced with CPU hotplug. Don't do anything to avoid + * passing doms with offlined cpu to partition_sched_domains(). + * Anyways, hotplug work item will rebuild sched domains. + */ + if (!cpumask_equal(top_cpuset.cpus_allowed, cpu_active_mask)) + goto out; + /* Generate domain masks and attrs */ ndoms = generate_sched_domains(&doms, &attr); /* Have scheduler rebuild the domains */ partition_sched_domains(ndoms, doms, attr); - +out: put_online_cpus(); } #else /* !CONFIG_SMP */ -- cgit v1.2.3 From 2a0010af17b1739ef8ea8cf02647a127241ee674 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Sun, 28 Apr 2013 09:46:57 +0800 Subject: cpuset: fix compile warning when CONFIG_SMP=n Reported by Fengguang's kbuild test robot: kernel/cpuset.c:787: warning: 'generate_sched_domains' defined but not used Introduced by commit e0e80a02e5701c8790bd348ab59edb154fbda60b ("cpuset: use rebuild_sched_domains() in cpuset_hotplug_workfn()), which removed generate_sched_domains() from cpuset_hotplug_workfn(). Reported-by: Fengguang Wu Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 93d140f159c..9bd30b9c430 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -789,13 +789,6 @@ out: static void rebuild_sched_domains_locked(void) { } - -static int generate_sched_domains(cpumask_var_t **domains, - struct sched_domain_attr **attributes) -{ - *domains = NULL; - return 1; -} #endif /* CONFIG_SMP */ void rebuild_sched_domains(void) -- cgit v1.2.3