diff options
Diffstat (limited to 'kernel/cgroup/cpuset.c')
-rw-r--r-- | kernel/cgroup/cpuset.c | 267 |
1 files changed, 158 insertions, 109 deletions
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index e4ca2dd2b764..58e6f18f01c1 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -25,45 +25,22 @@ #include <linux/cpu.h> #include <linux/cpumask.h> #include <linux/cpuset.h> -#include <linux/err.h> -#include <linux/errno.h> -#include <linux/file.h> -#include <linux/fs.h> #include <linux/init.h> #include <linux/interrupt.h> #include <linux/kernel.h> -#include <linux/kmod.h> -#include <linux/kthread.h> -#include <linux/list.h> #include <linux/mempolicy.h> #include <linux/mm.h> #include <linux/memory.h> #include <linux/export.h> -#include <linux/mount.h> -#include <linux/fs_context.h> -#include <linux/namei.h> -#include <linux/pagemap.h> -#include <linux/proc_fs.h> #include <linux/rcupdate.h> #include <linux/sched.h> #include <linux/sched/deadline.h> #include <linux/sched/mm.h> #include <linux/sched/task.h> -#include <linux/seq_file.h> #include <linux/security.h> -#include <linux/slab.h> #include <linux/spinlock.h> -#include <linux/stat.h> -#include <linux/string.h> -#include <linux/time.h> -#include <linux/time64.h> -#include <linux/backing-dev.h> -#include <linux/sort.h> #include <linux/oom.h> #include <linux/sched/isolation.h> -#include <linux/uaccess.h> -#include <linux/atomic.h> -#include <linux/mutex.h> #include <linux/cgroup.h> #include <linux/wait.h> @@ -193,6 +170,14 @@ struct cpuset { int use_parent_ecpus; int child_ecpus_count; + /* + * number of SCHED_DEADLINE tasks attached to this cpuset, so that we + * know when to rebuild associated root domain bandwidth information. + */ + int nr_deadline_tasks; + int nr_migrate_dl_tasks; + u64 sum_migrate_dl_bw; + /* Invalid partition error code, not lock protected */ enum prs_errcode prs_err; @@ -245,6 +230,20 @@ static inline struct cpuset *parent_cs(struct cpuset *cs) return css_cs(cs->css.parent); } +void inc_dl_tasks_cs(struct task_struct *p) +{ + struct cpuset *cs = task_cs(p); + + cs->nr_deadline_tasks++; +} + +void dec_dl_tasks_cs(struct task_struct *p) +{ + struct cpuset *cs = task_cs(p); + + cs->nr_deadline_tasks--; +} + /* bits in struct cpuset flags field */ typedef enum { CS_ONLINE, @@ -366,22 +365,23 @@ static struct cpuset top_cpuset = { if (is_cpuset_online(((des_cs) = css_cs((pos_css))))) /* - * There are two global locks guarding cpuset structures - cpuset_rwsem and + * There are two global locks guarding cpuset structures - cpuset_mutex and * callback_lock. We also require taking task_lock() when dereferencing a * task's cpuset pointer. See "The task_lock() exception", at the end of this - * comment. The cpuset code uses only cpuset_rwsem write lock. Other - * kernel subsystems can use cpuset_read_lock()/cpuset_read_unlock() to - * prevent change to cpuset structures. + * comment. The cpuset code uses only cpuset_mutex. Other kernel subsystems + * can use cpuset_lock()/cpuset_unlock() to prevent change to cpuset + * structures. Note that cpuset_mutex needs to be a mutex as it is used in + * paths that rely on priority inheritance (e.g. scheduler - on RT) for + * correctness. * * A task must hold both locks to modify cpusets. If a task holds - * cpuset_rwsem, it blocks others wanting that rwsem, ensuring that it - * is the only task able to also acquire callback_lock and be able to - * modify cpusets. It can perform various checks on the cpuset structure - * first, knowing nothing will change. It can also allocate memory while - * just holding cpuset_rwsem. While it is performing these checks, various - * callback routines can briefly acquire callback_lock to query cpusets. - * Once it is ready to make the changes, it takes callback_lock, blocking - * everyone else. + * cpuset_mutex, it blocks others, ensuring that it is the only task able to + * also acquire callback_lock and be able to modify cpusets. It can perform + * various checks on the cpuset structure first, knowing nothing will change. + * It can also allocate memory while just holding cpuset_mutex. While it is + * performing these checks, various callback routines can briefly acquire + * callback_lock to query cpusets. Once it is ready to make the changes, it + * takes callback_lock, blocking everyone else. * * Calls to the kernel memory allocator can not be made while holding * callback_lock, as that would risk double tripping on callback_lock @@ -403,16 +403,16 @@ static struct cpuset top_cpuset = { * guidelines for accessing subsystem state in kernel/cgroup.c */ -DEFINE_STATIC_PERCPU_RWSEM(cpuset_rwsem); +static DEFINE_MUTEX(cpuset_mutex); -void cpuset_read_lock(void) +void cpuset_lock(void) { - percpu_down_read(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); } -void cpuset_read_unlock(void) +void cpuset_unlock(void) { - percpu_up_read(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); } static DEFINE_SPINLOCK(callback_lock); @@ -496,7 +496,7 @@ static inline bool partition_is_populated(struct cpuset *cs, * One way or another, we guarantee to return some non-empty subset * of cpu_online_mask. * - * Call with callback_lock or cpuset_rwsem held. + * Call with callback_lock or cpuset_mutex held. */ static void guarantee_online_cpus(struct task_struct *tsk, struct cpumask *pmask) @@ -538,7 +538,7 @@ out_unlock: * One way or another, we guarantee to return some non-empty subset * of node_states[N_MEMORY]. * - * Call with callback_lock or cpuset_rwsem held. + * Call with callback_lock or cpuset_mutex held. */ static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) { @@ -550,7 +550,7 @@ static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) /* * update task's spread flag if cpuset's page/slab spread flag is set * - * Call with callback_lock or cpuset_rwsem held. The check can be skipped + * Call with callback_lock or cpuset_mutex held. The check can be skipped * if on default hierarchy. */ static void cpuset_update_task_spread_flags(struct cpuset *cs, @@ -575,7 +575,7 @@ static void cpuset_update_task_spread_flags(struct cpuset *cs, * * One cpuset is a subset of another if all its allowed CPUs and * Memory Nodes are a subset of the other, and its exclusive flags - * are only set if the other's are set. Call holding cpuset_rwsem. + * are only set if the other's are set. Call holding cpuset_mutex. */ static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q) @@ -713,7 +713,7 @@ out: * If we replaced the flag and mask values of the current cpuset * (cur) with those values in the trial cpuset (trial), would * our various subset and exclusive rules still be valid? Presumes - * cpuset_rwsem held. + * cpuset_mutex held. * * 'cur' is the address of an actual, in-use cpuset. Operations * such as list traversal that depend on the actual address of the @@ -829,7 +829,7 @@ static void update_domain_attr_tree(struct sched_domain_attr *dattr, rcu_read_unlock(); } -/* Must be called with cpuset_rwsem held. */ +/* Must be called with cpuset_mutex held. */ static inline int nr_cpusets(void) { /* jump label reference count + the top-level cpuset */ @@ -855,7 +855,7 @@ static inline int nr_cpusets(void) * domains when operating in the severe memory shortage situations * that could cause allocation failures below. * - * Must be called with cpuset_rwsem held. + * Must be called with cpuset_mutex held. * * The three key local variables below are: * cp - cpuset pointer, used (together with pos_css) to perform a @@ -1066,11 +1066,14 @@ done: return ndoms; } -static void update_tasks_root_domain(struct cpuset *cs) +static void dl_update_tasks_root_domain(struct cpuset *cs) { struct css_task_iter it; struct task_struct *task; + if (cs->nr_deadline_tasks == 0) + return; + css_task_iter_start(&cs->css, 0, &it); while ((task = css_task_iter_next(&it))) @@ -1079,12 +1082,12 @@ static void update_tasks_root_domain(struct cpuset *cs) css_task_iter_end(&it); } -static void rebuild_root_domains(void) +static void dl_rebuild_rd_accounting(void) { struct cpuset *cs = NULL; struct cgroup_subsys_state *pos_css; - percpu_rwsem_assert_held(&cpuset_rwsem); + lockdep_assert_held(&cpuset_mutex); lockdep_assert_cpus_held(); lockdep_assert_held(&sched_domains_mutex); @@ -1107,7 +1110,7 @@ static void rebuild_root_domains(void) rcu_read_unlock(); - update_tasks_root_domain(cs); + dl_update_tasks_root_domain(cs); rcu_read_lock(); css_put(&cs->css); @@ -1121,7 +1124,7 @@ partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[], { mutex_lock(&sched_domains_mutex); partition_sched_domains_locked(ndoms_new, doms_new, dattr_new); - rebuild_root_domains(); + dl_rebuild_rd_accounting(); mutex_unlock(&sched_domains_mutex); } @@ -1134,7 +1137,7 @@ partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[], * 'cpus' is removed, then call this routine to rebuild the * scheduler's dynamic sched domains. * - * Call with cpuset_rwsem held. Takes cpus_read_lock(). + * Call with cpuset_mutex held. Takes cpus_read_lock(). */ static void rebuild_sched_domains_locked(void) { @@ -1145,7 +1148,7 @@ static void rebuild_sched_domains_locked(void) int ndoms; lockdep_assert_cpus_held(); - percpu_rwsem_assert_held(&cpuset_rwsem); + lockdep_assert_held(&cpuset_mutex); /* * If we have raced with CPU hotplug, return early to avoid @@ -1196,9 +1199,9 @@ static void rebuild_sched_domains_locked(void) void rebuild_sched_domains(void) { cpus_read_lock(); - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); rebuild_sched_domains_locked(); - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); cpus_read_unlock(); } @@ -1208,7 +1211,7 @@ void rebuild_sched_domains(void) * @new_cpus: the temp variable for the new effective_cpus mask * * Iterate through each task of @cs updating its cpus_allowed to the - * effective cpuset's. As this function is called with cpuset_rwsem held, + * effective cpuset's. As this function is called with cpuset_mutex held, * cpuset membership stays stable. For top_cpuset, task_cpu_possible_mask() * is used instead of effective_cpus to make sure all offline CPUs are also * included as hotplug code won't update cpumasks for tasks in top_cpuset. @@ -1322,7 +1325,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd, int old_prs, new_prs; int part_error = PERR_NONE; /* Partition error? */ - percpu_rwsem_assert_held(&cpuset_rwsem); + lockdep_assert_held(&cpuset_mutex); /* * The parent must be a partition root. @@ -1545,7 +1548,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd, * * On legacy hierarchy, effective_cpus will be the same with cpu_allowed. * - * Called with cpuset_rwsem held + * Called with cpuset_mutex held */ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp, bool force) @@ -1705,7 +1708,7 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs, struct cpuset *sibling; struct cgroup_subsys_state *pos_css; - percpu_rwsem_assert_held(&cpuset_rwsem); + lockdep_assert_held(&cpuset_mutex); /* * Check all its siblings and call update_cpumasks_hier() @@ -1955,12 +1958,12 @@ static void *cpuset_being_rebound; * @cs: the cpuset in which each task's mems_allowed mask needs to be changed * * Iterate through each task of @cs updating its mems_allowed to the - * effective cpuset's. As this function is called with cpuset_rwsem held, + * effective cpuset's. As this function is called with cpuset_mutex held, * cpuset membership stays stable. */ static void update_tasks_nodemask(struct cpuset *cs) { - static nodemask_t newmems; /* protected by cpuset_rwsem */ + static nodemask_t newmems; /* protected by cpuset_mutex */ struct css_task_iter it; struct task_struct *task; @@ -1973,7 +1976,7 @@ static void update_tasks_nodemask(struct cpuset *cs) * take while holding tasklist_lock. Forks can happen - the * mpol_dup() cpuset_being_rebound check will catch such forks, * and rebind their vma mempolicies too. Because we still hold - * the global cpuset_rwsem, we know that no other rebind effort + * the global cpuset_mutex, we know that no other rebind effort * will be contending for the global variable cpuset_being_rebound. * It's ok if we rebind the same mm twice; mpol_rebind_mm() * is idempotent. Also migrate pages in each mm to new nodes. @@ -2019,7 +2022,7 @@ static void update_tasks_nodemask(struct cpuset *cs) * * On legacy hierarchy, effective_mems will be the same with mems_allowed. * - * Called with cpuset_rwsem held + * Called with cpuset_mutex held */ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) { @@ -2072,7 +2075,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) * mempolicies and if the cpuset is marked 'memory_migrate', * migrate the tasks pages to the new memory. * - * Call with cpuset_rwsem held. May take callback_lock during call. + * Call with cpuset_mutex held. May take callback_lock during call. * Will take tasklist_lock, scan tasklist for tasks in cpuset cs, * lock each such tasks mm->mmap_lock, scan its vma's and rebind * their mempolicies to the cpusets new mems_allowed. @@ -2164,7 +2167,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val) * @cs: the cpuset in which each task's spread flags needs to be changed * * Iterate through each task of @cs updating its spread flags. As this - * function is called with cpuset_rwsem held, cpuset membership stays + * function is called with cpuset_mutex held, cpuset membership stays * stable. */ static void update_tasks_flags(struct cpuset *cs) @@ -2184,7 +2187,7 @@ static void update_tasks_flags(struct cpuset *cs) * cs: the cpuset to update * turning_on: whether the flag is being set or cleared * - * Call with cpuset_rwsem held. + * Call with cpuset_mutex held. */ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, @@ -2234,7 +2237,7 @@ out: * @new_prs: new partition root state * Return: 0 if successful, != 0 if error * - * Call with cpuset_rwsem held. + * Call with cpuset_mutex held. */ static int update_prstate(struct cpuset *cs, int new_prs) { @@ -2472,19 +2475,26 @@ static int cpuset_can_attach_check(struct cpuset *cs) return 0; } -/* Called by cgroups to determine if a cpuset is usable; cpuset_rwsem held */ +static void reset_migrate_dl_data(struct cpuset *cs) +{ + cs->nr_migrate_dl_tasks = 0; + cs->sum_migrate_dl_bw = 0; +} + +/* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */ static int cpuset_can_attach(struct cgroup_taskset *tset) { struct cgroup_subsys_state *css; - struct cpuset *cs; + struct cpuset *cs, *oldcs; struct task_struct *task; int ret; /* used later by cpuset_attach() */ cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css)); + oldcs = cpuset_attach_old_cs; cs = css_cs(css); - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); /* Check to see if task is allowed in the cpuset */ ret = cpuset_can_attach_check(cs); @@ -2492,21 +2502,46 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) goto out_unlock; cgroup_taskset_for_each(task, css, tset) { - ret = task_can_attach(task, cs->effective_cpus); + ret = task_can_attach(task); if (ret) goto out_unlock; ret = security_task_setscheduler(task); if (ret) goto out_unlock; + + if (dl_task(task)) { + cs->nr_migrate_dl_tasks++; + cs->sum_migrate_dl_bw += task->dl.dl_bw; + } } + if (!cs->nr_migrate_dl_tasks) + goto out_success; + + if (!cpumask_intersects(oldcs->effective_cpus, cs->effective_cpus)) { + int cpu = cpumask_any_and(cpu_active_mask, cs->effective_cpus); + + if (unlikely(cpu >= nr_cpu_ids)) { + reset_migrate_dl_data(cs); + ret = -EINVAL; + goto out_unlock; + } + + ret = dl_bw_alloc(cpu, cs->sum_migrate_dl_bw); + if (ret) { + reset_migrate_dl_data(cs); + goto out_unlock; + } + } + +out_success: /* * Mark attach is in progress. This makes validate_change() fail * changes which zero cpus/mems_allowed. */ cs->attach_in_progress++; out_unlock: - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); return ret; } @@ -2518,15 +2553,23 @@ static void cpuset_cancel_attach(struct cgroup_taskset *tset) cgroup_taskset_first(tset, &css); cs = css_cs(css); - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); cs->attach_in_progress--; if (!cs->attach_in_progress) wake_up(&cpuset_attach_wq); - percpu_up_write(&cpuset_rwsem); + + if (cs->nr_migrate_dl_tasks) { + int cpu = cpumask_any(cs->effective_cpus); + + dl_bw_free(cpu, cs->sum_migrate_dl_bw); + reset_migrate_dl_data(cs); + } + + mutex_unlock(&cpuset_mutex); } /* - * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach_task() + * Protected by cpuset_mutex. cpus_attach is used only by cpuset_attach_task() * but we can't allocate it dynamically there. Define it global and * allocate from cpuset_init(). */ @@ -2535,7 +2578,7 @@ static nodemask_t cpuset_attach_nodemask_to; static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task) { - percpu_rwsem_assert_held(&cpuset_rwsem); + lockdep_assert_held(&cpuset_mutex); if (cs != &top_cpuset) guarantee_online_cpus(task, cpus_attach); @@ -2565,7 +2608,7 @@ static void cpuset_attach(struct cgroup_taskset *tset) cs = css_cs(css); lockdep_assert_cpus_held(); /* see cgroup_attach_lock() */ - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); cpus_updated = !cpumask_equal(cs->effective_cpus, oldcs->effective_cpus); mems_updated = !nodes_equal(cs->effective_mems, oldcs->effective_mems); @@ -2622,11 +2665,17 @@ static void cpuset_attach(struct cgroup_taskset *tset) out: cs->old_mems_allowed = cpuset_attach_nodemask_to; + if (cs->nr_migrate_dl_tasks) { + cs->nr_deadline_tasks += cs->nr_migrate_dl_tasks; + oldcs->nr_deadline_tasks -= cs->nr_migrate_dl_tasks; + reset_migrate_dl_data(cs); + } + cs->attach_in_progress--; if (!cs->attach_in_progress) wake_up(&cpuset_attach_wq); - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); } /* The various types of files and directories in a cpuset file system */ @@ -2658,7 +2707,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, int retval = 0; cpus_read_lock(); - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); if (!is_cpuset_online(cs)) { retval = -ENODEV; goto out_unlock; @@ -2694,7 +2743,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, break; } out_unlock: - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); cpus_read_unlock(); return retval; } @@ -2707,7 +2756,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, int retval = -ENODEV; cpus_read_lock(); - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); if (!is_cpuset_online(cs)) goto out_unlock; @@ -2720,7 +2769,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, break; } out_unlock: - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); cpus_read_unlock(); return retval; } @@ -2753,7 +2802,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, * operation like this one can lead to a deadlock through kernfs * active_ref protection. Let's break the protection. Losing the * protection is okay as we check whether @cs is online after - * grabbing cpuset_rwsem anyway. This only happens on the legacy + * grabbing cpuset_mutex anyway. This only happens on the legacy * hierarchies. */ css_get(&cs->css); @@ -2761,7 +2810,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, flush_work(&cpuset_hotplug_work); cpus_read_lock(); - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); if (!is_cpuset_online(cs)) goto out_unlock; @@ -2785,7 +2834,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, free_cpuset(trialcs); out_unlock: - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); cpus_read_unlock(); kernfs_unbreak_active_protection(of->kn); css_put(&cs->css); @@ -2933,13 +2982,13 @@ static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf, css_get(&cs->css); cpus_read_lock(); - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); if (!is_cpuset_online(cs)) goto out_unlock; retval = update_prstate(cs, val); out_unlock: - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); cpus_read_unlock(); css_put(&cs->css); return retval ?: nbytes; @@ -3156,7 +3205,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) return 0; cpus_read_lock(); - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); set_bit(CS_ONLINE, &cs->flags); if (is_spread_page(parent)) @@ -3207,7 +3256,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) cpumask_copy(cs->effective_cpus, parent->cpus_allowed); spin_unlock_irq(&callback_lock); out_unlock: - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); cpus_read_unlock(); return 0; } @@ -3228,7 +3277,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css) struct cpuset *cs = css_cs(css); cpus_read_lock(); - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); if (is_partition_valid(cs)) update_prstate(cs, 0); @@ -3247,7 +3296,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css) cpuset_dec(); clear_bit(CS_ONLINE, &cs->flags); - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); cpus_read_unlock(); } @@ -3260,7 +3309,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css) static void cpuset_bind(struct cgroup_subsys_state *root_css) { - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); spin_lock_irq(&callback_lock); if (is_in_v2_mode()) { @@ -3273,7 +3322,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) } spin_unlock_irq(&callback_lock); - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); } /* @@ -3294,14 +3343,14 @@ static int cpuset_can_fork(struct task_struct *task, struct css_set *cset) return 0; lockdep_assert_held(&cgroup_mutex); - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); /* Check to see if task is allowed in the cpuset */ ret = cpuset_can_attach_check(cs); if (ret) goto out_unlock; - ret = task_can_attach(task, cs->effective_cpus); + ret = task_can_attach(task); if (ret) goto out_unlock; @@ -3315,7 +3364,7 @@ static int cpuset_can_fork(struct task_struct *task, struct css_set *cset) */ cs->attach_in_progress++; out_unlock: - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); return ret; } @@ -3331,11 +3380,11 @@ static void cpuset_cancel_fork(struct task_struct *task, struct css_set *cset) if (same_cs) return; - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); cs->attach_in_progress--; if (!cs->attach_in_progress) wake_up(&cpuset_attach_wq); - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); } /* @@ -3363,7 +3412,7 @@ static void cpuset_fork(struct task_struct *task) } /* CLONE_INTO_CGROUP */ - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); guarantee_online_mems(cs, &cpuset_attach_nodemask_to); cpuset_attach_task(cs, task); @@ -3371,7 +3420,7 @@ static void cpuset_fork(struct task_struct *task) if (!cs->attach_in_progress) wake_up(&cpuset_attach_wq); - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); } struct cgroup_subsys cpuset_cgrp_subsys = { @@ -3472,7 +3521,7 @@ hotplug_update_tasks_legacy(struct cpuset *cs, is_empty = cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed); - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); /* * Move tasks to the nearest ancestor with execution resources, @@ -3482,7 +3531,7 @@ hotplug_update_tasks_legacy(struct cpuset *cs, if (is_empty) remove_tasks_in_empty_cpuset(cs); - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); } static void @@ -3533,14 +3582,14 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) retry: wait_event(cpuset_attach_wq, cs->attach_in_progress == 0); - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); /* * We have raced with task attaching. We wait until attaching * is finished, so we won't attach a task to an empty cpuset. */ if (cs->attach_in_progress) { - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); goto retry; } @@ -3637,7 +3686,7 @@ update_tasks: cpus_updated, mems_updated); unlock: - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); } /** @@ -3667,7 +3716,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) if (on_dfl && !alloc_cpumasks(NULL, &tmp)) ptmp = &tmp; - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); /* fetch the available cpus/mems and find out which changed how */ cpumask_copy(&new_cpus, cpu_active_mask); @@ -3724,7 +3773,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) update_tasks_nodemask(&top_cpuset); } - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); /* if cpus or mems changed, we need to propagate to descendants */ if (cpus_updated || mems_updated) { @@ -4155,7 +4204,7 @@ void __cpuset_memory_pressure_bump(void) * - Used for /proc/<pid>/cpuset. * - No need to task_lock(tsk) on this tsk->cpuset reference, as it * doesn't really matter if tsk->cpuset changes after we read it, - * and we take cpuset_rwsem, keeping cpuset_attach() from changing it + * and we take cpuset_mutex, keeping cpuset_attach() from changing it * anyway. */ int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns, |