diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/workqueue.c | 171 |
1 files changed, 110 insertions, 61 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 37eab7a1587d..6c4d7b1fdf9a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -326,7 +326,18 @@ struct workqueue_struct { static struct kmem_cache *pwq_cache; -static cpumask_var_t *wq_pod_cpus; /* possible CPUs of each node */ +/* + * Each pod type describes how CPUs should be grouped for unbound workqueues. + * See the comment above workqueue_attrs->affn_scope. + */ +struct wq_pod_type { + int nr_pods; /* number of pods */ + cpumask_var_t *pod_cpus; /* pod -> cpus */ + int *pod_node; /* pod -> node */ + int *cpu_pod; /* cpu -> pod */ +}; + +static struct wq_pod_type wq_pod_types[WQ_AFFN_NR_TYPES]; /* * Per-cpu work items which run for longer than the following threshold are @@ -344,8 +355,6 @@ module_param_named(power_efficient, wq_power_efficient, bool, 0444); static bool wq_online; /* can kworkers be created yet? */ -static bool wq_pod_enabled; /* unbound CPU pod affinity enabled */ - /* buf for wq_update_unbound_pod_attrs(), protected by CPU hotplug exclusion */ static struct workqueue_attrs *wq_update_pod_attrs_buf; static cpumask_var_t wq_update_pod_cpumask_buf; @@ -1774,10 +1783,6 @@ static int select_numa_node_cpu(int node) { int cpu; - /* No point in doing this if NUMA isn't enabled for workqueues */ - if (!wq_pod_enabled) - return WORK_CPU_UNBOUND; - /* Delay binding to CPU if node is not valid or online */ if (node < 0 || node >= MAX_NUMNODES || !node_online(node)) return WORK_CPU_UNBOUND; @@ -3659,6 +3664,7 @@ struct workqueue_attrs *alloc_workqueue_attrs(void) goto fail; cpumask_copy(attrs->cpumask, cpu_possible_mask); + attrs->affn_scope = WQ_AFFN_DFL; return attrs; fail: free_workqueue_attrs(attrs); @@ -3670,11 +3676,13 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to, { to->nice = from->nice; cpumask_copy(to->cpumask, from->cpumask); + /* - * Unlike hash and equality test, this function doesn't ignore - * ->ordered as it is used for both pool and wq attrs. Instead, - * get_unbound_pool() explicitly clears ->ordered after copying. + * Unlike hash and equality test, copying shouldn't ignore wq-only + * fields as copying is used for both pool and wq attrs. Instead, + * get_unbound_pool() explicitly clears the fields. */ + to->affn_scope = from->affn_scope; to->ordered = from->ordered; } @@ -3684,6 +3692,7 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to, */ static void wqattrs_clear_for_pool(struct workqueue_attrs *attrs) { + attrs->affn_scope = WQ_AFFN_NR_TYPES; attrs->ordered = false; } @@ -3723,6 +3732,25 @@ static void wqattrs_actualize_cpumask(struct workqueue_attrs *attrs, cpumask_copy(attrs->cpumask, unbound_cpumask); } +/* find wq_pod_type to use for @attrs */ +static const struct wq_pod_type * +wqattrs_pod_type(const struct workqueue_attrs *attrs) +{ + struct wq_pod_type *pt = &wq_pod_types[attrs->affn_scope]; + + if (!WARN_ON_ONCE(attrs->affn_scope == WQ_AFFN_NR_TYPES) && + likely(pt->nr_pods)) + return pt; + + /* + * Before workqueue_init_topology(), only SYSTEM is available which is + * initialized in workqueue_init_early(). + */ + pt = &wq_pod_types[WQ_AFFN_SYSTEM]; + BUG_ON(!pt->nr_pods); + return pt; +} + /** * init_worker_pool - initialize a newly zalloc'd worker_pool * @pool: worker_pool to initialize @@ -3924,10 +3952,10 @@ static void put_unbound_pool(struct worker_pool *pool) */ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs) { + struct wq_pod_type *pt = &wq_pod_types[WQ_AFFN_NUMA]; u32 hash = wqattrs_hash(attrs); struct worker_pool *pool; - int pod; - int target_pod = NUMA_NO_NODE; + int pod, node = NUMA_NO_NODE; lockdep_assert_held(&wq_pool_mutex); @@ -3939,23 +3967,20 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs) } } - /* if cpumask is contained inside a pod, we belong to that pod */ - if (wq_pod_enabled) { - for_each_node(pod) { - if (cpumask_subset(attrs->cpumask, wq_pod_cpus[pod])) { - target_pod = pod; - break; - } + /* If cpumask is contained inside a NUMA pod, that's our NUMA node */ + for (pod = 0; pod < pt->nr_pods; pod++) { + if (cpumask_subset(attrs->cpumask, pt->pod_cpus[pod])) { + node = pt->pod_node[pod]; + break; } } /* nope, create a new one */ - pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, target_pod); + pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, node); if (!pool || init_worker_pool(pool) < 0) goto fail; - pool->node = target_pod; - + pool->node = node; copy_workqueue_attrs(pool->attrs, attrs); wqattrs_clear_for_pool(pool->attrs); @@ -4143,7 +4168,7 @@ static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq, /** * wq_calc_pod_cpumask - calculate a wq_attrs' cpumask for a pod * @attrs: the wq_attrs of the default pwq of the target workqueue - * @pod: the target CPU pod + * @cpu: the target CPU * @cpu_going_down: if >= 0, the CPU to consider as offline * @cpumask: outarg, the resulting cpumask * @@ -4157,30 +4182,29 @@ static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq, * * The caller is responsible for ensuring that the cpumask of @pod stays stable. */ -static void wq_calc_pod_cpumask(const struct workqueue_attrs *attrs, int pod, - int cpu_going_down, cpumask_t *cpumask) +static void wq_calc_pod_cpumask(const struct workqueue_attrs *attrs, int cpu, + int cpu_going_down, cpumask_t *cpumask) { - if (!wq_pod_enabled || attrs->ordered) - goto use_dfl; + const struct wq_pod_type *pt = wqattrs_pod_type(attrs); + int pod = pt->cpu_pod[cpu]; /* does @pod have any online CPUs @attrs wants? */ - cpumask_and(cpumask, cpumask_of_node(pod), attrs->cpumask); + cpumask_and(cpumask, pt->pod_cpus[pod], attrs->cpumask); + cpumask_and(cpumask, cpumask, cpu_online_mask); if (cpu_going_down >= 0) cpumask_clear_cpu(cpu_going_down, cpumask); - if (cpumask_empty(cpumask)) - goto use_dfl; + if (cpumask_empty(cpumask)) { + cpumask_copy(cpumask, attrs->cpumask); + return; + } /* yeap, return possible CPUs in @pod that @attrs wants */ - cpumask_and(cpumask, attrs->cpumask, wq_pod_cpus[pod]); + cpumask_and(cpumask, attrs->cpumask, pt->pod_cpus[pod]); if (cpumask_empty(cpumask)) pr_warn_once("WARNING: workqueue cpumask: online intersect > " "possible intersect\n"); - return; - -use_dfl: - cpumask_copy(cpumask, attrs->cpumask); } /* install @pwq into @wq's cpu_pwq and return the old pwq */ @@ -4237,6 +4261,10 @@ apply_wqattrs_prepare(struct workqueue_struct *wq, lockdep_assert_held(&wq_pool_mutex); + if (WARN_ON(attrs->affn_scope < 0 || + attrs->affn_scope >= WQ_AFFN_NR_TYPES)) + return ERR_PTR(-EINVAL); + ctx = kzalloc(struct_size(ctx, pwq_tbl, nr_cpu_ids), GFP_KERNEL); new_attrs = alloc_workqueue_attrs(); @@ -4266,8 +4294,7 @@ apply_wqattrs_prepare(struct workqueue_struct *wq, ctx->dfl_pwq->refcnt++; ctx->pwq_tbl[cpu] = ctx->dfl_pwq; } else { - wq_calc_pod_cpumask(new_attrs, cpu_to_node(cpu), -1, - tmp_attrs->cpumask); + wq_calc_pod_cpumask(new_attrs, cpu, -1, tmp_attrs->cpumask); ctx->pwq_tbl[cpu] = alloc_unbound_pwq(wq, tmp_attrs); if (!ctx->pwq_tbl[cpu]) goto out_free; @@ -4287,7 +4314,7 @@ out_free: free_workqueue_attrs(tmp_attrs); free_workqueue_attrs(new_attrs); apply_wqattrs_cleanup(ctx); - return NULL; + return ERR_PTR(-ENOMEM); } /* set attrs and install prepared pwqs, @ctx points to old pwqs on return */ @@ -4343,8 +4370,8 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq, } ctx = apply_wqattrs_prepare(wq, attrs, wq_unbound_cpumask); - if (!ctx) - return -ENOMEM; + if (IS_ERR(ctx)) + return PTR_ERR(ctx); /* the ctx has been prepared successfully, let's commit it */ apply_wqattrs_commit(ctx); @@ -4409,7 +4436,6 @@ int apply_workqueue_attrs(struct workqueue_struct *wq, static void wq_update_pod(struct workqueue_struct *wq, int cpu, int hotplug_cpu, bool online) { - int pod = cpu_to_node(cpu); int off_cpu = online ? -1 : hotplug_cpu; struct pool_workqueue *old_pwq = NULL, *pwq; struct workqueue_attrs *target_attrs; @@ -4417,8 +4443,7 @@ static void wq_update_pod(struct workqueue_struct *wq, int cpu, lockdep_assert_held(&wq_pool_mutex); - if (!wq_pod_enabled || !(wq->flags & WQ_UNBOUND) || - wq->unbound_attrs->ordered) + if (!(wq->flags & WQ_UNBOUND) || wq->unbound_attrs->ordered) return; /* @@ -4433,7 +4458,7 @@ static void wq_update_pod(struct workqueue_struct *wq, int cpu, wqattrs_actualize_cpumask(target_attrs, wq_unbound_cpumask); /* nothing to do if the target cpumask matches the current pwq */ - wq_calc_pod_cpumask(target_attrs, pod, off_cpu, cpumask); + wq_calc_pod_cpumask(target_attrs, cpu, off_cpu, cpumask); pwq = rcu_dereference_protected(*per_cpu_ptr(wq->cpu_pwq, cpu), lockdep_is_held(&wq_pool_mutex)); if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask)) @@ -5460,12 +5485,14 @@ int workqueue_online_cpu(unsigned int cpu) /* update pod affinity of unbound workqueues */ list_for_each_entry(wq, &workqueues, list) { - int tcpu; + struct workqueue_attrs *attrs = wq->unbound_attrs; + + if (attrs) { + const struct wq_pod_type *pt = wqattrs_pod_type(attrs); + int tcpu; - for_each_possible_cpu(tcpu) { - if (cpu_to_node(tcpu) == cpu_to_node(cpu)) { + for_each_cpu(tcpu, pt->pod_cpus[pt->cpu_pod[cpu]]) wq_update_pod(wq, tcpu, cpu, true); - } } } @@ -5486,12 +5513,14 @@ int workqueue_offline_cpu(unsigned int cpu) /* update pod affinity of unbound workqueues */ mutex_lock(&wq_pool_mutex); list_for_each_entry(wq, &workqueues, list) { - int tcpu; + struct workqueue_attrs *attrs = wq->unbound_attrs; + + if (attrs) { + const struct wq_pod_type *pt = wqattrs_pod_type(attrs); + int tcpu; - for_each_possible_cpu(tcpu) { - if (cpu_to_node(tcpu) == cpu_to_node(cpu)) { + for_each_cpu(tcpu, pt->pod_cpus[pt->cpu_pod[cpu]]) wq_update_pod(wq, tcpu, cpu, false); - } } } mutex_unlock(&wq_pool_mutex); @@ -5689,8 +5718,8 @@ static int workqueue_apply_unbound_cpumask(const cpumask_var_t unbound_cpumask) continue; ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs, unbound_cpumask); - if (!ctx) { - ret = -ENOMEM; + if (IS_ERR(ctx)) { + ret = PTR_ERR(ctx); break; } @@ -6283,6 +6312,7 @@ static inline void wq_watchdog_init(void) { } */ void __init workqueue_init_early(void) { + struct wq_pod_type *pt = &wq_pod_types[WQ_AFFN_SYSTEM]; int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL }; int i, cpu; @@ -6302,6 +6332,22 @@ void __init workqueue_init_early(void) BUG_ON(!alloc_cpumask_var(&wq_update_pod_cpumask_buf, GFP_KERNEL)); + /* initialize WQ_AFFN_SYSTEM pods */ + pt->pod_cpus = kcalloc(1, sizeof(pt->pod_cpus[0]), GFP_KERNEL); + pt->pod_node = kcalloc(1, sizeof(pt->pod_node[0]), GFP_KERNEL); + pt->cpu_pod = kcalloc(nr_cpu_ids, sizeof(pt->cpu_pod[0]), GFP_KERNEL); + BUG_ON(!pt->pod_cpus || !pt->pod_node || !pt->cpu_pod); + + BUG_ON(!zalloc_cpumask_var_node(&pt->pod_cpus[0], GFP_KERNEL, NUMA_NO_NODE)); + + wq_update_pod_attrs_buf = alloc_workqueue_attrs(); + BUG_ON(!wq_update_pod_attrs_buf); + + pt->nr_pods = 1; + cpumask_copy(pt->pod_cpus[0], cpu_possible_mask); + pt->pod_node[0] = NUMA_NO_NODE; + pt->cpu_pod[0] = 0; + /* initialize CPU pools */ for_each_possible_cpu(cpu) { struct worker_pool *pool; @@ -6457,8 +6503,8 @@ void __init workqueue_init(void) */ void __init workqueue_init_topology(void) { + struct wq_pod_type *pt = &wq_pod_types[WQ_AFFN_NUMA]; struct workqueue_struct *wq; - cpumask_var_t *tbl; int node, cpu; if (num_possible_nodes() <= 1) @@ -6478,20 +6524,23 @@ void __init workqueue_init_topology(void) * available. Build one from cpu_to_node() which should have been * fully initialized by now. */ - tbl = kcalloc(nr_node_ids, sizeof(tbl[0]), GFP_KERNEL); - BUG_ON(!tbl); + pt->pod_cpus = kcalloc(nr_node_ids, sizeof(pt->pod_cpus[0]), GFP_KERNEL); + pt->pod_node = kcalloc(nr_node_ids, sizeof(pt->pod_node[0]), GFP_KERNEL); + pt->cpu_pod = kcalloc(nr_cpu_ids, sizeof(pt->cpu_pod[0]), GFP_KERNEL); + BUG_ON(!pt->pod_cpus || !pt->pod_node || !pt->cpu_pod); for_each_node(node) - BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL, + BUG_ON(!zalloc_cpumask_var_node(&pt->pod_cpus[node], GFP_KERNEL, node_online(node) ? node : NUMA_NO_NODE)); for_each_possible_cpu(cpu) { node = cpu_to_node(cpu); - cpumask_set_cpu(cpu, tbl[node]); + cpumask_set_cpu(cpu, pt->pod_cpus[node]); + pt->pod_node[node] = node; + pt->cpu_pod[cpu] = node; } - wq_pod_cpus = tbl; - wq_pod_enabled = true; + pt->nr_pods = nr_node_ids; /* * Workqueues allocated earlier would have all CPUs sharing the default |