diff options
author | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2021-10-21 19:44:27 +0200 |
---|---|---|
committer | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2021-10-21 19:44:27 +0200 |
commit | 3b4bd495131e9832b3b3425a958c03cda6f1d972 (patch) | |
tree | a3ea446aad0bb79ff10251b191a8197c1114c19f | |
parent | 519d81956ee277b4419c723adfb154603c2565ba (diff) | |
parent | 5d8cb8db9f791252ef5b7951b6d8cc6281de60a6 (diff) |
Merge tag 'dtpm-v5.16' of https://git.linaro.org/people/daniel.lezcano/linux
Pull Dynamic Thermal Power Management (DTPM) framework changes for v5.16
from Daniel Lezcano:
- Simplify and make the code more self-encapsulate by dealing with the
dtpm structure only (Daniel Lezcano)
- Fix power intialization (Daniel Lezcano)
- Add the CPU load consideration when estimating the instaneous power
consumption (Daniel Lezcano)
* tag 'dtpm-v5.16' of https://git.linaro.org/people/daniel.lezcano/linux:
powercap/drivers/dtpm: Fix power limit initialization
powercap/drivers/dtpm: Scale the power with the load
powercap/drivers/dtpm: Use container_of instead of a private data field
powercap/drivers/dtpm: Simplify the dtpm table
powercap/drivers/dtpm: Encapsulate even more the code
-rw-r--r-- | drivers/powercap/dtpm.c | 78 | ||||
-rw-r--r-- | drivers/powercap/dtpm_cpu.c | 228 | ||||
-rw-r--r-- | include/linux/cpuhotplug.h | 2 | ||||
-rw-r--r-- | include/linux/dtpm.h | 26 |
4 files changed, 173 insertions, 161 deletions
diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c index c2185ec5f887..b9fac786246a 100644 --- a/drivers/powercap/dtpm.c +++ b/drivers/powercap/dtpm.c @@ -116,8 +116,6 @@ static void __dtpm_sub_power(struct dtpm *dtpm) parent->power_limit -= dtpm->power_limit; parent = parent->parent; } - - __dtpm_rebalance_weight(root); } static void __dtpm_add_power(struct dtpm *dtpm) @@ -130,45 +128,45 @@ static void __dtpm_add_power(struct dtpm *dtpm) parent->power_limit += dtpm->power_limit; parent = parent->parent; } +} + +static int __dtpm_update_power(struct dtpm *dtpm) +{ + int ret; + + __dtpm_sub_power(dtpm); + + ret = dtpm->ops->update_power_uw(dtpm); + if (ret) + pr_err("Failed to update power for '%s': %d\n", + dtpm->zone.name, ret); - __dtpm_rebalance_weight(root); + if (!test_bit(DTPM_POWER_LIMIT_FLAG, &dtpm->flags)) + dtpm->power_limit = dtpm->power_max; + + __dtpm_add_power(dtpm); + + if (root) + __dtpm_rebalance_weight(root); + + return ret; } /** * dtpm_update_power - Update the power on the dtpm * @dtpm: a pointer to a dtpm structure to update - * @power_min: a u64 representing the new power_min value - * @power_max: a u64 representing the new power_max value * * Function to update the power values of the dtpm node specified in * parameter. These new values will be propagated to the tree. * * Return: zero on success, -EINVAL if the values are inconsistent */ -int dtpm_update_power(struct dtpm *dtpm, u64 power_min, u64 power_max) +int dtpm_update_power(struct dtpm *dtpm) { - int ret = 0; + int ret; mutex_lock(&dtpm_lock); - - if (power_min == dtpm->power_min && power_max == dtpm->power_max) - goto unlock; - - if (power_max < power_min) { - ret = -EINVAL; - goto unlock; - } - - __dtpm_sub_power(dtpm); - - dtpm->power_min = power_min; - dtpm->power_max = power_max; - if (!test_bit(DTPM_POWER_LIMIT_FLAG, &dtpm->flags)) - dtpm->power_limit = power_max; - - __dtpm_add_power(dtpm); - -unlock: + ret = __dtpm_update_power(dtpm); mutex_unlock(&dtpm_lock); return ret; @@ -359,24 +357,18 @@ static struct powercap_zone_ops zone_ops = { }; /** - * dtpm_alloc - Allocate and initialize a dtpm struct - * @name: a string specifying the name of the node - * - * Return: a struct dtpm pointer, NULL in case of error + * dtpm_init - Allocate and initialize a dtpm struct + * @dtpm: The dtpm struct pointer to be initialized + * @ops: The dtpm device specific ops, NULL for a virtual node */ -struct dtpm *dtpm_alloc(struct dtpm_ops *ops) +void dtpm_init(struct dtpm *dtpm, struct dtpm_ops *ops) { - struct dtpm *dtpm; - - dtpm = kzalloc(sizeof(*dtpm), GFP_KERNEL); if (dtpm) { INIT_LIST_HEAD(&dtpm->children); INIT_LIST_HEAD(&dtpm->sibling); dtpm->weight = 1024; dtpm->ops = ops; } - - return dtpm; } /** @@ -436,6 +428,7 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent) if (dtpm->ops && !(dtpm->ops->set_power_uw && dtpm->ops->get_power_uw && + dtpm->ops->update_power_uw && dtpm->ops->release)) return -EINVAL; @@ -455,7 +448,10 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent) root = dtpm; } - __dtpm_add_power(dtpm); + if (dtpm->ops && !dtpm->ops->update_power_uw(dtpm)) { + __dtpm_add_power(dtpm); + dtpm->power_limit = dtpm->power_max; + } pr_info("Registered dtpm node '%s' / %llu-%llu uW, \n", dtpm->zone.name, dtpm->power_min, dtpm->power_max); @@ -465,9 +461,9 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent) return 0; } -static int __init dtpm_init(void) +static int __init init_dtpm(void) { - struct dtpm_descr **dtpm_descr; + struct dtpm_descr *dtpm_descr; pct = powercap_register_control_type(NULL, "dtpm", NULL); if (IS_ERR(pct)) { @@ -476,8 +472,8 @@ static int __init dtpm_init(void) } for_each_dtpm_table(dtpm_descr) - (*dtpm_descr)->init(*dtpm_descr); + dtpm_descr->init(); return 0; } -late_initcall(dtpm_init); +late_initcall(init_dtpm); diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c index 51c366938acd..44faa3a74db6 100644 --- a/drivers/powercap/dtpm_cpu.c +++ b/drivers/powercap/dtpm_cpu.c @@ -14,6 +14,8 @@ * The CPU hotplug is supported and the power numbers will be updated * if a CPU is hot plugged / unplugged. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/cpumask.h> #include <linux/cpufreq.h> #include <linux/cpuhotplug.h> @@ -23,66 +25,29 @@ #include <linux/slab.h> #include <linux/units.h> -static struct dtpm *__parent; - -static DEFINE_PER_CPU(struct dtpm *, dtpm_per_cpu); - struct dtpm_cpu { + struct dtpm dtpm; struct freq_qos_request qos_req; int cpu; }; -/* - * When a new CPU is inserted at hotplug or boot time, add the power - * contribution and update the dtpm tree. - */ -static int power_add(struct dtpm *dtpm, struct em_perf_domain *em) -{ - u64 power_min, power_max; - - power_min = em->table[0].power; - power_min *= MICROWATT_PER_MILLIWATT; - power_min += dtpm->power_min; +static DEFINE_PER_CPU(struct dtpm_cpu *, dtpm_per_cpu); - power_max = em->table[em->nr_perf_states - 1].power; - power_max *= MICROWATT_PER_MILLIWATT; - power_max += dtpm->power_max; - - return dtpm_update_power(dtpm, power_min, power_max); -} - -/* - * When a CPU is unplugged, remove its power contribution from the - * dtpm tree. - */ -static int power_sub(struct dtpm *dtpm, struct em_perf_domain *em) +static struct dtpm_cpu *to_dtpm_cpu(struct dtpm *dtpm) { - u64 power_min, power_max; - - power_min = em->table[0].power; - power_min *= MICROWATT_PER_MILLIWATT; - power_min = dtpm->power_min - power_min; - - power_max = em->table[em->nr_perf_states - 1].power; - power_max *= MICROWATT_PER_MILLIWATT; - power_max = dtpm->power_max - power_max; - - return dtpm_update_power(dtpm, power_min, power_max); + return container_of(dtpm, struct dtpm_cpu, dtpm); } static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit) { - struct dtpm_cpu *dtpm_cpu = dtpm->private; - struct em_perf_domain *pd; + struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm); + struct em_perf_domain *pd = em_cpu_get(dtpm_cpu->cpu); struct cpumask cpus; unsigned long freq; u64 power; int i, nr_cpus; - pd = em_cpu_get(dtpm_cpu->cpu); - cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus)); - nr_cpus = cpumask_weight(&cpus); for (i = 0; i < pd->nr_perf_states; i++) { @@ -103,34 +68,88 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit) return power_limit; } +static u64 scale_pd_power_uw(struct cpumask *pd_mask, u64 power) +{ + unsigned long max = 0, sum_util = 0; + int cpu; + + for_each_cpu_and(cpu, pd_mask, cpu_online_mask) { + + /* + * The capacity is the same for all CPUs belonging to + * the same perf domain, so a single call to + * arch_scale_cpu_capacity() is enough. However, we + * need the CPU parameter to be initialized by the + * loop, so the call ends up in this block. + * + * We can initialize 'max' with a cpumask_first() call + * before the loop but the bits computation is not + * worth given the arch_scale_cpu_capacity() just + * returns a value where the resulting assembly code + * will be optimized by the compiler. + */ + max = arch_scale_cpu_capacity(cpu); + sum_util += sched_cpu_util(cpu, max); + } + + /* + * In the improbable case where all the CPUs of the perf + * domain are offline, 'max' will be zero and will lead to an + * illegal operation with a zero division. + */ + return max ? (power * ((sum_util << 10) / max)) >> 10 : 0; +} + static u64 get_pd_power_uw(struct dtpm *dtpm) { - struct dtpm_cpu *dtpm_cpu = dtpm->private; + struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm); struct em_perf_domain *pd; - struct cpumask cpus; + struct cpumask *pd_mask; unsigned long freq; - int i, nr_cpus; + int i; pd = em_cpu_get(dtpm_cpu->cpu); + + pd_mask = em_span_cpus(pd); + freq = cpufreq_quick_get(dtpm_cpu->cpu); - cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus)); - nr_cpus = cpumask_weight(&cpus); for (i = 0; i < pd->nr_perf_states; i++) { if (pd->table[i].frequency < freq) continue; - return pd->table[i].power * - MICROWATT_PER_MILLIWATT * nr_cpus; + return scale_pd_power_uw(pd_mask, pd->table[i].power * + MICROWATT_PER_MILLIWATT); } return 0; } +static int update_pd_power_uw(struct dtpm *dtpm) +{ + struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm); + struct em_perf_domain *em = em_cpu_get(dtpm_cpu->cpu); + struct cpumask cpus; + int nr_cpus; + + cpumask_and(&cpus, cpu_online_mask, to_cpumask(em->cpus)); + nr_cpus = cpumask_weight(&cpus); + + dtpm->power_min = em->table[0].power; + dtpm->power_min *= MICROWATT_PER_MILLIWATT; + dtpm->power_min *= nr_cpus; + + dtpm->power_max = em->table[em->nr_perf_states - 1].power; + dtpm->power_max *= MICROWATT_PER_MILLIWATT; + dtpm->power_max *= nr_cpus; + + return 0; +} + static void pd_release(struct dtpm *dtpm) { - struct dtpm_cpu *dtpm_cpu = dtpm->private; + struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm); if (freq_qos_request_active(&dtpm_cpu->qos_req)) freq_qos_remove_request(&dtpm_cpu->qos_req); @@ -139,44 +158,28 @@ static void pd_release(struct dtpm *dtpm) } static struct dtpm_ops dtpm_ops = { - .set_power_uw = set_pd_power_limit, - .get_power_uw = get_pd_power_uw, - .release = pd_release, + .set_power_uw = set_pd_power_limit, + .get_power_uw = get_pd_power_uw, + .update_power_uw = update_pd_power_uw, + .release = pd_release, }; static int cpuhp_dtpm_cpu_offline(unsigned int cpu) { - struct cpufreq_policy *policy; struct em_perf_domain *pd; - struct dtpm *dtpm; - - policy = cpufreq_cpu_get(cpu); - - if (!policy) - return 0; + struct dtpm_cpu *dtpm_cpu; pd = em_cpu_get(cpu); if (!pd) return -EINVAL; - dtpm = per_cpu(dtpm_per_cpu, cpu); + dtpm_cpu = per_cpu(dtpm_per_cpu, cpu); - power_sub(dtpm, pd); - - if (cpumask_weight(policy->cpus) != 1) - return 0; - - for_each_cpu(cpu, policy->related_cpus) - per_cpu(dtpm_per_cpu, cpu) = NULL; - - dtpm_unregister(dtpm); - - return 0; + return dtpm_update_power(&dtpm_cpu->dtpm); } static int cpuhp_dtpm_cpu_online(unsigned int cpu) { - struct dtpm *dtpm; struct dtpm_cpu *dtpm_cpu; struct cpufreq_policy *policy; struct em_perf_domain *pd; @@ -184,7 +187,6 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu) int ret = -ENOMEM; policy = cpufreq_cpu_get(cpu); - if (!policy) return 0; @@ -192,66 +194,82 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu) if (!pd) return -EINVAL; - dtpm = per_cpu(dtpm_per_cpu, cpu); - if (dtpm) - return power_add(dtpm, pd); - - dtpm = dtpm_alloc(&dtpm_ops); - if (!dtpm) - return -EINVAL; + dtpm_cpu = per_cpu(dtpm_per_cpu, cpu); + if (dtpm_cpu) + return dtpm_update_power(&dtpm_cpu->dtpm); dtpm_cpu = kzalloc(sizeof(*dtpm_cpu), GFP_KERNEL); if (!dtpm_cpu) - goto out_kfree_dtpm; + return -ENOMEM; - dtpm->private = dtpm_cpu; + dtpm_init(&dtpm_cpu->dtpm, &dtpm_ops); dtpm_cpu->cpu = cpu; for_each_cpu(cpu, policy->related_cpus) - per_cpu(dtpm_per_cpu, cpu) = dtpm; + per_cpu(dtpm_per_cpu, cpu) = dtpm_cpu; - sprintf(name, "cpu%d", dtpm_cpu->cpu); + snprintf(name, sizeof(name), "cpu%d-cpufreq", dtpm_cpu->cpu); - ret = dtpm_register(name, dtpm, __parent); + ret = dtpm_register(name, &dtpm_cpu->dtpm, NULL); if (ret) goto out_kfree_dtpm_cpu; - ret = power_add(dtpm, pd); - if (ret) - goto out_dtpm_unregister; - ret = freq_qos_add_request(&policy->constraints, &dtpm_cpu->qos_req, FREQ_QOS_MAX, pd->table[pd->nr_perf_states - 1].frequency); if (ret) - goto out_power_sub; + goto out_dtpm_unregister; return 0; -out_power_sub: - power_sub(dtpm, pd); - out_dtpm_unregister: - dtpm_unregister(dtpm); + dtpm_unregister(&dtpm_cpu->dtpm); dtpm_cpu = NULL; - dtpm = NULL; out_kfree_dtpm_cpu: for_each_cpu(cpu, policy->related_cpus) per_cpu(dtpm_per_cpu, cpu) = NULL; kfree(dtpm_cpu); -out_kfree_dtpm: - kfree(dtpm); return ret; } -int dtpm_register_cpu(struct dtpm *parent) +static int __init dtpm_cpu_init(void) { - __parent = parent; + int ret; + + /* + * The callbacks at CPU hotplug time are calling + * dtpm_update_power() which in turns calls update_pd_power(). + * + * The function update_pd_power() uses the online mask to + * figure out the power consumption limits. + * + * At CPUHP_AP_ONLINE_DYN, the CPU is present in the CPU + * online mask when the cpuhp_dtpm_cpu_online function is + * called, but the CPU is still in the online mask for the + * tear down callback. So the power can not be updated when + * the CPU is unplugged. + * + * At CPUHP_AP_DTPM_CPU_DEAD, the situation is the opposite as + * above. The CPU online mask is not up to date when the CPU + * is plugged in. + * + * For this reason, we need to call the online and offline + * callbacks at different moments when the CPU online mask is + * consistent with the power numbers we want to update. + */ + ret = cpuhp_setup_state(CPUHP_AP_DTPM_CPU_DEAD, "dtpm_cpu:offline", + NULL, cpuhp_dtpm_cpu_offline); + if (ret < 0) + return ret; + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "dtpm_cpu:online", + cpuhp_dtpm_cpu_online, NULL); + if (ret < 0) + return ret; - return cpuhp_setup_state(CPUHP_AP_DTPM_CPU_ONLINE, - "dtpm_cpu:online", - cpuhp_dtpm_cpu_online, - cpuhp_dtpm_cpu_offline); + return 0; } + +DTPM_DECLARE(dtpm_cpu, dtpm_cpu_init); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 832d8a74fa59..ad9a34a80440 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -97,6 +97,7 @@ enum cpuhp_state { CPUHP_LUSTRE_CFS_DEAD, CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, CPUHP_PADATA_DEAD, + CPUHP_AP_DTPM_CPU_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, @@ -242,7 +243,6 @@ enum cpuhp_state { CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30, CPUHP_AP_X86_HPET_ONLINE, CPUHP_AP_X86_KVM_CLK_ONLINE, - CPUHP_AP_DTPM_CPU_ONLINE, CPUHP_AP_ACTIVE, CPUHP_ONLINE, }; diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h index e80a332e3d8a..2890f6370eb9 100644 --- a/include/linux/dtpm.h +++ b/include/linux/dtpm.h @@ -23,34 +23,32 @@ struct dtpm { u64 power_max; u64 power_min; int weight; - void *private; }; struct dtpm_ops { u64 (*set_power_uw)(struct dtpm *, u64); u64 (*get_power_uw)(struct dtpm *); + int (*update_power_uw)(struct dtpm *); void (*release)(struct dtpm *); }; -struct dtpm_descr; - -typedef int (*dtpm_init_t)(struct dtpm_descr *); +typedef int (*dtpm_init_t)(void); struct dtpm_descr { - struct dtpm *parent; - const char *name; dtpm_init_t init; }; /* Init section thermal table */ -extern struct dtpm_descr *__dtpm_table[]; -extern struct dtpm_descr *__dtpm_table_end[]; +extern struct dtpm_descr __dtpm_table[]; +extern struct dtpm_descr __dtpm_table_end[]; -#define DTPM_TABLE_ENTRY(name) \ - static typeof(name) *__dtpm_table_entry_##name \ - __used __section("__dtpm_table") = &name +#define DTPM_TABLE_ENTRY(name, __init) \ + static struct dtpm_descr __dtpm_table_entry_##name \ + __used __section("__dtpm_table") = { \ + .init = __init, \ + } -#define DTPM_DECLARE(name) DTPM_TABLE_ENTRY(name) +#define DTPM_DECLARE(name, init) DTPM_TABLE_ENTRY(name, init) #define for_each_dtpm_table(__dtpm) \ for (__dtpm = __dtpm_table; \ @@ -62,11 +60,11 @@ static inline struct dtpm *to_dtpm(struct powercap_zone *zone) return container_of(zone, struct dtpm, zone); } -int dtpm_update_power(struct dtpm *dtpm, u64 power_min, u64 power_max); +int dtpm_update_power(struct dtpm *dtpm); int dtpm_release_zone(struct powercap_zone *pcz); -struct dtpm *dtpm_alloc(struct dtpm_ops *ops); +void dtpm_init(struct dtpm *dtpm, struct dtpm_ops *ops); void dtpm_unregister(struct dtpm *dtpm); |