diff options
67 files changed, 1940 insertions, 789 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 0b3de80ec8f6..52292b28b291 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1661,6 +1661,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. hwp_only Only load intel_pstate on systems which support hardware P state control (HWP) if available. + support_acpi_ppc + Enforce ACPI _PPC performance limits. If the Fixed ACPI + Description Table, specifies preferred power management + profile as "Enterprise Server" or "Performance Server", + then this feature is turned on by default. intremap= [X86-64, Intel-IOMMU] on enable Interrupt Remapping (default) diff --git a/arch/arm/mach-berlin/berlin.c b/arch/arm/mach-berlin/berlin.c index 25d73870ccca..ac181c6797ee 100644 --- a/arch/arm/mach-berlin/berlin.c +++ b/arch/arm/mach-berlin/berlin.c @@ -18,11 +18,6 @@ #include <asm/hardware/cache-l2x0.h> #include <asm/mach/arch.h> -static void __init berlin_init_late(void) -{ - platform_device_register_simple("cpufreq-dt", -1, NULL, 0); -} - static const char * const berlin_dt_compat[] = { "marvell,berlin", NULL, @@ -30,7 +25,6 @@ static const char * const berlin_dt_compat[] = { DT_MACHINE_START(BERLIN_DT, "Marvell Berlin") .dt_compat = berlin_dt_compat, - .init_late = berlin_init_late, /* * with DT probing for L2CCs, berlin_init_machine can be removed. * Note: 88DE3005 (Armada 1500-mini) uses pl310 l2cc diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c index bbf51a46f772..4d3b056fd786 100644 --- a/arch/arm/mach-exynos/exynos.c +++ b/arch/arm/mach-exynos/exynos.c @@ -213,33 +213,6 @@ static void __init exynos_init_irq(void) exynos_map_pmu(); } -static const struct of_device_id exynos_cpufreq_matches[] = { - { .compatible = "samsung,exynos3250", .data = "cpufreq-dt" }, - { .compatible = "samsung,exynos4210", .data = "cpufreq-dt" }, - { .compatible = "samsung,exynos4212", .data = "cpufreq-dt" }, - { .compatible = "samsung,exynos4412", .data = "cpufreq-dt" }, - { .compatible = "samsung,exynos5250", .data = "cpufreq-dt" }, -#ifndef CONFIG_BL_SWITCHER - { .compatible = "samsung,exynos5420", .data = "cpufreq-dt" }, - { .compatible = "samsung,exynos5800", .data = "cpufreq-dt" }, -#endif - { /* sentinel */ } -}; - -static void __init exynos_cpufreq_init(void) -{ - struct device_node *root = of_find_node_by_path("/"); - const struct of_device_id *match; - - match = of_match_node(exynos_cpufreq_matches, root); - if (!match) { - platform_device_register_simple("exynos-cpufreq", -1, NULL, 0); - return; - } - - platform_device_register_simple(match->data, -1, NULL, 0); -} - static void __init exynos_dt_machine_init(void) { /* @@ -262,8 +235,6 @@ static void __init exynos_dt_machine_init(void) of_machine_is_compatible("samsung,exynos5250")) platform_device_register(&exynos_cpuidle); - exynos_cpufreq_init(); - of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); } diff --git a/arch/arm/mach-imx/imx27-dt.c b/arch/arm/mach-imx/imx27-dt.c index bd42d1bd10af..530a728c2acc 100644 --- a/arch/arm/mach-imx/imx27-dt.c +++ b/arch/arm/mach-imx/imx27-dt.c @@ -18,15 +18,6 @@ #include "common.h" #include "mx27.h" -static void __init imx27_dt_init(void) -{ - struct platform_device_info devinfo = { .name = "cpufreq-dt", }; - - of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); - - platform_device_register_full(&devinfo); -} - static const char * const imx27_dt_board_compat[] __initconst = { "fsl,imx27", NULL @@ -36,6 +27,5 @@ DT_MACHINE_START(IMX27_DT, "Freescale i.MX27 (Device Tree Support)") .map_io = mx27_map_io, .init_early = imx27_init_early, .init_irq = mx27_init_irq, - .init_machine = imx27_dt_init, .dt_compat = imx27_dt_board_compat, MACHINE_END diff --git a/arch/arm/mach-imx/mach-imx51.c b/arch/arm/mach-imx/mach-imx51.c index 6883fbaf9484..10a82a4f1e58 100644 --- a/arch/arm/mach-imx/mach-imx51.c +++ b/arch/arm/mach-imx/mach-imx51.c @@ -50,13 +50,10 @@ static void __init imx51_ipu_mipi_setup(void) static void __init imx51_dt_init(void) { - struct platform_device_info devinfo = { .name = "cpufreq-dt", }; - imx51_ipu_mipi_setup(); imx_src_init(); of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); - platform_device_register_full(&devinfo); } static void __init imx51_init_late(void) diff --git a/arch/arm/mach-imx/mach-imx53.c b/arch/arm/mach-imx/mach-imx53.c index 86316a979297..18b5c5c136db 100644 --- a/arch/arm/mach-imx/mach-imx53.c +++ b/arch/arm/mach-imx/mach-imx53.c @@ -40,8 +40,6 @@ static void __init imx53_dt_init(void) static void __init imx53_init_late(void) { imx53_pm_init(); - - platform_device_register_simple("cpufreq-dt", -1, NULL, 0); } static const char * const imx53_dt_board_compat[] __initconst = { diff --git a/arch/arm/mach-imx/mach-imx7d.c b/arch/arm/mach-imx/mach-imx7d.c index 5a27f20c9a82..b450f525a670 100644 --- a/arch/arm/mach-imx/mach-imx7d.c +++ b/arch/arm/mach-imx/mach-imx7d.c @@ -105,11 +105,6 @@ static void __init imx7d_init_irq(void) irqchip_init(); } -static void __init imx7d_init_late(void) -{ - platform_device_register_simple("cpufreq-dt", -1, NULL, 0); -} - static const char *const imx7d_dt_compat[] __initconst = { "fsl,imx7d", NULL, @@ -117,7 +112,6 @@ static const char *const imx7d_dt_compat[] __initconst = { DT_MACHINE_START(IMX7D, "Freescale i.MX7 Dual (Device Tree)") .init_irq = imx7d_init_irq, - .init_late = imx7d_init_late, .init_machine = imx7d_init_machine, .dt_compat = imx7d_dt_compat, MACHINE_END diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c index 58920bc8807b..2f7b11da7d5d 100644 --- a/arch/arm/mach-omap2/pm.c +++ b/arch/arm/mach-omap2/pm.c @@ -277,13 +277,10 @@ static void __init omap4_init_voltages(void) static inline void omap_init_cpufreq(void) { - struct platform_device_info devinfo = { }; + struct platform_device_info devinfo = { .name = "omap-cpufreq" }; if (!of_have_populated_dt()) - devinfo.name = "omap-cpufreq"; - else - devinfo.name = "cpufreq-dt"; - platform_device_register_full(&devinfo); + platform_device_register_full(&devinfo); } static int __init omap2_common_pm_init(void) diff --git a/arch/arm/mach-rockchip/rockchip.c b/arch/arm/mach-rockchip/rockchip.c index 3f07cc5dfe5f..beb71da5d9c8 100644 --- a/arch/arm/mach-rockchip/rockchip.c +++ b/arch/arm/mach-rockchip/rockchip.c @@ -74,7 +74,6 @@ static void __init rockchip_dt_init(void) { rockchip_suspend_init(); of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); - platform_device_register_simple("cpufreq-dt", 0, NULL, 0); } static const char * const rockchip_board_dt_compat[] = { diff --git a/arch/arm/mach-shmobile/Makefile b/arch/arm/mach-shmobile/Makefile index a65c80ac9009..c9ea0e6ff4f9 100644 --- a/arch/arm/mach-shmobile/Makefile +++ b/arch/arm/mach-shmobile/Makefile @@ -38,7 +38,6 @@ smp-$(CONFIG_ARCH_EMEV2) += smp-emev2.o headsmp-scu.o platsmp-scu.o # PM objects obj-$(CONFIG_SUSPEND) += suspend.o -obj-$(CONFIG_CPU_FREQ) += cpufreq.o obj-$(CONFIG_PM_RCAR) += pm-rcar.o obj-$(CONFIG_PM_RMOBILE) += pm-rmobile.o obj-$(CONFIG_ARCH_RCAR_GEN2) += pm-rcar-gen2.o diff --git a/arch/arm/mach-shmobile/common.h b/arch/arm/mach-shmobile/common.h index 5464b7a75e30..3b562d87826d 100644 --- a/arch/arm/mach-shmobile/common.h +++ b/arch/arm/mach-shmobile/common.h @@ -25,16 +25,9 @@ static inline int shmobile_suspend_init(void) { return 0; } static inline void shmobile_smp_apmu_suspend_init(void) { } #endif -#ifdef CONFIG_CPU_FREQ -int shmobile_cpufreq_init(void); -#else -static inline int shmobile_cpufreq_init(void) { return 0; } -#endif - static inline void __init shmobile_init_late(void) { shmobile_suspend_init(); - shmobile_cpufreq_init(); } #endif /* __ARCH_MACH_COMMON_H */ diff --git a/arch/arm/mach-shmobile/cpufreq.c b/arch/arm/mach-shmobile/cpufreq.c deleted file mode 100644 index 634d701c56a7..000000000000 --- a/arch/arm/mach-shmobile/cpufreq.c +++ /dev/null @@ -1,19 +0,0 @@ -/* - * CPUFreq support code for SH-Mobile ARM - * - * Copyright (C) 2014 Gaku Inami - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ - -#include <linux/platform_device.h> - -#include "common.h" - -int __init shmobile_cpufreq_init(void) -{ - platform_device_register_simple("cpufreq-dt", -1, NULL, 0); - return 0; -} diff --git a/arch/arm/mach-sunxi/sunxi.c b/arch/arm/mach-sunxi/sunxi.c index 3c156190a1d4..95dca8c2c9ed 100644 --- a/arch/arm/mach-sunxi/sunxi.c +++ b/arch/arm/mach-sunxi/sunxi.c @@ -17,11 +17,6 @@ #include <asm/mach/arch.h> -static void __init sunxi_dt_cpufreq_init(void) -{ - platform_device_register_simple("cpufreq-dt", -1, NULL, 0); -} - static const char * const sunxi_board_dt_compat[] = { "allwinner,sun4i-a10", "allwinner,sun5i-a10s", @@ -32,7 +27,6 @@ static const char * const sunxi_board_dt_compat[] = { DT_MACHINE_START(SUNXI_DT, "Allwinner sun4i/sun5i Families") .dt_compat = sunxi_board_dt_compat, - .init_late = sunxi_dt_cpufreq_init, MACHINE_END static const char * const sun6i_board_dt_compat[] = { @@ -53,7 +47,6 @@ static void __init sun6i_timer_init(void) DT_MACHINE_START(SUN6I_DT, "Allwinner sun6i (A31) Family") .init_time = sun6i_timer_init, .dt_compat = sun6i_board_dt_compat, - .init_late = sunxi_dt_cpufreq_init, MACHINE_END static const char * const sun7i_board_dt_compat[] = { @@ -63,7 +56,6 @@ static const char * const sun7i_board_dt_compat[] = { DT_MACHINE_START(SUN7I_DT, "Allwinner sun7i (A20) Family") .dt_compat = sun7i_board_dt_compat, - .init_late = sunxi_dt_cpufreq_init, MACHINE_END static const char * const sun8i_board_dt_compat[] = { @@ -77,7 +69,6 @@ static const char * const sun8i_board_dt_compat[] = { DT_MACHINE_START(SUN8I_DT, "Allwinner sun8i Family") .init_time = sun6i_timer_init, .dt_compat = sun8i_board_dt_compat, - .init_late = sunxi_dt_cpufreq_init, MACHINE_END static const char * const sun9i_board_dt_compat[] = { diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c index 860ffb663f02..da876d28ccbc 100644 --- a/arch/arm/mach-zynq/common.c +++ b/arch/arm/mach-zynq/common.c @@ -110,7 +110,6 @@ static void __init zynq_init_late(void) */ static void __init zynq_init_machine(void) { - struct platform_device_info devinfo = { .name = "cpufreq-dt", }; struct soc_device_attribute *soc_dev_attr; struct soc_device *soc_dev; struct device *parent = NULL; @@ -145,7 +144,6 @@ out: of_platform_populate(NULL, of_default_bus_match_table, NULL, parent); platform_device_register(&zynq_cpuidle_device); - platform_device_register_full(&devinfo); } static void __init zynq_timer_init(void) diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index a7f45853c103..b38d5028a03e 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -18,7 +18,11 @@ config CPU_FREQ if CPU_FREQ +config CPU_FREQ_GOV_ATTR_SET + bool + config CPU_FREQ_GOV_COMMON + select CPU_FREQ_GOV_ATTR_SET select IRQ_WORK bool @@ -103,6 +107,16 @@ config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE Be aware that not all cpufreq drivers support the conservative governor. If unsure have a look at the help section of the driver. Fallback governor will be the performance governor. + +config CPU_FREQ_DEFAULT_GOV_SCHEDUTIL + bool "schedutil" + select CPU_FREQ_GOV_SCHEDUTIL + select CPU_FREQ_GOV_PERFORMANCE + help + Use the 'schedutil' CPUFreq governor by default. If unsure, + have a look at the help section of that governor. The fallback + governor will be 'performance'. + endchoice config CPU_FREQ_GOV_PERFORMANCE @@ -184,6 +198,26 @@ config CPU_FREQ_GOV_CONSERVATIVE If in doubt, say N. +config CPU_FREQ_GOV_SCHEDUTIL + tristate "'schedutil' cpufreq policy governor" + depends on CPU_FREQ + select CPU_FREQ_GOV_ATTR_SET + select IRQ_WORK + help + This governor makes decisions based on the utilization data provided + by the scheduler. It sets the CPU frequency to be proportional to + the utilization/capacity ratio coming from the scheduler. If the + utilization is frequency-invariant, the new frequency is also + proportional to the maximum available frequency. If that is not the + case, it is proportional to the current frequency of the CPU. The + frequency tipping point is at utilization/capacity equal to 80% in + both cases. + + To compile this driver as a module, choose M here: the module will + be called cpufreq_schedutil. + + If in doubt, say N. + comment "CPU frequency scaling drivers" config CPUFREQ_DT @@ -191,6 +225,7 @@ config CPUFREQ_DT depends on HAVE_CLK && OF # if CPU_THERMAL is on and THERMAL=m, CPUFREQ_DT cannot be =y: depends on !CPU_THERMAL || THERMAL + select CPUFREQ_DT_PLATDEV select PM_OPP help This adds a generic DT based cpufreq driver for frequency management. @@ -199,6 +234,15 @@ config CPUFREQ_DT If in doubt, say N. +config CPUFREQ_DT_PLATDEV + bool + help + This adds a generic DT based cpufreq platdev driver for frequency + management. This creates a 'cpufreq-dt' platform device, on the + supported platforms. + + If in doubt, say N. + if X86 source "drivers/cpufreq/Kconfig.x86" endif diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 14b1f9393b05..d89b8afe23b6 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -50,15 +50,6 @@ config ARM_HIGHBANK_CPUFREQ If in doubt, say N. -config ARM_HISI_ACPU_CPUFREQ - tristate "Hisilicon ACPU CPUfreq driver" - depends on ARCH_HISI && CPUFREQ_DT - select PM_OPP - help - This enables the hisilicon ACPU CPUfreq driver. - - If in doubt, say N. - config ARM_IMX6Q_CPUFREQ tristate "Freescale i.MX6 cpufreq support" depends on ARCH_MXC diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index c59bdcb83217..adbd1de1cea5 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -5,6 +5,7 @@ config X86_INTEL_PSTATE bool "Intel P state control" depends on X86 + select ACPI_PROCESSOR if ACPI help This driver provides a P state for Intel core processors. The driver implements an internal governor and will become diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 9e63fb1b09f8..2cce2cd400f9 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -11,8 +11,10 @@ obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND) += cpufreq_ondemand.o obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE) += cpufreq_conservative.o obj-$(CONFIG_CPU_FREQ_GOV_COMMON) += cpufreq_governor.o +obj-$(CONFIG_CPU_FREQ_GOV_ATTR_SET) += cpufreq_governor_attr_set.o obj-$(CONFIG_CPUFREQ_DT) += cpufreq-dt.o +obj-$(CONFIG_CPUFREQ_DT_PLATDEV) += cpufreq-dt-platdev.o ################################################################################## # x86 drivers. @@ -53,7 +55,6 @@ obj-$(CONFIG_ARCH_DAVINCI) += davinci-cpufreq.o obj-$(CONFIG_UX500_SOC_DB8500) += dbx500-cpufreq.o obj-$(CONFIG_ARM_EXYNOS5440_CPUFREQ) += exynos5440-cpufreq.o obj-$(CONFIG_ARM_HIGHBANK_CPUFREQ) += highbank-cpufreq.o -obj-$(CONFIG_ARM_HISI_ACPU_CPUFREQ) += hisi-acpu-cpufreq.o obj-$(CONFIG_ARM_IMX6Q_CPUFREQ) += imx6q-cpufreq.o obj-$(CONFIG_ARM_INTEGRATOR) += integrator-cpufreq.o obj-$(CONFIG_ARM_KIRKWOOD_CPUFREQ) += kirkwood-cpufreq.o diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index fb5712141040..32a15052f363 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -25,6 +25,8 @@ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> @@ -50,8 +52,6 @@ MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski"); MODULE_DESCRIPTION("ACPI Processor P-States Driver"); MODULE_LICENSE("GPL"); -#define PFX "acpi-cpufreq: " - enum { UNDEFINED_CAPABLE = 0, SYSTEM_INTEL_MSR_CAPABLE, @@ -65,7 +65,6 @@ enum { #define MSR_K7_HWCR_CPB_DIS (1ULL << 25) struct acpi_cpufreq_data { - struct cpufreq_frequency_table *freq_table; unsigned int resume; unsigned int cpu_feature; unsigned int acpi_perf_cpu; @@ -200,8 +199,9 @@ static int check_amd_hwpstate_cpu(unsigned int cpuid) return cpu_has(cpu, X86_FEATURE_HW_PSTATE); } -static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data) +static unsigned extract_io(struct cpufreq_policy *policy, u32 value) { + struct acpi_cpufreq_data *data = policy->driver_data; struct acpi_processor_performance *perf; int i; @@ -209,13 +209,14 @@ static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data) for (i = 0; i < perf->state_count; i++) { if (value == perf->states[i].status) - return data->freq_table[i].frequency; + return policy->freq_table[i].frequency; } return 0; } -static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data) +static unsigned extract_msr(struct cpufreq_policy *policy, u32 msr) { + struct acpi_cpufreq_data *data = policy->driver_data; struct cpufreq_frequency_table *pos; struct acpi_processor_performance *perf; @@ -226,20 +227,22 @@ static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data) perf = to_perf_data(data); - cpufreq_for_each_entry(pos, data->freq_table) + cpufreq_for_each_entry(pos, policy->freq_table) if (msr == perf->states[pos->driver_data].status) return pos->frequency; - return data->freq_table[0].frequency; + return policy->freq_table[0].frequency; } -static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data) +static unsigned extract_freq(struct cpufreq_policy *policy, u32 val) { + struct acpi_cpufreq_data *data = policy->driver_data; + switch (data->cpu_feature) { case SYSTEM_INTEL_MSR_CAPABLE: case SYSTEM_AMD_MSR_CAPABLE: - return extract_msr(val, data); + return extract_msr(policy, val); case SYSTEM_IO_CAPABLE: - return extract_io(val, data); + return extract_io(policy, val); default: return 0; } @@ -374,11 +377,11 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) return 0; data = policy->driver_data; - if (unlikely(!data || !data->freq_table)) + if (unlikely(!data || !policy->freq_table)) return 0; - cached_freq = data->freq_table[to_perf_data(data)->state].frequency; - freq = extract_freq(get_cur_val(cpumask_of(cpu), data), data); + cached_freq = policy->freq_table[to_perf_data(data)->state].frequency; + freq = extract_freq(policy, get_cur_val(cpumask_of(cpu), data)); if (freq != cached_freq) { /* * The dreaded BIOS frequency change behind our back. @@ -392,14 +395,15 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) return freq; } -static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq, - struct acpi_cpufreq_data *data) +static unsigned int check_freqs(struct cpufreq_policy *policy, + const struct cpumask *mask, unsigned int freq) { + struct acpi_cpufreq_data *data = policy->driver_data; unsigned int cur_freq; unsigned int i; for (i = 0; i < 100; i++) { - cur_freq = extract_freq(get_cur_val(mask, data), data); + cur_freq = extract_freq(policy, get_cur_val(mask, data)); if (cur_freq == freq) return 1; udelay(10); @@ -416,12 +420,12 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, unsigned int next_perf_state = 0; /* Index into perf table */ int result = 0; - if (unlikely(data == NULL || data->freq_table == NULL)) { + if (unlikely(!data)) { return -ENODEV; } perf = to_perf_data(data); - next_perf_state = data->freq_table[index].driver_data; + next_perf_state = policy->freq_table[index].driver_data; if (perf->state == next_perf_state) { if (unlikely(data->resume)) { pr_debug("Called after resume, resetting to P%d\n", @@ -444,8 +448,8 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, drv_write(data, mask, perf->states[next_perf_state].control); if (acpi_pstate_strict) { - if (!check_freqs(mask, data->freq_table[index].frequency, - data)) { + if (!check_freqs(policy, mask, + policy->freq_table[index].frequency)) { pr_debug("acpi_cpufreq_target failed (%d)\n", policy->cpu); result = -EAGAIN; @@ -458,6 +462,43 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, return result; } +unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + struct acpi_cpufreq_data *data = policy->driver_data; + struct acpi_processor_performance *perf; + struct cpufreq_frequency_table *entry; + unsigned int next_perf_state, next_freq, freq; + + /* + * Find the closest frequency above target_freq. + * + * The table is sorted in the reverse order with respect to the + * frequency and all of the entries are valid (see the initialization). + */ + entry = policy->freq_table; + do { + entry++; + freq = entry->frequency; + } while (freq >= target_freq && freq != CPUFREQ_TABLE_END); + entry--; + next_freq = entry->frequency; + next_perf_state = entry->driver_data; + + perf = to_perf_data(data); + if (perf->state == next_perf_state) { + if (unlikely(data->resume)) + data->resume = 0; + else + return next_freq; + } + + data->cpu_freq_write(&perf->control_register, + perf->states[next_perf_state].control); + perf->state = next_perf_state; + return next_freq; +} + static unsigned long acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) { @@ -611,10 +652,7 @@ static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c) if ((c->x86 == 15) && (c->x86_model == 6) && (c->x86_mask == 8)) { - printk(KERN_INFO "acpi-cpufreq: Intel(R) " - "Xeon(R) 7100 Errata AL30, processors may " - "lock up on frequency changes: disabling " - "acpi-cpufreq.\n"); + pr_info("Intel(R) Xeon(R) 7100 Errata AL30, processors may lock up on frequency changes: disabling acpi-cpufreq\n"); return -ENODEV; } } @@ -631,6 +669,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) unsigned int result = 0; struct cpuinfo_x86 *c = &cpu_data(policy->cpu); struct acpi_processor_performance *perf; + struct cpufreq_frequency_table *freq_table; #ifdef CONFIG_SMP static int blacklisted; #endif @@ -690,7 +729,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) cpumask_copy(data->freqdomain_cpus, topology_sibling_cpumask(cpu)); policy->shared_type = CPUFREQ_SHARED_TYPE_HW; - pr_info_once(PFX "overriding BIOS provided _PSD data\n"); + pr_info_once("overriding BIOS provided _PSD data\n"); } #endif @@ -742,9 +781,9 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) goto err_unreg; } - data->freq_table = kzalloc(sizeof(*data->freq_table) * + freq_table = kzalloc(sizeof(*freq_table) * (perf->state_count+1), GFP_KERNEL); - if (!data->freq_table) { + if (!freq_table) { result = -ENOMEM; goto err_unreg; } @@ -762,30 +801,29 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE && policy->cpuinfo.transition_latency > 20 * 1000) { policy->cpuinfo.transition_latency = 20 * 1000; - printk_once(KERN_INFO - "P-state transition latency capped at 20 uS\n"); + pr_info_once("P-state transition latency capped at 20 uS\n"); } /* table init */ for (i = 0; i < perf->state_count; i++) { if (i > 0 && perf->states[i].core_frequency >= - data->freq_table[valid_states-1].frequency / 1000) + freq_table[valid_states-1].frequency / 1000) continue; - data->freq_table[valid_states].driver_data = i; - data->freq_table[valid_states].frequency = + freq_table[valid_states].driver_data = i; + freq_table[valid_states].frequency = perf->states[i].core_frequency * 1000; valid_states++; } - data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END; + freq_table[valid_states].frequency = CPUFREQ_TABLE_END; perf->state = 0; - result = cpufreq_table_validate_and_show(policy, data->freq_table); + result = cpufreq_table_validate_and_show(policy, freq_table); if (result) goto err_freqfree; if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq) - printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n"); + pr_warn(FW_WARN "P-state 0 is not max freq\n"); switch (perf->control_register.space_id) { case ACPI_ADR_SPACE_SYSTEM_IO: @@ -821,10 +859,13 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) */ data->resume = 1; + policy->fast_switch_possible = !acpi_pstate_strict && + !(policy_is_shared(policy) && policy->shared_type != CPUFREQ_SHARED_TYPE_ANY); + return result; err_freqfree: - kfree(data->freq_table); + kfree(freq_table); err_unreg: acpi_processor_unregister_performance(cpu); err_free_mask: @@ -842,13 +883,12 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) pr_debug("acpi_cpufreq_cpu_exit\n"); - if (data) { - policy->driver_data = NULL; - acpi_processor_unregister_performance(data->acpi_perf_cpu); - free_cpumask_var(data->freqdomain_cpus); - kfree(data->freq_table); - kfree(data); - } + policy->fast_switch_possible = false; + policy->driver_data = NULL; + acpi_processor_unregister_performance(data->acpi_perf_cpu); + free_cpumask_var(data->freqdomain_cpus); + kfree(policy->freq_table); + kfree(data); return 0; } @@ -876,6 +916,7 @@ static struct freq_attr *acpi_cpufreq_attr[] = { static struct cpufreq_driver acpi_cpufreq_driver = { .verify = cpufreq_generic_frequency_table_verify, .target_index = acpi_cpufreq_target, + .fast_switch = acpi_cpufreq_fast_switch, .bios_limit = acpi_processor_get_bios_limit, .init = acpi_cpufreq_cpu_init, .exit = acpi_cpufreq_cpu_exit, diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 7c0bdfb1a2ca..8882b8e2ecd0 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -173,4 +173,25 @@ out: return -ENODEV; } +static void __exit cppc_cpufreq_exit(void) +{ + struct cpudata *cpu; + int i; + + cpufreq_unregister_driver(&cppc_cpufreq_driver); + + for_each_possible_cpu(i) { + cpu = all_cpu_data[i]; + free_cpumask_var(cpu->shared_cpu_map); + kfree(cpu); + } + + kfree(all_cpu_data); +} + +module_exit(cppc_cpufreq_exit); +MODULE_AUTHOR("Ashwin Chaugule"); +MODULE_DESCRIPTION("CPUFreq driver based on the ACPI CPPC v5.0+ spec"); +MODULE_LICENSE("GPL"); + late_initcall(cppc_cpufreq_init); diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c new file mode 100644 index 000000000000..ac4a0ba87c12 --- /dev/null +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2016 Linaro. + * Viresh Kumar <viresh.kumar@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/err.h> +#include <linux/of.h> +#include <linux/platform_device.h> + +static const struct of_device_id machines[] __initconst = { + { .compatible = "allwinner,sun4i-a10", }, + { .compatible = "allwinner,sun5i-a10s", }, + { .compatible = "allwinner,sun5i-a13", }, + { .compatible = "allwinner,sun5i-r8", }, + { .compatible = "allwinner,sun6i-a31", }, + { .compatible = "allwinner,sun6i-a31s", }, + { .compatible = "allwinner,sun7i-a20", }, + { .compatible = "allwinner,sun8i-a23", }, + { .compatible = "allwinner,sun8i-a33", }, + { .compatible = "allwinner,sun8i-a83t", }, + { .compatible = "allwinner,sun8i-h3", }, + + { .compatible = "hisilicon,hi6220", }, + + { .compatible = "fsl,imx27", }, + { .compatible = "fsl,imx51", }, + { .compatible = "fsl,imx53", }, + { .compatible = "fsl,imx7d", }, + + { .compatible = "marvell,berlin", }, + + { .compatible = "samsung,exynos3250", }, + { .compatible = "samsung,exynos4210", }, + { .compatible = "samsung,exynos4212", }, + { .compatible = "samsung,exynos4412", }, + { .compatible = "samsung,exynos5250", }, +#ifndef CONFIG_BL_SWITCHER + { .compatible = "samsung,exynos5420", }, + { .compatible = "samsung,exynos5800", }, +#endif + + { .compatible = "renesas,emev2", }, + { .compatible = "renesas,r7s72100", }, + { .compatible = "renesas,r8a73a4", }, + { .compatible = "renesas,r8a7740", }, + { .compatible = "renesas,r8a7778", }, + { .compatible = "renesas,r8a7779", }, + { .compatible = "renesas,r8a7790", }, + { .compatible = "renesas,r8a7791", }, + { .compatible = "renesas,r8a7793", }, + { .compatible = "renesas,r8a7794", }, + { .compatible = "renesas,sh73a0", }, + + { .compatible = "rockchip,rk2928", }, + { .compatible = "rockchip,rk3036", }, + { .compatible = "rockchip,rk3066a", }, + { .compatible = "rockchip,rk3066b", }, + { .compatible = "rockchip,rk3188", }, + { .compatible = "rockchip,rk3228", }, + { .compatible = "rockchip,rk3288", }, + { .compatible = "rockchip,rk3366", }, + { .compatible = "rockchip,rk3368", }, + { .compatible = "rockchip,rk3399", }, + + { .compatible = "ti,omap2", }, + { .compatible = "ti,omap3", }, + { .compatible = "ti,omap4", }, + { .compatible = "ti,omap5", }, + + { .compatible = "xlnx,zynq-7000", }, +}; + +static int __init cpufreq_dt_platdev_init(void) +{ + struct device_node *np = of_find_node_by_path("/"); + + if (!np) + return -ENODEV; + + if (!of_match_node(machines, np)) + return -ENODEV; + + of_node_put(of_root); + + return PTR_ERR_OR_ZERO(platform_device_register_simple("cpufreq-dt", -1, + NULL, 0)); +} +device_initcall(cpufreq_dt_platdev_init); diff --git a/drivers/cpufreq/cpufreq-nforce2.c b/drivers/cpufreq/cpufreq-nforce2.c index db69eeb501a7..5503d491b016 100644 --- a/drivers/cpufreq/cpufreq-nforce2.c +++ b/drivers/cpufreq/cpufreq-nforce2.c @@ -7,6 +7,8 @@ * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/module.h> #include <linux/moduleparam.h> @@ -56,8 +58,6 @@ MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)"); MODULE_PARM_DESC(min_fsb, "Minimum FSB to use, if not defined: current FSB - 50"); -#define PFX "cpufreq-nforce2: " - /** * nforce2_calc_fsb - calculate FSB * @pll: PLL value @@ -174,13 +174,13 @@ static int nforce2_set_fsb(unsigned int fsb) int pll = 0; if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) { - printk(KERN_ERR PFX "FSB %d is out of range!\n", fsb); + pr_err("FSB %d is out of range!\n", fsb); return -EINVAL; } tfsb = nforce2_fsb_read(0); if (!tfsb) { - printk(KERN_ERR PFX "Error while reading the FSB\n"); + pr_err("Error while reading the FSB\n"); return -EINVAL; } @@ -276,8 +276,7 @@ static int nforce2_target(struct cpufreq_policy *policy, /* local_irq_save(flags); */ if (nforce2_set_fsb(target_fsb) < 0) - printk(KERN_ERR PFX "Changing FSB to %d failed\n", - target_fsb); + pr_err("Changing FSB to %d failed\n", target_fsb); else pr_debug("Changed FSB successfully to %d\n", target_fsb); @@ -325,8 +324,7 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy) /* FIX: Get FID from CPU */ if (!fid) { if (!cpu_khz) { - printk(KERN_WARNING PFX - "cpu_khz not set, can't calculate multiplier!\n"); + pr_warn("cpu_khz not set, can't calculate multiplier!\n"); return -ENODEV; } @@ -341,8 +339,8 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy) } } - printk(KERN_INFO PFX "FSB currently at %i MHz, FID %d.%d\n", fsb, - fid / 10, fid % 10); + pr_info("FSB currently at %i MHz, FID %d.%d\n", + fsb, fid / 10, fid % 10); /* Set maximum FSB to FSB at boot time */ max_fsb = nforce2_fsb_read(1); @@ -401,11 +399,9 @@ static int nforce2_detect_chipset(void) if (nforce2_dev == NULL) return -ENODEV; - printk(KERN_INFO PFX "Detected nForce2 chipset revision %X\n", - nforce2_dev->revision); - printk(KERN_INFO PFX - "FSB changing is maybe unstable and can lead to " - "crashes and data loss.\n"); + pr_info("Detected nForce2 chipset revision %X\n", + nforce2_dev->revision); + pr_info("FSB changing is maybe unstable and can lead to crashes and data loss\n"); return 0; } @@ -423,7 +419,7 @@ static int __init nforce2_init(void) /* detect chipset */ if (nforce2_detect_chipset()) { - printk(KERN_INFO PFX "No nForce2 chipset.\n"); + pr_info("No nForce2 chipset\n"); return -ENODEV; } diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e93405f0eac4..a48b998b3304 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -78,6 +78,11 @@ static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event); static unsigned int __cpufreq_get(struct cpufreq_policy *policy); static int cpufreq_start_governor(struct cpufreq_policy *policy); +static inline int cpufreq_exit_governor(struct cpufreq_policy *policy) +{ + return cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); +} + /** * Two notifier lists: the "policy" list is involved in the * validation process for a new CPU frequency policy; the @@ -429,6 +434,73 @@ void cpufreq_freq_transition_end(struct cpufreq_policy *policy, } EXPORT_SYMBOL_GPL(cpufreq_freq_transition_end); +/* + * Fast frequency switching status count. Positive means "enabled", negative + * means "disabled" and 0 means "not decided yet". + */ +static int cpufreq_fast_switch_count; +static DEFINE_MUTEX(cpufreq_fast_switch_lock); + +static void cpufreq_list_transition_notifiers(void) +{ + struct notifier_block *nb; + + pr_info("Registered transition notifiers:\n"); + + mutex_lock(&cpufreq_transition_notifier_list.mutex); + + for (nb = cpufreq_transition_notifier_list.head; nb; nb = nb->next) + pr_info("%pF\n", nb->notifier_call); + + mutex_unlock(&cpufreq_transition_notifier_list.mutex); +} + +/** + * cpufreq_enable_fast_switch - Enable fast frequency switching for policy. + * @policy: cpufreq policy to enable fast frequency switching for. + * + * Try to enable fast frequency switching for @policy. + * + * The attempt will fail if there is at least one transition notifier registered + * at this point, as fast frequency switching is quite fundamentally at odds + * with transition notifiers. Thus if successful, it will make registration of + * transition notifiers fail going forward. + */ +void cpufreq_enable_fast_switch(struct cpufreq_policy *policy) +{ + lockdep_assert_held(&policy->rwsem); + + if (!policy->fast_switch_possible) + return; + + mutex_lock(&cpufreq_fast_switch_lock); + if (cpufreq_fast_switch_count >= 0) { + cpufreq_fast_switch_count++; + policy->fast_switch_enabled = true; + } else { + pr_warn("CPU%u: Fast frequency switching not enabled\n", + policy->cpu); + cpufreq_list_transition_notifiers(); + } + mutex_unlock(&cpufreq_fast_switch_lock); +} +EXPORT_SYMBOL_GPL(cpufreq_enable_fast_switch); + +/** + * cpufreq_disable_fast_switch - Disable fast frequency switching for policy. + * @policy: cpufreq policy to disable fast frequency switching for. + */ +void cpufreq_disable_fast_switch(struct cpufreq_policy *policy) +{ + mutex_lock(&cpufreq_fast_switch_lock); + if (policy->fast_switch_enabled) { + policy->fast_switch_enabled = false; + if (!WARN_ON(cpufreq_fast_switch_count <= 0)) + cpufreq_fast_switch_count--; + } + mutex_unlock(&cpufreq_fast_switch_lock); +} +EXPORT_SYMBOL_GPL(cpufreq_disable_fast_switch); /********************************************************************* * SYSFS INTERFACE * @@ -1248,26 +1320,24 @@ out_free_policy: */ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) { + struct cpufreq_policy *policy; unsigned cpu = dev->id; - int ret; dev_dbg(dev, "%s: adding CPU%u\n", __func__, cpu); - if (cpu_online(cpu)) { - ret = cpufreq_online(cpu); - } else { - /* - * A hotplug notifier will follow and we will handle it as CPU - * online then. For now, just create the sysfs link, unless - * there is no policy or the link is already present. - */ - struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); + if (cpu_online(cpu)) + return cpufreq_online(cpu); - ret = policy && !cpumask_test_and_set_cpu(cpu, policy->real_cpus) - ? add_cpu_dev_symlink(policy, cpu) : 0; - } + /* + * A hotplug notifier will follow and we will handle it as CPU online + * then. For now, just create the sysfs link, unless there is no policy + * or the link is already present. + */ + policy = per_cpu(cpufreq_cpu_data, cpu); + if (!policy || cpumask_test_and_set_cpu(cpu, policy->real_cpus)) + return 0; - return ret; + return add_cpu_dev_symlink(policy, cpu); } static void cpufreq_offline(unsigned int cpu) @@ -1319,7 +1389,7 @@ static void cpufreq_offline(unsigned int cpu) /* If cpu is last user of policy, free policy */ if (has_target()) { - ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); + ret = cpufreq_exit_governor(policy); if (ret) pr_err("%s: Failed to exit governor\n", __func__); } @@ -1447,8 +1517,12 @@ static unsigned int __cpufreq_get(struct cpufreq_policy *policy) ret_freq = cpufreq_driver->get(policy->cpu); - /* Updating inactive policies is invalid, so avoid doing that. */ - if (unlikely(policy_is_inactive(policy))) + /* + * Updating inactive policies is invalid, so avoid doing that. Also + * if fast frequency switching is used with the given policy, the check + * against policy->cur is pointless, so skip it in that case too. + */ + if (unlikely(policy_is_inactive(policy)) || policy->fast_switch_enabled) return ret_freq; if (ret_freq && policy->cur && @@ -1675,8 +1749,18 @@ int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list) switch (list) { case CPUFREQ_TRANSITION_NOTIFIER: + mutex_lock(&cpufreq_fast_switch_lock); + + if (cpufreq_fast_switch_count > 0) { + mutex_unlock(&cpufreq_fast_switch_lock); + return -EBUSY; + } ret = srcu_notifier_chain_register( &cpufreq_transition_notifier_list, nb); + if (!ret) + cpufreq_fast_switch_count--; + + mutex_unlock(&cpufreq_fast_switch_lock); break; case CPUFREQ_POLICY_NOTIFIER: ret = blocking_notifier_chain_register( @@ -1709,8 +1793,14 @@ int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list) switch (list) { case CPUFREQ_TRANSITION_NOTIFIER: + mutex_lock(&cpufreq_fast_switch_lock); + ret = srcu_notifier_chain_unregister( &cpufreq_transition_notifier_list, nb); + if (!ret && !WARN_ON(cpufreq_fast_switch_count >= 0)) + cpufreq_fast_switch_count++; + + mutex_unlock(&cpufreq_fast_switch_lock); break; case CPUFREQ_POLICY_NOTIFIER: ret = blocking_notifier_chain_unregister( @@ -1729,6 +1819,37 @@ EXPORT_SYMBOL(cpufreq_unregister_notifier); * GOVERNORS * *********************************************************************/ +/** + * cpufreq_driver_fast_switch - Carry out a fast CPU frequency switch. + * @policy: cpufreq policy to switch the frequency for. + * @target_freq: New frequency to set (may be approximate). + * + * Carry out a fast frequency switch without sleeping. + * + * The driver's ->fast_switch() callback invoked by this function must be + * suitable for being called from within RCU-sched read-side critical sections + * and it is expected to select the minimum available frequency greater than or + * equal to @target_freq (CPUFREQ_RELATION_L). + * + * This function must not be called if policy->fast_switch_enabled is unset. + * + * Governors calling this function must guarantee that it will never be invoked + * twice in parallel for the same policy and that it will never be called in + * parallel with either ->target() or ->target_index() for the same policy. + * + * If CPUFREQ_ENTRY_INVALID is returned by the driver's ->fast_switch() + * callback to indicate an error condition, the hardware configuration must be + * preserved. + */ +unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + clamp_val(target_freq, policy->min, policy->max); + + return cpufreq_driver->fast_switch(policy, target_freq); +} +EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch); + /* Must set freqs->new to intermediate frequency */ static int __target_intermediate(struct cpufreq_policy *policy, struct cpufreq_freqs *freqs, int index) @@ -2104,7 +2225,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, return ret; } - ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); + ret = cpufreq_exit_governor(policy); if (ret) { pr_err("%s: Failed to Exit Governor: %s (%d)\n", __func__, old_gov->name, ret); @@ -2121,7 +2242,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, pr_debug("cpufreq: governor change\n"); return 0; } - cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); + cpufreq_exit_governor(policy); } /* new governor failed, so re-start old one */ @@ -2189,16 +2310,13 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb, switch (action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: + case CPU_DOWN_FAILED: cpufreq_online(cpu); break; case CPU_DOWN_PREPARE: cpufreq_offline(cpu); break; - - case CPU_DOWN_FAILED: - cpufreq_online(cpu); - break; } return NOTIFY_OK; } diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index bf4913f6453b..316df247e00d 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -129,9 +129,10 @@ static struct notifier_block cs_cpufreq_notifier_block = { /************************** sysfs interface ************************/ static struct dbs_governor cs_dbs_gov; -static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, - const char *buf, size_t count) +static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set, + const char *buf, size_t count) { + struct dbs_data *dbs_data = to_dbs_data(attr_set); unsigned int input; int ret; ret = sscanf(buf, "%u", &input); @@ -143,9 +144,10 @@ static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, return count; } -static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, - size_t count) +static ssize_t store_up_threshold(struct gov_attr_set *attr_set, + const char *buf, size_t count) { + struct dbs_data *dbs_data = to_dbs_data(attr_set); struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; unsigned int input; int ret; @@ -158,9 +160,10 @@ static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, return count; } -static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf, - size_t count) +static ssize_t store_down_threshold(struct gov_attr_set *attr_set, + const char *buf, size_t count) { + struct dbs_data *dbs_data = to_dbs_data(attr_set); struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; unsigned int input; int ret; @@ -175,9 +178,10 @@ static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf, return count; } -static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, - const char *buf, size_t count) +static ssize_t store_ignore_nice_load(struct gov_attr_set *attr_set, + const char *buf, size_t count) { + struct dbs_data *dbs_data = to_dbs_data(attr_set); unsigned int input; int ret; @@ -199,9 +203,10 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, return count; } -static ssize_t store_freq_step(struct dbs_data *dbs_data, const char *buf, - size_t count) +static ssize_t store_freq_step(struct gov_attr_set *attr_set, const char *buf, + size_t count) { + struct dbs_data *dbs_data = to_dbs_data(attr_set); struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; unsigned int input; int ret; diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 10a5cfeae8c5..eb2fdbd9433c 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -43,9 +43,10 @@ static DEFINE_MUTEX(gov_dbs_data_mutex); * This must be called with dbs_data->mutex held, otherwise traversing * policy_dbs_list isn't safe. */ -ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, +ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf, size_t count) { + struct dbs_data *dbs_data = to_dbs_data(attr_set); struct policy_dbs_info *policy_dbs; unsigned int rate; int ret; @@ -59,7 +60,7 @@ ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, * We are operating under dbs_data->mutex and so the list and its * entries can't be freed concurrently. */ - list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) { + list_for_each_entry(policy_dbs, &attr_set->policy_list, list) { mutex_lock(&policy_dbs->timer_mutex); /* * On 32-bit architectures this may race with the @@ -96,13 +97,13 @@ void gov_update_cpu_data(struct dbs_data *dbs_data) { struct policy_dbs_info *policy_dbs; - list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) { + list_for_each_entry(policy_dbs, &dbs_data->attr_set.policy_list, list) { unsigned int j; for_each_cpu(j, policy_dbs->policy->cpus) { struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); - j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, + j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_update_time, dbs_data->io_is_busy); if (dbs_data->ignore_nice_load) j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; @@ -111,54 +112,6 @@ void gov_update_cpu_data(struct dbs_data *dbs_data) } EXPORT_SYMBOL_GPL(gov_update_cpu_data); -static inline struct dbs_data *to_dbs_data(struct kobject *kobj) -{ - return container_of(kobj, struct dbs_data, kobj); -} - -static inline struct governor_attr *to_gov_attr(struct attribute *attr) -{ - return container_of(attr, struct governor_attr, attr); -} - -static ssize_t governor_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct dbs_data *dbs_data = to_dbs_data(kobj); - struct governor_attr *gattr = to_gov_attr(attr); - - return gattr->show(dbs_data, buf); -} - -static ssize_t governor_store(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t count) -{ - struct dbs_data *dbs_data = to_dbs_data(kobj); - struct governor_attr *gattr = to_gov_attr(attr); - int ret = -EBUSY; - - mutex_lock(&dbs_data->mutex); - - if (dbs_data->usage_count) - ret = gattr->store(dbs_data, buf, count); - - mutex_unlock(&dbs_data->mutex); - - return ret; -} - -/* - * Sysfs Ops for accessing governor attributes. - * - * All show/store invocations for governor specific sysfs attributes, will first - * call the below show/store callbacks and the attribute specific callback will - * be called from within it. - */ -static const struct sysfs_ops governor_sysfs_ops = { - .show = governor_show, - .store = governor_store, -}; - unsigned int dbs_update(struct cpufreq_policy *policy) { struct policy_dbs_info *policy_dbs = policy->governor_data; @@ -184,21 +137,17 @@ unsigned int dbs_update(struct cpufreq_policy *policy) /* Get Absolute Load */ for_each_cpu(j, policy->cpus) { struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); - u64 cur_wall_time, cur_idle_time; - unsigned int idle_time, wall_time; + u64 update_time, cur_idle_time; + unsigned int idle_time, time_elapsed; unsigned int load; - cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy); + cur_idle_time = get_cpu_idle_time(j, &update_time, io_busy); - wall_time = cur_wall_time - j_cdbs->prev_cpu_wall; - j_cdbs->prev_cpu_wall = cur_wall_time; + time_elapsed = update_time - j_cdbs->prev_update_time; + j_cdbs->prev_update_time = update_time; - if (cur_idle_time <= j_cdbs->prev_cpu_idle) { - idle_time = 0; - } else { - idle_time = cur_idle_time - j_cdbs->prev_cpu_idle; - j_cdbs->prev_cpu_idle = cur_idle_time; - } + idle_time = cur_idle_time - j_cdbs->prev_cpu_idle; + j_cdbs->prev_cpu_idle = cur_idle_time; if (ignore_nice) { u64 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; @@ -207,7 +156,7 @@ unsigned int dbs_update(struct cpufreq_policy *policy) j_cdbs->prev_cpu_nice = cur_nice; } - if (unlikely(!wall_time || wall_time < idle_time)) + if (unlikely(!time_elapsed || time_elapsed < idle_time)) continue; /* @@ -228,7 +177,7 @@ unsigned int dbs_update(struct cpufreq_policy *policy) * * Detecting this situation is easy: the governor's utilization * update handler would not have run during CPU-idle periods. - * Hence, an unusually large 'wall_time' (as compared to the + * Hence, an unusually large 'time_elapsed' (as compared to the * sampling rate) indicates this scenario. * * prev_load can be zero in two cases and we must recalculate it @@ -236,7 +185,7 @@ unsigned int dbs_update(struct cpufreq_policy *policy) * - during long idle intervals * - explicitly set to zero */ - if (unlikely(wall_time > (2 * sampling_rate) && + if (unlikely(time_elapsed > 2 * sampling_rate && j_cdbs->prev_load)) { load = j_cdbs->prev_load; @@ -247,7 +196,7 @@ unsigned int dbs_update(struct cpufreq_policy *policy) */ j_cdbs->prev_load = 0; } else { - load = 100 * (wall_time - idle_time) / wall_time; + load = 100 * (time_elapsed - idle_time) / time_elapsed; j_cdbs->prev_load = load; } @@ -258,43 +207,6 @@ unsigned int dbs_update(struct cpufreq_policy *policy) } EXPORT_SYMBOL_GPL(dbs_update); -static void gov_set_update_util(struct policy_dbs_info *policy_dbs, - unsigned int delay_us) -{ - struct cpufreq_policy *policy = policy_dbs->policy; - int cpu; - - gov_update_sample_delay(policy_dbs, delay_us); - policy_dbs->last_sample_time = 0; - - for_each_cpu(cpu, policy->cpus) { - struct cpu_dbs_info *cdbs = &per_cpu(cpu_dbs, cpu); - - cpufreq_set_update_util_data(cpu, &cdbs->update_util); - } -} - -static inline void gov_clear_update_util(struct cpufreq_policy *policy) -{ - int i; - - for_each_cpu(i, policy->cpus) - cpufreq_set_update_util_data(i, NULL); - - synchronize_sched(); -} - -static void gov_cancel_work(struct cpufreq_policy *policy) -{ - struct policy_dbs_info *policy_dbs = policy->governor_data; - - gov_clear_update_util(policy_dbs->policy); - irq_work_sync(&policy_dbs->irq_work); - cancel_work_sync(&policy_dbs->work); - atomic_set(&policy_dbs->work_count, 0); - policy_dbs->work_in_progress = false; -} - static void dbs_work_handler(struct work_struct *work) { struct policy_dbs_info *policy_dbs; @@ -382,6 +294,44 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, irq_work_queue(&policy_dbs->irq_work); } +static void gov_set_update_util(struct policy_dbs_info *policy_dbs, + unsigned int delay_us) +{ + struct cpufreq_policy *policy = policy_dbs->policy; + int cpu; + + gov_update_sample_delay(policy_dbs, delay_us); + policy_dbs->last_sample_time = 0; + + for_each_cpu(cpu, policy->cpus) { + struct cpu_dbs_info *cdbs = &per_cpu(cpu_dbs, cpu); + + cpufreq_add_update_util_hook(cpu, &cdbs->update_util, + dbs_update_util_handler); + } +} + +static inline void gov_clear_update_util(struct cpufreq_policy *policy) +{ + int i; + + for_each_cpu(i, policy->cpus) + cpufreq_remove_update_util_hook(i); + + synchronize_sched(); +} + +static void gov_cancel_work(struct cpufreq_policy *policy) +{ + struct policy_dbs_info *policy_dbs = policy->governor_data; + + gov_clear_update_util(policy_dbs->policy); + irq_work_sync(&policy_dbs->irq_work); + cancel_work_sync(&policy_dbs->work); + atomic_set(&policy_dbs->work_count, 0); + policy_dbs->work_in_progress = false; +} + static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *policy, struct dbs_governor *gov) { @@ -404,7 +354,6 @@ static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *poli struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); j_cdbs->policy_dbs = policy_dbs; - j_cdbs->update_util.func = dbs_update_util_handler; } return policy_dbs; } @@ -453,10 +402,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) policy_dbs->dbs_data = dbs_data; policy->governor_data = policy_dbs; - mutex_lock(&dbs_data->mutex); - dbs_data->usage_count++; - list_add(&policy_dbs->list, &dbs_data->policy_dbs_list); - mutex_unlock(&dbs_data->mutex); + gov_attr_set_get(&dbs_data->attr_set, &policy_dbs->list); goto out; } @@ -466,8 +412,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) goto free_policy_dbs_info; } - INIT_LIST_HEAD(&dbs_data->policy_dbs_list); - mutex_init(&dbs_data->mutex); + gov_attr_set_init(&dbs_data->attr_set, &policy_dbs->list); ret = gov->init(dbs_data, !policy->governor->initialized); if (ret) @@ -487,14 +432,11 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) if (!have_governor_per_policy()) gov->gdbs_data = dbs_data; - policy->governor_data = policy_dbs; - policy_dbs->dbs_data = dbs_data; - dbs_data->usage_count = 1; - list_add(&policy_dbs->list, &dbs_data->policy_dbs_list); + policy->governor_data = policy_dbs; gov->kobj_type.sysfs_ops = &governor_sysfs_ops; - ret = kobject_init_and_add(&dbs_data->kobj, &gov->kobj_type, + ret = kobject_init_and_add(&dbs_data->attr_set.kobj, &gov->kobj_type, get_governor_parent_kobj(policy), "%s", gov->gov.name); if (!ret) @@ -523,29 +465,21 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) struct dbs_governor *gov = dbs_governor_of(policy); struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; - int count; + unsigned int count; /* Protect gov->gdbs_data against concurrent updates. */ mutex_lock(&gov_dbs_data_mutex); - mutex_lock(&dbs_data->mutex); - list_del(&policy_dbs->list); - count = --dbs_data->usage_count; - mutex_unlock(&dbs_data->mutex); + count = gov_attr_set_put(&dbs_data->attr_set, &policy_dbs->list); - if (!count) { - kobject_put(&dbs_data->kobj); - - policy->governor_data = NULL; + policy->governor_data = NULL; + if (!count) { if (!have_governor_per_policy()) gov->gdbs_data = NULL; gov->exit(dbs_data, policy->governor->initialized == 1); - mutex_destroy(&dbs_data->mutex); kfree(dbs_data); - } else { - policy->governor_data = NULL; } free_policy_dbs_info(policy_dbs, gov); @@ -574,12 +508,12 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) for_each_cpu(j, policy->cpus) { struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); - unsigned int prev_load; - j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy); - - prev_load = j_cdbs->prev_cpu_wall - j_cdbs->prev_cpu_idle; - j_cdbs->prev_load = 100 * prev_load / (unsigned int)j_cdbs->prev_cpu_wall; + j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_update_time, io_busy); + /* + * Make the first invocation of dbs_update() compute the load. + */ + j_cdbs->prev_load = 0; if (ignore_nice) j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 61ff82fe0613..34eb214b6d57 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -24,20 +24,6 @@ #include <linux/module.h> #include <linux/mutex.h> -/* - * The polling frequency depends on the capability of the processor. Default - * polling frequency is 1000 times the transition latency of the processor. The - * governor will work on any processor with transition latency <= 10ms, using - * appropriate sampling rate. - * - * For CPUs with transition latency > 10ms (mostly drivers with CPUFREQ_ETERNAL) - * this governor will not work. All times here are in us (micro seconds). - */ -#define MIN_SAMPLING_RATE_RATIO (2) -#define LATENCY_MULTIPLIER (1000) -#define MIN_LATENCY_MULTIPLIER (20) -#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) - /* Ondemand Sampling types */ enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE}; @@ -52,7 +38,7 @@ enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE}; /* Governor demand based switching data (per-policy or global). */ struct dbs_data { - int usage_count; + struct gov_attr_set attr_set; void *tuners; unsigned int min_sampling_rate; unsigned int ignore_nice_load; @@ -60,37 +46,27 @@ struct dbs_data { unsigned int sampling_down_factor; unsigned int up_threshold; unsigned int io_is_busy; - - struct kobject kobj; - struct list_head policy_dbs_list; - /* - * Protect concurrent updates to governor tunables from sysfs, - * policy_dbs_list and usage_count. - */ - struct mutex mutex; }; -/* Governor's specific attributes */ -struct dbs_data; -struct governor_attr { - struct attribute attr; - ssize_t (*show)(struct dbs_data *dbs_data, char *buf); - ssize_t (*store)(struct dbs_data *dbs_data, const char *buf, - size_t count); -}; +static inline struct dbs_data *to_dbs_data(struct gov_attr_set *attr_set) +{ + return container_of(attr_set, struct dbs_data, attr_set); +} #define gov_show_one(_gov, file_name) \ static ssize_t show_##file_name \ -(struct dbs_data *dbs_data, char *buf) \ +(struct gov_attr_set *attr_set, char *buf) \ { \ + struct dbs_data *dbs_data = to_dbs_data(attr_set); \ struct _gov##_dbs_tuners *tuners = dbs_data->tuners; \ return sprintf(buf, "%u\n", tuners->file_name); \ } #define gov_show_one_common(file_name) \ static ssize_t show_##file_name \ -(struct dbs_data *dbs_data, char *buf) \ +(struct gov_attr_set *attr_set, char *buf) \ { \ + struct dbs_data *dbs_data = to_dbs_data(attr_set); \ return sprintf(buf, "%u\n", dbs_data->file_name); \ } @@ -135,7 +111,7 @@ static inline void gov_update_sample_delay(struct policy_dbs_info *policy_dbs, /* Per cpu structures */ struct cpu_dbs_info { u64 prev_cpu_idle; - u64 prev_cpu_wall; + u64 prev_update_time; u64 prev_cpu_nice; /* * Used to keep track of load in the previous interval. However, when @@ -184,7 +160,7 @@ void od_register_powersave_bias_handler(unsigned int (*f) (struct cpufreq_policy *, unsigned int, unsigned int), unsigned int powersave_bias); void od_unregister_powersave_bias_handler(void); -ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, +ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf, size_t count); void gov_update_cpu_data(struct dbs_data *dbs_data); #endif /* _CPUFREQ_GOVERNOR_H */ diff --git a/drivers/cpufreq/cpufreq_governor_attr_set.c b/drivers/cpufreq/cpufreq_governor_attr_set.c new file mode 100644 index 000000000000..52841f807a7e --- /dev/null +++ b/drivers/cpufreq/cpufreq_governor_attr_set.c @@ -0,0 +1,84 @@ +/* + * Abstract code for CPUFreq governor tunable sysfs attributes. + * + * Copyright (C) 2016, Intel Corporation + * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "cpufreq_governor.h" + +static inline struct gov_attr_set *to_gov_attr_set(struct kobject *kobj) +{ + return container_of(kobj, struct gov_attr_set, kobj); +} + +static inline struct governor_attr *to_gov_attr(struct attribute *attr) +{ + return container_of(attr, struct governor_attr, attr); +} + +static ssize_t governor_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct governor_attr *gattr = to_gov_attr(attr); + + return gattr->show(to_gov_attr_set(kobj), buf); +} + +static ssize_t governor_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct gov_attr_set *attr_set = to_gov_attr_set(kobj); + struct governor_attr *gattr = to_gov_attr(attr); + int ret; + + mutex_lock(&attr_set->update_lock); + ret = attr_set->usage_count ? gattr->store(attr_set, buf, count) : -EBUSY; + mutex_unlock(&attr_set->update_lock); + return ret; +} + +const struct sysfs_ops governor_sysfs_ops = { + .show = governor_show, + .store = governor_store, +}; +EXPORT_SYMBOL_GPL(governor_sysfs_ops); + +void gov_attr_set_init(struct gov_attr_set *attr_set, struct list_head *list_node) +{ + INIT_LIST_HEAD(&attr_set->policy_list); + mutex_init(&attr_set->update_lock); + attr_set->usage_count = 1; + list_add(list_node, &attr_set->policy_list); +} +EXPORT_SYMBOL_GPL(gov_attr_set_init); + +void gov_attr_set_get(struct gov_attr_set *attr_set, struct list_head *list_node) +{ + mutex_lock(&attr_set->update_lock); + attr_set->usage_count++; + list_add(list_node, &attr_set->policy_list); + mutex_unlock(&attr_set->update_lock); +} +EXPORT_SYMBOL_GPL(gov_attr_set_get); + +unsigned int gov_attr_set_put(struct gov_attr_set *attr_set, struct list_head *list_node) +{ + unsigned int count; + + mutex_lock(&attr_set->update_lock); + list_del(list_node); + count = --attr_set->usage_count; + mutex_unlock(&attr_set->update_lock); + if (count) + return count; + + kobject_put(&attr_set->kobj); + mutex_destroy(&attr_set->update_lock); + return 0; +} +EXPORT_SYMBOL_GPL(gov_attr_set_put); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index acd80272ded6..300163430516 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -207,9 +207,10 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) /************************** sysfs interface ************************/ static struct dbs_governor od_dbs_gov; -static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf, - size_t count) +static ssize_t store_io_is_busy(struct gov_attr_set *attr_set, const char *buf, + size_t count) { + struct dbs_data *dbs_data = to_dbs_data(attr_set); unsigned int input; int ret; @@ -224,9 +225,10 @@ static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf, return count; } -static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, - size_t count) +static ssize_t store_up_threshold(struct gov_attr_set *attr_set, + const char *buf, size_t count) { + struct dbs_data *dbs_data = to_dbs_data(attr_set); unsigned int input; int ret; ret = sscanf(buf, "%u", &input); @@ -240,9 +242,10 @@ static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, return count; } -static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, - const char *buf, size_t count) +static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set, + const char *buf, size_t count) { + struct dbs_data *dbs_data = to_dbs_data(attr_set); struct policy_dbs_info *policy_dbs; unsigned int input; int ret; @@ -254,7 +257,7 @@ static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, dbs_data->sampling_down_factor = input; /* Reset down sampling multiplier in case it was active */ - list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) { + list_for_each_entry(policy_dbs, &attr_set->policy_list, list) { /* * Doing this without locking might lead to using different * rate_mult values in od_update() and od_dbs_timer(). @@ -267,9 +270,10 @@ static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, return count; } -static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, - const char *buf, size_t count) +static ssize_t store_ignore_nice_load(struct gov_attr_set *attr_set, + const char *buf, size_t count) { + struct dbs_data *dbs_data = to_dbs_data(attr_set); unsigned int input; int ret; @@ -291,9 +295,10 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, return count; } -static ssize_t store_powersave_bias(struct dbs_data *dbs_data, const char *buf, - size_t count) +static ssize_t store_powersave_bias(struct gov_attr_set *attr_set, + const char *buf, size_t count) { + struct dbs_data *dbs_data = to_dbs_data(attr_set); struct od_dbs_tuners *od_tuners = dbs_data->tuners; struct policy_dbs_info *policy_dbs; unsigned int input; @@ -308,7 +313,7 @@ static ssize_t store_powersave_bias(struct dbs_data *dbs_data, const char *buf, od_tuners->powersave_bias = input; - list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) + list_for_each_entry(policy_dbs, &attr_set->policy_list, list) ondemand_powersave_bias_init(policy_dbs->policy); return count; diff --git a/drivers/cpufreq/e_powersaver.c b/drivers/cpufreq/e_powersaver.c index 4085244c8a67..cdf097b29862 100644 --- a/drivers/cpufreq/e_powersaver.c +++ b/drivers/cpufreq/e_powersaver.c @@ -6,6 +6,8 @@ * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> @@ -20,7 +22,7 @@ #include <asm/msr.h> #include <asm/tsc.h> -#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE +#if IS_ENABLED(CONFIG_ACPI_PROCESSOR) #include <linux/acpi.h> #include <acpi/processor.h> #endif @@ -33,7 +35,7 @@ struct eps_cpu_data { u32 fsb; -#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE +#if IS_ENABLED(CONFIG_ACPI_PROCESSOR) u32 bios_limit; #endif struct cpufreq_frequency_table freq_table[]; @@ -46,7 +48,7 @@ static int freq_failsafe_off; static int voltage_failsafe_off; static int set_max_voltage; -#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE +#if IS_ENABLED(CONFIG_ACPI_PROCESSOR) static int ignore_acpi_limit; static struct acpi_processor_performance *eps_acpi_cpu_perf; @@ -141,11 +143,9 @@ static int eps_set_state(struct eps_cpu_data *centaur, /* Print voltage and multiplier */ rdmsr(MSR_IA32_PERF_STATUS, lo, hi); current_voltage = lo & 0xff; - printk(KERN_INFO "eps: Current voltage = %dmV\n", - current_voltage * 16 + 700); + pr_info("Current voltage = %dmV\n", current_voltage * 16 + 700); current_multiplier = (lo >> 8) & 0xff; - printk(KERN_INFO "eps: Current multiplier = %d\n", - current_multiplier); + pr_info("Current multiplier = %d\n", current_multiplier); } #endif return 0; @@ -166,7 +166,7 @@ static int eps_target(struct cpufreq_policy *policy, unsigned int index) dest_state = centaur->freq_table[index].driver_data & 0xffff; ret = eps_set_state(centaur, policy, dest_state); if (ret) - printk(KERN_ERR "eps: Timeout!\n"); + pr_err("Timeout!\n"); return ret; } @@ -186,7 +186,7 @@ static int eps_cpu_init(struct cpufreq_policy *policy) int k, step, voltage; int ret; int states; -#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE +#if IS_ENABLED(CONFIG_ACPI_PROCESSOR) unsigned int limit; #endif @@ -194,36 +194,36 @@ static int eps_cpu_init(struct cpufreq_policy *policy) return -ENODEV; /* Check brand */ - printk(KERN_INFO "eps: Detected VIA "); + pr_info("Detected VIA "); switch (c->x86_model) { case 10: rdmsr(0x1153, lo, hi); brand = (((lo >> 2) ^ lo) >> 18) & 3; - printk(KERN_CONT "Model A "); + pr_cont("Model A "); break; case 13: rdmsr(0x1154, lo, hi); brand = (((lo >> 4) ^ (lo >> 2))) & 0x000000ff; - printk(KERN_CONT "Model D "); + pr_cont("Model D "); break; } switch (brand) { case EPS_BRAND_C7M: - printk(KERN_CONT "C7-M\n"); + pr_cont("C7-M\n"); break; case EPS_BRAND_C7: - printk(KERN_CONT "C7\n"); + pr_cont("C7\n"); break; case EPS_BRAND_EDEN: - printk(KERN_CONT "Eden\n"); + pr_cont("Eden\n"); break; case EPS_BRAND_C7D: - printk(KERN_CONT "C7-D\n"); + pr_cont("C7-D\n"); break; case EPS_BRAND_C3: - printk(KERN_CONT "C3\n"); + pr_cont("C3\n"); return -ENODEV; break; } @@ -235,7 +235,7 @@ static int eps_cpu_init(struct cpufreq_policy *policy) /* Can be locked at 0 */ rdmsrl(MSR_IA32_MISC_ENABLE, val); if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { - printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n"); + pr_info("Can't enable Enhanced PowerSaver\n"); return -ENODEV; } } @@ -243,22 +243,19 @@ static int eps_cpu_init(struct cpufreq_policy *policy) /* Print voltage and multiplier */ rdmsr(MSR_IA32_PERF_STATUS, lo, hi); current_voltage = lo & 0xff; - printk(KERN_INFO "eps: Current voltage = %dmV\n", - current_voltage * 16 + 700); + pr_info("Current voltage = %dmV\n", current_voltage * 16 + 700); current_multiplier = (lo >> 8) & 0xff; - printk(KERN_INFO "eps: Current multiplier = %d\n", current_multiplier); + pr_info("Current multiplier = %d\n", current_multiplier); /* Print limits */ max_voltage = hi & 0xff; - printk(KERN_INFO "eps: Highest voltage = %dmV\n", - max_voltage * 16 + 700); + pr_info("Highest voltage = %dmV\n", max_voltage * 16 + 700); max_multiplier = (hi >> 8) & 0xff; - printk(KERN_INFO "eps: Highest multiplier = %d\n", max_multiplier); + pr_info("Highest multiplier = %d\n", max_multiplier); min_voltage = (hi >> 16) & 0xff; - printk(KERN_INFO "eps: Lowest voltage = %dmV\n", - min_voltage * 16 + 700); + pr_info("Lowest voltage = %dmV\n", min_voltage * 16 + 700); min_multiplier = (hi >> 24) & 0xff; - printk(KERN_INFO "eps: Lowest multiplier = %d\n", min_multiplier); + pr_info("Lowest multiplier = %d\n", min_multiplier); /* Sanity checks */ if (current_multiplier == 0 || max_multiplier == 0 @@ -276,34 +273,30 @@ static int eps_cpu_init(struct cpufreq_policy *policy) /* Check for systems using underclocked CPU */ if (!freq_failsafe_off && max_multiplier != current_multiplier) { - printk(KERN_INFO "eps: Your processor is running at different " - "frequency then its maximum. Aborting.\n"); - printk(KERN_INFO "eps: You can use freq_failsafe_off option " - "to disable this check.\n"); + pr_info("Your processor is running at different frequency then its maximum. Aborting.\n"); + pr_info("You can use freq_failsafe_off option to disable this check.\n"); return -EINVAL; } if (!voltage_failsafe_off && max_voltage != current_voltage) { - printk(KERN_INFO "eps: Your processor is running at different " - "voltage then its maximum. Aborting.\n"); - printk(KERN_INFO "eps: You can use voltage_failsafe_off " - "option to disable this check.\n"); + pr_info("Your processor is running at different voltage then its maximum. Aborting.\n"); + pr_info("You can use voltage_failsafe_off option to disable this check.\n"); return -EINVAL; } /* Calc FSB speed */ fsb = cpu_khz / current_multiplier; -#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE +#if IS_ENABLED(CONFIG_ACPI_PROCESSOR) /* Check for ACPI processor speed limit */ if (!ignore_acpi_limit && !eps_acpi_init()) { if (!acpi_processor_get_bios_limit(policy->cpu, &limit)) { - printk(KERN_INFO "eps: ACPI limit %u.%uGHz\n", + pr_info("ACPI limit %u.%uGHz\n", limit/1000000, (limit%1000000)/10000); eps_acpi_exit(policy); /* Check if max_multiplier is in BIOS limits */ if (limit && max_multiplier * fsb > limit) { - printk(KERN_INFO "eps: Aborting.\n"); + pr_info("Aborting\n"); return -EINVAL; } } @@ -319,8 +312,7 @@ static int eps_cpu_init(struct cpufreq_policy *policy) v = (set_max_voltage - 700) / 16; /* Check if voltage is within limits */ if (v >= min_voltage && v <= max_voltage) { - printk(KERN_INFO "eps: Setting %dmV as maximum.\n", - v * 16 + 700); + pr_info("Setting %dmV as maximum\n", v * 16 + 700); max_voltage = v; } } @@ -341,7 +333,7 @@ static int eps_cpu_init(struct cpufreq_policy *policy) /* Copy basic values */ centaur->fsb = fsb; -#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE +#if IS_ENABLED(CONFIG_ACPI_PROCESSOR) centaur->bios_limit = limit; #endif @@ -426,7 +418,7 @@ module_param(freq_failsafe_off, int, 0644); MODULE_PARM_DESC(freq_failsafe_off, "Disable current vs max frequency check"); module_param(voltage_failsafe_off, int, 0644); MODULE_PARM_DESC(voltage_failsafe_off, "Disable current vs max voltage check"); -#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE +#if IS_ENABLED(CONFIG_ACPI_PROCESSOR) module_param(ignore_acpi_limit, int, 0644); MODULE_PARM_DESC(ignore_acpi_limit, "Don't check ACPI's processor speed limit"); #endif diff --git a/drivers/cpufreq/elanfreq.c b/drivers/cpufreq/elanfreq.c index 1c06e786c9ba..bfce11cba1df 100644 --- a/drivers/cpufreq/elanfreq.c +++ b/drivers/cpufreq/elanfreq.c @@ -16,6 +16,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> @@ -185,7 +187,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy) static int __init elanfreq_setup(char *str) { max_freq = simple_strtoul(str, &str, 0); - printk(KERN_WARNING "You're using the deprecated elanfreq command line option. Use elanfreq.max_freq instead, please!\n"); + pr_warn("You're using the deprecated elanfreq command line option. Use elanfreq.max_freq instead, please!\n"); return 1; } __setup("elanfreq=", elanfreq_setup); diff --git a/drivers/cpufreq/hisi-acpu-cpufreq.c b/drivers/cpufreq/hisi-acpu-cpufreq.c deleted file mode 100644 index 026d5b2224de..000000000000 --- a/drivers/cpufreq/hisi-acpu-cpufreq.c +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Hisilicon Platforms Using ACPU CPUFreq Support - * - * Copyright (c) 2015 Hisilicon Limited. - * Copyright (c) 2015 Linaro Limited. - * - * Leo Yan <leo.yan@linaro.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/err.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/of.h> -#include <linux/platform_device.h> - -static int __init hisi_acpu_cpufreq_driver_init(void) -{ - struct platform_device *pdev; - - if (!of_machine_is_compatible("hisilicon,hi6220")) - return -ENODEV; - - pdev = platform_device_register_simple("cpufreq-dt", -1, NULL, 0); - return PTR_ERR_OR_ZERO(pdev); -} -module_init(hisi_acpu_cpufreq_driver_init); - -MODULE_AUTHOR("Leo Yan <leo.yan@linaro.org>"); -MODULE_DESCRIPTION("Hisilicon acpu cpufreq driver"); -MODULE_LICENSE("GPL v2"); diff --git a/drivers/cpufreq/ia64-acpi-cpufreq.c b/drivers/cpufreq/ia64-acpi-cpufreq.c index 0202429f1c5b..759612da4fdc 100644 --- a/drivers/cpufreq/ia64-acpi-cpufreq.c +++ b/drivers/cpufreq/ia64-acpi-cpufreq.c @@ -8,6 +8,8 @@ * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/slab.h> #include <linux/module.h> @@ -118,8 +120,7 @@ processor_get_freq ( if (ret) { set_cpus_allowed_ptr(current, &saved_mask); - printk(KERN_WARNING "get performance failed with error %d\n", - ret); + pr_warn("get performance failed with error %d\n", ret); ret = 0; goto migrate_end; } @@ -177,7 +178,7 @@ processor_set_freq ( ret = processor_set_pstate(value); if (ret) { - printk(KERN_WARNING "Transition failed with error %d\n", ret); + pr_warn("Transition failed with error %d\n", ret); retval = -ENODEV; goto migrate_end; } @@ -291,8 +292,7 @@ acpi_cpufreq_cpu_init ( /* notify BIOS that we exist */ acpi_processor_notify_smm(THIS_MODULE); - printk(KERN_INFO "acpi-cpufreq: CPU%u - ACPI performance management " - "activated.\n", cpu); + pr_info("CPU%u - ACPI performance management activated\n", cpu); for (i = 0; i < data->acpi_data.state_count; i++) pr_debug(" %cP%d: %d MHz, %d mW, %d uS, %d uS, 0x%x 0x%x\n", diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 30fe323c4551..a0823e84ceca 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -10,6 +10,8 @@ * of the License. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/kernel_stat.h> #include <linux/module.h> @@ -39,6 +41,10 @@ #define ATOM_TURBO_RATIOS 0x66c #define ATOM_TURBO_VIDS 0x66d +#ifdef CONFIG_ACPI +#include <acpi/processor.h> +#endif + #define FRAC_BITS 8 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS) #define fp_toint(X) ((X) >> FRAC_BITS) @@ -172,6 +178,8 @@ struct _pid { * @prev_cummulative_iowait: IO Wait time difference from last and * current sample * @sample: Storage for storing last Sample data + * @acpi_perf_data: Stores ACPI perf information read from _PSS + * @valid_pss_table: Set to true for valid ACPI _PSS entries found * * This structure stores per CPU instance data for all CPUs. */ @@ -190,6 +198,10 @@ struct cpudata { u64 prev_tsc; u64 prev_cummulative_iowait; struct sample sample; +#ifdef CONFIG_ACPI + struct acpi_processor_performance acpi_perf_data; + bool valid_pss_table; +#endif }; static struct cpudata **all_cpu_data; @@ -258,6 +270,9 @@ static struct pstate_adjust_policy pid_params; static struct pstate_funcs pstate_funcs; static int hwp_active; +#ifdef CONFIG_ACPI +static bool acpi_ppc; +#endif /** * struct perf_limits - Store user and policy limits @@ -331,6 +346,121 @@ static struct perf_limits *limits = &performance_limits; static struct perf_limits *limits = &powersave_limits; #endif +#ifdef CONFIG_ACPI + +static bool intel_pstate_get_ppc_enable_status(void) +{ + if (acpi_gbl_FADT.preferred_profile == PM_ENTERPRISE_SERVER || + acpi_gbl_FADT.preferred_profile == PM_PERFORMANCE_SERVER) + return true; + + return acpi_ppc; +} + +/* + * The max target pstate ratio is a 8 bit value in both PLATFORM_INFO MSR and + * in TURBO_RATIO_LIMIT MSR, which pstate driver stores in max_pstate and + * max_turbo_pstate fields. The PERF_CTL MSR contains 16 bit value for P state + * ratio, out of it only high 8 bits are used. For example 0x1700 is setting + * target ratio 0x17. The _PSS control value stores in a format which can be + * directly written to PERF_CTL MSR. But in intel_pstate driver this shift + * occurs during write to PERF_CTL (E.g. for cores core_set_pstate()). + * This function converts the _PSS control value to intel pstate driver format + * for comparison and assignment. + */ +static int convert_to_native_pstate_format(struct cpudata *cpu, int index) +{ + return cpu->acpi_perf_data.states[index].control >> 8; +} + +static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) +{ + struct cpudata *cpu; + int turbo_pss_ctl; + int ret; + int i; + + if (!intel_pstate_get_ppc_enable_status()) + return; + + cpu = all_cpu_data[policy->cpu]; + + ret = acpi_processor_register_performance(&cpu->acpi_perf_data, + policy->cpu); + if (ret) + return; + + /* + * Check if the control value in _PSS is for PERF_CTL MSR, which should + * guarantee that the states returned by it map to the states in our + * list directly. + */ + if (cpu->acpi_perf_data.control_register.space_id != + ACPI_ADR_SPACE_FIXED_HARDWARE) + goto err; + + /* + * If there is only one entry _PSS, simply ignore _PSS and continue as + * usual without taking _PSS into account + */ + if (cpu->acpi_perf_data.state_count < 2) + goto err; + + pr_debug("CPU%u - ACPI _PSS perf data\n", policy->cpu); + for (i = 0; i < cpu->acpi_perf_data.state_count; i++) { + pr_debug(" %cP%d: %u MHz, %u mW, 0x%x\n", + (i == cpu->acpi_perf_data.state ? '*' : ' '), i, + (u32) cpu->acpi_perf_data.states[i].core_frequency, + (u32) cpu->acpi_perf_data.states[i].power, + (u32) cpu->acpi_perf_data.states[i].control); + } + + /* + * The _PSS table doesn't contain whole turbo frequency range. + * This just contains +1 MHZ above the max non turbo frequency, + * with control value corresponding to max turbo ratio. But + * when cpufreq set policy is called, it will call with this + * max frequency, which will cause a reduced performance as + * this driver uses real max turbo frequency as the max + * frequency. So correct this frequency in _PSS table to + * correct max turbo frequency based on the turbo ratio. + * Also need to convert to MHz as _PSS freq is in MHz. + */ + turbo_pss_ctl = convert_to_native_pstate_format(cpu, 0); + if (turbo_pss_ctl > cpu->pstate.max_pstate) + cpu->acpi_perf_data.states[0].core_frequency = + policy->cpuinfo.max_freq / 1000; + cpu->valid_pss_table = true; + pr_info("_PPC limits will be enforced\n"); + + return; + + err: + cpu->valid_pss_table = false; + acpi_processor_unregister_performance(policy->cpu); +} + +static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) +{ + struct cpudata *cpu; + + cpu = all_cpu_data[policy->cpu]; + if (!cpu->valid_pss_table) + return; + + acpi_processor_unregister_performance(policy->cpu); +} + +#else +static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) +{ +} + +static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) +{ +} +#endif + static inline void pid_reset(struct _pid *pid, int setpoint, int busy, int deadband, int integral) { pid->setpoint = int_tofp(setpoint); @@ -341,17 +471,17 @@ static inline void pid_reset(struct _pid *pid, int setpoint, int busy, static inline void pid_p_gain_set(struct _pid *pid, int percent) { - pid->p_gain = div_fp(int_tofp(percent), int_tofp(100)); + pid->p_gain = div_fp(percent, 100); } static inline void pid_i_gain_set(struct _pid *pid, int percent) { - pid->i_gain = div_fp(int_tofp(percent), int_tofp(100)); + pid->i_gain = div_fp(percent, 100); } static inline void pid_d_gain_set(struct _pid *pid, int percent) { - pid->d_gain = div_fp(int_tofp(percent), int_tofp(100)); + pid->d_gain = div_fp(percent, 100); } static signed int pid_calc(struct _pid *pid, int32_t busy) @@ -529,7 +659,7 @@ static ssize_t show_turbo_pct(struct kobject *kobj, total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1; no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1; - turbo_fp = div_fp(int_tofp(no_turbo), int_tofp(total)); + turbo_fp = div_fp(no_turbo, total); turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100))); return sprintf(buf, "%u\n", turbo_pct); } @@ -571,7 +701,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, update_turbo_state(); if (limits->turbo_disabled) { - pr_warn("intel_pstate: Turbo disabled by BIOS or unavailable on processor\n"); + pr_warn("Turbo disabled by BIOS or unavailable on processor\n"); return -EPERM; } @@ -600,8 +730,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, limits->max_perf_pct); limits->max_perf_pct = max(limits->min_perf_pct, limits->max_perf_pct); - limits->max_perf = div_fp(int_tofp(limits->max_perf_pct), - int_tofp(100)); + limits->max_perf = div_fp(limits->max_perf_pct, 100); if (hwp_active) intel_pstate_hwp_set_online_cpus(); @@ -625,8 +754,7 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, limits->min_perf_pct); limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct); - limits->min_perf = div_fp(int_tofp(limits->min_perf_pct), - int_tofp(100)); + limits->min_perf = div_fp(limits->min_perf_pct, 100); if (hwp_active) intel_pstate_hwp_set_online_cpus(); @@ -1011,8 +1139,8 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu) struct sample *sample = &cpu->sample; int64_t core_pct; - core_pct = int_tofp(sample->aperf) * int_tofp(100); - core_pct = div64_u64(core_pct, int_tofp(sample->mperf)); + core_pct = sample->aperf * int_tofp(100); + core_pct = div64_u64(core_pct, sample->mperf); sample->core_pct_busy = (int32_t)core_pct; } @@ -1061,6 +1189,12 @@ static inline int32_t get_avg_frequency(struct cpudata *cpu) cpu->pstate.scaling, cpu->sample.mperf); } +static inline int32_t get_avg_pstate(struct cpudata *cpu) +{ + return div64_u64(cpu->pstate.max_pstate_physical * cpu->sample.aperf, + cpu->sample.mperf); +} + static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) { struct sample *sample = &cpu->sample; @@ -1093,7 +1227,7 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) cpu_load = div64_u64(int_tofp(100) * mperf, sample->tsc); cpu->sample.busy_scaled = cpu_load; - return cpu->pstate.current_pstate - pid_calc(&cpu->pid, cpu_load); + return get_avg_pstate(cpu) - pid_calc(&cpu->pid, cpu_load); } static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) @@ -1115,8 +1249,8 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) * specified pstate. */ core_busy = cpu->sample.core_pct_busy; - max_pstate = int_tofp(cpu->pstate.max_pstate_physical); - current_pstate = int_tofp(cpu->pstate.current_pstate); + max_pstate = cpu->pstate.max_pstate_physical; + current_pstate = cpu->pstate.current_pstate; core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); /* @@ -1127,8 +1261,7 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) */ duration_ns = cpu->sample.time - cpu->last_sample_time; if ((s64)duration_ns > pid_params.sample_rate_ns * 3) { - sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns), - int_tofp(duration_ns)); + sample_ratio = div_fp(pid_params.sample_rate_ns, duration_ns); core_busy = mul_fp(core_busy, sample_ratio); } else { sample_ratio = div_fp(100 * cpu->sample.mperf, cpu->sample.tsc); @@ -1246,9 +1379,7 @@ static int intel_pstate_init_cpu(unsigned int cpunum) intel_pstate_busy_pid_reset(cpu); - cpu->update_util.func = intel_pstate_update_util; - - pr_debug("intel_pstate: controlling: cpu %d\n", cpunum); + pr_debug("controlling: cpu %d\n", cpunum); return 0; } @@ -1271,12 +1402,13 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num) /* Prevent intel_pstate_update_util() from using stale data. */ cpu->sample.time = 0; - cpufreq_set_update_util_data(cpu_num, &cpu->update_util); + cpufreq_add_update_util_hook(cpu_num, &cpu->update_util, + intel_pstate_update_util); } static void intel_pstate_clear_update_util_hook(unsigned int cpu) { - cpufreq_set_update_util_data(cpu, NULL); + cpufreq_remove_update_util_hook(cpu); synchronize_sched(); } @@ -1296,20 +1428,31 @@ static void intel_pstate_set_performance_limits(struct perf_limits *limits) static int intel_pstate_set_policy(struct cpufreq_policy *policy) { + struct cpudata *cpu; + if (!policy->cpuinfo.max_freq) return -ENODEV; intel_pstate_clear_update_util_hook(policy->cpu); + cpu = all_cpu_data[0]; + if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate) { + if (policy->max < policy->cpuinfo.max_freq && + policy->max > cpu->pstate.max_pstate * cpu->pstate.scaling) { + pr_debug("policy->max > max non turbo frequency\n"); + policy->max = policy->cpuinfo.max_freq; + } + } + if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { limits = &performance_limits; if (policy->max >= policy->cpuinfo.max_freq) { - pr_debug("intel_pstate: set performance\n"); + pr_debug("set performance\n"); intel_pstate_set_performance_limits(limits); goto out; } } else { - pr_debug("intel_pstate: set powersave\n"); + pr_debug("set powersave\n"); limits = &powersave_limits; } @@ -1333,10 +1476,8 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) /* Make sure min_perf_pct <= max_perf_pct */ limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct); - limits->min_perf = div_fp(int_tofp(limits->min_perf_pct), - int_tofp(100)); - limits->max_perf = div_fp(int_tofp(limits->max_perf_pct), - int_tofp(100)); + limits->min_perf = div_fp(limits->min_perf_pct, 100); + limits->max_perf = div_fp(limits->max_perf_pct, 100); out: intel_pstate_set_update_util_hook(policy->cpu); @@ -1363,7 +1504,7 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) int cpu_num = policy->cpu; struct cpudata *cpu = all_cpu_data[cpu_num]; - pr_debug("intel_pstate: CPU %d exiting\n", cpu_num); + pr_debug("CPU %d exiting\n", cpu_num); intel_pstate_clear_update_util_hook(cpu_num); @@ -1396,18 +1537,27 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy) policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; + intel_pstate_init_acpi_perf_limits(policy); policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; cpumask_set_cpu(policy->cpu, policy->cpus); return 0; } +static int intel_pstate_cpu_exit(struct cpufreq_policy *policy) +{ + intel_pstate_exit_perf_limits(policy); + + return 0; +} + static struct cpufreq_driver intel_pstate_driver = { .flags = CPUFREQ_CONST_LOOPS, .verify = intel_pstate_verify_policy, .setpolicy = intel_pstate_set_policy, .get = intel_pstate_get, .init = intel_pstate_cpu_init, + .exit = intel_pstate_cpu_exit, .stop_cpu = intel_pstate_stop_cpu, .name = "intel_pstate", }; @@ -1451,8 +1601,7 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs) } -#if IS_ENABLED(CONFIG_ACPI) -#include <acpi/processor.h> +#ifdef CONFIG_ACPI static bool intel_pstate_no_acpi_pss(void) { @@ -1608,7 +1757,7 @@ hwp_cpu_matched: if (intel_pstate_platform_pwr_mgmt_exists()) return -ENODEV; - pr_info("Intel P-state driver initializing.\n"); + pr_info("Intel P-state driver initializing\n"); all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus()); if (!all_cpu_data) @@ -1625,7 +1774,7 @@ hwp_cpu_matched: intel_pstate_sysfs_expose_params(); if (hwp_active) - pr_info("intel_pstate: HWP enabled\n"); + pr_info("HWP enabled\n"); return rc; out: @@ -1651,13 +1800,19 @@ static int __init intel_pstate_setup(char *str) if (!strcmp(str, "disable")) no_load = 1; if (!strcmp(str, "no_hwp")) { - pr_info("intel_pstate: HWP disabled\n"); + pr_info("HWP disabled\n"); no_hwp = 1; } if (!strcmp(str, "force")) force_load = 1; if (!strcmp(str, "hwp_only")) hwp_only = 1; + +#ifdef CONFIG_ACPI + if (!strcmp(str, "support_acpi_ppc")) + acpi_ppc = true; +#endif + return 0; } early_param("intel_pstate", intel_pstate_setup); diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c index 0f6b229afcb9..beae5cf5c62c 100644 --- a/drivers/cpufreq/longhaul.c +++ b/drivers/cpufreq/longhaul.c @@ -21,6 +21,8 @@ * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/module.h> #include <linux/moduleparam.h> @@ -40,8 +42,6 @@ #include "longhaul.h" -#define PFX "longhaul: " - #define TYPE_LONGHAUL_V1 1 #define TYPE_LONGHAUL_V2 2 #define TYPE_POWERSAVER 3 @@ -347,14 +347,13 @@ retry_loop: freqs.new = calc_speed(longhaul_get_cpu_mult()); /* Check if requested frequency is set. */ if (unlikely(freqs.new != speed)) { - printk(KERN_INFO PFX "Failed to set requested frequency!\n"); + pr_info("Failed to set requested frequency!\n"); /* Revision ID = 1 but processor is expecting revision key * equal to 0. Jumpers at the bottom of processor will change * multiplier and FSB, but will not change bits in Longhaul * MSR nor enable voltage scaling. */ if (!revid_errata) { - printk(KERN_INFO PFX "Enabling \"Ignore Revision ID\" " - "option.\n"); + pr_info("Enabling \"Ignore Revision ID\" option\n"); revid_errata = 1; msleep(200); goto retry_loop; @@ -364,11 +363,10 @@ retry_loop: * but it doesn't change frequency. I tried poking various * bits in northbridge registers, but without success. */ if (longhaul_flags & USE_ACPI_C3) { - printk(KERN_INFO PFX "Disabling ACPI C3 support.\n"); + pr_info("Disabling ACPI C3 support\n"); longhaul_flags &= ~USE_ACPI_C3; if (revid_errata) { - printk(KERN_INFO PFX "Disabling \"Ignore " - "Revision ID\" option.\n"); + pr_info("Disabling \"Ignore Revision ID\" option\n"); revid_errata = 0; } msleep(200); @@ -379,7 +377,7 @@ retry_loop: * RevID = 1. RevID errata will make things right. Just * to be 100% sure. */ if (longhaul_version == TYPE_LONGHAUL_V2) { - printk(KERN_INFO PFX "Switching to Longhaul ver. 1\n"); + pr_info("Switching to Longhaul ver. 1\n"); longhaul_version = TYPE_LONGHAUL_V1; msleep(200); goto retry_loop; @@ -387,8 +385,7 @@ retry_loop: } if (!bm_timeout) { - printk(KERN_INFO PFX "Warning: Timeout while waiting for " - "idle PCI bus.\n"); + pr_info("Warning: Timeout while waiting for idle PCI bus\n"); return -EBUSY; } @@ -433,12 +430,12 @@ static int longhaul_get_ranges(void) /* Get current frequency */ mult = longhaul_get_cpu_mult(); if (mult == -1) { - printk(KERN_INFO PFX "Invalid (reserved) multiplier!\n"); + pr_info("Invalid (reserved) multiplier!\n"); return -EINVAL; } fsb = guess_fsb(mult); if (fsb == 0) { - printk(KERN_INFO PFX "Invalid (reserved) FSB!\n"); + pr_info("Invalid (reserved) FSB!\n"); return -EINVAL; } /* Get max multiplier - as we always did. @@ -468,11 +465,11 @@ static int longhaul_get_ranges(void) print_speed(highest_speed/1000)); if (lowest_speed == highest_speed) { - printk(KERN_INFO PFX "highestspeed == lowest, aborting.\n"); + pr_info("highestspeed == lowest, aborting\n"); return -EINVAL; } if (lowest_speed > highest_speed) { - printk(KERN_INFO PFX "nonsense! lowest (%d > %d) !\n", + pr_info("nonsense! lowest (%d > %d) !\n", lowest_speed, highest_speed); return -EINVAL; } @@ -538,16 +535,16 @@ static void longhaul_setup_voltagescaling(void) rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); if (!(longhaul.bits.RevisionID & 1)) { - printk(KERN_INFO PFX "Voltage scaling not supported by CPU.\n"); + pr_info("Voltage scaling not supported by CPU\n"); return; } if (!longhaul.bits.VRMRev) { - printk(KERN_INFO PFX "VRM 8.5\n"); + pr_info("VRM 8.5\n"); vrm_mV_table = &vrm85_mV[0]; mV_vrm_table = &mV_vrm85[0]; } else { - printk(KERN_INFO PFX "Mobile VRM\n"); + pr_info("Mobile VRM\n"); if (cpu_model < CPU_NEHEMIAH) return; vrm_mV_table = &mobilevrm_mV[0]; @@ -558,27 +555,21 @@ static void longhaul_setup_voltagescaling(void) maxvid = vrm_mV_table[longhaul.bits.MaximumVID]; if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) { - printk(KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. " - "Voltage scaling disabled.\n", - minvid.mV/1000, minvid.mV%1000, - maxvid.mV/1000, maxvid.mV%1000); + pr_info("Bogus values Min:%d.%03d Max:%d.%03d - Voltage scaling disabled\n", + minvid.mV/1000, minvid.mV%1000, + maxvid.mV/1000, maxvid.mV%1000); return; } if (minvid.mV == maxvid.mV) { - printk(KERN_INFO PFX "Claims to support voltage scaling but " - "min & max are both %d.%03d. " - "Voltage scaling disabled\n", - maxvid.mV/1000, maxvid.mV%1000); + pr_info("Claims to support voltage scaling but min & max are both %d.%03d - Voltage scaling disabled\n", + maxvid.mV/1000, maxvid.mV%1000); return; } /* How many voltage steps*/ numvscales = maxvid.pos - minvid.pos + 1; - printk(KERN_INFO PFX - "Max VID=%d.%03d " - "Min VID=%d.%03d, " - "%d possible voltage scales\n", + pr_info("Max VID=%d.%03d Min VID=%d.%03d, %d possible voltage scales\n", maxvid.mV/1000, maxvid.mV%1000, minvid.mV/1000, minvid.mV%1000, numvscales); @@ -617,12 +608,12 @@ static void longhaul_setup_voltagescaling(void) pos = minvid.pos; freq_pos->driver_data |= mV_vrm_table[pos] << 8; vid = vrm_mV_table[mV_vrm_table[pos]]; - printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n", + pr_info("f: %d kHz, index: %d, vid: %d mV\n", speed, (int)(freq_pos - longhaul_table), vid.mV); } can_scale_voltage = 1; - printk(KERN_INFO PFX "Voltage scaling enabled.\n"); + pr_info("Voltage scaling enabled\n"); } @@ -720,8 +711,7 @@ static int enable_arbiter_disable(void) pci_write_config_byte(dev, reg, pci_cmd); pci_read_config_byte(dev, reg, &pci_cmd); if (!(pci_cmd & 1<<7)) { - printk(KERN_ERR PFX - "Can't enable access to port 0x22.\n"); + pr_err("Can't enable access to port 0x22\n"); status = 0; } } @@ -758,8 +748,7 @@ static int longhaul_setup_southbridge(void) if (pci_cmd & 1 << 7) { pci_read_config_dword(dev, 0x88, &acpi_regs_addr); acpi_regs_addr &= 0xff00; - printk(KERN_INFO PFX "ACPI I/O at 0x%x\n", - acpi_regs_addr); + pr_info("ACPI I/O at 0x%x\n", acpi_regs_addr); } pci_dev_put(dev); @@ -853,14 +842,14 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy) longhaul_version = TYPE_LONGHAUL_V1; } - printk(KERN_INFO PFX "VIA %s CPU detected. ", cpuname); + pr_info("VIA %s CPU detected. ", cpuname); switch (longhaul_version) { case TYPE_LONGHAUL_V1: case TYPE_LONGHAUL_V2: - printk(KERN_CONT "Longhaul v%d supported.\n", longhaul_version); + pr_cont("Longhaul v%d supported\n", longhaul_version); break; case TYPE_POWERSAVER: - printk(KERN_CONT "Powersaver supported.\n"); + pr_cont("Powersaver supported\n"); break; }; @@ -889,15 +878,14 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy) if (!(longhaul_flags & USE_ACPI_C3 || longhaul_flags & USE_NORTHBRIDGE) && ((pr == NULL) || !(pr->flags.bm_control))) { - printk(KERN_ERR PFX - "No ACPI support. Unsupported northbridge.\n"); + pr_err("No ACPI support: Unsupported northbridge\n"); return -ENODEV; } if (longhaul_flags & USE_NORTHBRIDGE) - printk(KERN_INFO PFX "Using northbridge support.\n"); + pr_info("Using northbridge support\n"); if (longhaul_flags & USE_ACPI_C3) - printk(KERN_INFO PFX "Using ACPI support.\n"); + pr_info("Using ACPI support\n"); ret = longhaul_get_ranges(); if (ret != 0) @@ -934,20 +922,18 @@ static int __init longhaul_init(void) return -ENODEV; if (!enable) { - printk(KERN_ERR PFX "Option \"enable\" not set. Aborting.\n"); + pr_err("Option \"enable\" not set - Aborting\n"); return -ENODEV; } #ifdef CONFIG_SMP if (num_online_cpus() > 1) { - printk(KERN_ERR PFX "More than 1 CPU detected, " - "longhaul disabled.\n"); + pr_err("More than 1 CPU detected, longhaul disabled\n"); return -ENODEV; } #endif #ifdef CONFIG_X86_IO_APIC if (cpu_has_apic) { - printk(KERN_ERR PFX "APIC detected. Longhaul is currently " - "broken in this configuration.\n"); + pr_err("APIC detected. Longhaul is currently broken in this configuration.\n"); return -ENODEV; } #endif @@ -955,7 +941,7 @@ static int __init longhaul_init(void) case 6 ... 9: return cpufreq_register_driver(&longhaul_driver); case 10: - printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n"); + pr_err("Use acpi-cpufreq driver for VIA C7\n"); default: ; } diff --git a/drivers/cpufreq/loongson2_cpufreq.c b/drivers/cpufreq/loongson2_cpufreq.c index cd593c1f66dc..6bbdac1065ff 100644 --- a/drivers/cpufreq/loongson2_cpufreq.c +++ b/drivers/cpufreq/loongson2_cpufreq.c @@ -10,6 +10,9 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/cpufreq.h> #include <linux/module.h> #include <linux/err.h> @@ -76,7 +79,7 @@ static int loongson2_cpufreq_cpu_init(struct cpufreq_policy *policy) cpuclk = clk_get(NULL, "cpu_clk"); if (IS_ERR(cpuclk)) { - printk(KERN_ERR "cpufreq: couldn't get CPU clk\n"); + pr_err("couldn't get CPU clk\n"); return PTR_ERR(cpuclk); } @@ -163,7 +166,7 @@ static int __init cpufreq_init(void) if (ret) return ret; - pr_info("cpufreq: Loongson-2F CPU frequency driver.\n"); + pr_info("Loongson-2F CPU frequency driver\n"); cpufreq_register_notifier(&loongson2_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); diff --git a/drivers/cpufreq/maple-cpufreq.c b/drivers/cpufreq/maple-cpufreq.c index cc3408fc073f..d9df89392b84 100644 --- a/drivers/cpufreq/maple-cpufreq.c +++ b/drivers/cpufreq/maple-cpufreq.c @@ -13,6 +13,8 @@ #undef DEBUG +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/types.h> #include <linux/errno.h> @@ -174,7 +176,7 @@ static int __init maple_cpufreq_init(void) /* Get first CPU node */ cpunode = of_cpu_device_node_get(0); if (cpunode == NULL) { - printk(KERN_ERR "cpufreq: Can't find any CPU 0 node\n"); + pr_err("Can't find any CPU 0 node\n"); goto bail_noprops; } @@ -182,8 +184,7 @@ static int __init maple_cpufreq_init(void) /* we actually don't care on which CPU to access PVR */ pvr_hi = PVR_VER(mfspr(SPRN_PVR)); if (pvr_hi != 0x3c && pvr_hi != 0x44) { - printk(KERN_ERR "cpufreq: Unsupported CPU version (%x)\n", - pvr_hi); + pr_err("Unsupported CPU version (%x)\n", pvr_hi); goto bail_noprops; } @@ -222,8 +223,8 @@ static int __init maple_cpufreq_init(void) maple_pmode_cur = -1; maple_scom_switch_freq(maple_scom_query_freq()); - printk(KERN_INFO "Registering Maple CPU frequency driver\n"); - printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n", + pr_info("Registering Maple CPU frequency driver\n"); + pr_info("Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n", maple_cpu_freqs[1].frequency/1000, maple_cpu_freqs[0].frequency/1000, maple_cpu_freqs[maple_pmode_cur].frequency/1000); diff --git a/drivers/cpufreq/mt8173-cpufreq.c b/drivers/cpufreq/mt8173-cpufreq.c index 2058e6d292ce..6f602c7a71bd 100644 --- a/drivers/cpufreq/mt8173-cpufreq.c +++ b/drivers/cpufreq/mt8173-cpufreq.c @@ -59,11 +59,8 @@ static LIST_HEAD(dvfs_info_list); static struct mtk_cpu_dvfs_info *mtk_cpu_dvfs_info_lookup(int cpu) { struct mtk_cpu_dvfs_info *info; - struct list_head *list; - - list_for_each(list, &dvfs_info_list) { - info = list_entry(list, struct mtk_cpu_dvfs_info, list_head); + list_for_each_entry(info, &dvfs_info_list, list_head) { if (cpumask_test_cpu(cpu, &info->cpus)) return info; } @@ -524,8 +521,7 @@ static struct cpufreq_driver mt8173_cpufreq_driver = { static int mt8173_cpufreq_probe(struct platform_device *pdev) { - struct mtk_cpu_dvfs_info *info; - struct list_head *list, *tmp; + struct mtk_cpu_dvfs_info *info, *tmp; int cpu, ret; for_each_possible_cpu(cpu) { @@ -559,11 +555,9 @@ static int mt8173_cpufreq_probe(struct platform_device *pdev) return 0; release_dvfs_info_list: - list_for_each_safe(list, tmp, &dvfs_info_list) { - info = list_entry(list, struct mtk_cpu_dvfs_info, list_head); - + list_for_each_entry_safe(info, tmp, &dvfs_info_list, list_head) { mtk_cpu_dvfs_info_release(info); - list_del(list); + list_del(&info->list_head); } return ret; diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c index e3866e0d5bf8..cead9bec4843 100644 --- a/drivers/cpufreq/omap-cpufreq.c +++ b/drivers/cpufreq/omap-cpufreq.c @@ -13,6 +13,9 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched.h> @@ -163,13 +166,13 @@ static int omap_cpufreq_probe(struct platform_device *pdev) { mpu_dev = get_cpu_device(0); if (!mpu_dev) { - pr_warning("%s: unable to get the mpu device\n", __func__); + pr_warn("%s: unable to get the MPU device\n", __func__); return -EINVAL; } mpu_reg = regulator_get(mpu_dev, "vcc"); if (IS_ERR(mpu_reg)) { - pr_warning("%s: unable to get MPU regulator\n", __func__); + pr_warn("%s: unable to get MPU regulator\n", __func__); mpu_reg = NULL; } else { /* diff --git a/drivers/cpufreq/p4-clockmod.c b/drivers/cpufreq/p4-clockmod.c index 5dd95dab580d..fd77812313f3 100644 --- a/drivers/cpufreq/p4-clockmod.c +++ b/drivers/cpufreq/p4-clockmod.c @@ -20,6 +20,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> @@ -35,8 +37,6 @@ #include "speedstep-lib.h" -#define PFX "p4-clockmod: " - /* * Duty Cycle (3bits), note DC_DISABLE is not specified in * intel docs i just use it to mean disable @@ -124,11 +124,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) { if (c->x86 == 0x06) { if (cpu_has(c, X86_FEATURE_EST)) - printk_once(KERN_WARNING PFX "Warning: EST-capable " - "CPU detected. The acpi-cpufreq module offers " - "voltage scaling in addition to frequency " - "scaling. You should use that instead of " - "p4-clockmod, if possible.\n"); + pr_warn_once("Warning: EST-capable CPU detected. The acpi-cpufreq module offers voltage scaling in addition to frequency scaling. You should use that instead of p4-clockmod, if possible.\n"); switch (c->x86_model) { case 0x0E: /* Core */ case 0x0F: /* Core Duo */ @@ -152,11 +148,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4M) { - printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. " - "The speedstep-ich or acpi cpufreq modules offer " - "voltage scaling in addition of frequency scaling. " - "You should use either one instead of p4-clockmod, " - "if possible.\n"); + pr_warn("Warning: Pentium 4-M detected. The speedstep-ich or acpi cpufreq modules offer voltage scaling in addition of frequency scaling. You should use either one instead of p4-clockmod, if possible.\n"); return speedstep_get_frequency(SPEEDSTEP_CPU_P4M); } @@ -265,8 +257,7 @@ static int __init cpufreq_p4_init(void) ret = cpufreq_register_driver(&p4clockmod_driver); if (!ret) - printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock " - "Modulation available\n"); + pr_info("P4/Xeon(TM) CPU On-Demand Clock Modulation available\n"); return ret; } diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c index 1f49d97a70ea..b7b576e53e92 100644 --- a/drivers/cpufreq/pmac32-cpufreq.c +++ b/drivers/cpufreq/pmac32-cpufreq.c @@ -13,6 +13,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/types.h> #include <linux/errno.h> @@ -481,13 +483,13 @@ static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode) freqs = of_get_property(cpunode, "bus-frequencies", &lenp); lenp /= sizeof(u32); if (freqs == NULL || lenp != 2) { - printk(KERN_ERR "cpufreq: bus-frequencies incorrect or missing\n"); + pr_err("bus-frequencies incorrect or missing\n"); return 1; } ratio = of_get_property(cpunode, "processor-to-bus-ratio*2", NULL); if (ratio == NULL) { - printk(KERN_ERR "cpufreq: processor-to-bus-ratio*2 missing\n"); + pr_err("processor-to-bus-ratio*2 missing\n"); return 1; } @@ -550,7 +552,7 @@ static int pmac_cpufreq_init_7447A(struct device_node *cpunode) if (volt_gpio_np) voltage_gpio = read_gpio(volt_gpio_np); if (!voltage_gpio){ - printk(KERN_ERR "cpufreq: missing cpu-vcore-select gpio\n"); + pr_err("missing cpu-vcore-select gpio\n"); return 1; } @@ -675,9 +677,9 @@ out: pmac_cpu_freqs[CPUFREQ_HIGH].frequency = hi_freq; ppc_proc_freq = cur_freq * 1000ul; - printk(KERN_INFO "Registering PowerMac CPU frequency driver\n"); - printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Boot: %d Mhz\n", - low_freq/1000, hi_freq/1000, cur_freq/1000); + pr_info("Registering PowerMac CPU frequency driver\n"); + pr_info("Low: %d Mhz, High: %d Mhz, Boot: %d Mhz\n", + low_freq/1000, hi_freq/1000, cur_freq/1000); return cpufreq_register_driver(&pmac_cpufreq_driver); } diff --git a/drivers/cpufreq/pmac64-cpufreq.c b/drivers/cpufreq/pmac64-cpufreq.c index 4ff86878727f..267e0894c62d 100644 --- a/drivers/cpufreq/pmac64-cpufreq.c +++ b/drivers/cpufreq/pmac64-cpufreq.c @@ -12,6 +12,8 @@ #undef DEBUG +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/types.h> #include <linux/errno.h> @@ -138,7 +140,7 @@ static void g5_vdnap_switch_volt(int speed_mode) usleep_range(1000, 1000); } if (done == 0) - printk(KERN_WARNING "cpufreq: Timeout in clock slewing !\n"); + pr_warn("Timeout in clock slewing !\n"); } @@ -266,7 +268,7 @@ static int g5_pfunc_switch_freq(int speed_mode) rc = pmf_call_one(pfunc_cpu_setfreq_low, NULL); if (rc) - printk(KERN_WARNING "cpufreq: pfunc switch error %d\n", rc); + pr_warn("pfunc switch error %d\n", rc); /* It's an irq GPIO so we should be able to just block here, * I'll do that later after I've properly tested the IRQ code for @@ -282,7 +284,7 @@ static int g5_pfunc_switch_freq(int speed_mode) usleep_range(500, 500); } if (done == 0) - printk(KERN_WARNING "cpufreq: Timeout in clock slewing !\n"); + pr_warn("Timeout in clock slewing !\n"); /* If frequency is going down, last ramp the voltage */ if (speed_mode > g5_pmode_cur) @@ -368,7 +370,7 @@ static int __init g5_neo2_cpufreq_init(struct device_node *cpunode) } pvr_hi = (*valp) >> 16; if (pvr_hi != 0x3c && pvr_hi != 0x44) { - printk(KERN_ERR "cpufreq: Unsupported CPU version\n"); + pr_err("Unsupported CPU version\n"); goto bail_noprops; } @@ -403,8 +405,7 @@ static int __init g5_neo2_cpufreq_init(struct device_node *cpunode) root = of_find_node_by_path("/"); if (root == NULL) { - printk(KERN_ERR "cpufreq: Can't find root of " - "device tree\n"); + pr_err("Can't find root of device tree\n"); goto bail_noprops; } pfunc_set_vdnap0 = pmf_find_function(root, "set-vdnap0"); @@ -412,8 +413,7 @@ static int __init g5_neo2_cpufreq_init(struct device_node *cpunode) pmf_find_function(root, "slewing-done"); if (pfunc_set_vdnap0 == NULL || pfunc_vdnap0_complete == NULL) { - printk(KERN_ERR "cpufreq: Can't find required " - "platform function\n"); + pr_err("Can't find required platform function\n"); goto bail_noprops; } @@ -453,10 +453,10 @@ static int __init g5_neo2_cpufreq_init(struct device_node *cpunode) g5_pmode_cur = -1; g5_switch_freq(g5_query_freq()); - printk(KERN_INFO "Registering G5 CPU frequency driver\n"); - printk(KERN_INFO "Frequency method: %s, Voltage method: %s\n", - freq_method, volt_method); - printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n", + pr_info("Registering G5 CPU frequency driver\n"); + pr_info("Frequency method: %s, Voltage method: %s\n", + freq_method, volt_method); + pr_info("Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n", g5_cpu_freqs[1].frequency/1000, g5_cpu_freqs[0].frequency/1000, g5_cpu_freqs[g5_pmode_cur].frequency/1000); @@ -493,7 +493,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpunode) if (cpuid != NULL) eeprom = of_get_property(cpuid, "cpuid", NULL); if (eeprom == NULL) { - printk(KERN_ERR "cpufreq: Can't find cpuid EEPROM !\n"); + pr_err("Can't find cpuid EEPROM !\n"); rc = -ENODEV; goto bail; } @@ -511,7 +511,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpunode) break; } if (hwclock == NULL) { - printk(KERN_ERR "cpufreq: Can't find i2c clock chip !\n"); + pr_err("Can't find i2c clock chip !\n"); rc = -ENODEV; goto bail; } @@ -539,7 +539,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpunode) /* Check we have minimum requirements */ if (pfunc_cpu_getfreq == NULL || pfunc_cpu_setfreq_high == NULL || pfunc_cpu_setfreq_low == NULL || pfunc_slewing_done == NULL) { - printk(KERN_ERR "cpufreq: Can't find platform functions !\n"); + pr_err("Can't find platform functions !\n"); rc = -ENODEV; goto bail; } @@ -567,7 +567,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpunode) /* Get max frequency from device-tree */ valp = of_get_property(cpunode, "clock-frequency", NULL); if (!valp) { - printk(KERN_ERR "cpufreq: Can't find CPU frequency !\n"); + pr_err("Can't find CPU frequency !\n"); rc = -ENODEV; goto bail; } @@ -583,8 +583,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpunode) /* Check for machines with no useful settings */ if (il == ih) { - printk(KERN_WARNING "cpufreq: No low frequency mode available" - " on this model !\n"); + pr_warn("No low frequency mode available on this model !\n"); rc = -ENODEV; goto bail; } @@ -595,7 +594,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpunode) /* Sanity check */ if (min_freq >= max_freq || min_freq < 1000) { - printk(KERN_ERR "cpufreq: Can't calculate low frequency !\n"); + pr_err("Can't calculate low frequency !\n"); rc = -ENXIO; goto bail; } @@ -619,10 +618,10 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpunode) g5_pmode_cur = -1; g5_switch_freq(g5_query_freq()); - printk(KERN_INFO "Registering G5 CPU frequency driver\n"); - printk(KERN_INFO "Frequency method: i2c/pfunc, " - "Voltage method: %s\n", has_volt ? "i2c/pfunc" : "none"); - printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n", + pr_info("Registering G5 CPU frequency driver\n"); + pr_info("Frequency method: i2c/pfunc, Voltage method: %s\n", + has_volt ? "i2c/pfunc" : "none"); + pr_info("Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n", g5_cpu_freqs[1].frequency/1000, g5_cpu_freqs[0].frequency/1000, g5_cpu_freqs[g5_pmode_cur].frequency/1000); @@ -654,7 +653,7 @@ static int __init g5_cpufreq_init(void) /* Get first CPU node */ cpunode = of_cpu_device_node_get(0); if (cpunode == NULL) { - pr_err("cpufreq: Can't find any CPU node\n"); + pr_err("Can't find any CPU node\n"); return -ENODEV; } diff --git a/drivers/cpufreq/powernow-k6.c b/drivers/cpufreq/powernow-k6.c index e6f24b281e3e..dedd2568e852 100644 --- a/drivers/cpufreq/powernow-k6.c +++ b/drivers/cpufreq/powernow-k6.c @@ -8,6 +8,8 @@ * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> @@ -22,7 +24,6 @@ #define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long as it is unused */ -#define PFX "powernow-k6: " static unsigned int busfreq; /* FSB, in 10 kHz */ static unsigned int max_multiplier; @@ -141,7 +142,7 @@ static int powernow_k6_target(struct cpufreq_policy *policy, { if (clock_ratio[best_i].driver_data > max_multiplier) { - printk(KERN_ERR PFX "invalid target frequency\n"); + pr_err("invalid target frequency\n"); return -EINVAL; } @@ -175,13 +176,14 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy) max_multiplier = param_max_multiplier; goto have_max_multiplier; } - printk(KERN_ERR "powernow-k6: invalid max_multiplier parameter, valid parameters 20, 30, 35, 40, 45, 50, 55, 60\n"); + pr_err("invalid max_multiplier parameter, valid parameters 20, 30, 35, 40, 45, 50, 55, 60\n"); return -EINVAL; } if (!max_multiplier) { - printk(KERN_WARNING "powernow-k6: unknown frequency %u, cannot determine current multiplier\n", khz); - printk(KERN_WARNING "powernow-k6: use module parameters max_multiplier and bus_frequency\n"); + pr_warn("unknown frequency %u, cannot determine current multiplier\n", + khz); + pr_warn("use module parameters max_multiplier and bus_frequency\n"); return -EOPNOTSUPP; } @@ -193,7 +195,7 @@ have_max_multiplier: busfreq = param_busfreq / 10; goto have_busfreq; } - printk(KERN_ERR "powernow-k6: invalid bus_frequency parameter, allowed range 50000 - 150000 kHz\n"); + pr_err("invalid bus_frequency parameter, allowed range 50000 - 150000 kHz\n"); return -EINVAL; } @@ -275,7 +277,7 @@ static int __init powernow_k6_init(void) return -ENODEV; if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) { - printk(KERN_INFO PFX "PowerNow IOPORT region already used.\n"); + pr_info("PowerNow IOPORT region already used\n"); return -EIO; } diff --git a/drivers/cpufreq/powernow-k7.c b/drivers/cpufreq/powernow-k7.c index c1ae1999770a..9f013ed42977 100644 --- a/drivers/cpufreq/powernow-k7.c +++ b/drivers/cpufreq/powernow-k7.c @@ -13,6 +13,8 @@ * - We disable half multipliers if ACPI is used on A0 stepping CPUs. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/module.h> #include <linux/moduleparam.h> @@ -35,9 +37,6 @@ #include "powernow-k7.h" -#define PFX "powernow: " - - struct psb_s { u8 signature[10]; u8 tableversion; @@ -127,14 +126,13 @@ static int check_powernow(void) maxei = cpuid_eax(0x80000000); if (maxei < 0x80000007) { /* Any powernow info ? */ #ifdef MODULE - printk(KERN_INFO PFX "No powernow capabilities detected\n"); + pr_info("No powernow capabilities detected\n"); #endif return 0; } if ((c->x86_model == 6) && (c->x86_mask == 0)) { - printk(KERN_INFO PFX "K7 660[A0] core detected, " - "enabling errata workarounds\n"); + pr_info("K7 660[A0] core detected, enabling errata workarounds\n"); have_a0 = 1; } @@ -144,22 +142,22 @@ static int check_powernow(void) if (!(edx & (1 << 1 | 1 << 2))) return 0; - printk(KERN_INFO PFX "PowerNOW! Technology present. Can scale: "); + pr_info("PowerNOW! Technology present. Can scale: "); if (edx & 1 << 1) { - printk("frequency"); + pr_cont("frequency"); can_scale_bus = 1; } if ((edx & (1 << 1 | 1 << 2)) == 0x6) - printk(" and "); + pr_cont(" and "); if (edx & 1 << 2) { - printk("voltage"); + pr_cont("voltage"); can_scale_vid = 1; } - printk(".\n"); + pr_cont("\n"); return 1; } @@ -427,16 +425,14 @@ err1: err05: kfree(acpi_processor_perf); err0: - printk(KERN_WARNING PFX "ACPI perflib can not be used on " - "this platform\n"); + pr_warn("ACPI perflib can not be used on this platform\n"); acpi_processor_perf = NULL; return retval; } #else static int powernow_acpi_init(void) { - printk(KERN_INFO PFX "no support for ACPI processor found." - " Please recompile your kernel with ACPI processor\n"); + pr_info("no support for ACPI processor found - please recompile your kernel with ACPI processor\n"); return -EINVAL; } #endif @@ -468,8 +464,7 @@ static int powernow_decode_bios(int maxfid, int startvid) psb = (struct psb_s *) p; pr_debug("Table version: 0x%x\n", psb->tableversion); if (psb->tableversion != 0x12) { - printk(KERN_INFO PFX "Sorry, only v1.2 tables" - " supported right now\n"); + pr_info("Sorry, only v1.2 tables supported right now\n"); return -ENODEV; } @@ -481,10 +476,8 @@ static int powernow_decode_bios(int maxfid, int startvid) latency = psb->settlingtime; if (latency < 100) { - printk(KERN_INFO PFX "BIOS set settling time " - "to %d microseconds. " - "Should be at least 100. " - "Correcting.\n", latency); + pr_info("BIOS set settling time to %d microseconds. Should be at least 100. Correcting.\n", + latency); latency = 100; } pr_debug("Settling Time: %d microseconds.\n", @@ -516,10 +509,9 @@ static int powernow_decode_bios(int maxfid, int startvid) p += 2; } } - printk(KERN_INFO PFX "No PST tables match this cpuid " - "(0x%x)\n", etuple); - printk(KERN_INFO PFX "This is indicative of a broken " - "BIOS.\n"); + pr_info("No PST tables match this cpuid (0x%x)\n", + etuple); + pr_info("This is indicative of a broken BIOS\n"); return -EINVAL; } @@ -552,7 +544,7 @@ static int fixup_sgtc(void) sgtc = 100 * m * latency; sgtc = sgtc / 3; if (sgtc > 0xfffff) { - printk(KERN_WARNING PFX "SGTC too large %d\n", sgtc); + pr_warn("SGTC too large %d\n", sgtc); sgtc = 0xfffff; } return sgtc; @@ -574,14 +566,10 @@ static unsigned int powernow_get(unsigned int cpu) static int acer_cpufreq_pst(const struct dmi_system_id *d) { - printk(KERN_WARNING PFX - "%s laptop with broken PST tables in BIOS detected.\n", + pr_warn("%s laptop with broken PST tables in BIOS detected\n", d->ident); - printk(KERN_WARNING PFX - "You need to downgrade to 3A21 (09/09/2002), or try a newer " - "BIOS than 3A71 (01/20/2003)\n"); - printk(KERN_WARNING PFX - "cpufreq scaling has been disabled as a result of this.\n"); + pr_warn("You need to downgrade to 3A21 (09/09/2002), or try a newer BIOS than 3A71 (01/20/2003)\n"); + pr_warn("cpufreq scaling has been disabled as a result of this\n"); return 0; } @@ -616,40 +604,38 @@ static int powernow_cpu_init(struct cpufreq_policy *policy) fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID]; if (!fsb) { - printk(KERN_WARNING PFX "can not determine bus frequency\n"); + pr_warn("can not determine bus frequency\n"); return -EINVAL; } pr_debug("FSB: %3dMHz\n", fsb/1000); if (dmi_check_system(powernow_dmi_table) || acpi_force) { - printk(KERN_INFO PFX "PSB/PST known to be broken. " - "Trying ACPI instead\n"); + pr_info("PSB/PST known to be broken - trying ACPI instead\n"); result = powernow_acpi_init(); } else { result = powernow_decode_bios(fidvidstatus.bits.MFID, fidvidstatus.bits.SVID); if (result) { - printk(KERN_INFO PFX "Trying ACPI perflib\n"); + pr_info("Trying ACPI perflib\n"); maximum_speed = 0; minimum_speed = -1; latency = 0; result = powernow_acpi_init(); if (result) { - printk(KERN_INFO PFX - "ACPI and legacy methods failed\n"); + pr_info("ACPI and legacy methods failed\n"); } } else { /* SGTC use the bus clock as timer */ latency = fixup_sgtc(); - printk(KERN_INFO PFX "SGTC: %d\n", latency); + pr_info("SGTC: %d\n", latency); } } if (result) return result; - printk(KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n", - minimum_speed/1000, maximum_speed/1000); + pr_info("Minimum speed %d MHz - Maximum speed %d MHz\n", + minimum_speed/1000, maximum_speed/1000); policy->cpuinfo.transition_latency = cpufreq_scale(2000000UL, fsb, latency); diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 39ac78c94be0..144c73211926 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -36,12 +36,56 @@ #include <asm/reg.h> #include <asm/smp.h> /* Required for cpu_sibling_mask() in UP configs */ #include <asm/opal.h> +#include <linux/timer.h> #define POWERNV_MAX_PSTATES 256 #define PMSR_PSAFE_ENABLE (1UL << 30) #define PMSR_SPR_EM_DISABLE (1UL << 31) #define PMSR_MAX(x) ((x >> 32) & 0xFF) +#define MAX_RAMP_DOWN_TIME 5120 +/* + * On an idle system we want the global pstate to ramp-down from max value to + * min over a span of ~5 secs. Also we want it to initially ramp-down slowly and + * then ramp-down rapidly later on. + * + * This gives a percentage rampdown for time elapsed in milliseconds. + * ramp_down_percentage = ((ms * ms) >> 18) + * ~= 3.8 * (sec * sec) + * + * At 0 ms ramp_down_percent = 0 + * At 5120 ms ramp_down_percent = 100 + */ +#define ramp_down_percent(time) ((time * time) >> 18) + +/* Interval after which the timer is queued to bring down global pstate */ +#define GPSTATE_TIMER_INTERVAL 2000 + +/** + * struct global_pstate_info - Per policy data structure to maintain history of + * global pstates + * @highest_lpstate: The local pstate from which we are ramping down + * @elapsed_time: Time in ms spent in ramping down from + * highest_lpstate + * @last_sampled_time: Time from boot in ms when global pstates were + * last set + * @last_lpstate,last_gpstate: Last set values for local and global pstates + * @timer: Is used for ramping down if cpu goes idle for + * a long time with global pstate held high + * @gpstate_lock: A spinlock to maintain synchronization between + * routines called by the timer handler and + * governer's target_index calls + */ +struct global_pstate_info { + int highest_lpstate; + unsigned int elapsed_time; + unsigned int last_sampled_time; + int last_lpstate; + int last_gpstate; + spinlock_t gpstate_lock; + struct timer_list timer; +}; + static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1]; static bool rebooting, throttled, occ_reset; @@ -94,6 +138,17 @@ static struct powernv_pstate_info { int nr_pstates; } powernv_pstate_info; +static inline void reset_gpstates(struct cpufreq_policy *policy) +{ + struct global_pstate_info *gpstates = policy->driver_data; + + gpstates->highest_lpstate = 0; + gpstates->elapsed_time = 0; + gpstates->last_sampled_time = 0; + gpstates->last_lpstate = 0; + gpstates->last_gpstate = 0; +} + /* * Initialize the freq table based on data obtained * from the firmware passed via device-tree @@ -285,6 +340,7 @@ static inline void set_pmspr(unsigned long sprn, unsigned long val) struct powernv_smp_call_data { unsigned int freq; int pstate_id; + int gpstate_id; }; /* @@ -343,19 +399,21 @@ static unsigned int powernv_cpufreq_get(unsigned int cpu) * (struct powernv_smp_call_data *) and the pstate_id which needs to be set * on this CPU should be present in freq_data->pstate_id. */ -static void set_pstate(void *freq_data) +static void set_pstate(void *data) { unsigned long val; - unsigned long pstate_ul = - ((struct powernv_smp_call_data *) freq_data)->pstate_id; + struct powernv_smp_call_data *freq_data = data; + unsigned long pstate_ul = freq_data->pstate_id; + unsigned long gpstate_ul = freq_data->gpstate_id; val = get_pmspr(SPRN_PMCR); val = val & 0x0000FFFFFFFFFFFFULL; pstate_ul = pstate_ul & 0xFF; + gpstate_ul = gpstate_ul & 0xFF; /* Set both global(bits 56..63) and local(bits 48..55) PStates */ - val = val | (pstate_ul << 56) | (pstate_ul << 48); + val = val | (gpstate_ul << 56) | (pstate_ul << 48); pr_debug("Setting cpu %d pmcr to %016lX\n", raw_smp_processor_id(), val); @@ -424,6 +482,110 @@ next: } } +/** + * calc_global_pstate - Calculate global pstate + * @elapsed_time: Elapsed time in milliseconds + * @local_pstate: New local pstate + * @highest_lpstate: pstate from which its ramping down + * + * Finds the appropriate global pstate based on the pstate from which its + * ramping down and the time elapsed in ramping down. It follows a quadratic + * equation which ensures that it reaches ramping down to pmin in 5sec. + */ +static inline int calc_global_pstate(unsigned int elapsed_time, + int highest_lpstate, int local_pstate) +{ + int pstate_diff; + + /* + * Using ramp_down_percent we get the percentage of rampdown + * that we are expecting to be dropping. Difference between + * highest_lpstate and powernv_pstate_info.min will give a absolute + * number of how many pstates we will drop eventually by the end of + * 5 seconds, then just scale it get the number pstates to be dropped. + */ + pstate_diff = ((int)ramp_down_percent(elapsed_time) * + (highest_lpstate - powernv_pstate_info.min)) / 100; + + /* Ensure that global pstate is >= to local pstate */ + if (highest_lpstate - pstate_diff < local_pstate) + return local_pstate; + else + return highest_lpstate - pstate_diff; +} + +static inline void queue_gpstate_timer(struct global_pstate_info *gpstates) +{ + unsigned int timer_interval; + + /* + * Setting up timer to fire after GPSTATE_TIMER_INTERVAL ms, But + * if it exceeds MAX_RAMP_DOWN_TIME ms for ramp down time. + * Set timer such that it fires exactly at MAX_RAMP_DOWN_TIME + * seconds of ramp down time. + */ + if ((gpstates->elapsed_time + GPSTATE_TIMER_INTERVAL) + > MAX_RAMP_DOWN_TIME) + timer_interval = MAX_RAMP_DOWN_TIME - gpstates->elapsed_time; + else + timer_interval = GPSTATE_TIMER_INTERVAL; + + mod_timer_pinned(&gpstates->timer, jiffies + + msecs_to_jiffies(timer_interval)); +} + +/** + * gpstate_timer_handler + * + * @data: pointer to cpufreq_policy on which timer was queued + * + * This handler brings down the global pstate closer to the local pstate + * according quadratic equation. Queues a new timer if it is still not equal + * to local pstate + */ +void gpstate_timer_handler(unsigned long data) +{ + struct cpufreq_policy *policy = (struct cpufreq_policy *)data; + struct global_pstate_info *gpstates = policy->driver_data; + int gpstate_id; + unsigned int time_diff = jiffies_to_msecs(jiffies) + - gpstates->last_sampled_time; + struct powernv_smp_call_data freq_data; + + if (!spin_trylock(&gpstates->gpstate_lock)) + return; + + gpstates->last_sampled_time += time_diff; + gpstates->elapsed_time += time_diff; + freq_data.pstate_id = gpstates->last_lpstate; + + if ((gpstates->last_gpstate == freq_data.pstate_id) || + (gpstates->elapsed_time > MAX_RAMP_DOWN_TIME)) { + gpstate_id = freq_data.pstate_id; + reset_gpstates(policy); + gpstates->highest_lpstate = freq_data.pstate_id; + } else { + gpstate_id = calc_global_pstate(gpstates->elapsed_time, + gpstates->highest_lpstate, + freq_data.pstate_id); + } + + /* + * If local pstate is equal to global pstate, rampdown is over + * So timer is not required to be queued. + */ + if (gpstate_id != freq_data.pstate_id) + queue_gpstate_timer(gpstates); + + freq_data.gpstate_id = gpstate_id; + gpstates->last_gpstate = freq_data.gpstate_id; + gpstates->last_lpstate = freq_data.pstate_id; + + /* Timer may get migrated to a different cpu on cpu hot unplug */ + smp_call_function_any(policy->cpus, set_pstate, &freq_data, 1); + spin_unlock(&gpstates->gpstate_lock); +} + /* * powernv_cpufreq_target_index: Sets the frequency corresponding to * the cpufreq table entry indexed by new_index on the cpus in the @@ -433,6 +595,9 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy, unsigned int new_index) { struct powernv_smp_call_data freq_data; + unsigned int cur_msec, gpstate_id; + unsigned long flags; + struct global_pstate_info *gpstates = policy->driver_data; if (unlikely(rebooting) && new_index != get_nominal_index()) return 0; @@ -440,28 +605,78 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy, if (!throttled) powernv_cpufreq_throttle_check(NULL); + cur_msec = jiffies_to_msecs(get_jiffies_64()); + + spin_lock_irqsave(&gpstates->gpstate_lock, flags); freq_data.pstate_id = powernv_freqs[new_index].driver_data; + if (!gpstates->last_sampled_time) { + gpstate_id = freq_data.pstate_id; + gpstates->highest_lpstate = freq_data.pstate_id; + goto gpstates_done; + } + + if (gpstates->last_gpstate > freq_data.pstate_id) { + gpstates->elapsed_time += cur_msec - + gpstates->last_sampled_time; + + /* + * If its has been ramping down for more than MAX_RAMP_DOWN_TIME + * we should be resetting all global pstate related data. Set it + * equal to local pstate to start fresh. + */ + if (gpstates->elapsed_time > MAX_RAMP_DOWN_TIME) { + reset_gpstates(policy); + gpstates->highest_lpstate = freq_data.pstate_id; + gpstate_id = freq_data.pstate_id; + } else { + /* Elaspsed_time is less than 5 seconds, continue to rampdown */ + gpstate_id = calc_global_pstate(gpstates->elapsed_time, + gpstates->highest_lpstate, + freq_data.pstate_id); + } + } else { + reset_gpstates(policy); + gpstates->highest_lpstate = freq_data.pstate_id; + gpstate_id = freq_data.pstate_id; + } + + /* + * If local pstate is equal to global pstate, rampdown is over + * So timer is not required to be queued. + */ + if (gpstate_id != freq_data.pstate_id) + queue_gpstate_timer(gpstates); + +gpstates_done: + freq_data.gpstate_id = gpstate_id; + gpstates->last_sampled_time = cur_msec; + gpstates->last_gpstate = freq_data.gpstate_id; + gpstates->last_lpstate = freq_data.pstate_id; + /* * Use smp_call_function to send IPI and execute the * mtspr on target CPU. We could do that without IPI * if current CPU is within policy->cpus (core) */ smp_call_function_any(policy->cpus, set_pstate, &freq_data, 1); - + spin_unlock_irqrestore(&gpstates->gpstate_lock, flags); return 0; } static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy) { - int base, i; + int base, i, ret; + struct kernfs_node *kn; + struct global_pstate_info *gpstates; base = cpu_first_thread_sibling(policy->cpu); for (i = 0; i < threads_per_core; i++) cpumask_set_cpu(base + i, policy->cpus); - if (!policy->driver_data) { + kn = kernfs_find_and_get(policy->kobj.sd, throttle_attr_grp.name); + if (!kn) { int ret; ret = sysfs_create_group(&policy->kobj, &throttle_attr_grp); @@ -470,13 +685,37 @@ static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy) policy->cpu); return ret; } - /* - * policy->driver_data is used as a flag for one-time - * creation of throttle sysfs files. - */ - policy->driver_data = policy; + } else { + kernfs_put(kn); } - return cpufreq_table_validate_and_show(policy, powernv_freqs); + + gpstates = kzalloc(sizeof(*gpstates), GFP_KERNEL); + if (!gpstates) + return -ENOMEM; + + policy->driver_data = gpstates; + + /* initialize timer */ + init_timer_deferrable(&gpstates->timer); + gpstates->timer.data = (unsigned long)policy; + gpstates->timer.function = gpstate_timer_handler; + gpstates->timer.expires = jiffies + + msecs_to_jiffies(GPSTATE_TIMER_INTERVAL); + spin_lock_init(&gpstates->gpstate_lock); + ret = cpufreq_table_validate_and_show(policy, powernv_freqs); + + if (ret < 0) + kfree(policy->driver_data); + + return ret; +} + +static int powernv_cpufreq_cpu_exit(struct cpufreq_policy *policy) +{ + /* timer is deleted in cpufreq_cpu_stop() */ + kfree(policy->driver_data); + + return 0; } static int powernv_cpufreq_reboot_notifier(struct notifier_block *nb, @@ -604,15 +843,19 @@ static struct notifier_block powernv_cpufreq_opal_nb = { static void powernv_cpufreq_stop_cpu(struct cpufreq_policy *policy) { struct powernv_smp_call_data freq_data; + struct global_pstate_info *gpstates = policy->driver_data; freq_data.pstate_id = powernv_pstate_info.min; + freq_data.gpstate_id = powernv_pstate_info.min; smp_call_function_single(policy->cpu, set_pstate, &freq_data, 1); + del_timer_sync(&gpstates->timer); } static struct cpufreq_driver powernv_cpufreq_driver = { .name = "powernv-cpufreq", .flags = CPUFREQ_CONST_LOOPS, .init = powernv_cpufreq_cpu_init, + .exit = powernv_cpufreq_cpu_exit, .verify = cpufreq_generic_frequency_table_verify, .target_index = powernv_cpufreq_target_index, .get = powernv_cpufreq_get, diff --git a/drivers/cpufreq/ppc_cbe_cpufreq.h b/drivers/cpufreq/ppc_cbe_cpufreq.h index b4c00a5a6a59..3eace725ccd6 100644 --- a/drivers/cpufreq/ppc_cbe_cpufreq.h +++ b/drivers/cpufreq/ppc_cbe_cpufreq.h @@ -17,7 +17,7 @@ int cbe_cpufreq_get_pmode(int cpu); int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode); -#if defined(CONFIG_CPU_FREQ_CBE_PMI) || defined(CONFIG_CPU_FREQ_CBE_PMI_MODULE) +#if IS_ENABLED(CONFIG_CPU_FREQ_CBE_PMI) extern bool cbe_cpufreq_has_pmi; #else #define cbe_cpufreq_has_pmi (0) diff --git a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c index 7969f7690498..7c4cd5c634f2 100644 --- a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c +++ b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c @@ -23,7 +23,7 @@ #include <linux/kernel.h> #include <linux/types.h> #include <linux/timer.h> -#include <linux/module.h> +#include <linux/init.h> #include <linux/of_platform.h> #include <asm/processor.h> @@ -142,15 +142,4 @@ static int __init cbe_cpufreq_pmi_init(void) return 0; } - -static void __exit cbe_cpufreq_pmi_exit(void) -{ - cpufreq_unregister_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER); - pmi_unregister_handler(&cbe_pmi_handler); -} - -module_init(cbe_cpufreq_pmi_init); -module_exit(cbe_cpufreq_pmi_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>"); +device_initcall(cbe_cpufreq_pmi_init); diff --git a/drivers/cpufreq/pxa2xx-cpufreq.c b/drivers/cpufreq/pxa2xx-cpufreq.c index 46fee1539cc8..ce345bf34d5d 100644 --- a/drivers/cpufreq/pxa2xx-cpufreq.c +++ b/drivers/cpufreq/pxa2xx-cpufreq.c @@ -29,6 +29,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/module.h> #include <linux/sched.h> @@ -186,8 +188,7 @@ static int pxa_cpufreq_change_voltage(const struct pxa_freqs *pxa_freq) ret = regulator_set_voltage(vcc_core, vmin, vmax); if (ret) - pr_err("cpufreq: Failed to set vcc_core in [%dmV..%dmV]\n", - vmin, vmax); + pr_err("Failed to set vcc_core in [%dmV..%dmV]\n", vmin, vmax); return ret; } @@ -195,10 +196,10 @@ static void __init pxa_cpufreq_init_voltages(void) { vcc_core = regulator_get(NULL, "vcc_core"); if (IS_ERR(vcc_core)) { - pr_info("cpufreq: Didn't find vcc_core regulator\n"); + pr_info("Didn't find vcc_core regulator\n"); vcc_core = NULL; } else { - pr_info("cpufreq: Found vcc_core regulator\n"); + pr_info("Found vcc_core regulator\n"); } } #else @@ -233,9 +234,8 @@ static void pxa27x_guess_max_freq(void) { if (!pxa27x_maxfreq) { pxa27x_maxfreq = 416000; - printk(KERN_INFO "PXA CPU 27x max frequency not defined " - "(pxa27x_maxfreq), assuming pxa271 with %dkHz maxfreq\n", - pxa27x_maxfreq); + pr_info("PXA CPU 27x max frequency not defined (pxa27x_maxfreq), assuming pxa271 with %dkHz maxfreq\n", + pxa27x_maxfreq); } else { pxa27x_maxfreq *= 1000; } @@ -408,7 +408,7 @@ static int pxa_cpufreq_init(struct cpufreq_policy *policy) */ if (cpu_is_pxa25x()) { find_freq_tables(&pxa255_freq_table, &pxa255_freqs); - pr_info("PXA255 cpufreq using %s frequency table\n", + pr_info("using %s frequency table\n", pxa255_turbo_table ? "turbo" : "run"); cpufreq_table_validate_and_show(policy, pxa255_freq_table); @@ -417,7 +417,7 @@ static int pxa_cpufreq_init(struct cpufreq_policy *policy) cpufreq_table_validate_and_show(policy, pxa27x_freq_table); } - printk(KERN_INFO "PXA CPU frequency change support initialized\n"); + pr_info("frequency change support initialized\n"); return 0; } diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c index b23e525a7af3..53d8c3fb16f6 100644 --- a/drivers/cpufreq/qoriq-cpufreq.c +++ b/drivers/cpufreq/qoriq-cpufreq.c @@ -301,10 +301,11 @@ err_np: return -ENODEV; } -static int __exit qoriq_cpufreq_cpu_exit(struct cpufreq_policy *policy) +static int qoriq_cpufreq_cpu_exit(struct cpufreq_policy *policy) { struct cpu_data *data = policy->driver_data; + cpufreq_cooling_unregister(data->cdev); kfree(data->pclk); kfree(data->table); kfree(data); @@ -333,8 +334,8 @@ static void qoriq_cpufreq_ready(struct cpufreq_policy *policy) cpud->cdev = of_cpufreq_cooling_register(np, policy->related_cpus); - if (IS_ERR(cpud->cdev)) { - pr_err("Failed to register cooling device cpu%d: %ld\n", + if (IS_ERR(cpud->cdev) && PTR_ERR(cpud->cdev) != -ENOSYS) { + pr_err("cpu%d is not running as cooling device: %ld\n", policy->cpu, PTR_ERR(cpud->cdev)); cpud->cdev = NULL; @@ -348,7 +349,7 @@ static struct cpufreq_driver qoriq_cpufreq_driver = { .name = "qoriq_cpufreq", .flags = CPUFREQ_CONST_LOOPS, .init = qoriq_cpufreq_cpu_init, - .exit = __exit_p(qoriq_cpufreq_cpu_exit), + .exit = qoriq_cpufreq_cpu_exit, .verify = cpufreq_generic_frequency_table_verify, .target_index = qoriq_cpufreq_target, .get = cpufreq_generic_get, diff --git a/drivers/cpufreq/s3c2412-cpufreq.c b/drivers/cpufreq/s3c2412-cpufreq.c index eb262133fef2..b04b6f02bbdc 100644 --- a/drivers/cpufreq/s3c2412-cpufreq.c +++ b/drivers/cpufreq/s3c2412-cpufreq.c @@ -10,6 +10,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/init.h> #include <linux/module.h> #include <linux/interrupt.h> @@ -197,21 +199,20 @@ static int s3c2412_cpufreq_add(struct device *dev, hclk = clk_get(NULL, "hclk"); if (IS_ERR(hclk)) { - printk(KERN_ERR "%s: cannot find hclk clock\n", __func__); + pr_err("cannot find hclk clock\n"); return -ENOENT; } fclk = clk_get(NULL, "fclk"); if (IS_ERR(fclk)) { - printk(KERN_ERR "%s: cannot find fclk clock\n", __func__); + pr_err("cannot find fclk clock\n"); goto err_fclk; } fclk_rate = clk_get_rate(fclk); if (fclk_rate > 200000000) { - printk(KERN_INFO - "%s: fclk %ld MHz, assuming 266MHz capable part\n", - __func__, fclk_rate / 1000000); + pr_info("fclk %ld MHz, assuming 266MHz capable part\n", + fclk_rate / 1000000); s3c2412_cpufreq_info.max.fclk = 266000000; s3c2412_cpufreq_info.max.hclk = 133000000; s3c2412_cpufreq_info.max.pclk = 66000000; @@ -219,13 +220,13 @@ static int s3c2412_cpufreq_add(struct device *dev, armclk = clk_get(NULL, "armclk"); if (IS_ERR(armclk)) { - printk(KERN_ERR "%s: cannot find arm clock\n", __func__); + pr_err("cannot find arm clock\n"); goto err_armclk; } xtal = clk_get(NULL, "xtal"); if (IS_ERR(xtal)) { - printk(KERN_ERR "%s: cannot find xtal clock\n", __func__); + pr_err("cannot find xtal clock\n"); goto err_xtal; } diff --git a/drivers/cpufreq/s3c2440-cpufreq.c b/drivers/cpufreq/s3c2440-cpufreq.c index 0129f5c70a61..d0d75b65ddd6 100644 --- a/drivers/cpufreq/s3c2440-cpufreq.c +++ b/drivers/cpufreq/s3c2440-cpufreq.c @@ -11,6 +11,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/init.h> #include <linux/module.h> #include <linux/interrupt.h> @@ -66,7 +68,7 @@ static int s3c2440_cpufreq_calcdivs(struct s3c_cpufreq_config *cfg) __func__, fclk, armclk, hclk_max); if (armclk > fclk) { - printk(KERN_WARNING "%s: armclk > fclk\n", __func__); + pr_warn("%s: armclk > fclk\n", __func__); armclk = fclk; } @@ -273,7 +275,7 @@ static int s3c2440_cpufreq_add(struct device *dev, armclk = s3c_cpufreq_clk_get(NULL, "armclk"); if (IS_ERR(xtal) || IS_ERR(hclk) || IS_ERR(fclk) || IS_ERR(armclk)) { - printk(KERN_ERR "%s: failed to get clocks\n", __func__); + pr_err("%s: failed to get clocks\n", __func__); return -ENOENT; } diff --git a/drivers/cpufreq/s3c24xx-cpufreq-debugfs.c b/drivers/cpufreq/s3c24xx-cpufreq-debugfs.c index 9b7b4289d66c..4d976e8dbb2f 100644 --- a/drivers/cpufreq/s3c24xx-cpufreq-debugfs.c +++ b/drivers/cpufreq/s3c24xx-cpufreq-debugfs.c @@ -10,6 +10,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/init.h> #include <linux/export.h> #include <linux/interrupt.h> @@ -178,7 +180,7 @@ static int __init s3c_freq_debugfs_init(void) { dbgfs_root = debugfs_create_dir("s3c-cpufreq", NULL); if (IS_ERR(dbgfs_root)) { - printk(KERN_ERR "%s: error creating debugfs root\n", __func__); + pr_err("%s: error creating debugfs root\n", __func__); return PTR_ERR(dbgfs_root); } diff --git a/drivers/cpufreq/s3c24xx-cpufreq.c b/drivers/cpufreq/s3c24xx-cpufreq.c index 68ef8fd9482f..ae8eaed77b70 100644 --- a/drivers/cpufreq/s3c24xx-cpufreq.c +++ b/drivers/cpufreq/s3c24xx-cpufreq.c @@ -10,6 +10,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/init.h> #include <linux/module.h> #include <linux/interrupt.h> @@ -175,7 +177,7 @@ static int s3c_cpufreq_settarget(struct cpufreq_policy *policy, cpu_new.freq.fclk = cpu_new.pll.frequency; if (s3c_cpufreq_calcdivs(&cpu_new) < 0) { - printk(KERN_ERR "no divisors for %d\n", target_freq); + pr_err("no divisors for %d\n", target_freq); goto err_notpossible; } @@ -187,7 +189,7 @@ static int s3c_cpufreq_settarget(struct cpufreq_policy *policy, if (cpu_new.freq.hclk != cpu_cur.freq.hclk) { if (s3c_cpufreq_calcio(&cpu_new) < 0) { - printk(KERN_ERR "%s: no IO timings\n", __func__); + pr_err("%s: no IO timings\n", __func__); goto err_notpossible; } } @@ -262,7 +264,7 @@ static int s3c_cpufreq_settarget(struct cpufreq_policy *policy, return 0; err_notpossible: - printk(KERN_ERR "no compatible settings for %d\n", target_freq); + pr_err("no compatible settings for %d\n", target_freq); return -EINVAL; } @@ -331,7 +333,7 @@ static int s3c_cpufreq_target(struct cpufreq_policy *policy, &index); if (ret < 0) { - printk(KERN_ERR "%s: no PLL available\n", __func__); + pr_err("%s: no PLL available\n", __func__); goto err_notpossible; } @@ -346,7 +348,7 @@ static int s3c_cpufreq_target(struct cpufreq_policy *policy, return s3c_cpufreq_settarget(policy, target_freq, pll); err_notpossible: - printk(KERN_ERR "no compatible settings for %d\n", target_freq); + pr_err("no compatible settings for %d\n", target_freq); return -EINVAL; } @@ -356,7 +358,7 @@ struct clk *s3c_cpufreq_clk_get(struct device *dev, const char *name) clk = clk_get(dev, name); if (IS_ERR(clk)) - printk(KERN_ERR "cpufreq: failed to get clock '%s'\n", name); + pr_err("failed to get clock '%s'\n", name); return clk; } @@ -378,15 +380,16 @@ static int __init s3c_cpufreq_initclks(void) if (IS_ERR(clk_fclk) || IS_ERR(clk_hclk) || IS_ERR(clk_pclk) || IS_ERR(_clk_mpll) || IS_ERR(clk_arm) || IS_ERR(_clk_xtal)) { - printk(KERN_ERR "%s: could not get clock(s)\n", __func__); + pr_err("%s: could not get clock(s)\n", __func__); return -ENOENT; } - printk(KERN_INFO "%s: clocks f=%lu,h=%lu,p=%lu,a=%lu\n", __func__, - clk_get_rate(clk_fclk) / 1000, - clk_get_rate(clk_hclk) / 1000, - clk_get_rate(clk_pclk) / 1000, - clk_get_rate(clk_arm) / 1000); + pr_info("%s: clocks f=%lu,h=%lu,p=%lu,a=%lu\n", + __func__, + clk_get_rate(clk_fclk) / 1000, + clk_get_rate(clk_hclk) / 1000, + clk_get_rate(clk_pclk) / 1000, + clk_get_rate(clk_arm) / 1000); return 0; } @@ -424,7 +427,7 @@ static int s3c_cpufreq_resume(struct cpufreq_policy *policy) ret = s3c_cpufreq_settarget(NULL, suspend_freq, &suspend_pll); if (ret) { - printk(KERN_ERR "%s: failed to reset pll/freq\n", __func__); + pr_err("%s: failed to reset pll/freq\n", __func__); return ret; } @@ -449,13 +452,12 @@ static struct cpufreq_driver s3c24xx_driver = { int s3c_cpufreq_register(struct s3c_cpufreq_info *info) { if (!info || !info->name) { - printk(KERN_ERR "%s: failed to pass valid information\n", - __func__); + pr_err("%s: failed to pass valid information\n", __func__); return -EINVAL; } - printk(KERN_INFO "S3C24XX CPU Frequency driver, %s cpu support\n", - info->name); + pr_info("S3C24XX CPU Frequency driver, %s cpu support\n", + info->name); /* check our driver info has valid data */ @@ -478,7 +480,7 @@ int __init s3c_cpufreq_setboard(struct s3c_cpufreq_board *board) struct s3c_cpufreq_board *ours; if (!board) { - printk(KERN_INFO "%s: no board data\n", __func__); + pr_info("%s: no board data\n", __func__); return -EINVAL; } @@ -487,7 +489,7 @@ int __init s3c_cpufreq_setboard(struct s3c_cpufreq_board *board) ours = kzalloc(sizeof(*ours), GFP_KERNEL); if (ours == NULL) { - printk(KERN_ERR "%s: no memory\n", __func__); + pr_err("%s: no memory\n", __func__); return -ENOMEM; } @@ -502,15 +504,15 @@ static int __init s3c_cpufreq_auto_io(void) int ret; if (!cpu_cur.info->get_iotiming) { - printk(KERN_ERR "%s: get_iotiming undefined\n", __func__); + pr_err("%s: get_iotiming undefined\n", __func__); return -ENOENT; } - printk(KERN_INFO "%s: working out IO settings\n", __func__); + pr_info("%s: working out IO settings\n", __func__); ret = (cpu_cur.info->get_iotiming)(&cpu_cur, &s3c24xx_iotiming); if (ret) - printk(KERN_ERR "%s: failed to get timings\n", __func__); + pr_err("%s: failed to get timings\n", __func__); return ret; } @@ -561,7 +563,7 @@ static void s3c_cpufreq_update_loctkime(void) val = calc_locktime(rate, cpu_cur.info->locktime_u) << bits; val |= calc_locktime(rate, cpu_cur.info->locktime_m); - printk(KERN_INFO "%s: new locktime is 0x%08x\n", __func__, val); + pr_info("%s: new locktime is 0x%08x\n", __func__, val); __raw_writel(val, S3C2410_LOCKTIME); } @@ -580,7 +582,7 @@ static int s3c_cpufreq_build_freq(void) ftab = kzalloc(sizeof(*ftab) * size, GFP_KERNEL); if (!ftab) { - printk(KERN_ERR "%s: no memory for tables\n", __func__); + pr_err("%s: no memory for tables\n", __func__); return -ENOMEM; } @@ -608,15 +610,14 @@ static int __init s3c_cpufreq_initcall(void) if (cpu_cur.board->auto_io) { ret = s3c_cpufreq_auto_io(); if (ret) { - printk(KERN_ERR "%s: failed to get io timing\n", + pr_err("%s: failed to get io timing\n", __func__); goto out; } } if (cpu_cur.board->need_io && !cpu_cur.info->set_iotiming) { - printk(KERN_ERR "%s: no IO support registered\n", - __func__); + pr_err("%s: no IO support registered\n", __func__); ret = -EINVAL; goto out; } @@ -666,9 +667,9 @@ int s3c_plltab_register(struct cpufreq_frequency_table *plls, vals += plls_no; vals->frequency = CPUFREQ_TABLE_END; - printk(KERN_INFO "cpufreq: %d PLL entries\n", plls_no); + pr_info("%d PLL entries\n", plls_no); } else - printk(KERN_ERR "cpufreq: no memory for PLL tables\n"); + pr_err("no memory for PLL tables\n"); return vals ? 0 : -ENOMEM; } diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c index a145b319d171..06d85917b6d5 100644 --- a/drivers/cpufreq/s5pv210-cpufreq.c +++ b/drivers/cpufreq/s5pv210-cpufreq.c @@ -9,6 +9,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/types.h> #include <linux/kernel.h> #include <linux/init.h> @@ -205,7 +207,7 @@ static void s5pv210_set_refresh(enum s5pv210_dmc_port ch, unsigned long freq) } else if (ch == DMC1) { reg = (dmc_base[1] + 0x30); } else { - printk(KERN_ERR "Cannot find DMC port\n"); + pr_err("Cannot find DMC port\n"); return; } @@ -534,7 +536,7 @@ static int s5pv210_cpu_init(struct cpufreq_policy *policy) mem_type = check_mem_type(dmc_base[0]); if ((mem_type != LPDDR) && (mem_type != LPDDR2)) { - printk(KERN_ERR "CPUFreq doesn't support this memory type\n"); + pr_err("CPUFreq doesn't support this memory type\n"); ret = -EINVAL; goto out_dmc1; } @@ -635,13 +637,13 @@ static int s5pv210_cpufreq_probe(struct platform_device *pdev) arm_regulator = regulator_get(NULL, "vddarm"); if (IS_ERR(arm_regulator)) { - pr_err("failed to get regulator vddarm"); + pr_err("failed to get regulator vddarm\n"); return PTR_ERR(arm_regulator); } int_regulator = regulator_get(NULL, "vddint"); if (IS_ERR(int_regulator)) { - pr_err("failed to get regulator vddint"); + pr_err("failed to get regulator vddint\n"); regulator_put(arm_regulator); return PTR_ERR(int_regulator); } diff --git a/drivers/cpufreq/sc520_freq.c b/drivers/cpufreq/sc520_freq.c index ac84e4818014..4225501a4b78 100644 --- a/drivers/cpufreq/sc520_freq.c +++ b/drivers/cpufreq/sc520_freq.c @@ -13,6 +13,8 @@ * 2005-03-30: - initial revision */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> @@ -30,8 +32,6 @@ static __u8 __iomem *cpuctl; -#define PFX "sc520_freq: " - static struct cpufreq_frequency_table sc520_freq_table[] = { {0, 0x01, 100000}, {0, 0x02, 133000}, @@ -44,8 +44,8 @@ static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu) switch (clockspeed_reg & 0x03) { default: - printk(KERN_ERR PFX "error: cpuctl register has unexpected " - "value %02x\n", clockspeed_reg); + pr_err("error: cpuctl register has unexpected value %02x\n", + clockspeed_reg); case 0x01: return 100000; case 0x02: @@ -112,7 +112,7 @@ static int __init sc520_freq_init(void) cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1); if (!cpuctl) { - printk(KERN_ERR "sc520_freq: error: failed to remap memory\n"); + pr_err("sc520_freq: error: failed to remap memory\n"); return -ENOMEM; } diff --git a/drivers/cpufreq/speedstep-centrino.c b/drivers/cpufreq/speedstep-centrino.c index 7d4a31571608..41bc5397f4bb 100644 --- a/drivers/cpufreq/speedstep-centrino.c +++ b/drivers/cpufreq/speedstep-centrino.c @@ -13,6 +13,8 @@ * Copyright (C) 2003 Jeremy Fitzhardinge <jeremy@goop.org> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> @@ -27,7 +29,6 @@ #include <asm/cpufeature.h> #include <asm/cpu_device_id.h> -#define PFX "speedstep-centrino: " #define MAINTAINER "linux-pm@vger.kernel.org" #define INTEL_MSR_RANGE (0xffff) @@ -386,8 +387,7 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) /* check to see if it stuck */ rdmsr(MSR_IA32_MISC_ENABLE, l, h); if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { - printk(KERN_INFO PFX - "couldn't enable Enhanced SpeedStep\n"); + pr_info("couldn't enable Enhanced SpeedStep\n"); return -ENODEV; } } diff --git a/drivers/cpufreq/speedstep-ich.c b/drivers/cpufreq/speedstep-ich.c index 37555c6b86a7..b86953a3ddc4 100644 --- a/drivers/cpufreq/speedstep-ich.c +++ b/drivers/cpufreq/speedstep-ich.c @@ -18,6 +18,8 @@ * SPEEDSTEP - DEFINITIONS * *********************************************************************/ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> @@ -68,13 +70,13 @@ static int speedstep_find_register(void) /* get PMBASE */ pci_read_config_dword(speedstep_chipset_dev, 0x40, &pmbase); if (!(pmbase & 0x01)) { - printk(KERN_ERR "speedstep-ich: could not find speedstep register\n"); + pr_err("could not find speedstep register\n"); return -ENODEV; } pmbase &= 0xFFFFFFFE; if (!pmbase) { - printk(KERN_ERR "speedstep-ich: could not find speedstep register\n"); + pr_err("could not find speedstep register\n"); return -ENODEV; } @@ -136,7 +138,7 @@ static void speedstep_set_state(unsigned int state) pr_debug("change to %u MHz succeeded\n", speedstep_get_frequency(speedstep_processor) / 1000); else - printk(KERN_ERR "cpufreq: change failed - I/O error\n"); + pr_err("change failed - I/O error\n"); return; } diff --git a/drivers/cpufreq/speedstep-lib.c b/drivers/cpufreq/speedstep-lib.c index 15d3214aaa00..1b8062182c81 100644 --- a/drivers/cpufreq/speedstep-lib.c +++ b/drivers/cpufreq/speedstep-lib.c @@ -8,6 +8,8 @@ * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/module.h> #include <linux/moduleparam.h> @@ -153,7 +155,7 @@ static unsigned int pentium_core_get_frequency(void) fsb = 333333; break; default: - printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value"); + pr_err("PCORE - MSR_FSB_FREQ undefined value\n"); } rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); @@ -453,11 +455,8 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor, */ if (*transition_latency > 10000000 || *transition_latency < 50000) { - printk(KERN_WARNING PFX "frequency transition " - "measured seems out of range (%u " - "nSec), falling back to a safe one of" - "%u nSec.\n", - *transition_latency, 500000); + pr_warn("frequency transition measured seems out of range (%u nSec), falling back to a safe one of %u nSec\n", + *transition_latency, 500000); *transition_latency = 500000; } } diff --git a/drivers/cpufreq/speedstep-smi.c b/drivers/cpufreq/speedstep-smi.c index 819229e824fb..770a9ae1999a 100644 --- a/drivers/cpufreq/speedstep-smi.c +++ b/drivers/cpufreq/speedstep-smi.c @@ -12,6 +12,8 @@ * SPEEDSTEP - DEFINITIONS * *********************************************************************/ +#define pr_fmt(fmt) "cpufreq: " fmt + #include <linux/kernel.h> #include <linux/module.h> #include <linux/moduleparam.h> @@ -204,9 +206,8 @@ static void speedstep_set_state(unsigned int state) (speedstep_freqs[new_state].frequency / 1000), retry, result); else - printk(KERN_ERR "cpufreq: change to state %u " - "failed with new_state %u and result %u\n", - state, new_state, result); + pr_err("change to state %u failed with new_state %u and result %u\n", + state, new_state, result); return; } diff --git a/drivers/cpufreq/tegra124-cpufreq.c b/drivers/cpufreq/tegra124-cpufreq.c index 20bcceb58ccc..43530254201a 100644 --- a/drivers/cpufreq/tegra124-cpufreq.c +++ b/drivers/cpufreq/tegra124-cpufreq.c @@ -14,7 +14,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/clk.h> -#include <linux/cpufreq-dt.h> #include <linux/err.h> #include <linux/init.h> #include <linux/kernel.h> @@ -69,10 +68,6 @@ static void tegra124_cpu_switch_to_pllx(struct tegra124_cpufreq_priv *priv) clk_set_parent(priv->cpu_clk, priv->pllx_clk); } -static struct cpufreq_dt_platform_data cpufreq_dt_pd = { - .independent_clocks = false, -}; - static int tegra124_cpufreq_probe(struct platform_device *pdev) { struct tegra124_cpufreq_priv *priv; @@ -129,8 +124,6 @@ static int tegra124_cpufreq_probe(struct platform_device *pdev) cpufreq_dt_devinfo.name = "cpufreq-dt"; cpufreq_dt_devinfo.parent = &pdev->dev; - cpufreq_dt_devinfo.data = &cpufreq_dt_pd; - cpufreq_dt_devinfo.size_data = sizeof(cpufreq_dt_pd); priv->cpufreq_dt_pdev = platform_device_register_full(&cpufreq_dt_devinfo); diff --git a/include/linux/cpufreq-dt.h b/include/linux/cpufreq-dt.h index 0414009e2c30..a87335a1660c 100644 --- a/include/linux/cpufreq-dt.h +++ b/include/linux/cpufreq-dt.h @@ -10,6 +10,8 @@ #ifndef __CPUFREQ_DT_H__ #define __CPUFREQ_DT_H__ +#include <linux/types.h> + struct cpufreq_dt_platform_data { /* * True when each CPU has its own clock to control its diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 718e8725de8a..4e81e08db752 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -102,6 +102,17 @@ struct cpufreq_policy { */ struct rw_semaphore rwsem; + /* + * Fast switch flags: + * - fast_switch_possible should be set by the driver if it can + * guarantee that frequency can be changed on any CPU sharing the + * policy and that the change will affect all of the policy CPUs then. + * - fast_switch_enabled is to be set by governors that support fast + * freqnency switching with the help of cpufreq_enable_fast_switch(). + */ + bool fast_switch_possible; + bool fast_switch_enabled; + /* Synchronization for frequency transitions */ bool transition_ongoing; /* Tracks transition status */ spinlock_t transition_lock; @@ -156,6 +167,8 @@ int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu); int cpufreq_update_policy(unsigned int cpu); bool have_governor_per_policy(void); struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy); +void cpufreq_enable_fast_switch(struct cpufreq_policy *policy); +void cpufreq_disable_fast_switch(struct cpufreq_policy *policy); #else static inline unsigned int cpufreq_get(unsigned int cpu) { @@ -236,6 +249,8 @@ struct cpufreq_driver { unsigned int relation); /* Deprecated */ int (*target_index)(struct cpufreq_policy *policy, unsigned int index); + unsigned int (*fast_switch)(struct cpufreq_policy *policy, + unsigned int target_freq); /* * Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION * unset. @@ -426,6 +441,20 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, #define CPUFREQ_POLICY_POWERSAVE (1) #define CPUFREQ_POLICY_PERFORMANCE (2) +/* + * The polling frequency depends on the capability of the processor. Default + * polling frequency is 1000 times the transition latency of the processor. The + * ondemand governor will work on any processor with transition latency <= 10ms, + * using appropriate sampling rate. + * + * For CPUs with transition latency > 10ms (mostly drivers with CPUFREQ_ETERNAL) + * the ondemand governor will not work. All times here are in us (microseconds). + */ +#define MIN_SAMPLING_RATE_RATIO (2) +#define LATENCY_MULTIPLIER (1000) +#define MIN_LATENCY_MULTIPLIER (20) +#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) + /* Governor Events */ #define CPUFREQ_GOV_START 1 #define CPUFREQ_GOV_STOP 2 @@ -450,6 +479,8 @@ struct cpufreq_governor { }; /* Pass a target to the cpufreq driver */ +unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, + unsigned int target_freq); int cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation); @@ -462,6 +493,29 @@ void cpufreq_unregister_governor(struct cpufreq_governor *governor); struct cpufreq_governor *cpufreq_default_governor(void); struct cpufreq_governor *cpufreq_fallback_governor(void); +/* Governor attribute set */ +struct gov_attr_set { + struct kobject kobj; + struct list_head policy_list; + struct mutex update_lock; + int usage_count; +}; + +/* sysfs ops for cpufreq governors */ +extern const struct sysfs_ops governor_sysfs_ops; + +void gov_attr_set_init(struct gov_attr_set *attr_set, struct list_head *list_node); +void gov_attr_set_get(struct gov_attr_set *attr_set, struct list_head *list_node); +unsigned int gov_attr_set_put(struct gov_attr_set *attr_set, struct list_head *list_node); + +/* Governor sysfs attribute */ +struct governor_attr { + struct attribute attr; + ssize_t (*show)(struct gov_attr_set *attr_set, char *buf); + ssize_t (*store)(struct gov_attr_set *attr_set, const char *buf, + size_t count); +}; + /********************************************************************* * FREQUENCY TABLE HELPERS * *********************************************************************/ diff --git a/include/linux/sched.h b/include/linux/sched.h index 52c4847b05e2..8344e1947eec 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3240,7 +3240,10 @@ struct update_util_data { u64 time, unsigned long util, unsigned long max); }; -void cpufreq_set_update_util_data(int cpu, struct update_util_data *data); +void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data, + void (*func)(struct update_util_data *data, u64 time, + unsigned long util, unsigned long max)); +void cpufreq_remove_update_util_hook(int cpu); #endif /* CONFIG_CPU_FREQ */ #endif diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 414d9c16da42..5e59b832ae2b 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -24,3 +24,4 @@ obj-$(CONFIG_SCHEDSTATS) += stats.o obj-$(CONFIG_SCHED_DEBUG) += debug.o obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o obj-$(CONFIG_CPU_FREQ) += cpufreq.o +obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c index 928c4ba32f68..1141954e73b4 100644 --- a/kernel/sched/cpufreq.c +++ b/kernel/sched/cpufreq.c @@ -14,24 +14,50 @@ DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); /** - * cpufreq_set_update_util_data - Populate the CPU's update_util_data pointer. + * cpufreq_add_update_util_hook - Populate the CPU's update_util_data pointer. * @cpu: The CPU to set the pointer for. * @data: New pointer value. + * @func: Callback function to set for the CPU. * - * Set and publish the update_util_data pointer for the given CPU. That pointer - * points to a struct update_util_data object containing a callback function - * to call from cpufreq_update_util(). That function will be called from an RCU - * read-side critical section, so it must not sleep. + * Set and publish the update_util_data pointer for the given CPU. * - * Callers must use RCU-sched callbacks to free any memory that might be - * accessed via the old update_util_data pointer or invoke synchronize_sched() - * right after this function to avoid use-after-free. + * The update_util_data pointer of @cpu is set to @data and the callback + * function pointer in the target struct update_util_data is set to @func. + * That function will be called by cpufreq_update_util() from RCU-sched + * read-side critical sections, so it must not sleep. @data will always be + * passed to it as the first argument which allows the function to get to the + * target update_util_data structure and its container. + * + * The update_util_data pointer of @cpu must be NULL when this function is + * called or it will WARN() and return with no effect. */ -void cpufreq_set_update_util_data(int cpu, struct update_util_data *data) +void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data, + void (*func)(struct update_util_data *data, u64 time, + unsigned long util, unsigned long max)) { - if (WARN_ON(data && !data->func)) + if (WARN_ON(!data || !func)) return; + if (WARN_ON(per_cpu(cpufreq_update_util_data, cpu))) + return; + + data->func = func; rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data); } -EXPORT_SYMBOL_GPL(cpufreq_set_update_util_data); +EXPORT_SYMBOL_GPL(cpufreq_add_update_util_hook); + +/** + * cpufreq_remove_update_util_hook - Clear the CPU's update_util_data pointer. + * @cpu: The CPU to clear the pointer for. + * + * Clear the update_util_data pointer for the given CPU. + * + * Callers must use RCU-sched callbacks to free any memory that might be + * accessed via the old update_util_data pointer or invoke synchronize_sched() + * right after this function to avoid use-after-free. + */ +void cpufreq_remove_update_util_hook(int cpu) +{ + rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), NULL); +} +EXPORT_SYMBOL_GPL(cpufreq_remove_update_util_hook); diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c new file mode 100644 index 000000000000..154ae3a51e86 --- /dev/null +++ b/kernel/sched/cpufreq_schedutil.c @@ -0,0 +1,530 @@ +/* + * CPUFreq governor based on scheduler-provided CPU utilization data. + * + * Copyright (C) 2016, Intel Corporation + * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/cpufreq.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <trace/events/power.h> + +#include "sched.h" + +struct sugov_tunables { + struct gov_attr_set attr_set; + unsigned int rate_limit_us; +}; + +struct sugov_policy { + struct cpufreq_policy *policy; + + struct sugov_tunables *tunables; + struct list_head tunables_hook; + + raw_spinlock_t update_lock; /* For shared policies */ + u64 last_freq_update_time; + s64 freq_update_delay_ns; + unsigned int next_freq; + + /* The next fields are only needed if fast switch cannot be used. */ + struct irq_work irq_work; + struct work_struct work; + struct mutex work_lock; + bool work_in_progress; + + bool need_freq_update; +}; + +struct sugov_cpu { + struct update_util_data update_util; + struct sugov_policy *sg_policy; + + /* The fields below are only needed when sharing a policy. */ + unsigned long util; + unsigned long max; + u64 last_update; +}; + +static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu); + +/************************ Governor internals ***********************/ + +static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) +{ + s64 delta_ns; + + if (sg_policy->work_in_progress) + return false; + + if (unlikely(sg_policy->need_freq_update)) { + sg_policy->need_freq_update = false; + /* + * This happens when limits change, so forget the previous + * next_freq value and force an update. + */ + sg_policy->next_freq = UINT_MAX; + return true; + } + + delta_ns = time - sg_policy->last_freq_update_time; + return delta_ns >= sg_policy->freq_update_delay_ns; +} + +static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, + unsigned int next_freq) +{ + struct cpufreq_policy *policy = sg_policy->policy; + + sg_policy->last_freq_update_time = time; + + if (policy->fast_switch_enabled) { + if (sg_policy->next_freq == next_freq) { + trace_cpu_frequency(policy->cur, smp_processor_id()); + return; + } + sg_policy->next_freq = next_freq; + next_freq = cpufreq_driver_fast_switch(policy, next_freq); + if (next_freq == CPUFREQ_ENTRY_INVALID) + return; + + policy->cur = next_freq; + trace_cpu_frequency(next_freq, smp_processor_id()); + } else if (sg_policy->next_freq != next_freq) { + sg_policy->next_freq = next_freq; + sg_policy->work_in_progress = true; + irq_work_queue(&sg_policy->irq_work); + } +} + +/** + * get_next_freq - Compute a new frequency for a given cpufreq policy. + * @policy: cpufreq policy object to compute the new frequency for. + * @util: Current CPU utilization. + * @max: CPU capacity. + * + * If the utilization is frequency-invariant, choose the new frequency to be + * proportional to it, that is + * + * next_freq = C * max_freq * util / max + * + * Otherwise, approximate the would-be frequency-invariant utilization by + * util_raw * (curr_freq / max_freq) which leads to + * + * next_freq = C * curr_freq * util_raw / max + * + * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8. + */ +static unsigned int get_next_freq(struct cpufreq_policy *policy, + unsigned long util, unsigned long max) +{ + unsigned int freq = arch_scale_freq_invariant() ? + policy->cpuinfo.max_freq : policy->cur; + + return (freq + (freq >> 2)) * util / max; +} + +static void sugov_update_single(struct update_util_data *hook, u64 time, + unsigned long util, unsigned long max) +{ + struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); + struct sugov_policy *sg_policy = sg_cpu->sg_policy; + struct cpufreq_policy *policy = sg_policy->policy; + unsigned int next_f; + + if (!sugov_should_update_freq(sg_policy, time)) + return; + + next_f = util == ULONG_MAX ? policy->cpuinfo.max_freq : + get_next_freq(policy, util, max); + sugov_update_commit(sg_policy, time, next_f); +} + +static unsigned int sugov_next_freq_shared(struct sugov_policy *sg_policy, + unsigned long util, unsigned long max) +{ + struct cpufreq_policy *policy = sg_policy->policy; + unsigned int max_f = policy->cpuinfo.max_freq; + u64 last_freq_update_time = sg_policy->last_freq_update_time; + unsigned int j; + + if (util == ULONG_MAX) + return max_f; + + for_each_cpu(j, policy->cpus) { + struct sugov_cpu *j_sg_cpu; + unsigned long j_util, j_max; + s64 delta_ns; + + if (j == smp_processor_id()) + continue; + + j_sg_cpu = &per_cpu(sugov_cpu, j); + /* + * If the CPU utilization was last updated before the previous + * frequency update and the time elapsed between the last update + * of the CPU utilization and the last frequency update is long + * enough, don't take the CPU into account as it probably is + * idle now. + */ + delta_ns = last_freq_update_time - j_sg_cpu->last_update; + if (delta_ns > TICK_NSEC) + continue; + + j_util = j_sg_cpu->util; + if (j_util == ULONG_MAX) + return max_f; + + j_max = j_sg_cpu->max; + if (j_util * max > j_max * util) { + util = j_util; + max = j_max; + } + } + + return get_next_freq(policy, util, max); +} + +static void sugov_update_shared(struct update_util_data *hook, u64 time, + unsigned long util, unsigned long max) +{ + struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); + struct sugov_policy *sg_policy = sg_cpu->sg_policy; + unsigned int next_f; + + raw_spin_lock(&sg_policy->update_lock); + + sg_cpu->util = util; + sg_cpu->max = max; + sg_cpu->last_update = time; + + if (sugov_should_update_freq(sg_policy, time)) { + next_f = sugov_next_freq_shared(sg_policy, util, max); + sugov_update_commit(sg_policy, time, next_f); + } + + raw_spin_unlock(&sg_policy->update_lock); +} + +static void sugov_work(struct work_struct *work) +{ + struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work); + + mutex_lock(&sg_policy->work_lock); + __cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq, + CPUFREQ_RELATION_L); + mutex_unlock(&sg_policy->work_lock); + + sg_policy->work_in_progress = false; +} + +static void sugov_irq_work(struct irq_work *irq_work) +{ + struct sugov_policy *sg_policy; + + sg_policy = container_of(irq_work, struct sugov_policy, irq_work); + schedule_work_on(smp_processor_id(), &sg_policy->work); +} + +/************************** sysfs interface ************************/ + +static struct sugov_tunables *global_tunables; +static DEFINE_MUTEX(global_tunables_lock); + +static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set) +{ + return container_of(attr_set, struct sugov_tunables, attr_set); +} + +static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf) +{ + struct sugov_tunables *tunables = to_sugov_tunables(attr_set); + + return sprintf(buf, "%u\n", tunables->rate_limit_us); +} + +static ssize_t rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, + size_t count) +{ + struct sugov_tunables *tunables = to_sugov_tunables(attr_set); + struct sugov_policy *sg_policy; + unsigned int rate_limit_us; + + if (kstrtouint(buf, 10, &rate_limit_us)) + return -EINVAL; + + tunables->rate_limit_us = rate_limit_us; + + list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook) + sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC; + + return count; +} + +static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us); + +static struct attribute *sugov_attributes[] = { + &rate_limit_us.attr, + NULL +}; + +static struct kobj_type sugov_tunables_ktype = { + .default_attrs = sugov_attributes, + .sysfs_ops = &governor_sysfs_ops, +}; + +/********************** cpufreq governor interface *********************/ + +static struct cpufreq_governor schedutil_gov; + +static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy) +{ + struct sugov_policy *sg_policy; + + sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL); + if (!sg_policy) + return NULL; + + sg_policy->policy = policy; + init_irq_work(&sg_policy->irq_work, sugov_irq_work); + INIT_WORK(&sg_policy->work, sugov_work); + mutex_init(&sg_policy->work_lock); + raw_spin_lock_init(&sg_policy->update_lock); + return sg_policy; +} + +static void sugov_policy_free(struct sugov_policy *sg_policy) +{ + mutex_destroy(&sg_policy->work_lock); + kfree(sg_policy); +} + +static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy) +{ + struct sugov_tunables *tunables; + + tunables = kzalloc(sizeof(*tunables), GFP_KERNEL); + if (tunables) { + gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook); + if (!have_governor_per_policy()) + global_tunables = tunables; + } + return tunables; +} + +static void sugov_tunables_free(struct sugov_tunables *tunables) +{ + if (!have_governor_per_policy()) + global_tunables = NULL; + + kfree(tunables); +} + +static int sugov_init(struct cpufreq_policy *policy) +{ + struct sugov_policy *sg_policy; + struct sugov_tunables *tunables; + unsigned int lat; + int ret = 0; + + /* State should be equivalent to EXIT */ + if (policy->governor_data) + return -EBUSY; + + sg_policy = sugov_policy_alloc(policy); + if (!sg_policy) + return -ENOMEM; + + mutex_lock(&global_tunables_lock); + + if (global_tunables) { + if (WARN_ON(have_governor_per_policy())) { + ret = -EINVAL; + goto free_sg_policy; + } + policy->governor_data = sg_policy; + sg_policy->tunables = global_tunables; + + gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook); + goto out; + } + + tunables = sugov_tunables_alloc(sg_policy); + if (!tunables) { + ret = -ENOMEM; + goto free_sg_policy; + } + + tunables->rate_limit_us = LATENCY_MULTIPLIER; + lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC; + if (lat) + tunables->rate_limit_us *= lat; + + policy->governor_data = sg_policy; + sg_policy->tunables = tunables; + + ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype, + get_governor_parent_kobj(policy), "%s", + schedutil_gov.name); + if (ret) + goto fail; + + out: + mutex_unlock(&global_tunables_lock); + + cpufreq_enable_fast_switch(policy); + return 0; + + fail: + policy->governor_data = NULL; + sugov_tunables_free(tunables); + + free_sg_policy: + mutex_unlock(&global_tunables_lock); + + sugov_policy_free(sg_policy); + pr_err("cpufreq: schedutil governor initialization failed (error %d)\n", ret); + return ret; +} + +static int sugov_exit(struct cpufreq_policy *policy) +{ + struct sugov_policy *sg_policy = policy->governor_data; + struct sugov_tunables *tunables = sg_policy->tunables; + unsigned int count; + + cpufreq_disable_fast_switch(policy); + + mutex_lock(&global_tunables_lock); + + count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook); + policy->governor_data = NULL; + if (!count) + sugov_tunables_free(tunables); + + mutex_unlock(&global_tunables_lock); + + sugov_policy_free(sg_policy); + return 0; +} + +static int sugov_start(struct cpufreq_policy *policy) +{ + struct sugov_policy *sg_policy = policy->governor_data; + unsigned int cpu; + + sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; + sg_policy->last_freq_update_time = 0; + sg_policy->next_freq = UINT_MAX; + sg_policy->work_in_progress = false; + sg_policy->need_freq_update = false; + + for_each_cpu(cpu, policy->cpus) { + struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); + + sg_cpu->sg_policy = sg_policy; + if (policy_is_shared(policy)) { + sg_cpu->util = ULONG_MAX; + sg_cpu->max = 0; + sg_cpu->last_update = 0; + cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, + sugov_update_shared); + } else { + cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, + sugov_update_single); + } + } + return 0; +} + +static int sugov_stop(struct cpufreq_policy *policy) +{ + struct sugov_policy *sg_policy = policy->governor_data; + unsigned int cpu; + + for_each_cpu(cpu, policy->cpus) + cpufreq_remove_update_util_hook(cpu); + + synchronize_sched(); + + irq_work_sync(&sg_policy->irq_work); + cancel_work_sync(&sg_policy->work); + return 0; +} + +static int sugov_limits(struct cpufreq_policy *policy) +{ + struct sugov_policy *sg_policy = policy->governor_data; + + if (!policy->fast_switch_enabled) { + mutex_lock(&sg_policy->work_lock); + + if (policy->max < policy->cur) + __cpufreq_driver_target(policy, policy->max, + CPUFREQ_RELATION_H); + else if (policy->min > policy->cur) + __cpufreq_driver_target(policy, policy->min, + CPUFREQ_RELATION_L); + + mutex_unlock(&sg_policy->work_lock); + } + + sg_policy->need_freq_update = true; + return 0; +} + +int sugov_governor(struct cpufreq_policy *policy, unsigned int event) +{ + if (event == CPUFREQ_GOV_POLICY_INIT) { + return sugov_init(policy); + } else if (policy->governor_data) { + switch (event) { + case CPUFREQ_GOV_POLICY_EXIT: + return sugov_exit(policy); + case CPUFREQ_GOV_START: + return sugov_start(policy); + case CPUFREQ_GOV_STOP: + return sugov_stop(policy); + case CPUFREQ_GOV_LIMITS: + return sugov_limits(policy); + } + } + return -EINVAL; +} + +static struct cpufreq_governor schedutil_gov = { + .name = "schedutil", + .governor = sugov_governor, + .owner = THIS_MODULE, +}; + +static int __init sugov_module_init(void) +{ + return cpufreq_register_governor(&schedutil_gov); +} + +static void __exit sugov_module_exit(void) +{ + cpufreq_unregister_governor(&schedutil_gov); +} + +MODULE_AUTHOR("Rafael J. Wysocki <rafael.j.wysocki@intel.com>"); +MODULE_DESCRIPTION("Utilization-based CPU frequency selection"); +MODULE_LICENSE("GPL"); + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL +struct cpufreq_governor *cpufreq_default_governor(void) +{ + return &schedutil_gov; +} + +fs_initcall(sugov_module_init); +#else +module_init(sugov_module_init); +#endif +module_exit(sugov_module_exit); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index ec2e8d23527e..921d6e5d33b7 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1842,6 +1842,14 @@ static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned lo static inline void cpufreq_trigger_update(u64 time) {} #endif /* CONFIG_CPU_FREQ */ +#ifdef arch_scale_freq_capacity +#ifndef arch_scale_freq_invariant +#define arch_scale_freq_invariant() (true) +#endif +#else /* arch_scale_freq_capacity */ +#define arch_scale_freq_invariant() (false) +#endif + static inline void account_reset_rq(struct rq *rq) { #ifdef CONFIG_IRQ_TIME_ACCOUNTING diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c index 81b87451c0ea..0c7dee221dca 100644 --- a/kernel/trace/power-traces.c +++ b/kernel/trace/power-traces.c @@ -15,5 +15,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(suspend_resume); EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle); +EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_frequency); EXPORT_TRACEPOINT_SYMBOL_GPL(powernv_throttle); |