diff options
author | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2016-12-12 20:45:01 +0100 |
---|---|---|
committer | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2016-12-12 20:45:01 +0100 |
commit | fecc8c0ebd30c41cc66303b6f9476481c5d6d260 (patch) | |
tree | b2e06edcc3425c2a992eb99a8cab1098c4246473 /kernel/sched | |
parent | 57def856f33a5b50c71d3d40faf73d00b034e29c (diff) | |
parent | 2bf3b685a35c80fe368dad9da0e77ba48b460939 (diff) |
Merge branch 'pm-cpufreq'
* pm-cpufreq: (51 commits)
Documentation: intel_pstate: Document HWP energy/performance hints
cpufreq: intel_pstate: Support for energy performance hints with HWP
cpufreq: intel_pstate: Add locking around HWP requests
cpufreq: ondemand: Set MIN_FREQUENCY_UP_THRESHOLD to 1
cpufreq: intel_pstate: Add Knights Mill CPUID
MAINTAINERS: Add bug tracking system location entry for cpufreq
cpufreq: dt: Add support for zx296718
cpufreq: acpi-cpufreq: drop rdmsr_on_cpus() usage
cpufreq: acpi-cpufreq: Convert to hotplug state machine
cpufreq: intel_pstate: fix intel_pstate_exit_perf_limits() prototype
cpufreq: intel_pstate: Set EPP/EPB to 0 in performance mode
cpufreq: schedutil: Rectify comment in sugov_irq_work() function
cpufreq: intel_pstate: increase precision of performance limits
cpufreq: intel_pstate: round up min_perf limits
cpufreq: Make cpufreq_update_policy() void
ACPI / processor: Make acpi_processor_ppc_has_changed() void
cpufreq: Avoid using inactive policies
cpufreq: intel_pstate: Generic governors support
cpufreq: intel_pstate: Request P-states control from SMM if needed
cpufreq: dt: Add support for r8a7743 and r8a7745
...
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/cpufreq_schedutil.c | 119 |
1 files changed, 99 insertions, 20 deletions
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 69e06898997d..fd4659313640 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -12,11 +12,14 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/cpufreq.h> +#include <linux/kthread.h> #include <linux/slab.h> #include <trace/events/power.h> #include "sched.h" +#define SUGOV_KTHREAD_PRIORITY 50 + struct sugov_tunables { struct gov_attr_set attr_set; unsigned int rate_limit_us; @@ -35,8 +38,10 @@ struct sugov_policy { /* The next fields are only needed if fast switch cannot be used. */ struct irq_work irq_work; - struct work_struct work; + struct kthread_work work; struct mutex work_lock; + struct kthread_worker worker; + struct task_struct *thread; bool work_in_progress; bool need_freq_update; @@ -291,7 +296,7 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time, raw_spin_unlock(&sg_policy->update_lock); } -static void sugov_work(struct work_struct *work) +static void sugov_work(struct kthread_work *work) { struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work); @@ -308,7 +313,21 @@ static void sugov_irq_work(struct irq_work *irq_work) struct sugov_policy *sg_policy; sg_policy = container_of(irq_work, struct sugov_policy, irq_work); - schedule_work_on(smp_processor_id(), &sg_policy->work); + + /* + * For RT and deadline tasks, the schedutil governor shoots the + * frequency to maximum. Special care must be taken to ensure that this + * kthread doesn't result in the same behavior. + * + * This is (mostly) guaranteed by the work_in_progress flag. The flag is + * updated only at the end of the sugov_work() function and before that + * the schedutil governor rejects all other frequency scaling requests. + * + * There is a very rare case though, where the RT thread yields right + * after the work_in_progress flag is cleared. The effects of that are + * neglected for now. + */ + kthread_queue_work(&sg_policy->worker, &sg_policy->work); } /************************** sysfs interface ************************/ @@ -371,19 +390,64 @@ static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy) return NULL; sg_policy->policy = policy; - init_irq_work(&sg_policy->irq_work, sugov_irq_work); - INIT_WORK(&sg_policy->work, sugov_work); - mutex_init(&sg_policy->work_lock); raw_spin_lock_init(&sg_policy->update_lock); return sg_policy; } static void sugov_policy_free(struct sugov_policy *sg_policy) { - mutex_destroy(&sg_policy->work_lock); kfree(sg_policy); } +static int sugov_kthread_create(struct sugov_policy *sg_policy) +{ + struct task_struct *thread; + struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO / 2 }; + struct cpufreq_policy *policy = sg_policy->policy; + int ret; + + /* kthread only required for slow path */ + if (policy->fast_switch_enabled) + return 0; + + kthread_init_work(&sg_policy->work, sugov_work); + kthread_init_worker(&sg_policy->worker); + thread = kthread_create(kthread_worker_fn, &sg_policy->worker, + "sugov:%d", + cpumask_first(policy->related_cpus)); + if (IS_ERR(thread)) { + pr_err("failed to create sugov thread: %ld\n", PTR_ERR(thread)); + return PTR_ERR(thread); + } + + ret = sched_setscheduler_nocheck(thread, SCHED_FIFO, ¶m); + if (ret) { + kthread_stop(thread); + pr_warn("%s: failed to set SCHED_FIFO\n", __func__); + return ret; + } + + sg_policy->thread = thread; + kthread_bind_mask(thread, policy->related_cpus); + init_irq_work(&sg_policy->irq_work, sugov_irq_work); + mutex_init(&sg_policy->work_lock); + + wake_up_process(thread); + + return 0; +} + +static void sugov_kthread_stop(struct sugov_policy *sg_policy) +{ + /* kthread only required for slow path */ + if (sg_policy->policy->fast_switch_enabled) + return; + + kthread_flush_worker(&sg_policy->worker); + kthread_stop(sg_policy->thread); + mutex_destroy(&sg_policy->work_lock); +} + static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy) { struct sugov_tunables *tunables; @@ -416,16 +480,24 @@ static int sugov_init(struct cpufreq_policy *policy) if (policy->governor_data) return -EBUSY; + cpufreq_enable_fast_switch(policy); + sg_policy = sugov_policy_alloc(policy); - if (!sg_policy) - return -ENOMEM; + if (!sg_policy) { + ret = -ENOMEM; + goto disable_fast_switch; + } + + ret = sugov_kthread_create(sg_policy); + if (ret) + goto free_sg_policy; mutex_lock(&global_tunables_lock); if (global_tunables) { if (WARN_ON(have_governor_per_policy())) { ret = -EINVAL; - goto free_sg_policy; + goto stop_kthread; } policy->governor_data = sg_policy; sg_policy->tunables = global_tunables; @@ -437,7 +509,7 @@ static int sugov_init(struct cpufreq_policy *policy) tunables = sugov_tunables_alloc(sg_policy); if (!tunables) { ret = -ENOMEM; - goto free_sg_policy; + goto stop_kthread; } tunables->rate_limit_us = LATENCY_MULTIPLIER; @@ -454,20 +526,25 @@ static int sugov_init(struct cpufreq_policy *policy) if (ret) goto fail; - out: +out: mutex_unlock(&global_tunables_lock); - - cpufreq_enable_fast_switch(policy); return 0; - fail: +fail: policy->governor_data = NULL; sugov_tunables_free(tunables); - free_sg_policy: +stop_kthread: + sugov_kthread_stop(sg_policy); + +free_sg_policy: mutex_unlock(&global_tunables_lock); sugov_policy_free(sg_policy); + +disable_fast_switch: + cpufreq_disable_fast_switch(policy); + pr_err("initialization failed (error %d)\n", ret); return ret; } @@ -478,8 +555,6 @@ static void sugov_exit(struct cpufreq_policy *policy) struct sugov_tunables *tunables = sg_policy->tunables; unsigned int count; - cpufreq_disable_fast_switch(policy); - mutex_lock(&global_tunables_lock); count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook); @@ -489,7 +564,9 @@ static void sugov_exit(struct cpufreq_policy *policy) mutex_unlock(&global_tunables_lock); + sugov_kthread_stop(sg_policy); sugov_policy_free(sg_policy); + cpufreq_disable_fast_switch(policy); } static int sugov_start(struct cpufreq_policy *policy) @@ -535,8 +612,10 @@ static void sugov_stop(struct cpufreq_policy *policy) synchronize_sched(); - irq_work_sync(&sg_policy->irq_work); - cancel_work_sync(&sg_policy->work); + if (!policy->fast_switch_enabled) { + irq_work_sync(&sg_policy->irq_work); + kthread_cancel_work_sync(&sg_policy->work); + } } static void sugov_limits(struct cpufreq_policy *policy) |