diff options
-rw-r--r-- | Documentation/ABI/testing/sysfs-class-devfreq | 3 | ||||
-rw-r--r-- | Documentation/power/freezing-of-tasks.rst | 85 | ||||
-rw-r--r-- | drivers/base/power/main.c | 148 | ||||
-rw-r--r-- | drivers/cpufreq/armada-8k-cpufreq.c | 4 | ||||
-rw-r--r-- | drivers/cpufreq/intel_pstate.c | 15 | ||||
-rw-r--r-- | drivers/cpufreq/scmi-cpufreq.c | 7 | ||||
-rw-r--r-- | drivers/cpuidle/cpuidle-haltpoll.c | 9 | ||||
-rw-r--r-- | drivers/devfreq/devfreq.c | 80 | ||||
-rw-r--r-- | drivers/idle/intel_idle.c | 114 | ||||
-rw-r--r-- | drivers/opp/core.c | 294 | ||||
-rw-r--r-- | drivers/opp/of.c | 57 | ||||
-rw-r--r-- | drivers/opp/opp.h | 24 | ||||
-rw-r--r-- | drivers/opp/ti-opp-supply.c | 13 | ||||
-rw-r--r-- | include/linux/async.h | 2 | ||||
-rw-r--r-- | include/linux/pm_opp.h | 28 | ||||
-rw-r--r-- | kernel/async.c | 85 | ||||
-rw-r--r-- | kernel/power/hibernate.c | 10 | ||||
-rw-r--r-- | kernel/power/main.c | 16 | ||||
-rw-r--r-- | kernel/power/power.h | 2 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 16 | ||||
-rw-r--r-- | kernel/power/swap.c | 41 |
21 files changed, 658 insertions, 395 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq index 5e6b74f30406..1e7e0bb4c14e 100644 --- a/Documentation/ABI/testing/sysfs-class-devfreq +++ b/Documentation/ABI/testing/sysfs-class-devfreq @@ -52,6 +52,9 @@ Description: echo 0 > /sys/class/devfreq/.../trans_stat + If the transition table is bigger than PAGE_SIZE, reading + this will return an -EFBIG error. + What: /sys/class/devfreq/.../available_frequencies Date: October 2012 Contact: Nishanth Menon <nm@ti.com> diff --git a/Documentation/power/freezing-of-tasks.rst b/Documentation/power/freezing-of-tasks.rst index 53b6a56c4635..df9755bfbd94 100644 --- a/Documentation/power/freezing-of-tasks.rst +++ b/Documentation/power/freezing-of-tasks.rst @@ -14,27 +14,28 @@ architectures). II. How does it work? ===================== -There are three per-task flags used for that, PF_NOFREEZE, PF_FROZEN -and PF_FREEZER_SKIP (the last one is auxiliary). The tasks that have -PF_NOFREEZE unset (all user space processes and some kernel threads) are -regarded as 'freezable' and treated in a special way before the system enters a -suspend state as well as before a hibernation image is created (in what follows -we only consider hibernation, but the description also applies to suspend). +There is one per-task flag (PF_NOFREEZE) and three per-task states +(TASK_FROZEN, TASK_FREEZABLE and __TASK_FREEZABLE_UNSAFE) used for that. +The tasks that have PF_NOFREEZE unset (all user space tasks and some kernel +threads) are regarded as 'freezable' and treated in a special way before the +system enters a sleep state as well as before a hibernation image is created +(hibernation is directly covered by what follows, but the description applies +to system-wide suspend too). Namely, as the first step of the hibernation procedure the function freeze_processes() (defined in kernel/power/process.c) is called. A system-wide -variable system_freezing_cnt (as opposed to a per-task flag) is used to indicate -whether the system is to undergo a freezing operation. And freeze_processes() -sets this variable. After this, it executes try_to_freeze_tasks() that sends a -fake signal to all user space processes, and wakes up all the kernel threads. -All freezable tasks must react to that by calling try_to_freeze(), which -results in a call to __refrigerator() (defined in kernel/freezer.c), which sets -the task's PF_FROZEN flag, changes its state to TASK_UNINTERRUPTIBLE and makes -it loop until PF_FROZEN is cleared for it. Then, we say that the task is -'frozen' and therefore the set of functions handling this mechanism is referred -to as 'the freezer' (these functions are defined in kernel/power/process.c, -kernel/freezer.c & include/linux/freezer.h). User space processes are generally -frozen before kernel threads. +static key freezer_active (as opposed to a per-task flag or state) is used to +indicate whether the system is to undergo a freezing operation. And +freeze_processes() sets this static key. After this, it executes +try_to_freeze_tasks() that sends a fake signal to all user space processes, and +wakes up all the kernel threads. All freezable tasks must react to that by +calling try_to_freeze(), which results in a call to __refrigerator() (defined +in kernel/freezer.c), which changes the task's state to TASK_FROZEN, and makes +it loop until it is woken by an explicit TASK_FROZEN wakeup. Then, that task +is regarded as 'frozen' and so the set of functions handling this mechanism is +referred to as 'the freezer' (these functions are defined in +kernel/power/process.c, kernel/freezer.c & include/linux/freezer.h). User space +tasks are generally frozen before kernel threads. __refrigerator() must not be called directly. Instead, use the try_to_freeze() function (defined in include/linux/freezer.h), that checks @@ -43,31 +44,40 @@ if the task is to be frozen and makes the task enter __refrigerator(). For user space processes try_to_freeze() is called automatically from the signal-handling code, but the freezable kernel threads need to call it explicitly in suitable places or use the wait_event_freezable() or -wait_event_freezable_timeout() macros (defined in include/linux/freezer.h) -that combine interruptible sleep with checking if the task is to be frozen and -calling try_to_freeze(). The main loop of a freezable kernel thread may look +wait_event_freezable_timeout() macros (defined in include/linux/wait.h) +that put the task to sleep (TASK_INTERRUPTIBLE) or freeze it (TASK_FROZEN) if +freezer_active is set. The main loop of a freezable kernel thread may look like the following one:: set_freezable(); - do { - hub_events(); - wait_event_freezable(khubd_wait, - !list_empty(&hub_event_list) || - kthread_should_stop()); - } while (!kthread_should_stop() || !list_empty(&hub_event_list)); - -(from drivers/usb/core/hub.c::hub_thread()). - -If a freezable kernel thread fails to call try_to_freeze() after the freezer has -initiated a freezing operation, the freezing of tasks will fail and the entire -hibernation operation will be cancelled. For this reason, freezable kernel -threads must call try_to_freeze() somewhere or use one of the + + while (true) { + struct task_struct *tsk = NULL; + + wait_event_freezable(oom_reaper_wait, oom_reaper_list != NULL); + spin_lock_irq(&oom_reaper_lock); + if (oom_reaper_list != NULL) { + tsk = oom_reaper_list; + oom_reaper_list = tsk->oom_reaper_list; + } + spin_unlock_irq(&oom_reaper_lock); + + if (tsk) + oom_reap_task(tsk); + } + +(from mm/oom_kill.c::oom_reaper()). + +If a freezable kernel thread is not put to the frozen state after the freezer +has initiated a freezing operation, the freezing of tasks will fail and the +entire system-wide transition will be cancelled. For this reason, freezable +kernel threads must call try_to_freeze() somewhere or use one of the wait_event_freezable() and wait_event_freezable_timeout() macros. After the system memory state has been restored from a hibernation image and devices have been reinitialized, the function thaw_processes() is called in -order to clear the PF_FROZEN flag for each frozen task. Then, the tasks that -have been frozen leave __refrigerator() and continue running. +order to wake up each frozen task. Then, the tasks that have been frozen leave +__refrigerator() and continue running. Rationale behind the functions dealing with freezing and thawing of tasks @@ -96,7 +106,8 @@ III. Which kernel threads are freezable? Kernel threads are not freezable by default. However, a kernel thread may clear PF_NOFREEZE for itself by calling set_freezable() (the resetting of PF_NOFREEZE directly is not allowed). From this point it is regarded as freezable -and must call try_to_freeze() in a suitable place. +and must call try_to_freeze() or variants of wait_event_freezable() in a +suitable place. IV. Why do we do that? ====================== diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index f85f3515c258..9c5a5f4dba5a 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -579,7 +579,7 @@ bool dev_pm_skip_resume(struct device *dev) } /** - * device_resume_noirq - Execute a "noirq resume" callback for given device. + * __device_resume_noirq - Execute a "noirq resume" callback for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being resumed asynchronously. @@ -587,7 +587,7 @@ bool dev_pm_skip_resume(struct device *dev) * The driver of @dev will not receive interrupts while this function is being * executed. */ -static int device_resume_noirq(struct device *dev, pm_message_t state, bool async) +static void __device_resume_noirq(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; @@ -655,7 +655,13 @@ Skip: Out: complete_all(&dev->power.completion); TRACE_RESUME(error); - return error; + + if (error) { + suspend_stats.failed_resume_noirq++; + dpm_save_failed_step(SUSPEND_RESUME_NOIRQ); + dpm_save_failed_dev(dev_name(dev)); + pm_dev_err(dev, state, async ? " async noirq" : " noirq", error); + } } static bool is_async(struct device *dev) @@ -668,11 +674,15 @@ static bool dpm_async_fn(struct device *dev, async_func_t func) { reinit_completion(&dev->power.completion); - if (is_async(dev)) { - get_device(dev); - async_schedule_dev(func, dev); + if (!is_async(dev)) + return false; + + get_device(dev); + + if (async_schedule_dev_nocall(func, dev)) return true; - } + + put_device(dev); return false; } @@ -680,15 +690,19 @@ static bool dpm_async_fn(struct device *dev, async_func_t func) static void async_resume_noirq(void *data, async_cookie_t cookie) { struct device *dev = data; - int error; - - error = device_resume_noirq(dev, pm_transition, true); - if (error) - pm_dev_err(dev, pm_transition, " async", error); + __device_resume_noirq(dev, pm_transition, true); put_device(dev); } +static void device_resume_noirq(struct device *dev) +{ + if (dpm_async_fn(dev, async_resume_noirq)) + return; + + __device_resume_noirq(dev, pm_transition, false); +} + static void dpm_noirq_resume_devices(pm_message_t state) { struct device *dev; @@ -698,14 +712,6 @@ static void dpm_noirq_resume_devices(pm_message_t state) mutex_lock(&dpm_list_mtx); pm_transition = state; - /* - * Advanced the async threads upfront, - * in case the starting of async threads is - * delayed by non-async resuming devices. - */ - list_for_each_entry(dev, &dpm_noirq_list, power.entry) - dpm_async_fn(dev, async_resume_noirq); - while (!list_empty(&dpm_noirq_list)) { dev = to_device(dpm_noirq_list.next); get_device(dev); @@ -713,17 +719,7 @@ static void dpm_noirq_resume_devices(pm_message_t state) mutex_unlock(&dpm_list_mtx); - if (!is_async(dev)) { - int error; - - error = device_resume_noirq(dev, state, false); - if (error) { - suspend_stats.failed_resume_noirq++; - dpm_save_failed_step(SUSPEND_RESUME_NOIRQ); - dpm_save_failed_dev(dev_name(dev)); - pm_dev_err(dev, state, " noirq", error); - } - } + device_resume_noirq(dev); put_device(dev); @@ -751,14 +747,14 @@ void dpm_resume_noirq(pm_message_t state) } /** - * device_resume_early - Execute an "early resume" callback for given device. + * __device_resume_early - Execute an "early resume" callback for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being resumed asynchronously. * * Runtime PM is disabled for @dev while this function is being executed. */ -static int device_resume_early(struct device *dev, pm_message_t state, bool async) +static void __device_resume_early(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; @@ -811,21 +807,31 @@ Out: pm_runtime_enable(dev); complete_all(&dev->power.completion); - return error; + + if (error) { + suspend_stats.failed_resume_early++; + dpm_save_failed_step(SUSPEND_RESUME_EARLY); + dpm_save_failed_dev(dev_name(dev)); + pm_dev_err(dev, state, async ? " async early" : " early", error); + } } static void async_resume_early(void *data, async_cookie_t cookie) { struct device *dev = data; - int error; - - error = device_resume_early(dev, pm_transition, true); - if (error) - pm_dev_err(dev, pm_transition, " async", error); + __device_resume_early(dev, pm_transition, true); put_device(dev); } +static void device_resume_early(struct device *dev) +{ + if (dpm_async_fn(dev, async_resume_early)) + return; + + __device_resume_early(dev, pm_transition, false); +} + /** * dpm_resume_early - Execute "early resume" callbacks for all devices. * @state: PM transition of the system being carried out. @@ -839,14 +845,6 @@ void dpm_resume_early(pm_message_t state) mutex_lock(&dpm_list_mtx); pm_transition = state; - /* - * Advanced the async threads upfront, - * in case the starting of async threads is - * delayed by non-async resuming devices. - */ - list_for_each_entry(dev, &dpm_late_early_list, power.entry) - dpm_async_fn(dev, async_resume_early); - while (!list_empty(&dpm_late_early_list)) { dev = to_device(dpm_late_early_list.next); get_device(dev); @@ -854,17 +852,7 @@ void dpm_resume_early(pm_message_t state) mutex_unlock(&dpm_list_mtx); - if (!is_async(dev)) { - int error; - - error = device_resume_early(dev, state, false); - if (error) { - suspend_stats.failed_resume_early++; - dpm_save_failed_step(SUSPEND_RESUME_EARLY); - dpm_save_failed_dev(dev_name(dev)); - pm_dev_err(dev, state, " early", error); - } - } + device_resume_early(dev); put_device(dev); @@ -888,12 +876,12 @@ void dpm_resume_start(pm_message_t state) EXPORT_SYMBOL_GPL(dpm_resume_start); /** - * device_resume - Execute "resume" callbacks for given device. + * __device_resume - Execute "resume" callbacks for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being resumed asynchronously. */ -static int device_resume(struct device *dev, pm_message_t state, bool async) +static void __device_resume(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; @@ -975,20 +963,30 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) TRACE_RESUME(error); - return error; + if (error) { + suspend_stats.failed_resume++; + dpm_save_failed_step(SUSPEND_RESUME); + dpm_save_failed_dev(dev_name(dev)); + pm_dev_err(dev, state, async ? " async" : "", error); + } } static void async_resume(void *data, async_cookie_t cookie) { struct device *dev = data; - int error; - error = device_resume(dev, pm_transition, true); - if (error) - pm_dev_err(dev, pm_transition, " async", error); + __device_resume(dev, pm_transition, true); put_device(dev); } +static void device_resume(struct device *dev) +{ + if (dpm_async_fn(dev, async_resume)) + return; + + __device_resume(dev, pm_transition, false); +} + /** * dpm_resume - Execute "resume" callbacks for non-sysdev devices. * @state: PM transition of the system being carried out. @@ -1008,27 +1006,17 @@ void dpm_resume(pm_message_t state) pm_transition = state; async_error = 0; - list_for_each_entry(dev, &dpm_suspended_list, power.entry) - dpm_async_fn(dev, async_resume); - while (!list_empty(&dpm_suspended_list)) { dev = to_device(dpm_suspended_list.next); + get_device(dev); - if (!is_async(dev)) { - int error; - mutex_unlock(&dpm_list_mtx); + mutex_unlock(&dpm_list_mtx); + + device_resume(dev); - error = device_resume(dev, state, false); - if (error) { - suspend_stats.failed_resume++; - dpm_save_failed_step(SUSPEND_RESUME); - dpm_save_failed_dev(dev_name(dev)); - pm_dev_err(dev, state, "", error); - } + mutex_lock(&dpm_list_mtx); - mutex_lock(&dpm_list_mtx); - } if (!list_empty(&dev->power.entry)) list_move_tail(&dev->power.entry, &dpm_prepared_list); diff --git a/drivers/cpufreq/armada-8k-cpufreq.c b/drivers/cpufreq/armada-8k-cpufreq.c index 8afefdea4d80..ce5a5641b6dd 100644 --- a/drivers/cpufreq/armada-8k-cpufreq.c +++ b/drivers/cpufreq/armada-8k-cpufreq.c @@ -57,7 +57,7 @@ static void __init armada_8k_get_sharing_cpus(struct clk *cur_clk, continue; } - clk = clk_get(cpu_dev, 0); + clk = clk_get(cpu_dev, NULL); if (IS_ERR(clk)) { pr_warn("Cannot get clock for CPU %d\n", cpu); } else { @@ -165,7 +165,7 @@ static int __init armada_8k_cpufreq_init(void) continue; } - clk = clk_get(cpu_dev, 0); + clk = clk_get(cpu_dev, NULL); if (IS_ERR(clk)) { pr_err("Cannot get clock for CPU %d\n", cpu); diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index a534a1f7f1ee..3c69040920b8 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1692,13 +1692,6 @@ static void intel_pstate_update_epp_defaults(struct cpudata *cpudata) cpudata->epp_default = intel_pstate_get_epp(cpudata, 0); /* - * If this CPU gen doesn't call for change in balance_perf - * EPP return. - */ - if (epp_values[EPP_INDEX_BALANCE_PERFORMANCE] == HWP_EPP_BALANCE_PERFORMANCE) - return; - - /* * If the EPP is set by firmware, which means that firmware enabled HWP * - Is equal or less than 0x80 (default balance_perf EPP) * - But less performance oriented than performance EPP @@ -1711,6 +1704,13 @@ static void intel_pstate_update_epp_defaults(struct cpudata *cpudata) } /* + * If this CPU gen doesn't call for change in balance_perf + * EPP return. + */ + if (epp_values[EPP_INDEX_BALANCE_PERFORMANCE] == HWP_EPP_BALANCE_PERFORMANCE) + return; + + /* * Use hard coded value per gen to update the balance_perf * and default EPP. */ @@ -2406,6 +2406,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { X86_MATCH(ICELAKE_X, core_funcs), X86_MATCH(TIGERLAKE, core_funcs), X86_MATCH(SAPPHIRERAPIDS_X, core_funcs), + X86_MATCH(EMERALDRAPIDS_X, core_funcs), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index c8a7ccc42c16..4ee23f4ebf4a 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -334,8 +334,11 @@ static int scmi_cpufreq_probe(struct scmi_device *sdev) #ifdef CONFIG_COMMON_CLK /* dummy clock provider as needed by OPP if clocks property is used */ - if (of_property_present(dev->of_node, "#clock-cells")) - devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, NULL); + if (of_property_present(dev->of_node, "#clock-cells")) { + ret = devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, NULL); + if (ret) + return dev_err_probe(dev, ret, "%s: registering clock provider failed\n", __func__); + } #endif ret = cpufreq_register_driver(&scmi_cpufreq_driver); diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c index e66df22f9695..d8515d5c0853 100644 --- a/drivers/cpuidle/cpuidle-haltpoll.c +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -25,13 +25,12 @@ MODULE_PARM_DESC(force, "Load unconditionally"); static struct cpuidle_device __percpu *haltpoll_cpuidle_devices; static enum cpuhp_state haltpoll_hp_state; -static int default_enter_idle(struct cpuidle_device *dev, - struct cpuidle_driver *drv, int index) +static __cpuidle int default_enter_idle(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) { - if (current_clr_polling_and_test()) { - local_irq_enable(); + if (current_clr_polling_and_test()) return index; - } + arch_cpu_idle(); return index; } diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index b3a68d5833bd..98657d3b9435 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -461,10 +461,14 @@ static void devfreq_monitor(struct work_struct *work) if (err) dev_err(&devfreq->dev, "dvfs failed with (%d) error\n", err); + if (devfreq->stop_polling) + goto out; + queue_delayed_work(devfreq_wq, &devfreq->work, msecs_to_jiffies(devfreq->profile->polling_ms)); - mutex_unlock(&devfreq->lock); +out: + mutex_unlock(&devfreq->lock); trace_devfreq_monitor(devfreq); } @@ -483,6 +487,10 @@ void devfreq_monitor_start(struct devfreq *devfreq) if (IS_SUPPORTED_FLAG(devfreq->governor->flags, IRQ_DRIVEN)) return; + mutex_lock(&devfreq->lock); + if (delayed_work_pending(&devfreq->work)) + goto out; + switch (devfreq->profile->timer) { case DEVFREQ_TIMER_DEFERRABLE: INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor); @@ -491,12 +499,16 @@ void devfreq_monitor_start(struct devfreq *devfreq) INIT_DELAYED_WORK(&devfreq->work, devfreq_monitor); break; default: - return; + goto out; } if (devfreq->profile->polling_ms) queue_delayed_work(devfreq_wq, &devfreq->work, msecs_to_jiffies(devfreq->profile->polling_ms)); + +out: + devfreq->stop_polling = false; + mutex_unlock(&devfreq->lock); } EXPORT_SYMBOL(devfreq_monitor_start); @@ -513,6 +525,14 @@ void devfreq_monitor_stop(struct devfreq *devfreq) if (IS_SUPPORTED_FLAG(devfreq->governor->flags, IRQ_DRIVEN)) return; + mutex_lock(&devfreq->lock); + if (devfreq->stop_polling) { + mutex_unlock(&devfreq->lock); + return; + } + + devfreq->stop_polling = true; + mutex_unlock(&devfreq->lock); cancel_delayed_work_sync(&devfreq->work); } EXPORT_SYMBOL(devfreq_monitor_stop); @@ -1688,7 +1708,7 @@ static ssize_t trans_stat_show(struct device *dev, struct device_attribute *attr, char *buf) { struct devfreq *df = to_devfreq(dev); - ssize_t len; + ssize_t len = 0; int i, j; unsigned int max_state; @@ -1697,7 +1717,7 @@ static ssize_t trans_stat_show(struct device *dev, max_state = df->max_state; if (max_state == 0) - return sprintf(buf, "Not Supported.\n"); + return sysfs_emit(buf, "Not Supported.\n"); mutex_lock(&df->lock); if (!df->stop_polling && @@ -1707,31 +1727,49 @@ static ssize_t trans_stat_show(struct device *dev, } mutex_unlock(&df->lock); - len = sprintf(buf, " From : To\n"); - len += sprintf(buf + len, " :"); - for (i = 0; i < max_state; i++) - len += sprintf(buf + len, "%10lu", - df->freq_table[i]); + len += sysfs_emit_at(buf, len, " From : To\n"); + len += sysfs_emit_at(buf, len, " :"); + for (i = 0; i < max_state; i++) { + if (len >= PAGE_SIZE - 1) + break; + len += sysfs_emit_at(buf, len, "%10lu", + df->freq_table[i]); + } - len += sprintf(buf + len, " time(ms)\n"); + if (len >= PAGE_SIZE - 1) + return PAGE_SIZE - 1; + len += sysfs_emit_at(buf, len, " time(ms)\n"); for (i = 0; i < max_state; i++) { - if (df->freq_table[i] == df->previous_freq) - len += sprintf(buf + len, "*"); + if (len >= PAGE_SIZE - 1) + break; + if (df->freq_table[2] == df->previous_freq) + len += sysfs_emit_at(buf, len, "*"); else - len += sprintf(buf + len, " "); - - len += sprintf(buf + len, "%10lu:", df->freq_table[i]); - for (j = 0; j < max_state; j++) - len += sprintf(buf + len, "%10u", + len += sysfs_emit_at(buf, len, " "); + if (len >= PAGE_SIZE - 1) + break; + len += sysfs_emit_at(buf, len, "%10lu:", df->freq_table[i]); + for (j = 0; j < max_state; j++) { + if (len >= PAGE_SIZE - 1) + break; + len += sysfs_emit_at(buf, len, "%10u", df->stats.trans_table[(i * max_state) + j]); + } + if (len >= PAGE_SIZE - 1) + break; + len += sysfs_emit_at(buf, len, "%10llu\n", (u64) + jiffies64_to_msecs(df->stats.time_in_state[i])); + } - len += sprintf(buf + len, "%10llu\n", (u64) - jiffies64_to_msecs(df->stats.time_in_state[i])); + if (len < PAGE_SIZE - 1) + len += sysfs_emit_at(buf, len, "Total transition : %u\n", + df->stats.total_trans); + if (len >= PAGE_SIZE - 1) { + pr_warn_once("devfreq transition table exceeds PAGE_SIZE. Disabling\n"); + return -EFBIG; } - len += sprintf(buf + len, "Total transition : %u\n", - df->stats.total_trans); return len; } diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 3e01a6b23e75..bcf1198e8991 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -918,6 +918,35 @@ static struct cpuidle_state adl_l_cstates[] __initdata = { .enter = NULL } }; +static struct cpuidle_state mtl_l_cstates[] __initdata = { + { + .name = "C1E", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, + .exit_latency = 1, + .target_residency = 1, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C6", + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 140, + .target_residency = 420, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C10", + .desc = "MWAIT 0x60", + .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 310, + .target_residency = 930, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .enter = NULL } +}; + static struct cpuidle_state gmt_cstates[] __initdata = { { .name = "C1", @@ -1237,6 +1266,72 @@ static struct cpuidle_state snr_cstates[] __initdata = { .enter = NULL } }; +static struct cpuidle_state grr_cstates[] __initdata = { + { + .name = "C1", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_ALWAYS_ENABLE, + .exit_latency = 1, + .target_residency = 1, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C1E", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, + .exit_latency = 2, + .target_residency = 10, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C6S", + .desc = "MWAIT 0x22", + .flags = MWAIT2flg(0x22) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 140, + .target_residency = 500, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .enter = NULL } +}; + +static struct cpuidle_state srf_cstates[] __initdata = { + { + .name = "C1", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_ALWAYS_ENABLE, + .exit_latency = 1, + .target_residency = 1, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C1E", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, + .exit_latency = 2, + .target_residency = 10, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C6S", + .desc = "MWAIT 0x22", + .flags = MWAIT2flg(0x22) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 270, + .target_residency = 700, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C6SP", + .desc = "MWAIT 0x23", + .flags = MWAIT2flg(0x23) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 310, + .target_residency = 900, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .enter = NULL } +}; + static const struct idle_cpu idle_cpu_nehalem __initconst = { .state_table = nehalem_cstates, .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, @@ -1344,6 +1439,10 @@ static const struct idle_cpu idle_cpu_adl_l __initconst = { .state_table = adl_l_cstates, }; +static const struct idle_cpu idle_cpu_mtl_l __initconst = { + .state_table = mtl_l_cstates, +}; + static const struct idle_cpu idle_cpu_gmt __initconst = { .state_table = gmt_cstates, }; @@ -1382,6 +1481,18 @@ static const struct idle_cpu idle_cpu_snr __initconst = { .use_acpi = true, }; +static const struct idle_cpu idle_cpu_grr __initconst = { + .state_table = grr_cstates, + .disable_promotion_to_c1e = true, + .use_acpi = true, +}; + +static const struct idle_cpu idle_cpu_srf __initconst = { + .state_table = srf_cstates, + .disable_promotion_to_c1e = true, + .use_acpi = true, +}; + static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx), X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem), @@ -1418,6 +1529,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &idle_cpu_adl), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &idle_cpu_mtl_l), X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &idle_cpu_gmt), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr), X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &idle_cpu_spr), @@ -1427,6 +1539,8 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt), X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_snr), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT, &idle_cpu_grr), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT_X, &idle_cpu_srf), {} }; diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 84f345c69ea5..c4e0432ae42a 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -201,7 +201,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_freq_indexed); * @opp: opp for which level value has to be returned for * * Return: level read from device tree corresponding to the opp, else - * return 0. + * return U32_MAX. */ unsigned int dev_pm_opp_get_level(struct dev_pm_opp *opp) { @@ -221,7 +221,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_level); * @index: index of the required opp * * Return: performance state read from device tree corresponding to the - * required opp, else return 0. + * required opp, else return U32_MAX. */ unsigned int dev_pm_opp_get_required_pstate(struct dev_pm_opp *opp, unsigned int index) @@ -808,6 +808,16 @@ struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, struct dev_pm_opp *opp; opp = _find_key_ceil(dev, &temp, 0, true, _read_level, NULL); + if (IS_ERR(opp)) + return opp; + + /* False match */ + if (temp == OPP_LEVEL_UNSET) { + dev_err(dev, "%s: OPP levels aren't available\n", __func__); + dev_pm_opp_put(opp); + return ERR_PTR(-ENODEV); + } + *level = temp; return opp; } @@ -832,9 +842,14 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_find_level_ceil); * use. */ struct dev_pm_opp *dev_pm_opp_find_level_floor(struct device *dev, - unsigned long *level) + unsigned int *level) { - return _find_key_floor(dev, level, 0, true, _read_level, NULL); + unsigned long temp = *level; + struct dev_pm_opp *opp; + + opp = _find_key_floor(dev, &temp, 0, true, _read_level, NULL); + *level = temp; + return opp; } EXPORT_SYMBOL_GPL(dev_pm_opp_find_level_floor); @@ -948,7 +963,7 @@ _opp_config_clk_single(struct device *dev, struct opp_table *opp_table, dev_err(dev, "%s: failed to set clock rate: %d\n", __func__, ret); } else { - opp_table->rate_clk_single = freq; + opp_table->current_rate_single_clk = freq; } return ret; @@ -1046,40 +1061,23 @@ static int _set_opp_bw(const struct opp_table *opp_table, return 0; } -static int _set_performance_state(struct device *dev, struct device *pd_dev, - struct dev_pm_opp *opp, int i) +/* This is only called for PM domain for now */ +static int _set_required_opps(struct device *dev, struct opp_table *opp_table, + struct dev_pm_opp *opp, bool up) { - unsigned int pstate = likely(opp) ? opp->required_opps[i]->level: 0; - int ret; + struct device **devs = opp_table->required_devs; + struct dev_pm_opp *required_opp; + int index, target, delta, ret; - if (!pd_dev) + if (!devs) return 0; - ret = dev_pm_domain_set_performance_state(pd_dev, pstate); - if (ret) { - dev_err(dev, "Failed to set performance state of %s: %d (%d)\n", - dev_name(pd_dev), pstate, ret); - } - - return ret; -} - -static int _opp_set_required_opps_generic(struct device *dev, - struct opp_table *opp_table, struct dev_pm_opp *opp, bool scaling_down) -{ - dev_err(dev, "setting required-opps isn't supported for non-genpd devices\n"); - return -ENOENT; -} - -static int _opp_set_required_opps_genpd(struct device *dev, - struct opp_table *opp_table, struct dev_pm_opp *opp, bool scaling_down) -{ - struct device **genpd_virt_devs = - opp_table->genpd_virt_devs ? opp_table->genpd_virt_devs : &dev; - int index, target, delta, ret; + /* required-opps not fully initialized yet */ + if (lazy_linking_pending(opp_table)) + return -EBUSY; /* Scaling up? Set required OPPs in normal order, else reverse */ - if (!scaling_down) { + if (up) { index = 0; target = opp_table->required_opp_count; delta = 1; @@ -1090,9 +1088,13 @@ static int _opp_set_required_opps_genpd(struct device *dev, } while (index != target) { - ret = _set_performance_state(dev, genpd_virt_devs[index], opp, index); - if (ret) - return ret; + if (devs[index]) { + required_opp = opp ? opp->required_opps[index] : NULL; + + ret = dev_pm_opp_set_opp(devs[index], required_opp); + if (ret) + return ret; + } index += delta; } @@ -1100,34 +1102,6 @@ static int _opp_set_required_opps_genpd(struct device *dev, return 0; } -/* This is only called for PM domain for now */ -static int _set_required_opps(struct device *dev, struct opp_table *opp_table, - struct dev_pm_opp *opp, bool up) -{ - /* required-opps not fully initialized yet */ - if (lazy_linking_pending(opp_table)) - return -EBUSY; - - if (opp_table->set_required_opps) - return opp_table->set_required_opps(dev, opp_table, opp, up); - - return 0; -} - -/* Update set_required_opps handler */ -void _update_set_required_opps(struct opp_table *opp_table) -{ - /* Already set */ - if (opp_table->set_required_opps) - return; - - /* All required OPPs will belong to genpd or none */ - if (opp_table->required_opp_tables[0]->is_genpd) - opp_table->set_required_opps = _opp_set_required_opps_genpd; - else - opp_table->set_required_opps = _opp_set_required_opps_generic; -} - static int _set_opp_level(struct device *dev, struct opp_table *opp_table, struct dev_pm_opp *opp) { @@ -1135,7 +1109,7 @@ static int _set_opp_level(struct device *dev, struct opp_table *opp_table, int ret = 0; if (opp) { - if (!opp->level) + if (opp->level == OPP_LEVEL_UNSET) return 0; level = opp->level; @@ -1378,12 +1352,12 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) * value of the frequency. In such a case, do not abort but * configure the hardware to the desired frequency forcefully. */ - forced = opp_table->rate_clk_single != target_freq; + forced = opp_table->current_rate_single_clk != freq; } - ret = _set_opp(dev, opp_table, opp, &target_freq, forced); + ret = _set_opp(dev, opp_table, opp, &freq, forced); - if (target_freq) + if (freq) dev_pm_opp_put(opp); put_opp_table: @@ -1867,6 +1841,8 @@ struct dev_pm_opp *_opp_allocate(struct opp_table *opp_table) INIT_LIST_HEAD(&opp->node); + opp->level = OPP_LEVEL_UNSET; + return opp; } @@ -2388,19 +2364,13 @@ static void _opp_detach_genpd(struct opp_table *opp_table) { int index; - if (!opp_table->genpd_virt_devs) - return; - for (index = 0; index < opp_table->required_opp_count; index++) { - if (!opp_table->genpd_virt_devs[index]) + if (!opp_table->required_devs[index]) continue; - dev_pm_domain_detach(opp_table->genpd_virt_devs[index], false); - opp_table->genpd_virt_devs[index] = NULL; + dev_pm_domain_detach(opp_table->required_devs[index], false); + opp_table->required_devs[index] = NULL; } - - kfree(opp_table->genpd_virt_devs); - opp_table->genpd_virt_devs = NULL; } /* @@ -2427,14 +2397,20 @@ static int _opp_attach_genpd(struct opp_table *opp_table, struct device *dev, int index = 0, ret = -EINVAL; const char * const *name = names; - if (opp_table->genpd_virt_devs) - return 0; + if (!opp_table->required_devs) { + dev_err(dev, "Required OPPs not available, can't attach genpd\n"); + return -EINVAL; + } - opp_table->genpd_virt_devs = kcalloc(opp_table->required_opp_count, - sizeof(*opp_table->genpd_virt_devs), - GFP_KERNEL); - if (!opp_table->genpd_virt_devs) - return -ENOMEM; + /* Genpd core takes care of propagation to parent genpd */ + if (opp_table->is_genpd) { + dev_err(dev, "%s: Operation not supported for genpds\n", __func__); + return -EOPNOTSUPP; + } + + /* Checking only the first one is enough ? */ + if (opp_table->required_devs[0]) + return 0; while (*name) { if (index >= opp_table->required_opp_count) { @@ -2450,13 +2426,25 @@ static int _opp_attach_genpd(struct opp_table *opp_table, struct device *dev, goto err; } - opp_table->genpd_virt_devs[index] = virt_dev; + /* + * Add the virtual genpd device as a user of the OPP table, so + * we can call dev_pm_opp_set_opp() on it directly. + * + * This will be automatically removed when the OPP table is + * removed, don't need to handle that here. + */ + if (!_add_opp_dev(virt_dev, opp_table->required_opp_tables[index])) { + ret = -ENOMEM; + goto err; + } + + opp_table->required_devs[index] = virt_dev; index++; name++; } if (virt_devs) - *virt_devs = opp_table->genpd_virt_devs; + *virt_devs = opp_table->required_devs; return 0; @@ -2466,10 +2454,50 @@ err: } +static int _opp_set_required_devs(struct opp_table *opp_table, + struct device *dev, + struct device **required_devs) +{ + int i; + + if (!opp_table->required_devs) { + dev_err(dev, "Required OPPs not available, can't set required devs\n"); + return -EINVAL; + } + + /* Another device that shares the OPP table has set the required devs ? */ + if (opp_table->required_devs[0]) + return 0; + + for (i = 0; i < opp_table->required_opp_count; i++) { + /* Genpd core takes care of propagation to parent genpd */ + if (required_devs[i] && opp_table->is_genpd && + opp_table->required_opp_tables[i]->is_genpd) { + dev_err(dev, "%s: Operation not supported for genpds\n", __func__); + return -EOPNOTSUPP; + } + + opp_table->required_devs[i] = required_devs[i]; + } + + return 0; +} + +static void _opp_put_required_devs(struct opp_table *opp_table) +{ + int i; + + for (i = 0; i < opp_table->required_opp_count; i++) + opp_table->required_devs[i] = NULL; +} + static void _opp_clear_config(struct opp_config_data *data) { - if (data->flags & OPP_CONFIG_GENPD) + if (data->flags & OPP_CONFIG_REQUIRED_DEVS) + _opp_put_required_devs(data->opp_table); + else if (data->flags & OPP_CONFIG_GENPD) _opp_detach_genpd(data->opp_table); + if (data->flags & OPP_CONFIG_REGULATOR) _opp_put_regulators(data->opp_table); if (data->flags & OPP_CONFIG_SUPPORTED_HW) @@ -2583,12 +2611,22 @@ int dev_pm_opp_set_config(struct device *dev, struct dev_pm_opp_config *config) /* Attach genpds */ if (config->genpd_names) { + if (config->required_devs) + goto err; + ret = _opp_attach_genpd(opp_table, dev, config->genpd_names, config->virt_devs); if (ret) goto err; data->flags |= OPP_CONFIG_GENPD; + } else if (config->required_devs) { + ret = _opp_set_required_devs(opp_table, dev, + config->required_devs); + if (ret) + goto err; + + data->flags |= OPP_CONFIG_REQUIRED_DEVS; } ret = xa_alloc(&opp_configs, &id, data, XA_LIMIT(1, INT_MAX), @@ -2975,6 +3013,47 @@ put_table: EXPORT_SYMBOL_GPL(dev_pm_opp_adjust_voltage); /** + * dev_pm_opp_sync_regulators() - Sync state of voltage regulators + * @dev: device for which we do this operation + * + * Sync voltage state of the OPP table regulators. + * + * Return: 0 on success or a negative error value. + */ +int dev_pm_opp_sync_regulators(struct device *dev) +{ + struct opp_table *opp_table; + struct regulator *reg; + int i, ret = 0; + + /* Device may not have OPP table */ + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) + return 0; + + /* Regulator may not be required for the device */ + if (unlikely(!opp_table->regulators)) + goto put_table; + + /* Nothing to sync if voltage wasn't changed */ + if (!opp_table->enabled) + goto put_table; + + for (i = 0; i < opp_table->regulator_count; i++) { + reg = opp_table->regulators[i]; + ret = regulator_sync_voltage(reg); + if (ret) + break; + } +put_table: + /* Drop reference taken by _find_opp_table() */ + dev_pm_opp_put_opp_table(opp_table); + + return ret; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_sync_regulators); + +/** * dev_pm_opp_enable() - Enable a specific OPP * @dev: device for which we do this operation * @freq: OPP frequency to enable @@ -3097,44 +3176,3 @@ void dev_pm_opp_remove_table(struct device *dev) dev_pm_opp_put_opp_table(opp_table); } EXPORT_SYMBOL_GPL(dev_pm_opp_remove_table); - -/** - * dev_pm_opp_sync_regulators() - Sync state of voltage regulators - * @dev: device for which we do this operation - * - * Sync voltage state of the OPP table regulators. - * - * Return: 0 on success or a negative error value. - */ -int dev_pm_opp_sync_regulators(struct device *dev) -{ - struct opp_table *opp_table; - struct regulator *reg; - int i, ret = 0; - - /* Device may not have OPP table */ - opp_table = _find_opp_table(dev); - if (IS_ERR(opp_table)) - return 0; - - /* Regulator may not be required for the device */ - if (unlikely(!opp_table->regulators)) - goto put_table; - - /* Nothing to sync if voltage wasn't changed */ - if (!opp_table->enabled) - goto put_table; - - for (i = 0; i < opp_table->regulator_count; i++) { - reg = opp_table->regulators[i]; - ret = regulator_sync_voltage(reg); - if (ret) - break; - } -put_table: - /* Drop reference taken by _find_opp_table() */ - dev_pm_opp_put_opp_table(opp_table); - - return ret; -} -EXPORT_SYMBOL_GPL(dev_pm_opp_sync_regulators); diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 81fa27599d58..f9f0b22bccbb 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -165,7 +165,7 @@ static void _opp_table_alloc_required_tables(struct opp_table *opp_table, struct opp_table **required_opp_tables; struct device_node *required_np, *np; bool lazy = false; - int count, i; + int count, i, size; /* Traversing the first OPP node is all we need */ np = of_get_next_available_child(opp_np, NULL); @@ -179,12 +179,13 @@ static void _opp_table_alloc_required_tables(struct opp_table *opp_table, if (count <= 0) goto put_np; - required_opp_tables = kcalloc(count, sizeof(*required_opp_tables), - GFP_KERNEL); + size = sizeof(*required_opp_tables) + sizeof(*opp_table->required_devs); + required_opp_tables = kcalloc(count, size, GFP_KERNEL); if (!required_opp_tables) goto put_np; opp_table->required_opp_tables = required_opp_tables; + opp_table->required_devs = (void *)(required_opp_tables + count); opp_table->required_opp_count = count; for (i = 0; i < count; i++) { @@ -208,8 +209,6 @@ static void _opp_table_alloc_required_tables(struct opp_table *opp_table, mutex_lock(&opp_table_lock); list_add(&opp_table->lazy, &lazy_opp_tables); mutex_unlock(&opp_table_lock); - } else { - _update_set_required_opps(opp_table); } goto put_np; @@ -296,7 +295,7 @@ void _of_clear_opp(struct opp_table *opp_table, struct dev_pm_opp *opp) of_node_put(opp->np); } -static int _link_required_opps(struct dev_pm_opp *opp, +static int _link_required_opps(struct dev_pm_opp *opp, struct opp_table *opp_table, struct opp_table *required_table, int index) { struct device_node *np; @@ -314,6 +313,39 @@ static int _link_required_opps(struct dev_pm_opp *opp, return -ENODEV; } + /* + * There are two genpd (as required-opp) cases that we need to handle, + * devices with a single genpd and ones with multiple genpds. + * + * The single genpd case requires special handling as we need to use the + * same `dev` structure (instead of a virtual one provided by genpd + * core) for setting the performance state. + * + * It doesn't make sense for a device's DT entry to have both + * "opp-level" and single "required-opps" entry pointing to a genpd's + * OPP, as that would make the OPP core call + * dev_pm_domain_set_performance_state() for two different values for + * the same device structure. Lets treat single genpd configuration as a + * case where the OPP's level is directly available without required-opp + * link in the DT. + * + * Just update the `level` with the right value, which + * dev_pm_opp_set_opp() will take care of in the normal path itself. + * + * There is another case though, where a genpd's OPP table has + * required-opps set to a parent genpd. The OPP core expects the user to + * set the respective required `struct device` pointer via + * dev_pm_opp_set_config(). + */ + if (required_table->is_genpd && opp_table->required_opp_count == 1 && + !opp_table->required_devs[0]) { + /* Genpd core takes care of propagation to parent genpd */ + if (!opp_table->is_genpd) { + if (!WARN_ON(opp->level != OPP_LEVEL_UNSET)) + opp->level = opp->required_opps[0]->level; + } + } + return 0; } @@ -338,7 +370,7 @@ static int _of_opp_alloc_required_opps(struct opp_table *opp_table, if (IS_ERR_OR_NULL(required_table)) continue; - ret = _link_required_opps(opp, required_table, i); + ret = _link_required_opps(opp, opp_table, required_table, i); if (ret) goto free_required_opps; } @@ -359,7 +391,7 @@ static int lazy_link_required_opps(struct opp_table *opp_table, int ret; list_for_each_entry(opp, &opp_table->opp_list, node) { - ret = _link_required_opps(opp, new_table, index); + ret = _link_required_opps(opp, opp_table, new_table, index); if (ret) return ret; } @@ -422,7 +454,6 @@ static void lazy_link_required_opp_table(struct opp_table *new_table) /* All required opp-tables found, remove from lazy list */ if (!lazy) { - _update_set_required_opps(opp_table); list_del_init(&opp_table->lazy); list_for_each_entry(opp, &opp_table->opp_list, node) @@ -1393,8 +1424,14 @@ int of_get_required_opp_performance_state(struct device_node *np, int index) opp = _find_opp_of_np(opp_table, required_np); if (opp) { - pstate = opp->level; + if (opp->level == OPP_LEVEL_UNSET) { + pr_err("%s: OPP levels aren't available for %pOF\n", + __func__, np); + } else { + pstate = opp->level; + } dev_pm_opp_put(opp); + } dev_pm_opp_put_opp_table(opp_table); diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h index 08366f90f16b..cff1fabd1ae3 100644 --- a/drivers/opp/opp.h +++ b/drivers/opp/opp.h @@ -35,6 +35,7 @@ extern struct list_head opp_tables; #define OPP_CONFIG_PROP_NAME BIT(3) #define OPP_CONFIG_SUPPORTED_HW BIT(4) #define OPP_CONFIG_GENPD BIT(5) +#define OPP_CONFIG_REQUIRED_DEVS BIT(6) /** * struct opp_config_data - data for set config operations @@ -49,6 +50,18 @@ struct opp_config_data { unsigned int flags; }; +/** + * struct dev_pm_opp_icc_bw - Interconnect bandwidth values + * @avg: Average bandwidth corresponding to this OPP (in icc units) + * @peak: Peak bandwidth corresponding to this OPP (in icc units) + * + * This structure stores the bandwidth values for a single interconnect path. + */ +struct dev_pm_opp_icc_bw { + u32 avg; + u32 peak; +}; + /* * Internal data structure organization with the OPP layer library is as * follows: @@ -157,12 +170,12 @@ enum opp_table_access { * @clock_latency_ns_max: Max clock latency in nanoseconds. * @parsed_static_opps: Count of devices for which OPPs are initialized from DT. * @shared_opp: OPP is shared between multiple devices. - * @rate_clk_single: Currently configured frequency for single clk. + * @current_rate_single_clk: Currently configured frequency for single clk. * @current_opp: Currently configured OPP for the table. * @suspend_opp: Pointer to OPP to be used during device suspend. - * @genpd_virt_devs: List of virtual devices for multiple genpd support. * @required_opp_tables: List of device OPP tables that are required by OPPs in * this table. + * @required_devs: List of devices for required OPP tables. * @required_opp_count: Number of required devices. * @supported_hw: Array of version number to support. * @supported_hw_count: Number of elements in supported_hw array. @@ -180,7 +193,6 @@ enum opp_table_access { * @path_count: Number of interconnect paths * @enabled: Set to true if the device's resources are enabled/configured. * @is_genpd: Marks if the OPP table belongs to a genpd. - * @set_required_opps: Helper responsible to set required OPPs. * @dentry: debugfs dentry pointer of the real device directory (not links). * @dentry_name: Name of the real dentry. * @@ -207,12 +219,12 @@ struct opp_table { unsigned int parsed_static_opps; enum opp_table_access shared_opp; - unsigned long rate_clk_single; + unsigned long current_rate_single_clk; struct dev_pm_opp *current_opp; struct dev_pm_opp *suspend_opp; - struct device **genpd_virt_devs; struct opp_table **required_opp_tables; + struct device **required_devs; unsigned int required_opp_count; unsigned int *supported_hw; @@ -229,8 +241,6 @@ struct opp_table { unsigned int path_count; bool enabled; bool is_genpd; - int (*set_required_opps)(struct device *dev, - struct opp_table *opp_table, struct dev_pm_opp *opp, bool scaling_down); #ifdef CONFIG_DEBUG_FS struct dentry *dentry; diff --git a/drivers/opp/ti-opp-supply.c b/drivers/opp/ti-opp-supply.c index 8f3f13fbbb25..e3b97cd1fbbf 100644 --- a/drivers/opp/ti-opp-supply.c +++ b/drivers/opp/ti-opp-supply.c @@ -18,6 +18,7 @@ #include <linux/of.h> #include <linux/platform_device.h> #include <linux/pm_opp.h> +#include <linux/property.h> #include <linux/regulator/consumer.h> #include <linux/slab.h> @@ -373,23 +374,15 @@ static int ti_opp_supply_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct device *cpu_dev = get_cpu_device(0); - const struct of_device_id *match; const struct ti_opp_supply_of_data *of_data; int ret = 0; - match = of_match_device(ti_opp_supply_of_match, dev); - if (!match) { - /* We do not expect this to happen */ - dev_err(dev, "%s: Unable to match device\n", __func__); - return -ENODEV; - } - if (!match->data) { + of_data = device_get_match_data(dev); + if (!of_data) { /* Again, unlikely.. but mistakes do happen */ dev_err(dev, "%s: Bad data in match\n", __func__); return -EINVAL; } - of_data = match->data; - dev_set_drvdata(dev, (void *)of_data); /* If we need optimized voltage */ diff --git a/include/linux/async.h b/include/linux/async.h index cce4ad31e8fc..33c9ff4afb49 100644 --- a/include/linux/async.h +++ b/include/linux/async.h @@ -90,6 +90,8 @@ async_schedule_dev(async_func_t func, struct device *dev) return async_schedule_node(func, dev, dev_to_node(dev)); } +bool async_schedule_dev_nocall(async_func_t func, struct device *dev); + /** * async_schedule_dev_domain - A device specific version of async_schedule_domain * @func: function to execute asynchronously diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index ccd97bcef269..76dcb7f37bcd 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -45,18 +45,6 @@ struct dev_pm_opp_supply { unsigned long u_watt; }; -/** - * struct dev_pm_opp_icc_bw - Interconnect bandwidth values - * @avg: Average bandwidth corresponding to this OPP (in icc units) - * @peak: Peak bandwidth corresponding to this OPP (in icc units) - * - * This structure stores the bandwidth values for a single interconnect path. - */ -struct dev_pm_opp_icc_bw { - u32 avg; - u32 peak; -}; - typedef int (*config_regulators_t)(struct device *dev, struct dev_pm_opp *old_opp, struct dev_pm_opp *new_opp, struct regulator **regulators, unsigned int count); @@ -74,8 +62,10 @@ typedef int (*config_clks_t)(struct device *dev, struct opp_table *opp_table, * @supported_hw_count: Number of elements in the array. * @regulator_names: Array of pointers to the names of the regulator, NULL terminated. * @genpd_names: Null terminated array of pointers containing names of genpd to - * attach. - * @virt_devs: Pointer to return the array of virtual devices. + * attach. Mutually exclusive with required_devs. + * @virt_devs: Pointer to return the array of genpd virtual devices. Mutually + * exclusive with required_devs. + * @required_devs: Required OPP devices. Mutually exclusive with genpd_names/virt_devs. * * This structure contains platform specific OPP configurations for the device. */ @@ -90,11 +80,15 @@ struct dev_pm_opp_config { const char * const *regulator_names; const char * const *genpd_names; struct device ***virt_devs; + struct device **required_devs; }; +#define OPP_LEVEL_UNSET U32_MAX + /** * struct dev_pm_opp_data - The data to use to initialize an OPP. - * @level: The performance level for the OPP. + * @level: The performance level for the OPP. Set level to OPP_LEVEL_UNSET if + * level field isn't used. * @freq: The clock rate in Hz for the OPP. * @u_volt: The voltage in uV for the OPP. */ @@ -157,7 +151,7 @@ struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, unsigned int *level); struct dev_pm_opp *dev_pm_opp_find_level_floor(struct device *dev, - unsigned long *level); + unsigned int *level); struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev, unsigned int *bw, int index); @@ -324,7 +318,7 @@ static inline struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, } static inline struct dev_pm_opp *dev_pm_opp_find_level_floor(struct device *dev, - unsigned long *level) + unsigned int *level) { return ERR_PTR(-EOPNOTSUPP); } diff --git a/kernel/async.c b/kernel/async.c index b2c4ba5686ee..673bba6bdf3a 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -145,6 +145,39 @@ static void async_run_entry_fn(struct work_struct *work) wake_up(&async_done); } +static async_cookie_t __async_schedule_node_domain(async_func_t func, + void *data, int node, + struct async_domain *domain, + struct async_entry *entry) +{ + async_cookie_t newcookie; + unsigned long flags; + + INIT_LIST_HEAD(&entry->domain_list); + INIT_LIST_HEAD(&entry->global_list); + INIT_WORK(&entry->work, async_run_entry_fn); + entry->func = func; + entry->data = data; + entry->domain = domain; + + spin_lock_irqsave(&async_lock, flags); + + /* allocate cookie and queue */ + newcookie = entry->cookie = next_cookie++; + + list_add_tail(&entry->domain_list, &domain->pending); + if (domain->registered) + list_add_tail(&entry->global_list, &async_global_pending); + + atomic_inc(&entry_count); + spin_unlock_irqrestore(&async_lock, flags); + + /* schedule for execution */ + queue_work_node(node, system_unbound_wq, &entry->work); + + return newcookie; +} + /** * async_schedule_node_domain - NUMA specific version of async_schedule_domain * @func: function to execute asynchronously @@ -186,29 +219,8 @@ async_cookie_t async_schedule_node_domain(async_func_t func, void *data, func(data, newcookie); return newcookie; } - INIT_LIST_HEAD(&entry->domain_list); - INIT_LIST_HEAD(&entry->global_list); - INIT_WORK(&entry->work, async_run_entry_fn); - entry->func = func; - entry->data = data; - entry->domain = domain; - - spin_lock_irqsave(&async_lock, flags); - - /* allocate cookie and queue */ - newcookie = entry->cookie = next_cookie++; - - list_add_tail(&entry->domain_list, &domain->pending); - if (domain->registered) - list_add_tail(&entry->global_list, &async_global_pending); - - atomic_inc(&entry_count); - spin_unlock_irqrestore(&async_lock, flags); - - /* schedule for execution */ - queue_work_node(node, system_unbound_wq, &entry->work); - return newcookie; + return __async_schedule_node_domain(func, data, node, domain, entry); } EXPORT_SYMBOL_GPL(async_schedule_node_domain); @@ -232,6 +244,35 @@ async_cookie_t async_schedule_node(async_func_t func, void *data, int node) EXPORT_SYMBOL_GPL(async_schedule_node); /** + * async_schedule_dev_nocall - A simplified variant of async_schedule_dev() + * @func: function to execute asynchronously + * @dev: device argument to be passed to function + * + * @dev is used as both the argument for the function and to provide NUMA + * context for where to run the function. + * + * If the asynchronous execution of @func is scheduled successfully, return + * true. Otherwise, do nothing and return false, unlike async_schedule_dev() + * that will run the function synchronously then. + */ +bool async_schedule_dev_nocall(async_func_t func, struct device *dev) +{ + struct async_entry *entry; + + entry = kzalloc(sizeof(struct async_entry), GFP_KERNEL); + + /* Give up if there is no memory or too much work. */ + if (!entry || atomic_read(&entry_count) > MAX_WORK) { + kfree(entry); + return false; + } + + __async_schedule_node_domain(func, dev, dev_to_node(dev), + &async_dfl_domain, entry); + return true; +} + +/** * async_synchronize_full - synchronize all asynchronous function calls * * This function waits until all asynchronous function calls have been done. diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index dee341ae4ace..4b0b7cf2e019 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -642,9 +642,9 @@ int hibernation_platform_enter(void) */ static void power_down(void) { -#ifdef CONFIG_SUSPEND int error; +#ifdef CONFIG_SUSPEND if (hibernation_mode == HIBERNATION_SUSPEND) { error = suspend_devices_and_enter(mem_sleep_current); if (error) { @@ -667,7 +667,13 @@ static void power_down(void) kernel_restart(NULL); break; case HIBERNATION_PLATFORM: - hibernation_platform_enter(); + error = hibernation_platform_enter(); + if (error == -EAGAIN || error == -EBUSY) { + swsusp_unmark(); + events_check_enabled = false; + pr_info("Wakeup event detected during hibernation, rolling back.\n"); + return; + } fallthrough; case HIBERNATION_SHUTDOWN: if (kernel_can_power_off()) diff --git a/kernel/power/main.c b/kernel/power/main.c index f6425ae3e8b0..b1ae9b677d03 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -60,22 +60,6 @@ EXPORT_SYMBOL_GPL(lock_system_sleep); void unlock_system_sleep(unsigned int flags) { - /* - * Don't use freezer_count() because we don't want the call to - * try_to_freeze() here. - * - * Reason: - * Fundamentally, we just don't need it, because freezing condition - * doesn't come into effect until we release the - * system_transition_mutex lock, since the freezer always works with - * system_transition_mutex held. - * - * More importantly, in the case of hibernation, - * unlock_system_sleep() gets called in snapshot_read() and - * snapshot_write() when the freezing condition is still in effect. - * Which means, if we use try_to_freeze() here, it would make them - * enter the refrigerator, thus causing hibernation to lockup. - */ if (!(flags & PF_NOFREEZE)) current->flags &= ~PF_NOFREEZE; mutex_unlock(&system_transition_mutex); diff --git a/kernel/power/power.h b/kernel/power/power.h index 17fd9aaaf084..8499a39c62f4 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -175,6 +175,8 @@ extern int swsusp_write(unsigned int flags); void swsusp_close(void); #ifdef CONFIG_SUSPEND extern int swsusp_unmark(void); +#else +static inline int swsusp_unmark(void) { return 0; } #endif struct __kernel_old_timeval; diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 50a15408c3fc..5c96ff067c64 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1119,7 +1119,7 @@ static void mark_nosave_pages(struct memory_bitmap *bm) int create_basic_memory_bitmaps(void) { struct memory_bitmap *bm1, *bm2; - int error = 0; + int error; if (forbidden_pages_map && free_pages_map) return 0; @@ -1487,11 +1487,11 @@ static bool copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) s_page = pfn_to_page(src_pfn); d_page = pfn_to_page(dst_pfn); if (PageHighMem(s_page)) { - src = kmap_atomic(s_page); - dst = kmap_atomic(d_page); + src = kmap_local_page(s_page); + dst = kmap_local_page(d_page); zeros_only = do_copy_page(dst, src); - kunmap_atomic(dst); - kunmap_atomic(src); + kunmap_local(dst); + kunmap_local(src); } else { if (PageHighMem(d_page)) { /* @@ -1499,9 +1499,9 @@ static bool copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) * data modified by kmap_atomic() */ zeros_only = safe_copy_page(buffer, s_page); - dst = kmap_atomic(d_page); + dst = kmap_local_page(d_page); copy_page(dst, buffer); - kunmap_atomic(dst); + kunmap_local(dst); } else { zeros_only = safe_copy_page(page_address(d_page), s_page); } @@ -2778,7 +2778,7 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) int snapshot_write_next(struct snapshot_handle *handle) { static struct chain_allocator ca; - int error = 0; + int error; next: /* Check if we have already loaded the entire image */ diff --git a/kernel/power/swap.c b/kernel/power/swap.c index a2cb0babb5ec..6053ddddaf65 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -451,7 +451,7 @@ err_close: static int swap_write_page(struct swap_map_handle *handle, void *buf, struct hib_bio_batch *hb) { - int error = 0; + int error; sector_t offset; if (!handle->cur) @@ -606,11 +606,11 @@ static int crc32_threadfn(void *data) unsigned i; while (1) { - wait_event(d->go, atomic_read(&d->ready) || + wait_event(d->go, atomic_read_acquire(&d->ready) || kthread_should_stop()); if (kthread_should_stop()) { d->thr = NULL; - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); break; } @@ -619,7 +619,7 @@ static int crc32_threadfn(void *data) for (i = 0; i < d->run_threads; i++) *d->crc32 = crc32_le(*d->crc32, d->unc[i], *d->unc_len[i]); - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); } return 0; @@ -649,12 +649,12 @@ static int lzo_compress_threadfn(void *data) struct cmp_data *d = data; while (1) { - wait_event(d->go, atomic_read(&d->ready) || + wait_event(d->go, atomic_read_acquire(&d->ready) || kthread_should_stop()); if (kthread_should_stop()) { d->thr = NULL; d->ret = -1; - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); break; } @@ -663,7 +663,7 @@ static int lzo_compress_threadfn(void *data) d->ret = lzo1x_1_compress(d->unc, d->unc_len, d->cmp + LZO_HEADER, &d->cmp_len, d->wrk); - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); } return 0; @@ -798,7 +798,7 @@ static int save_image_lzo(struct swap_map_handle *handle, data[thr].unc_len = off; - atomic_set(&data[thr].ready, 1); + atomic_set_release(&data[thr].ready, 1); wake_up(&data[thr].go); } @@ -806,12 +806,12 @@ static int save_image_lzo(struct swap_map_handle *handle, break; crc->run_threads = thr; - atomic_set(&crc->ready, 1); + atomic_set_release(&crc->ready, 1); wake_up(&crc->go); for (run_threads = thr, thr = 0; thr < run_threads; thr++) { wait_event(data[thr].done, - atomic_read(&data[thr].stop)); + atomic_read_acquire(&data[thr].stop)); atomic_set(&data[thr].stop, 0); ret = data[thr].ret; @@ -850,7 +850,7 @@ static int save_image_lzo(struct swap_map_handle *handle, } } - wait_event(crc->done, atomic_read(&crc->stop)); + wait_event(crc->done, atomic_read_acquire(&crc->stop)); atomic_set(&crc->stop, 0); } @@ -1132,12 +1132,12 @@ static int lzo_decompress_threadfn(void *data) struct dec_data *d = data; while (1) { - wait_event(d->go, atomic_read(&d->ready) || + wait_event(d->go, atomic_read_acquire(&d->ready) || kthread_should_stop()); if (kthread_should_stop()) { d->thr = NULL; d->ret = -1; - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); break; } @@ -1150,7 +1150,7 @@ static int lzo_decompress_threadfn(void *data) flush_icache_range((unsigned long)d->unc, (unsigned long)d->unc + d->unc_len); - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); } return 0; @@ -1335,7 +1335,7 @@ static int load_image_lzo(struct swap_map_handle *handle, } if (crc->run_threads) { - wait_event(crc->done, atomic_read(&crc->stop)); + wait_event(crc->done, atomic_read_acquire(&crc->stop)); atomic_set(&crc->stop, 0); crc->run_threads = 0; } @@ -1371,7 +1371,7 @@ static int load_image_lzo(struct swap_map_handle *handle, pg = 0; } - atomic_set(&data[thr].ready, 1); + atomic_set_release(&data[thr].ready, 1); wake_up(&data[thr].go); } @@ -1390,7 +1390,7 @@ static int load_image_lzo(struct swap_map_handle *handle, for (run_threads = thr, thr = 0; thr < run_threads; thr++) { wait_event(data[thr].done, - atomic_read(&data[thr].stop)); + atomic_read_acquire(&data[thr].stop)); atomic_set(&data[thr].stop, 0); ret = data[thr].ret; @@ -1421,7 +1421,7 @@ static int load_image_lzo(struct swap_map_handle *handle, ret = snapshot_write_next(snapshot); if (ret <= 0) { crc->run_threads = thr + 1; - atomic_set(&crc->ready, 1); + atomic_set_release(&crc->ready, 1); wake_up(&crc->go); goto out_finish; } @@ -1429,13 +1429,13 @@ static int load_image_lzo(struct swap_map_handle *handle, } crc->run_threads = thr; - atomic_set(&crc->ready, 1); + atomic_set_release(&crc->ready, 1); wake_up(&crc->go); } out_finish: if (crc->run_threads) { - wait_event(crc->done, atomic_read(&crc->stop)); + wait_event(crc->done, atomic_read_acquire(&crc->stop)); atomic_set(&crc->stop, 0); } stop = ktime_get(); @@ -1566,7 +1566,6 @@ put: /** * swsusp_close - close resume device. - * @exclusive: Close the resume device which is exclusively opened. */ void swsusp_close(void) |