From 6e863844ad6effecac92a67a43b8467a8c2e129f Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Wed, 5 Dec 2018 12:05:55 +0100 Subject: [PATCH 1/9] PM: sleep: call devfreq suspend/resume Devfreq framework supports suspend of its devices. Call the the devfreq interface and allow devfreq devices preserve/restore their states during suspend/resume. Suggested-by: Tobias Jakobi Reviewed-by: Chanwoo Choi Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index a690fd400260..0992e67e862b 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include "../base.h" @@ -1078,6 +1079,7 @@ void dpm_resume(pm_message_t state) dpm_show_time(starttime, state, 0, NULL); cpufreq_resume(); + devfreq_resume(); trace_suspend_resume(TPS("dpm_resume"), state.event, false); } @@ -1852,6 +1854,7 @@ int dpm_suspend(pm_message_t state) trace_suspend_resume(TPS("dpm_suspend"), state.event, true); might_sleep(); + devfreq_suspend(); cpufreq_suspend(); mutex_lock(&dpm_list_mtx); From 088d923a11e683af28ad9cea9b66782fff588495 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 2 Jan 2019 12:13:52 +0100 Subject: [PATCH 2/9] cpufreq / Documentation: Update cpufreq MAINTAINERS entry Update the MAINTAINERS entry for cpufreq by making it clear that it is not just drivers and adding current documentation records to it. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- MAINTAINERS | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7808b166fdb9..2a01d1af1e26 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3909,7 +3909,7 @@ L: netdev@vger.kernel.org S: Maintained F: drivers/net/ethernet/ti/cpmac.c -CPU FREQUENCY DRIVERS +CPU FREQUENCY SCALING FRAMEWORK M: "Rafael J. Wysocki" M: Viresh Kumar L: linux-pm@vger.kernel.org @@ -3917,6 +3917,8 @@ S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git T: git git://git.linaro.org/people/vireshk/linux.git (For ARM Updates) B: https://bugzilla.kernel.org +F: Documentation/admin-guide/pm/cpufreq.rst +F: Documentation/admin-guide/pm/intel_pstate.rst F: Documentation/cpu-freq/ F: Documentation/devicetree/bindings/cpufreq/ F: drivers/cpufreq/ From 09ad32dc6dc23beb638c48d0a2e5a9eaa8e85c6c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 3 Jan 2019 11:30:07 +0100 Subject: [PATCH 3/9] cpuidle / Documentation: Update cpuidle MAINTAINERS entry Update the MAINTAINERS entry for cpuidle by making it clear that it is not just drivers and adding a documentation record to it. Signed-off-by: Rafael J. Wysocki --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7808b166fdb9..5cc9e96ff2c3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3964,13 +3964,14 @@ S: Supported F: drivers/cpuidle/cpuidle-exynos.c F: arch/arm/mach-exynos/pm.c -CPUIDLE DRIVERS +CPU IDLE TIME MANAGEMENT FRAMEWORK M: "Rafael J. Wysocki" M: Daniel Lezcano L: linux-pm@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git B: https://bugzilla.kernel.org +F: Documentation/admin-guide/pm/cpuidle.rst F: drivers/cpuidle/* F: include/linux/cpuidle.h From 1690d8bb91e370ab772062b79bd434ce815c4729 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 4 Jan 2019 15:14:33 +0530 Subject: [PATCH 4/9] cpufreq: scpi/scmi: Fix freeing of dynamic OPPs Since the commit 2a4eb7358aba "OPP: Don't remove dynamic OPPs from _dev_pm_opp_remove_table()", dynamically created OPP aren't automatically removed anymore by dev_pm_opp_cpumask_remove_table(). This affects the scpi and scmi cpufreq drivers which no longer free OPPs on failures or on invocations of the policy->exit() callback. Create a generic OPP helper dev_pm_opp_remove_all_dynamic() which can be called from these drivers instead of dev_pm_opp_cpumask_remove_table(). In dev_pm_opp_remove_all_dynamic(), we need to make sure that the opp_list isn't getting accessed simultaneously from other parts of the OPP core while the helper is freeing dynamic OPPs, i.e. we can't drop the opp_table->lock while traversing through the OPP list. And to accomplish that, this patch also creates _opp_kref_release_unlocked() which can be called from this new helper with the opp_table lock already held. Cc: 4.20 # v4.20 Reported-by: Valentin Schneider Fixes: 2a4eb7358aba "OPP: Don't remove dynamic OPPs from _dev_pm_opp_remove_table()" Signed-off-by: Viresh Kumar Tested-by: Valentin Schneider Reviewed-by: Sudeep Holla Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/scmi-cpufreq.c | 4 +-- drivers/cpufreq/scpi-cpufreq.c | 4 +-- drivers/opp/core.c | 63 +++++++++++++++++++++++++++++++--- include/linux/pm_opp.h | 5 +++ 4 files changed, 67 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index 50b1551ba894..c2e66528f5ee 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -176,7 +176,7 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) out_free_priv: kfree(priv); out_free_opp: - dev_pm_opp_cpumask_remove_table(policy->cpus); + dev_pm_opp_remove_all_dynamic(cpu_dev); return ret; } @@ -188,7 +188,7 @@ static int scmi_cpufreq_exit(struct cpufreq_policy *policy) cpufreq_cooling_unregister(priv->cdev); dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table); kfree(priv); - dev_pm_opp_cpumask_remove_table(policy->related_cpus); + dev_pm_opp_remove_all_dynamic(priv->cpu_dev); return 0; } diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c index 87a98ec77773..99449738faa4 100644 --- a/drivers/cpufreq/scpi-cpufreq.c +++ b/drivers/cpufreq/scpi-cpufreq.c @@ -177,7 +177,7 @@ out_free_cpufreq_table: out_free_priv: kfree(priv); out_free_opp: - dev_pm_opp_cpumask_remove_table(policy->cpus); + dev_pm_opp_remove_all_dynamic(cpu_dev); return ret; } @@ -190,7 +190,7 @@ static int scpi_cpufreq_exit(struct cpufreq_policy *policy) clk_put(priv->clk); dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table); kfree(priv); - dev_pm_opp_cpumask_remove_table(policy->related_cpus); + dev_pm_opp_remove_all_dynamic(priv->cpu_dev); return 0; } diff --git a/drivers/opp/core.c b/drivers/opp/core.c index e5507add8f04..18f1639dbc4a 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -988,11 +988,9 @@ void _opp_free(struct dev_pm_opp *opp) kfree(opp); } -static void _opp_kref_release(struct kref *kref) +static void _opp_kref_release(struct dev_pm_opp *opp, + struct opp_table *opp_table) { - struct dev_pm_opp *opp = container_of(kref, struct dev_pm_opp, kref); - struct opp_table *opp_table = opp->opp_table; - /* * Notify the changes in the availability of the operable * frequency/voltage list. @@ -1002,7 +1000,22 @@ static void _opp_kref_release(struct kref *kref) opp_debug_remove_one(opp); list_del(&opp->node); kfree(opp); +} +static void _opp_kref_release_unlocked(struct kref *kref) +{ + struct dev_pm_opp *opp = container_of(kref, struct dev_pm_opp, kref); + struct opp_table *opp_table = opp->opp_table; + + _opp_kref_release(opp, opp_table); +} + +static void _opp_kref_release_locked(struct kref *kref) +{ + struct dev_pm_opp *opp = container_of(kref, struct dev_pm_opp, kref); + struct opp_table *opp_table = opp->opp_table; + + _opp_kref_release(opp, opp_table); mutex_unlock(&opp_table->lock); } @@ -1013,10 +1026,16 @@ void dev_pm_opp_get(struct dev_pm_opp *opp) void dev_pm_opp_put(struct dev_pm_opp *opp) { - kref_put_mutex(&opp->kref, _opp_kref_release, &opp->opp_table->lock); + kref_put_mutex(&opp->kref, _opp_kref_release_locked, + &opp->opp_table->lock); } EXPORT_SYMBOL_GPL(dev_pm_opp_put); +static void dev_pm_opp_put_unlocked(struct dev_pm_opp *opp) +{ + kref_put(&opp->kref, _opp_kref_release_unlocked); +} + /** * dev_pm_opp_remove() - Remove an OPP from OPP table * @dev: device for which we do this operation @@ -1060,6 +1079,40 @@ void dev_pm_opp_remove(struct device *dev, unsigned long freq) } EXPORT_SYMBOL_GPL(dev_pm_opp_remove); +/** + * dev_pm_opp_remove_all_dynamic() - Remove all dynamically created OPPs + * @dev: device for which we do this operation + * + * This function removes all dynamically created OPPs from the opp table. + */ +void dev_pm_opp_remove_all_dynamic(struct device *dev) +{ + struct opp_table *opp_table; + struct dev_pm_opp *opp, *temp; + int count = 0; + + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) + return; + + mutex_lock(&opp_table->lock); + list_for_each_entry_safe(opp, temp, &opp_table->opp_list, node) { + if (opp->dynamic) { + dev_pm_opp_put_unlocked(opp); + count++; + } + } + mutex_unlock(&opp_table->lock); + + /* Drop the references taken by dev_pm_opp_add() */ + while (count--) + dev_pm_opp_put_opp_table(opp_table); + + /* Drop the reference taken by _find_opp_table() */ + dev_pm_opp_put_opp_table(opp_table); +} +EXPORT_SYMBOL_GPL(dev_pm_opp_remove_all_dynamic); + struct dev_pm_opp *_opp_allocate(struct opp_table *table) { struct dev_pm_opp *opp; diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 0a2a88e5a383..b895f4e79868 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -108,6 +108,7 @@ void dev_pm_opp_put(struct dev_pm_opp *opp); int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt); void dev_pm_opp_remove(struct device *dev, unsigned long freq); +void dev_pm_opp_remove_all_dynamic(struct device *dev); int dev_pm_opp_enable(struct device *dev, unsigned long freq); @@ -217,6 +218,10 @@ static inline void dev_pm_opp_remove(struct device *dev, unsigned long freq) { } +static inline void dev_pm_opp_remove_all_dynamic(struct device *dev) +{ +} + static inline int dev_pm_opp_enable(struct device *dev, unsigned long freq) { return 0; From 2f66196208c98b3d1b4294edffb2c5a8197be899 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 7 Jan 2019 18:51:53 +0000 Subject: [PATCH 5/9] cpufreq: check if policy is inactive early in __cpufreq_get() cpuinfo_cur_freq gets current CPU frequency as detected by hardware while scaling_cur_freq last known CPU frequency. Some platforms may not allow checking the CPU frequency of an offline CPU or the associated resources may have been released via cpufreq_exit when the CPU gets offlined, in which case the policy would have been invalidated already. If we attempt to get current frequency from the hardware, it may result in hang or crash. For example on Juno, I see: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000188 [0000000000000188] pgd=0000000000000000 Internal error: Oops: 96000004 [#1] PREEMPT SMP Modules linked in: CPU: 5 PID: 4202 Comm: cat Not tainted 4.20.0-08251-ga0f2c0318a15-dirty #87 Hardware name: ARM LTD ARM Juno Development Platform/ARM Juno Development Platform pstate: 40000005 (nZcv daif -PAN -UAO) pc : scmi_cpufreq_get_rate+0x34/0xb0 lr : scmi_cpufreq_get_rate+0x34/0xb0 Call trace: scmi_cpufreq_get_rate+0x34/0xb0 __cpufreq_get+0x34/0xc0 show_cpuinfo_cur_freq+0x24/0x78 show+0x40/0x60 sysfs_kf_seq_show+0xc0/0x148 kernfs_seq_show+0x44/0x50 seq_read+0xd4/0x480 kernfs_fop_read+0x15c/0x208 __vfs_read+0x60/0x188 vfs_read+0x94/0x150 ksys_read+0x6c/0xd8 __arm64_sys_read+0x24/0x30 el0_svc_common+0x78/0x100 el0_svc_handler+0x38/0x78 el0_svc+0x8/0xc ---[ end trace 3d1024e58f77f6b2 ]--- So fix the issue by checking if the policy is invalid early in __cpufreq_get before attempting to get the current frequency. Signed-off-by: Sudeep Holla Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 7aa3dcad2175..df34a12a388f 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1530,17 +1530,16 @@ static unsigned int __cpufreq_get(struct cpufreq_policy *policy) { unsigned int ret_freq = 0; - if (!cpufreq_driver->get) + if (unlikely(policy_is_inactive(policy)) || !cpufreq_driver->get) return ret_freq; ret_freq = cpufreq_driver->get(policy->cpu); /* - * Updating inactive policies is invalid, so avoid doing that. Also - * if fast frequency switching is used with the given policy, the check + * If fast frequency switching is used with the given policy, the check * against policy->cur is pointless, so skip it in that case too. */ - if (unlikely(policy_is_inactive(policy)) || policy->fast_switch_enabled) + if (policy->fast_switch_enabled) return ret_freq; if (ret_freq && policy->cur && @@ -1569,10 +1568,7 @@ unsigned int cpufreq_get(unsigned int cpu) if (policy) { down_read(&policy->rwsem); - - if (!policy_is_inactive(policy)) - ret_freq = __cpufreq_get(policy); - + ret_freq = __cpufreq_get(policy); up_read(&policy->rwsem); cpufreq_cpu_put(policy); From 7604bf0920985c9280c8b24e2f0c3e4ed47f502f Mon Sep 17 00:00:00 2001 From: Otto Sabart Date: Wed, 9 Jan 2019 00:56:51 +0100 Subject: [PATCH 6/9] doc: trace: fix reference to cpuidle documentation file Old cpuidle/sysfs.txt file was replaced in aa5eee355b46. So, refer to an updated file. Fixes: aa5eee355b46 (Documentation: admin-guide: PM: Add cpuidle document) Signed-off-by: Otto Sabart Signed-off-by: Rafael J. Wysocki --- Documentation/trace/coresight-cpu-debug.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/trace/coresight-cpu-debug.txt b/Documentation/trace/coresight-cpu-debug.txt index 89ab09e78e8d..f07e38094b40 100644 --- a/Documentation/trace/coresight-cpu-debug.txt +++ b/Documentation/trace/coresight-cpu-debug.txt @@ -165,7 +165,7 @@ Do some work... The same can also be done from an application program. Disable specific CPU's specific idle state from cpuidle sysfs (see -Documentation/cpuidle/sysfs.txt): +Documentation/admin-guide/pm/cpuidle.rst): # echo 1 > /sys/devices/system/cpu/cpu$cpu/cpuidle/state$state/disable From 0e141d1c65c1dd31c914eb2e11651adcc1a15912 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Wed, 9 Jan 2019 10:42:36 +0000 Subject: [PATCH 7/9] cpufreq: scmi: Fix frequency invariance in slow path The scmi-cpufreq driver calls the arch_set_freq_scale() callback on frequency changes to provide scale-invariant load-tracking signals to the scheduler. However, in the slow path, it does so while specifying the current and max frequencies in different units, hence resulting in a broken freq_scale factor. Fix this by passing all frequencies in KHz, as stored in the CPUFreq frequency table. Fixes: 99d6bdf33877 (cpufreq: add support for CPU DVFS based on SCMI message protocol) Signed-off-by: Quentin Perret Acked-by: Viresh Kumar Acked-by: Sudeep Holla Cc: 4.17+ # v4.17+ Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/scmi-cpufreq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index c2e66528f5ee..242c3370544e 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -52,9 +52,9 @@ scmi_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int index) int ret; struct scmi_data *priv = policy->driver_data; struct scmi_perf_ops *perf_ops = handle->perf_ops; - u64 freq = policy->freq_table[index].frequency * 1000; + u64 freq = policy->freq_table[index].frequency; - ret = perf_ops->freq_set(handle, priv->domain_id, freq, false); + ret = perf_ops->freq_set(handle, priv->domain_id, freq * 1000, false); if (!ret) arch_set_freq_scale(policy->related_cpus, freq, policy->cpuinfo.max_freq); From 1f7b7081568bca281f4ef42096206180cfaced00 Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Thu, 10 Jan 2019 00:19:44 +0100 Subject: [PATCH 8/9] PM-runtime: Fix 'jiffies' in comments after switch to hrtimers PM-runtime now uses the hrtimers infrastructure for autosuspend, however comments still reference 'jiffies'. Fixes: 8234f6734c5d (PM-runtime: Switch autosuspend over to using hrtimers) Signed-off-by: Ladislav Michl Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 70624695b6d5..a282e74d1a16 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -121,7 +121,7 @@ static void pm_runtime_cancel_pending(struct device *dev) * Compute the autosuspend-delay expiration time based on the device's * power.last_busy time. If the delay has already expired or is disabled * (negative) or the power.use_autosuspend flag isn't set, return 0. - * Otherwise return the expiration time in jiffies (adjusted to be nonzero). + * Otherwise return the expiration time in nanoseconds (adjusted to be nonzero). * * This function may be called either with or without dev->power.lock held. * Either way it can be racy, since power.last_busy may be updated at any time. @@ -905,7 +905,10 @@ static enum hrtimer_restart pm_suspend_timer_fn(struct hrtimer *timer) spin_lock_irqsave(&dev->power.lock, flags); expires = dev->power.timer_expires; - /* If 'expire' is after 'jiffies' we've been called too early. */ + /* + * If 'expires' is after the current time, we've been called + * too early. + */ if (expires > 0 && expires < ktime_to_ns(ktime_get())) { dev->power.timer_expires = 0; rpm_suspend(dev, dev->power.timer_autosuspends ? From ca27e4cd0bdd87e33fda38e6e3d18d36d54356d4 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Thu, 10 Jan 2019 10:00:40 +0100 Subject: [PATCH 9/9] PM-runtime: Fix autosuspend_delay on 32bits arch Cast autosuspend_delay to u64 to make sure that the full computation of 'expires' or slack will be done in u64, even on 32bits arch. Otherwise, any delay greater than 2^31 nsec can overflow if signed 32bits is used when converting delay from msec to nsec. Fixes: 8234f6734c5d (PM-runtime: Switch autosuspend over to using hrtimers) Reported-by: Tony Lindgren Tested-by: Tony Lindgren Signed-off-by: Vincent Guittot Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index a282e74d1a16..457be03b744d 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -141,7 +141,7 @@ u64 pm_runtime_autosuspend_expiration(struct device *dev) last_busy = READ_ONCE(dev->power.last_busy); - expires = last_busy + autosuspend_delay * NSEC_PER_MSEC; + expires = last_busy + (u64)autosuspend_delay * NSEC_PER_MSEC; if (expires <= now) expires = 0; /* Already expired. */ @@ -525,7 +525,7 @@ static int rpm_suspend(struct device *dev, int rpmflags) * We add a slack of 25% to gather wakeups * without sacrificing the granularity. */ - u64 slack = READ_ONCE(dev->power.autosuspend_delay) * + u64 slack = (u64)READ_ONCE(dev->power.autosuspend_delay) * (NSEC_PER_MSEC >> 2); dev->power.timer_expires = expires;