0408497800
- Make the Energy Model cover non-CPU devices (Lukasz Luba). - Add Ice Lake server idle states table to the intel_idle driver and eliminate a redundant static variable from it (Chen Yu, Rafael Wysocki). - Eliminate all W=1 build warnings from cpufreq (Lee Jones). - Add support for Sapphire Rapids and for Power Limit 4 to the Intel RAPL power capping driver (Sumeet Pawnikar, Zhang Rui). - Fix function name in kerneldoc comments in the idle_inject power capping driver (Yangtao Li). - Fix locking issues with cpufreq governors and drop a redundant "weak" function definition from cpufreq (Viresh Kumar). - Rearrange cpufreq to register non-modular governors at the core_initcall level and allow the default cpufreq governor to be specified in the kernel command line (Quentin Perret). - Extend, fix and clean up the intel_pstate driver (Srinivas Pandruvada, Rafael Wysocki): * Add a new sysfs attribute for disabling/enabling CPU energy-efficiency optimizations in the processor. * Make the driver avoid enabling HWP if EPP is not supported. * Allow the driver to handle numeric EPP values in the sysfs interface and fix the setting of EPP via sysfs in the active mode. * Eliminate a static checker warning and clean up a kerneldoc comment. - Clean up some variable declarations in the powernv cpufreq driver (Wei Yongjun). - Fix up the ->enter_s2idle callback definition to cover the case when it points to the same function as ->idle correctly (Neal Liu). - Rearrange and clean up the PSCI cpuidle driver (Ulf Hansson). - Make the PM core emit "changed" uevent when adding/removing the "wakeup" sysfs attribute of devices (Abhishek Pandit-Subedi). - Add a helper macro for declaring PM callbacks and use it in the MMC jz4740 driver (Paul Cercueil). - Fix white space in some places in the hibernate code and make the system-wide PM code use "const char *" where appropriate (Xiang Chen, Alexey Dobriyan). - Add one more "unsafe" helper macro to the freezer to cover the NFS use case (He Zhe). - Change the language in the generic PM domains framework to use parent/child terminology and clean up a typo and some comment fromatting in that code (Kees Cook, Geert Uytterhoeven). - Update the operating performance points OPP framework (Lukasz Luba, Andrew-sh.Cheng, Valdis Kletnieks): * Refactor dev_pm_opp_of_register_em() and update related drivers. * Add a missing function export. * Allow disabled OPPs in dev_pm_opp_get_freq(). - Update devfreq core and drivers (Chanwoo Choi, Lukasz Luba, Enric Balletbo i Serra, Dmitry Osipenko, Kieran Bingham, Marc Zyngier): * Add support for delayed timers to the devfreq core and make the Samsung exynos5422-dmc driver use it. * Unify sysfs interface to use "df-" as a prefix in instance names consistently. * Fix devfreq_summary debugfs node indentation. * Add the rockchip,pmu phandle to the rk3399_dmc driver DT bindings. * List Dmitry Osipenko as the Tegra devfreq driver maintainer. * Fix typos in the core devfreq code. - Update the pm-graph utility to version 5.7 including a number of fixes related to suspend-to-idle (Todd Brandt). - Fix coccicheck errors and warnings in the cpupower utility (Shuah Khan). - Replace HTTP links with HTTPs ones in multiple places (Alexander A. Klimov). -----BEGIN PGP SIGNATURE----- iQJGBAABCAAwFiEE4fcc61cGeeHD/fCwgsRv/nhiVHEFAl8oO24SHHJqd0Byand5 c29ja2kubmV0AAoJEILEb/54YlRx7ZQP/0lQ0yABnASnwomdOH6+K/m7rvc+e9FE zx5pTDQswhU5tM7SQAIKqe0uSI+okF2UrBrT5onA16F+JUbnrbexJLazBPfVTTGF AKpKEQ7Wh69Wz+Y6cQZjm1dTuRL+dlBJuBrzR2tLSnONPMMHuFcO3xd7lgE9UAxC oGEf393taA6OqcUNRQIa2gqbq+k1qhKjeDucGkbOaoJ6CL0ZyWI+Tfw1WWaBBGv0 /2wBd6V513OH8WtQCW6H3YpHmhYW6OwL8w19KyGcjPRGJaeaIP4W/Ng7mkvgL5ZB vZqg3XiufFV9uTe8W1NQaVv/NjlN256OteuK809aosTVjD0dhFkhBYg5TLu6HbQq C/NciZ+78oLedWLT73EUfw3NyS+V0jk6X2EIlBUwNi0Qw1B1pCifGOCKzWFFe5cr ci4xr4FG7dBkxScOxwFAU2s5TdPHLOkGkQtg4jZr0OYDrzkyLEdsnZEUjLPORo+0 6EBXGfTOSy2CBHcYswRtzJr/1pUTzj7oejhTAMCCuYW2r3VyQtnYcVjlehtp20if 6BfmGisk8nmtxlSm+/Y2FqKa4bNnSTMmr0UJQ+Rjp0tHs47QeucI0ORfZ5nPaBac +ptvIjWmn3xejT/+oAehpH9066Iuy66vzHdnj7x5+WAsmYS8n8OFtlBFkYELmLJB 3xI5hIl7WtGo =8cUO -----END PGP SIGNATURE----- Merge tag 'pm-5.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm Pull power management updates from Rafael Wysocki: "The most significant change here is the extension of the Energy Model to cover non-CPU devices (as well as CPUs) from Lukasz Luba. There is also some new hardware support (Ice Lake server idle states table for intel_idle, Sapphire Rapids and Power Limit 4 support in the RAPL driver), some new functionality in the existing drivers (eg. a new switch to disable/enable CPU energy-efficiency optimizations in intel_pstate, delayed timers in devfreq), some assorted fixes (cpufreq core, intel_pstate, intel_idle) and cleanups (eg. cpuidle-psci, devfreq), including the elimination of W=1 build warnings from cpufreq done by Lee Jones. Specifics: - Make the Energy Model cover non-CPU devices (Lukasz Luba). - Add Ice Lake server idle states table to the intel_idle driver and eliminate a redundant static variable from it (Chen Yu, Rafael Wysocki). - Eliminate all W=1 build warnings from cpufreq (Lee Jones). - Add support for Sapphire Rapids and for Power Limit 4 to the Intel RAPL power capping driver (Sumeet Pawnikar, Zhang Rui). - Fix function name in kerneldoc comments in the idle_inject power capping driver (Yangtao Li). - Fix locking issues with cpufreq governors and drop a redundant "weak" function definition from cpufreq (Viresh Kumar). - Rearrange cpufreq to register non-modular governors at the core_initcall level and allow the default cpufreq governor to be specified in the kernel command line (Quentin Perret). - Extend, fix and clean up the intel_pstate driver (Srinivas Pandruvada, Rafael Wysocki): * Add a new sysfs attribute for disabling/enabling CPU energy-efficiency optimizations in the processor. * Make the driver avoid enabling HWP if EPP is not supported. * Allow the driver to handle numeric EPP values in the sysfs interface and fix the setting of EPP via sysfs in the active mode. * Eliminate a static checker warning and clean up a kerneldoc comment. - Clean up some variable declarations in the powernv cpufreq driver (Wei Yongjun). - Fix up the ->enter_s2idle callback definition to cover the case when it points to the same function as ->idle correctly (Neal Liu). - Rearrange and clean up the PSCI cpuidle driver (Ulf Hansson). - Make the PM core emit "changed" uevent when adding/removing the "wakeup" sysfs attribute of devices (Abhishek Pandit-Subedi). - Add a helper macro for declaring PM callbacks and use it in the MMC jz4740 driver (Paul Cercueil). - Fix white space in some places in the hibernate code and make the system-wide PM code use "const char *" where appropriate (Xiang Chen, Alexey Dobriyan). - Add one more "unsafe" helper macro to the freezer to cover the NFS use case (He Zhe). - Change the language in the generic PM domains framework to use parent/child terminology and clean up a typo and some comment fromatting in that code (Kees Cook, Geert Uytterhoeven). - Update the operating performance points OPP framework (Lukasz Luba, Andrew-sh.Cheng, Valdis Kletnieks): * Refactor dev_pm_opp_of_register_em() and update related drivers. * Add a missing function export. * Allow disabled OPPs in dev_pm_opp_get_freq(). - Update devfreq core and drivers (Chanwoo Choi, Lukasz Luba, Enric Balletbo i Serra, Dmitry Osipenko, Kieran Bingham, Marc Zyngier): * Add support for delayed timers to the devfreq core and make the Samsung exynos5422-dmc driver use it. * Unify sysfs interface to use "df-" as a prefix in instance names consistently. * Fix devfreq_summary debugfs node indentation. * Add the rockchip,pmu phandle to the rk3399_dmc driver DT bindings. * List Dmitry Osipenko as the Tegra devfreq driver maintainer. * Fix typos in the core devfreq code. - Update the pm-graph utility to version 5.7 including a number of fixes related to suspend-to-idle (Todd Brandt). - Fix coccicheck errors and warnings in the cpupower utility (Shuah Khan). - Replace HTTP links with HTTPs ones in multiple places (Alexander A. Klimov)" * tag 'pm-5.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: (71 commits) cpuidle: ACPI: fix 'return' with no value build warning cpufreq: intel_pstate: Fix EPP setting via sysfs in active mode cpufreq: intel_pstate: Rearrange the storing of new EPP values intel_idle: Customize IceLake server support PM / devfreq: Fix the wrong end with semicolon PM / devfreq: Fix indentaion of devfreq_summary debugfs node PM / devfreq: Clean up the devfreq instance name in sysfs attr memory: samsung: exynos5422-dmc: Add module param to control IRQ mode memory: samsung: exynos5422-dmc: Adjust polling interval and uptreshold memory: samsung: exynos5422-dmc: Use delayed timer as default PM / devfreq: Add support delayed timer for polling mode dt-bindings: devfreq: rk3399_dmc: Add rockchip,pmu phandle PM / devfreq: tegra: Add Dmitry as a maintainer PM / devfreq: event: Fix trivial spelling PM / devfreq: rk3399_dmc: Fix kernel oops when rockchip,pmu is absent cpuidle: change enter_s2idle() prototype cpuidle: psci: Prevent domain idlestates until consumers are ready cpuidle: psci: Convert PM domain to platform driver cpuidle: psci: Fix error path via converting to a platform driver cpuidle: psci: Fail cpuidle registration if set OSI mode failed ...
276 lines
6.5 KiB
C
276 lines
6.5 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* System Control and Power Interface (SCMI) based CPUFreq Interface driver
|
|
*
|
|
* Copyright (C) 2018 ARM Ltd.
|
|
* Sudeep Holla <sudeep.holla@arm.com>
|
|
*/
|
|
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
#include <linux/cpu.h>
|
|
#include <linux/cpufreq.h>
|
|
#include <linux/cpumask.h>
|
|
#include <linux/energy_model.h>
|
|
#include <linux/export.h>
|
|
#include <linux/module.h>
|
|
#include <linux/pm_opp.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/scmi_protocol.h>
|
|
#include <linux/types.h>
|
|
|
|
struct scmi_data {
|
|
int domain_id;
|
|
struct device *cpu_dev;
|
|
};
|
|
|
|
static const struct scmi_handle *handle;
|
|
|
|
static unsigned int scmi_cpufreq_get_rate(unsigned int cpu)
|
|
{
|
|
struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu);
|
|
struct scmi_perf_ops *perf_ops = handle->perf_ops;
|
|
struct scmi_data *priv = policy->driver_data;
|
|
unsigned long rate;
|
|
int ret;
|
|
|
|
ret = perf_ops->freq_get(handle, priv->domain_id, &rate, false);
|
|
if (ret)
|
|
return 0;
|
|
return rate / 1000;
|
|
}
|
|
|
|
/*
|
|
* perf_ops->freq_set is not a synchronous, the actual OPP change will
|
|
* happen asynchronously and can get notified if the events are
|
|
* subscribed for by the SCMI firmware
|
|
*/
|
|
static int
|
|
scmi_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int index)
|
|
{
|
|
int ret;
|
|
struct scmi_data *priv = policy->driver_data;
|
|
struct scmi_perf_ops *perf_ops = handle->perf_ops;
|
|
u64 freq = policy->freq_table[index].frequency;
|
|
|
|
ret = perf_ops->freq_set(handle, priv->domain_id, freq * 1000, false);
|
|
if (!ret)
|
|
arch_set_freq_scale(policy->related_cpus, freq,
|
|
policy->cpuinfo.max_freq);
|
|
return ret;
|
|
}
|
|
|
|
static unsigned int scmi_cpufreq_fast_switch(struct cpufreq_policy *policy,
|
|
unsigned int target_freq)
|
|
{
|
|
struct scmi_data *priv = policy->driver_data;
|
|
struct scmi_perf_ops *perf_ops = handle->perf_ops;
|
|
|
|
if (!perf_ops->freq_set(handle, priv->domain_id,
|
|
target_freq * 1000, true)) {
|
|
arch_set_freq_scale(policy->related_cpus, target_freq,
|
|
policy->cpuinfo.max_freq);
|
|
return target_freq;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
scmi_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
|
|
{
|
|
int cpu, domain, tdomain;
|
|
struct device *tcpu_dev;
|
|
|
|
domain = handle->perf_ops->device_domain_id(cpu_dev);
|
|
if (domain < 0)
|
|
return domain;
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
if (cpu == cpu_dev->id)
|
|
continue;
|
|
|
|
tcpu_dev = get_cpu_device(cpu);
|
|
if (!tcpu_dev)
|
|
continue;
|
|
|
|
tdomain = handle->perf_ops->device_domain_id(tcpu_dev);
|
|
if (tdomain == domain)
|
|
cpumask_set_cpu(cpu, cpumask);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int __maybe_unused
|
|
scmi_get_cpu_power(unsigned long *power, unsigned long *KHz,
|
|
struct device *cpu_dev)
|
|
{
|
|
unsigned long Hz;
|
|
int ret, domain;
|
|
|
|
domain = handle->perf_ops->device_domain_id(cpu_dev);
|
|
if (domain < 0)
|
|
return domain;
|
|
|
|
/* Get the power cost of the performance domain. */
|
|
Hz = *KHz * 1000;
|
|
ret = handle->perf_ops->est_power_get(handle, domain, &Hz, power);
|
|
if (ret)
|
|
return ret;
|
|
|
|
/* The EM framework specifies the frequency in KHz. */
|
|
*KHz = Hz / 1000;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int scmi_cpufreq_init(struct cpufreq_policy *policy)
|
|
{
|
|
int ret, nr_opp;
|
|
unsigned int latency;
|
|
struct device *cpu_dev;
|
|
struct scmi_data *priv;
|
|
struct cpufreq_frequency_table *freq_table;
|
|
struct em_data_callback em_cb = EM_DATA_CB(scmi_get_cpu_power);
|
|
|
|
cpu_dev = get_cpu_device(policy->cpu);
|
|
if (!cpu_dev) {
|
|
pr_err("failed to get cpu%d device\n", policy->cpu);
|
|
return -ENODEV;
|
|
}
|
|
|
|
ret = handle->perf_ops->device_opps_add(handle, cpu_dev);
|
|
if (ret) {
|
|
dev_warn(cpu_dev, "failed to add opps to the device\n");
|
|
return ret;
|
|
}
|
|
|
|
ret = scmi_get_sharing_cpus(cpu_dev, policy->cpus);
|
|
if (ret) {
|
|
dev_warn(cpu_dev, "failed to get sharing cpumask\n");
|
|
return ret;
|
|
}
|
|
|
|
ret = dev_pm_opp_set_sharing_cpus(cpu_dev, policy->cpus);
|
|
if (ret) {
|
|
dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n",
|
|
__func__, ret);
|
|
return ret;
|
|
}
|
|
|
|
nr_opp = dev_pm_opp_get_opp_count(cpu_dev);
|
|
if (nr_opp <= 0) {
|
|
dev_dbg(cpu_dev, "OPP table is not ready, deferring probe\n");
|
|
ret = -EPROBE_DEFER;
|
|
goto out_free_opp;
|
|
}
|
|
|
|
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
|
|
if (!priv) {
|
|
ret = -ENOMEM;
|
|
goto out_free_opp;
|
|
}
|
|
|
|
ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table);
|
|
if (ret) {
|
|
dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret);
|
|
goto out_free_priv;
|
|
}
|
|
|
|
priv->cpu_dev = cpu_dev;
|
|
priv->domain_id = handle->perf_ops->device_domain_id(cpu_dev);
|
|
|
|
policy->driver_data = priv;
|
|
policy->freq_table = freq_table;
|
|
|
|
/* SCMI allows DVFS request for any domain from any CPU */
|
|
policy->dvfs_possible_from_any_cpu = true;
|
|
|
|
latency = handle->perf_ops->transition_latency_get(handle, cpu_dev);
|
|
if (!latency)
|
|
latency = CPUFREQ_ETERNAL;
|
|
|
|
policy->cpuinfo.transition_latency = latency;
|
|
|
|
policy->fast_switch_possible =
|
|
handle->perf_ops->fast_switch_possible(handle, cpu_dev);
|
|
|
|
em_dev_register_perf_domain(cpu_dev, nr_opp, &em_cb, policy->cpus);
|
|
|
|
return 0;
|
|
|
|
out_free_priv:
|
|
kfree(priv);
|
|
out_free_opp:
|
|
dev_pm_opp_remove_all_dynamic(cpu_dev);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int scmi_cpufreq_exit(struct cpufreq_policy *policy)
|
|
{
|
|
struct scmi_data *priv = policy->driver_data;
|
|
|
|
dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table);
|
|
dev_pm_opp_remove_all_dynamic(priv->cpu_dev);
|
|
kfree(priv);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static struct cpufreq_driver scmi_cpufreq_driver = {
|
|
.name = "scmi",
|
|
.flags = CPUFREQ_STICKY | CPUFREQ_HAVE_GOVERNOR_PER_POLICY |
|
|
CPUFREQ_NEED_INITIAL_FREQ_CHECK |
|
|
CPUFREQ_IS_COOLING_DEV,
|
|
.verify = cpufreq_generic_frequency_table_verify,
|
|
.attr = cpufreq_generic_attr,
|
|
.target_index = scmi_cpufreq_set_target,
|
|
.fast_switch = scmi_cpufreq_fast_switch,
|
|
.get = scmi_cpufreq_get_rate,
|
|
.init = scmi_cpufreq_init,
|
|
.exit = scmi_cpufreq_exit,
|
|
};
|
|
|
|
static int scmi_cpufreq_probe(struct scmi_device *sdev)
|
|
{
|
|
int ret;
|
|
|
|
handle = sdev->handle;
|
|
|
|
if (!handle || !handle->perf_ops)
|
|
return -ENODEV;
|
|
|
|
ret = cpufreq_register_driver(&scmi_cpufreq_driver);
|
|
if (ret) {
|
|
dev_err(&sdev->dev, "%s: registering cpufreq failed, err: %d\n",
|
|
__func__, ret);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void scmi_cpufreq_remove(struct scmi_device *sdev)
|
|
{
|
|
cpufreq_unregister_driver(&scmi_cpufreq_driver);
|
|
}
|
|
|
|
static const struct scmi_device_id scmi_id_table[] = {
|
|
{ SCMI_PROTOCOL_PERF, "cpufreq" },
|
|
{ },
|
|
};
|
|
MODULE_DEVICE_TABLE(scmi, scmi_id_table);
|
|
|
|
static struct scmi_driver scmi_cpufreq_drv = {
|
|
.name = "scmi-cpufreq",
|
|
.probe = scmi_cpufreq_probe,
|
|
.remove = scmi_cpufreq_remove,
|
|
.id_table = scmi_id_table,
|
|
};
|
|
module_scmi_driver(scmi_cpufreq_drv);
|
|
|
|
MODULE_AUTHOR("Sudeep Holla <sudeep.holla@arm.com>");
|
|
MODULE_DESCRIPTION("ARM SCMI CPUFreq interface driver");
|
|
MODULE_LICENSE("GPL v2");
|