From bb8c26d9387fe428068dcab35b1873ea3b881de1 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 11 Aug 2021 10:22:55 +0530 Subject: [PATCH 01/23] cpufreq: vexpress: Set CPUFREQ_IS_COOLING_DEV flag Reuse the cpufreq core's registration of cooling device by setting the CPUFREQ_IS_COOLING_DEV flag. Set this only if bL switcher isn't enabled. Signed-off-by: Viresh Kumar --- drivers/cpufreq/vexpress-spc-cpufreq.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/drivers/cpufreq/vexpress-spc-cpufreq.c b/drivers/cpufreq/vexpress-spc-cpufreq.c index 51dfa9ae6cf5..ab56813b7256 100644 --- a/drivers/cpufreq/vexpress-spc-cpufreq.c +++ b/drivers/cpufreq/vexpress-spc-cpufreq.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -47,7 +46,6 @@ static bool bL_switching_enabled; #define ACTUAL_FREQ(cluster, freq) ((cluster == A7_CLUSTER) ? freq << 1 : freq) #define VIRT_FREQ(cluster, freq) ((cluster == A7_CLUSTER) ? freq >> 1 : freq) -static struct thermal_cooling_device *cdev[MAX_CLUSTERS]; static struct clk *clk[MAX_CLUSTERS]; static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS + 1]; static atomic_t cluster_usage[MAX_CLUSTERS + 1]; @@ -457,11 +455,6 @@ static int ve_spc_cpufreq_exit(struct cpufreq_policy *policy) struct device *cpu_dev; int cur_cluster = cpu_to_cluster(policy->cpu); - if (cur_cluster < MAX_CLUSTERS) { - cpufreq_cooling_unregister(cdev[cur_cluster]); - cdev[cur_cluster] = NULL; - } - cpu_dev = get_cpu_device(policy->cpu); if (!cpu_dev) { pr_err("%s: failed to get cpu%d device\n", __func__, @@ -473,17 +466,6 @@ static int ve_spc_cpufreq_exit(struct cpufreq_policy *policy) return 0; } -static void ve_spc_cpufreq_ready(struct cpufreq_policy *policy) -{ - int cur_cluster = cpu_to_cluster(policy->cpu); - - /* Do not register a cpu_cooling device if we are in IKS mode */ - if (cur_cluster >= MAX_CLUSTERS) - return; - - cdev[cur_cluster] = of_cpufreq_cooling_register(policy); -} - static struct cpufreq_driver ve_spc_cpufreq_driver = { .name = "vexpress-spc", .flags = CPUFREQ_HAVE_GOVERNOR_PER_POLICY | @@ -493,7 +475,6 @@ static struct cpufreq_driver ve_spc_cpufreq_driver = { .get = ve_spc_cpufreq_get_rate, .init = ve_spc_cpufreq_init, .exit = ve_spc_cpufreq_exit, - .ready = ve_spc_cpufreq_ready, .attr = cpufreq_generic_attr, }; @@ -553,6 +534,9 @@ static int ve_spc_cpufreq_probe(struct platform_device *pdev) for (i = 0; i < MAX_CLUSTERS; i++) mutex_init(&cluster_lock[i]); + if (!is_bL_switching_enabled()) + ve_spc_cpufreq_driver.flags |= CPUFREQ_IS_COOLING_DEV; + ret = cpufreq_register_driver(&ve_spc_cpufreq_driver); if (ret) { pr_info("%s: Failed registering platform driver: %s, err: %d\n", From c17495b01b72b53bd290f442d39b060e015c7aea Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 10 Aug 2021 12:04:33 +0530 Subject: [PATCH 02/23] cpufreq: Add callback to register with energy model Many cpufreq drivers register with the energy model for each policy and do exactly the same thing. Follow the footsteps of thermal-cooling, to get it done from the cpufreq core itself. Provide a new callback, which will be called, if present, by the cpufreq core at the right moment (more on that in the code's comment). Also provide a generic implementation that uses dev_pm_opp_of_register_em(). This also allows us to register with the EM at a later point of time, compared to ->init(), from where the EM core can access cpufreq policy directly using cpufreq_cpu_get() type of helpers and perform other work, like marking few frequencies inefficient, this will be done separately. Reviewed-by: Quentin Perret Reviewed-by: Lukasz Luba Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 13 +++++++++++++ include/linux/cpufreq.h | 14 ++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 45f3416988f1..d301f39248a0 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1491,6 +1491,19 @@ static int cpufreq_online(unsigned int cpu) write_lock_irqsave(&cpufreq_driver_lock, flags); list_add(&policy->policy_list, &cpufreq_policy_list); write_unlock_irqrestore(&cpufreq_driver_lock, flags); + + /* + * Register with the energy model before + * sched_cpufreq_governor_change() is called, which will result + * in rebuilding of the sched domains, which should only be done + * once the energy model is properly initialized for the policy + * first. + * + * Also, this should be called before the policy is registered + * with cooling framework. + */ + if (cpufreq_driver->register_em) + cpufreq_driver->register_em(policy); } ret = cpufreq_init_policy(policy); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 9fd719475fcd..c65a1d7385f8 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -9,10 +9,12 @@ #define _LINUX_CPUFREQ_H #include +#include #include #include #include #include +#include #include #include #include @@ -373,6 +375,12 @@ struct cpufreq_driver { /* platform specific boost support code */ bool boost_enabled; int (*set_boost)(struct cpufreq_policy *policy, int state); + + /* + * Set by drivers that want to register with the energy model after the + * policy is properly initialized, but before the governor is started. + */ + void (*register_em)(struct cpufreq_policy *policy); }; /* flags */ @@ -1046,4 +1054,10 @@ unsigned int cpufreq_generic_get(unsigned int cpu); void cpufreq_generic_init(struct cpufreq_policy *policy, struct cpufreq_frequency_table *table, unsigned int transition_latency); + +static inline void cpufreq_register_em_with_opp(struct cpufreq_policy *policy) +{ + dev_pm_opp_of_register_em(get_cpu_device(policy->cpu), + policy->related_cpus); +} #endif /* _LINUX_CPUFREQ_H */ From 94ab4c3c259c7d00746e5cafb55b5f5125f34b71 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 10 Aug 2021 12:24:36 +0530 Subject: [PATCH 03/23] cpufreq: dt: Use .register_em() to register with energy model Set the newly added .register_em() callback with cpufreq_register_em_with_opp() to register with the EM core. Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index ece52863ba62..8fcaba541539 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -143,8 +143,6 @@ static int cpufreq_init(struct cpufreq_policy *policy) cpufreq_dt_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs; } - dev_pm_opp_of_register_em(cpu_dev, policy->cpus); - return 0; out_clk_put: @@ -184,6 +182,7 @@ static struct cpufreq_driver dt_cpufreq_driver = { .exit = cpufreq_exit, .online = cpufreq_online, .offline = cpufreq_offline, + .register_em = cpufreq_register_em_with_opp, .name = "cpufreq-dt", .attr = cpufreq_dt_attr, .suspend = cpufreq_generic_suspend, From fcd300c685d5152e76a811c492b0e6eccde29717 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 10 Aug 2021 12:24:36 +0530 Subject: [PATCH 04/23] cpufreq: imx6q: Use .register_em() to register with energy model Set the newly added .register_em() callback with cpufreq_register_em_with_opp() to register with the EM core. Signed-off-by: Viresh Kumar --- drivers/cpufreq/imx6q-cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 5bf5fc759881..90beb26ed34e 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -192,7 +192,6 @@ static int imx6q_cpufreq_init(struct cpufreq_policy *policy) policy->clk = clks[ARM].clk; cpufreq_generic_init(policy, freq_table, transition_latency); policy->suspend_freq = max_freq; - dev_pm_opp_of_register_em(cpu_dev, policy->cpus); return 0; } @@ -204,6 +203,7 @@ static struct cpufreq_driver imx6q_cpufreq_driver = { .target_index = imx6q_set_target, .get = cpufreq_generic_get, .init = imx6q_cpufreq_init, + .register_em = cpufreq_register_em_with_opp, .name = "imx6q-cpufreq", .attr = cpufreq_generic_attr, .suspend = cpufreq_generic_suspend, From 3701fd64a3fb947fc805ca0d108ab87562a9659b Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 10 Aug 2021 12:24:36 +0530 Subject: [PATCH 05/23] cpufreq: mediatek: Use .register_em() to register with energy model Set the newly added .register_em() callback with cpufreq_register_em_with_opp() to register with the EM core. Signed-off-by: Viresh Kumar --- drivers/cpufreq/mediatek-cpufreq.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 87019d5a9547..866163883b48 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -448,8 +448,6 @@ static int mtk_cpufreq_init(struct cpufreq_policy *policy) policy->driver_data = info; policy->clk = info->cpu_clk; - dev_pm_opp_of_register_em(info->cpu_dev, policy->cpus); - return 0; } @@ -471,6 +469,7 @@ static struct cpufreq_driver mtk_cpufreq_driver = { .get = cpufreq_generic_get, .init = mtk_cpufreq_init, .exit = mtk_cpufreq_exit, + .register_em = cpufreq_register_em_with_opp, .name = "mtk-cpufreq", .attr = cpufreq_generic_attr, }; From 361a172d230964807c0b479738749c50d95d7b50 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 10 Aug 2021 12:24:36 +0530 Subject: [PATCH 06/23] cpufreq: omap: Use .register_em() to register with energy model Set the newly added .register_em() callback with cpufreq_register_em_with_opp() to register with the EM core. Signed-off-by: Viresh Kumar --- drivers/cpufreq/omap-cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c index e035ee216b0f..1b50df06c6bc 100644 --- a/drivers/cpufreq/omap-cpufreq.c +++ b/drivers/cpufreq/omap-cpufreq.c @@ -131,7 +131,6 @@ static int omap_cpu_init(struct cpufreq_policy *policy) /* FIXME: what's the actual transition time? */ cpufreq_generic_init(policy, freq_table, 300 * 1000); - dev_pm_opp_of_register_em(mpu_dev, policy->cpus); return 0; } @@ -150,6 +149,7 @@ static struct cpufreq_driver omap_driver = { .get = cpufreq_generic_get, .init = omap_cpu_init, .exit = omap_cpu_exit, + .register_em = cpufreq_register_em_with_opp, .name = "omap", .attr = cpufreq_generic_attr, }; From e96c2153d0fc0a1c218bf5ba149ccdf75d19a275 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 10 Aug 2021 12:24:36 +0530 Subject: [PATCH 07/23] cpufreq: qcom-cpufreq-hw: Use .register_em() to register with energy model Set the newly added .register_em() callback with cpufreq_register_em_with_opp() to register with the EM core. Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index f86859bf76f1..c2e71c430fbf 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -362,8 +362,6 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) goto error; } - dev_pm_opp_of_register_em(cpu_dev, policy->cpus); - if (policy_has_boost_freq(policy)) { ret = cpufreq_enable_boost_support(); if (ret) @@ -412,6 +410,7 @@ static struct cpufreq_driver cpufreq_qcom_hw_driver = { .get = qcom_cpufreq_hw_get, .init = qcom_cpufreq_hw_cpu_init, .exit = qcom_cpufreq_hw_cpu_exit, + .register_em = cpufreq_register_em_with_opp, .fast_switch = qcom_cpufreq_hw_fast_switch, .name = "qcom-cpufreq-hw", .attr = qcom_cpufreq_hw_attr, From d00aa8061e04da5c570a54283462e47fab01bd3c Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 20 Jul 2021 08:41:19 -0600 Subject: [PATCH 08/23] ARM: dts: omap: Drop references to opp.txt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit opp.txt is getting removed with the OPP binding converted to DT schema. As it is unusual to reference a binding doc from a dts file, let's just remove the reference. Cc: "Benoît Cousson" Cc: Tony Lindgren Signed-off-by: Rob Herring Signed-off-by: Viresh Kumar --- arch/arm/boot/dts/omap34xx.dtsi | 1 - arch/arm/boot/dts/omap36xx.dtsi | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/arm/boot/dts/omap34xx.dtsi b/arch/arm/boot/dts/omap34xx.dtsi index feaa43b78535..8b8451399784 100644 --- a/arch/arm/boot/dts/omap34xx.dtsi +++ b/arch/arm/boot/dts/omap34xx.dtsi @@ -24,7 +24,6 @@ }; }; - /* see Documentation/devicetree/bindings/opp/opp.txt */ cpu0_opp_table: opp-table { compatible = "operating-points-v2-ti-cpu"; syscon = <&scm_conf>; diff --git a/arch/arm/boot/dts/omap36xx.dtsi b/arch/arm/boot/dts/omap36xx.dtsi index 20844dbc002e..22b33098b1a2 100644 --- a/arch/arm/boot/dts/omap36xx.dtsi +++ b/arch/arm/boot/dts/omap36xx.dtsi @@ -29,7 +29,6 @@ }; }; - /* see Documentation/devicetree/bindings/opp/opp.txt */ cpu0_opp_table: opp-table { compatible = "operating-points-v2-ti-cpu"; syscon = <&scm_conf>; From 29fc76957a9754768cfa3884b413ffc10f82d3b4 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 20 Jul 2021 08:41:20 -0600 Subject: [PATCH 09/23] dt-bindings: Clean-up OPP binding node names in examples In preparation to convert OPP bindings to DT schema, clean-up a few OPP binding node names in the binding examples. Cc: Georgi Djakov Cc: Shawn Guo Cc: Sascha Hauer Cc: Leonard Crestez Acked-by: Viresh Kumar Signed-off-by: Rob Herring Acked-by: Georgi Djakov Signed-off-by: Viresh Kumar --- Documentation/devicetree/bindings/gpu/arm,mali-bifrost.yaml | 2 +- Documentation/devicetree/bindings/gpu/arm,mali-midgard.yaml | 2 +- .../devicetree/bindings/interconnect/fsl,imx8m-noc.yaml | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/gpu/arm,mali-bifrost.yaml b/Documentation/devicetree/bindings/gpu/arm,mali-bifrost.yaml index 0f73f436bea7..4bea51d1e7ea 100644 --- a/Documentation/devicetree/bindings/gpu/arm,mali-bifrost.yaml +++ b/Documentation/devicetree/bindings/gpu/arm,mali-bifrost.yaml @@ -136,7 +136,7 @@ examples: resets = <&reset 0>, <&reset 1>; }; - gpu_opp_table: opp_table0 { + gpu_opp_table: opp-table { compatible = "operating-points-v2"; opp-533000000 { diff --git a/Documentation/devicetree/bindings/gpu/arm,mali-midgard.yaml b/Documentation/devicetree/bindings/gpu/arm,mali-midgard.yaml index 696c17aedbbe..d209f272625d 100644 --- a/Documentation/devicetree/bindings/gpu/arm,mali-midgard.yaml +++ b/Documentation/devicetree/bindings/gpu/arm,mali-midgard.yaml @@ -160,7 +160,7 @@ examples: #cooling-cells = <2>; }; - gpu_opp_table: opp_table0 { + gpu_opp_table: opp-table { compatible = "operating-points-v2"; opp-533000000 { diff --git a/Documentation/devicetree/bindings/interconnect/fsl,imx8m-noc.yaml b/Documentation/devicetree/bindings/interconnect/fsl,imx8m-noc.yaml index a8873739d61a..b8204ed22dd5 100644 --- a/Documentation/devicetree/bindings/interconnect/fsl,imx8m-noc.yaml +++ b/Documentation/devicetree/bindings/interconnect/fsl,imx8m-noc.yaml @@ -81,10 +81,10 @@ examples: noc_opp_table: opp-table { compatible = "operating-points-v2"; - opp-133M { + opp-133333333 { opp-hz = /bits/ 64 <133333333>; }; - opp-800M { + opp-800000000 { opp-hz = /bits/ 64 <800000000>; }; }; From 94274f20f6bf5eb0099bbf7e133aac1f5cd087e8 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 20 Jul 2021 08:41:21 -0600 Subject: [PATCH 10/23] dt-bindings: opp: Convert to DT schema Convert the OPP v1 and v2 bindings to DT schema format. As the OPPv2 binding can be extended by vendors, we need to split the common part out from the "operating-points-v2" conforming compatible. Cc: Yangtao Li Cc: Nishanth Menon Cc: Stephen Boyd Cc: Maxime Ripard Cc: Chen-Yu Tsai Acked-by: Viresh Kumar Signed-off-by: Rob Herring Acked-by: Maxime Ripard Signed-off-by: Viresh Kumar --- .../bindings/cpufreq/cpufreq-dt.txt | 2 +- .../bindings/cpufreq/cpufreq-mediatek.txt | 2 +- .../bindings/cpufreq/cpufreq-st.txt | 6 +- .../cpufreq/nvidia,tegra20-cpufreq.txt | 2 +- .../bindings/devfreq/rk3399_dmc.txt | 2 +- .../allwinner,sun50i-h6-operating-points.yaml | 4 + .../devicetree/bindings/opp/opp-v1.yaml | 51 ++ .../devicetree/bindings/opp/opp-v2-base.yaml | 214 ++++++ .../devicetree/bindings/opp/opp-v2.yaml | 475 +++++++++++++ Documentation/devicetree/bindings/opp/opp.txt | 622 ------------------ .../devicetree/bindings/opp/qcom-opp.txt | 2 +- .../bindings/opp/ti-omap5-opp-supply.txt | 2 +- .../bindings/power/power-domain.yaml | 2 +- 13 files changed, 753 insertions(+), 633 deletions(-) create mode 100644 Documentation/devicetree/bindings/opp/opp-v1.yaml create mode 100644 Documentation/devicetree/bindings/opp/opp-v2-base.yaml create mode 100644 Documentation/devicetree/bindings/opp/opp-v2.yaml delete mode 100644 Documentation/devicetree/bindings/opp/opp.txt diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-dt.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-dt.txt index 56f442374383..1d7e49167666 100644 --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-dt.txt +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-dt.txt @@ -11,7 +11,7 @@ Required properties: - None Optional properties: -- operating-points: Refer to Documentation/devicetree/bindings/opp/opp.txt for +- operating-points: Refer to Documentation/devicetree/bindings/opp/opp-v1.yaml for details. OPPs *must* be supplied either via DT, i.e. this property, or populated at runtime. - clock-latency: Specify the possible maximum transition latency for clock, diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek.txt index ef68711716fb..b8233ec91d3d 100644 --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek.txt +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek.txt @@ -10,7 +10,7 @@ Required properties: transition and not stable yet. Please refer to Documentation/devicetree/bindings/clock/clock-bindings.txt for generic clock consumer properties. -- operating-points-v2: Please refer to Documentation/devicetree/bindings/opp/opp.txt +- operating-points-v2: Please refer to Documentation/devicetree/bindings/opp/opp-v2.yaml for detail. - proc-supply: Regulator for Vproc of CPU cluster. diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-st.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-st.txt index d91a02a3b6b0..6b0b452acef0 100644 --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-st.txt +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-st.txt @@ -6,8 +6,6 @@ from the SoC, then supplies the OPP framework with 'prop' and 'supported hardware' information respectively. The framework is then able to read the DT and operate in the usual way. -For more information about the expected DT format [See: ../opp/opp.txt]. - Frequency Scaling only ---------------------- @@ -15,7 +13,7 @@ No vendor specific driver required for this. Located in CPU's node: -- operating-points : [See: ../power/opp.txt] +- operating-points : [See: ../power/opp-v1.yaml] Example [safe] -------------- @@ -37,7 +35,7 @@ This requires the ST CPUFreq driver to supply 'process' and 'version' info. Located in CPU's node: -- operating-points-v2 : [See ../power/opp.txt] +- operating-points-v2 : [See ../power/opp-v2.yaml] Example [unsafe] ---------------- diff --git a/Documentation/devicetree/bindings/cpufreq/nvidia,tegra20-cpufreq.txt b/Documentation/devicetree/bindings/cpufreq/nvidia,tegra20-cpufreq.txt index 52a24b82fd86..bdbfd7c36101 100644 --- a/Documentation/devicetree/bindings/cpufreq/nvidia,tegra20-cpufreq.txt +++ b/Documentation/devicetree/bindings/cpufreq/nvidia,tegra20-cpufreq.txt @@ -4,7 +4,7 @@ Binding for NVIDIA Tegra20 CPUFreq Required properties: - clocks: Must contain an entry for the CPU clock. See ../clocks/clock-bindings.txt for details. -- operating-points-v2: See ../bindings/opp/opp.txt for details. +- operating-points-v2: See ../bindings/opp/opp-v2.yaml for details. - #cooling-cells: Should be 2. See ../thermal/thermal-cooling-devices.yaml for details. For each opp entry in 'operating-points-v2' table: diff --git a/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt b/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt index ac189dd82b08..3fbeb3733c48 100644 --- a/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt +++ b/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt @@ -8,7 +8,7 @@ Required properties: - clocks: Phandles for clock specified in "clock-names" property - clock-names : The name of clock used by the DFI, must be "pclk_ddr_mon"; -- operating-points-v2: Refer to Documentation/devicetree/bindings/opp/opp.txt +- operating-points-v2: Refer to Documentation/devicetree/bindings/opp/opp-v2.yaml for details. - center-supply: DMC supply node. - status: Marks the node enabled/disabled. diff --git a/Documentation/devicetree/bindings/opp/allwinner,sun50i-h6-operating-points.yaml b/Documentation/devicetree/bindings/opp/allwinner,sun50i-h6-operating-points.yaml index aeff2bd774dd..729ae97b63d9 100644 --- a/Documentation/devicetree/bindings/opp/allwinner,sun50i-h6-operating-points.yaml +++ b/Documentation/devicetree/bindings/opp/allwinner,sun50i-h6-operating-points.yaml @@ -18,6 +18,9 @@ description: | sun50i-cpufreq-nvmem driver reads the efuse value from the SoC to provide the OPP framework with required information. +allOf: + - $ref: opp-v2-base.yaml# + properties: compatible: const: allwinner,sun50i-h6-operating-points @@ -43,6 +46,7 @@ patternProperties: properties: opp-hz: true + clock-latency-ns: true patternProperties: "opp-microvolt-.*": true diff --git a/Documentation/devicetree/bindings/opp/opp-v1.yaml b/Documentation/devicetree/bindings/opp/opp-v1.yaml new file mode 100644 index 000000000000..d585d536a3fb --- /dev/null +++ b/Documentation/devicetree/bindings/opp/opp-v1.yaml @@ -0,0 +1,51 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/opp/opp-v1.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Generic OPP (Operating Performance Points) v1 Bindings + +maintainers: + - Viresh Kumar + +description: |+ + Devices work at voltage-current-frequency combinations and some implementations + have the liberty of choosing these. These combinations are called Operating + Performance Points aka OPPs. This document defines bindings for these OPPs + applicable across wide range of devices. For illustration purpose, this document + uses CPU as a device. + + This binding only supports voltage-frequency pairs. + +select: true + +properties: + operating-points: + $ref: /schemas/types.yaml#/definitions/uint32-matrix + items: + items: + - description: Frequency in kHz + - description: Voltage for OPP in uV + + +additionalProperties: true +examples: + - | + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + compatible = "arm,cortex-a9"; + device_type = "cpu"; + reg = <0>; + next-level-cache = <&L2>; + operating-points = + /* kHz uV */ + <792000 1100000>, + <396000 950000>, + <198000 850000>; + }; + }; +... diff --git a/Documentation/devicetree/bindings/opp/opp-v2-base.yaml b/Documentation/devicetree/bindings/opp/opp-v2-base.yaml new file mode 100644 index 000000000000..ae3ae4d39843 --- /dev/null +++ b/Documentation/devicetree/bindings/opp/opp-v2-base.yaml @@ -0,0 +1,214 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/opp/opp-v2-base.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Generic OPP (Operating Performance Points) Common Binding + +maintainers: + - Viresh Kumar + +description: | + Devices work at voltage-current-frequency combinations and some implementations + have the liberty of choosing these. These combinations are called Operating + Performance Points aka OPPs. This document defines bindings for these OPPs + applicable across wide range of devices. For illustration purpose, this document + uses CPU as a device. + + This describes the OPPs belonging to a device. + +select: false + +properties: + $nodename: + pattern: '^opp-table(-[a-z0-9]+)?$' + + opp-shared: + description: + Indicates that device nodes using this OPP Table Node's phandle switch + their DVFS state together, i.e. they share clock/voltage/current lines. + Missing property means devices have independent clock/voltage/current + lines, but they share OPP tables. + type: boolean + +patternProperties: + '^opp-?[0-9]+$': + type: object + description: + One or more OPP nodes describing voltage-current-frequency combinations. + Their name isn't significant but their phandle can be used to reference an + OPP. These are mandatory except for the case where the OPP table is + present only to indicate dependency between devices using the opp-shared + property. + + properties: + opp-hz: + description: + Frequency in Hz, expressed as a 64-bit big-endian integer. This is a + required property for all device nodes, unless another "required" + property to uniquely identify the OPP nodes exists. Devices like power + domains must have another (implementation dependent) property. + + opp-microvolt: + description: | + Voltage for the OPP + + A single regulator's voltage is specified with an array of size one or three. + Single entry is for target voltage and three entries are for + voltages. + + Entries for multiple regulators shall be provided in the same field separated + by angular brackets <>. The OPP binding doesn't provide any provisions to + relate the values to their power supplies or the order in which the supplies + need to be configured and that is left for the implementation specific + binding. + + Entries for all regulators shall be of the same size, i.e. either all use a + single value or triplets. + minItems: 1 + maxItems: 8 # Should be enough regulators + items: + minItems: 1 + maxItems: 3 + + opp-microamp: + description: | + The maximum current drawn by the device in microamperes considering + system specific parameters (such as transients, process, aging, + maximum operating temperature range etc.) as necessary. This may be + used to set the most efficient regulator operating mode. + + Should only be set if opp-microvolt or opp-microvolt- is set for + the OPP. + + Entries for multiple regulators shall be provided in the same field + separated by angular brackets <>. If current values aren't required + for a regulator, then it shall be filled with 0. If current values + aren't required for any of the regulators, then this field is not + required. The OPP binding doesn't provide any provisions to relate the + values to their power supplies or the order in which the supplies need + to be configured and that is left for the implementation specific + binding. + minItems: 1 + maxItems: 8 # Should be enough regulators + + opp-level: + description: + A value representing the performance level of the device. + $ref: /schemas/types.yaml#/definitions/uint32 + + opp-peak-kBps: + description: + Peak bandwidth in kilobytes per second, expressed as an array of + 32-bit big-endian integers. Each element of the array represents the + peak bandwidth value of each interconnect path. The number of elements + should match the number of interconnect paths. + minItems: 1 + maxItems: 32 # Should be enough + + opp-avg-kBps: + description: + Average bandwidth in kilobytes per second, expressed as an array + of 32-bit big-endian integers. Each element of the array represents the + average bandwidth value of each interconnect path. The number of elements + should match the number of interconnect paths. This property is only + meaningful in OPP tables where opp-peak-kBps is present. + minItems: 1 + maxItems: 32 # Should be enough + + clock-latency-ns: + description: + Specifies the maximum possible transition latency (in nanoseconds) for + switching to this OPP from any other OPP. + + turbo-mode: + description: + Marks the OPP to be used only for turbo modes. Turbo mode is available + on some platforms, where the device can run over its operating + frequency for a short duration of time limited by the device's power, + current and thermal limits. + type: boolean + + opp-suspend: + description: + Marks the OPP to be used during device suspend. If multiple OPPs in + the table have this, the OPP with highest opp-hz will be used. + type: boolean + + opp-supported-hw: + description: | + This property allows a platform to enable only a subset of the OPPs + from the larger set present in the OPP table, based on the current + version of the hardware (already known to the operating system). + + Each block present in the array of blocks in this property, represents + a sub-group of hardware versions supported by the OPP. i.e. , , etc. The OPP will be enabled if _any_ of these + sub-groups match the hardware's version. + + Each sub-group is a platform defined array representing the hierarchy + of hardware versions supported by the platform. For a platform with + three hierarchical levels of version (X.Y.Z), this field shall look + like + + opp-supported-hw = , , . + + Each level (eg. X1) in version hierarchy is represented by a 32 bit + value, one bit per version and so there can be maximum 32 versions per + level. Logical AND (&) operation is performed for each level with the + hardware's level version and a non-zero output for _all_ the levels in + a sub-group means the OPP is supported by hardware. A value of + 0xFFFFFFFF for each level in the sub-group will enable the OPP for all + versions for the hardware. + $ref: /schemas/types.yaml#/definitions/uint32-matrix + maxItems: 32 + items: + minItems: 1 + maxItems: 4 + + required-opps: + description: + This contains phandle to an OPP node in another device's OPP table. It + may contain an array of phandles, where each phandle points to an OPP + of a different device. It should not contain multiple phandles to the + OPP nodes in the same OPP table. This specifies the minimum required + OPP of the device(s), whose OPP's phandle is present in this property, + for the functioning of the current device at the current OPP (where + this property is present). + $ref: /schemas/types.yaml#/definitions/phandle-array + + patternProperties: + '^opp-microvolt-': + description: + Named opp-microvolt property. This is exactly similar to the above + opp-microvolt property, but allows multiple voltage ranges to be + provided for the same OPP. At runtime, the platform can pick a + and matching opp-microvolt- property will be enabled for all + OPPs. If the platform doesn't pick a specific or the + doesn't match with any opp-microvolt- properties, then + opp-microvolt property shall be used, if present. + $ref: /schemas/types.yaml#/definitions/uint32-matrix + minItems: 1 + maxItems: 8 # Should be enough regulators + items: + minItems: 1 + maxItems: 3 + + '^opp-microamp-': + description: + Named opp-microamp property. Similar to opp-microvolt- property, + but for microamp instead. + $ref: /schemas/types.yaml#/definitions/uint32-array + minItems: 1 + maxItems: 8 # Should be enough regulators + + dependencies: + opp-avg-kBps: [ opp-peak-kBps ] + +required: + - compatible + +additionalProperties: true + +... diff --git a/Documentation/devicetree/bindings/opp/opp-v2.yaml b/Documentation/devicetree/bindings/opp/opp-v2.yaml new file mode 100644 index 000000000000..eaf8fba2c691 --- /dev/null +++ b/Documentation/devicetree/bindings/opp/opp-v2.yaml @@ -0,0 +1,475 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/opp/opp-v2.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Generic OPP (Operating Performance Points) Bindings + +maintainers: + - Viresh Kumar + +allOf: + - $ref: opp-v2-base.yaml# + +properties: + compatible: + const: operating-points-v2 + +unevaluatedProperties: false + +examples: + - | + /* + * Example 1: Single cluster Dual-core ARM cortex A9, switch DVFS states + * together. + */ + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + compatible = "arm,cortex-a9"; + device_type = "cpu"; + reg = <0>; + next-level-cache = <&L2>; + clocks = <&clk_controller 0>; + clock-names = "cpu"; + cpu-supply = <&cpu_supply0>; + operating-points-v2 = <&cpu0_opp_table0>; + }; + + cpu@1 { + compatible = "arm,cortex-a9"; + device_type = "cpu"; + reg = <1>; + next-level-cache = <&L2>; + clocks = <&clk_controller 0>; + clock-names = "cpu"; + cpu-supply = <&cpu_supply0>; + operating-points-v2 = <&cpu0_opp_table0>; + }; + }; + + cpu0_opp_table0: opp-table { + compatible = "operating-points-v2"; + opp-shared; + + opp-1000000000 { + opp-hz = /bits/ 64 <1000000000>; + opp-microvolt = <975000 970000 985000>; + opp-microamp = <70000>; + clock-latency-ns = <300000>; + opp-suspend; + }; + opp-1100000000 { + opp-hz = /bits/ 64 <1100000000>; + opp-microvolt = <1000000 980000 1010000>; + opp-microamp = <80000>; + clock-latency-ns = <310000>; + }; + opp-1200000000 { + opp-hz = /bits/ 64 <1200000000>; + opp-microvolt = <1025000>; + clock-latency-ns = <290000>; + turbo-mode; + }; + }; + + - | + /* + * Example 2: Single cluster, Quad-core Qualcom-krait, switches DVFS states + * independently. + */ + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + compatible = "qcom,krait"; + device_type = "cpu"; + reg = <0>; + next-level-cache = <&L2>; + clocks = <&clk_controller 0>; + clock-names = "cpu"; + cpu-supply = <&cpu_supply0>; + operating-points-v2 = <&cpu_opp_table>; + }; + + cpu@1 { + compatible = "qcom,krait"; + device_type = "cpu"; + reg = <1>; + next-level-cache = <&L2>; + clocks = <&clk_controller 1>; + clock-names = "cpu"; + cpu-supply = <&cpu_supply1>; + operating-points-v2 = <&cpu_opp_table>; + }; + + cpu@2 { + compatible = "qcom,krait"; + device_type = "cpu"; + reg = <2>; + next-level-cache = <&L2>; + clocks = <&clk_controller 2>; + clock-names = "cpu"; + cpu-supply = <&cpu_supply2>; + operating-points-v2 = <&cpu_opp_table>; + }; + + cpu@3 { + compatible = "qcom,krait"; + device_type = "cpu"; + reg = <3>; + next-level-cache = <&L2>; + clocks = <&clk_controller 3>; + clock-names = "cpu"; + cpu-supply = <&cpu_supply3>; + operating-points-v2 = <&cpu_opp_table>; + }; + }; + + cpu_opp_table: opp-table { + compatible = "operating-points-v2"; + + /* + * Missing opp-shared property means CPUs switch DVFS states + * independently. + */ + + opp-1000000000 { + opp-hz = /bits/ 64 <1000000000>; + opp-microvolt = <975000 970000 985000>; + opp-microamp = <70000>; + clock-latency-ns = <300000>; + opp-suspend; + }; + opp-1100000000 { + opp-hz = /bits/ 64 <1100000000>; + opp-microvolt = <1000000 980000 1010000>; + opp-microamp = <80000>; + clock-latency-ns = <310000>; + }; + opp-1200000000 { + opp-hz = /bits/ 64 <1200000000>; + opp-microvolt = <1025000>; + opp-microamp = <90000>; + lock-latency-ns = <290000>; + turbo-mode; + }; + }; + + - | + /* + * Example 3: Dual-cluster, Dual-core per cluster. CPUs within a cluster switch + * DVFS state together. + */ + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + compatible = "arm,cortex-a7"; + device_type = "cpu"; + reg = <0>; + next-level-cache = <&L2>; + clocks = <&clk_controller 0>; + clock-names = "cpu"; + cpu-supply = <&cpu_supply0>; + operating-points-v2 = <&cluster0_opp>; + }; + + cpu@1 { + compatible = "arm,cortex-a7"; + device_type = "cpu"; + reg = <1>; + next-level-cache = <&L2>; + clocks = <&clk_controller 0>; + clock-names = "cpu"; + cpu-supply = <&cpu_supply0>; + operating-points-v2 = <&cluster0_opp>; + }; + + cpu@100 { + compatible = "arm,cortex-a15"; + device_type = "cpu"; + reg = <100>; + next-level-cache = <&L2>; + clocks = <&clk_controller 1>; + clock-names = "cpu"; + cpu-supply = <&cpu_supply1>; + operating-points-v2 = <&cluster1_opp>; + }; + + cpu@101 { + compatible = "arm,cortex-a15"; + device_type = "cpu"; + reg = <101>; + next-level-cache = <&L2>; + clocks = <&clk_controller 1>; + clock-names = "cpu"; + cpu-supply = <&cpu_supply1>; + operating-points-v2 = <&cluster1_opp>; + }; + }; + + cluster0_opp: opp-table-0 { + compatible = "operating-points-v2"; + opp-shared; + + opp-1000000000 { + opp-hz = /bits/ 64 <1000000000>; + opp-microvolt = <975000 970000 985000>; + opp-microamp = <70000>; + clock-latency-ns = <300000>; + opp-suspend; + }; + opp-1100000000 { + opp-hz = /bits/ 64 <1100000000>; + opp-microvolt = <1000000 980000 1010000>; + opp-microamp = <80000>; + clock-latency-ns = <310000>; + }; + opp-1200000000 { + opp-hz = /bits/ 64 <1200000000>; + opp-microvolt = <1025000>; + opp-microamp = <90000>; + clock-latency-ns = <290000>; + turbo-mode; + }; + }; + + cluster1_opp: opp-table-1 { + compatible = "operating-points-v2"; + opp-shared; + + opp-1300000000 { + opp-hz = /bits/ 64 <1300000000>; + opp-microvolt = <1050000 1045000 1055000>; + opp-microamp = <95000>; + clock-latency-ns = <400000>; + opp-suspend; + }; + opp-1400000000 { + opp-hz = /bits/ 64 <1400000000>; + opp-microvolt = <1075000>; + opp-microamp = <100000>; + clock-latency-ns = <400000>; + }; + opp-1500000000 { + opp-hz = /bits/ 64 <1500000000>; + opp-microvolt = <1100000 1010000 1110000>; + opp-microamp = <95000>; + clock-latency-ns = <400000>; + turbo-mode; + }; + }; + + - | + /* Example 4: Handling multiple regulators */ + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + compatible = "foo,cpu-type"; + device_type = "cpu"; + reg = <0>; + + vcc0-supply = <&cpu_supply0>; + vcc1-supply = <&cpu_supply1>; + vcc2-supply = <&cpu_supply2>; + operating-points-v2 = <&cpu0_opp_table4>; + }; + }; + + cpu0_opp_table4: opp-table-0 { + compatible = "operating-points-v2"; + opp-shared; + + opp-1000000000 { + opp-hz = /bits/ 64 <1000000000>; + opp-microvolt = <970000>, /* Supply 0 */ + <960000>, /* Supply 1 */ + <960000>; /* Supply 2 */ + opp-microamp = <70000>, /* Supply 0 */ + <70000>, /* Supply 1 */ + <70000>; /* Supply 2 */ + clock-latency-ns = <300000>; + }; + + /* OR */ + + opp-1000000001 { + opp-hz = /bits/ 64 <1000000001>; + opp-microvolt = <975000 970000 985000>, /* Supply 0 */ + <965000 960000 975000>, /* Supply 1 */ + <965000 960000 975000>; /* Supply 2 */ + opp-microamp = <70000>, /* Supply 0 */ + <70000>, /* Supply 1 */ + <70000>; /* Supply 2 */ + clock-latency-ns = <300000>; + }; + + /* OR */ + + opp-1000000002 { + opp-hz = /bits/ 64 <1000000002>; + opp-microvolt = <975000 970000 985000>, /* Supply 0 */ + <965000 960000 975000>, /* Supply 1 */ + <965000 960000 975000>; /* Supply 2 */ + opp-microamp = <70000>, /* Supply 0 */ + <0>, /* Supply 1 doesn't need this */ + <70000>; /* Supply 2 */ + clock-latency-ns = <300000>; + }; + }; + + - | + /* + * Example 5: opp-supported-hw + * (example: three level hierarchy of versions: cuts, substrate and process) + */ + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + compatible = "arm,cortex-a7"; + device_type = "cpu"; + reg = <0>; + cpu-supply = <&cpu_supply>; + operating-points-v2 = <&cpu0_opp_table_slow>; + }; + }; + + cpu0_opp_table_slow: opp-table { + compatible = "operating-points-v2"; + opp-shared; + + opp-600000000 { + /* + * Supports all substrate and process versions for 0xF + * cuts, i.e. only first four cuts. + */ + opp-supported-hw = <0xF 0xFFFFFFFF 0xFFFFFFFF>; + opp-hz = /bits/ 64 <600000000>; + }; + + opp-800000000 { + /* + * Supports: + * - cuts: only one, 6th cut (represented by 6th bit). + * - substrate: supports 16 different substrate versions + * - process: supports 9 different process versions + */ + opp-supported-hw = <0x20 0xff0000ff 0x0000f4f0>; + opp-hz = /bits/ 64 <800000000>; + }; + + opp-900000000 { + /* + * Supports: + * - All cuts and substrate where process version is 0x2. + * - All cuts and process where substrate version is 0x2. + */ + opp-supported-hw = <0xFFFFFFFF 0xFFFFFFFF 0x02>, + <0xFFFFFFFF 0x01 0xFFFFFFFF>; + opp-hz = /bits/ 64 <900000000>; + }; + }; + + - | + /* + * Example 6: opp-microvolt-, opp-microamp-: + * (example: device with two possible microvolt ranges: slow and fast) + */ + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + compatible = "arm,cortex-a7"; + device_type = "cpu"; + reg = <0>; + operating-points-v2 = <&cpu0_opp_table6>; + }; + }; + + cpu0_opp_table6: opp-table-0 { + compatible = "operating-points-v2"; + opp-shared; + + opp-1000000000 { + opp-hz = /bits/ 64 <1000000000>; + opp-microvolt-slow = <915000 900000 925000>; + opp-microvolt-fast = <975000 970000 985000>; + opp-microamp-slow = <70000>; + opp-microamp-fast = <71000>; + }; + + opp-1200000000 { + opp-hz = /bits/ 64 <1200000000>; + opp-microvolt-slow = <915000 900000 925000>, /* Supply vcc0 */ + <925000 910000 935000>; /* Supply vcc1 */ + opp-microvolt-fast = <975000 970000 985000>, /* Supply vcc0 */ + <965000 960000 975000>; /* Supply vcc1 */ + opp-microamp = <70000>; /* Will be used for both slow/fast */ + }; + }; + + - | + /* + * Example 7: Single cluster Quad-core ARM cortex A53, OPP points from firmware, + * distinct clock controls but two sets of clock/voltage/current lines. + */ + cpus { + #address-cells = <2>; + #size-cells = <0>; + + cpu@0 { + compatible = "arm,cortex-a53"; + device_type = "cpu"; + reg = <0x0 0x100>; + next-level-cache = <&A53_L2>; + clocks = <&dvfs_controller 0>; + operating-points-v2 = <&cpu_opp0_table>; + }; + cpu@1 { + compatible = "arm,cortex-a53"; + device_type = "cpu"; + reg = <0x0 0x101>; + next-level-cache = <&A53_L2>; + clocks = <&dvfs_controller 1>; + operating-points-v2 = <&cpu_opp0_table>; + }; + cpu@2 { + compatible = "arm,cortex-a53"; + device_type = "cpu"; + reg = <0x0 0x102>; + next-level-cache = <&A53_L2>; + clocks = <&dvfs_controller 2>; + operating-points-v2 = <&cpu_opp1_table>; + }; + cpu@3 { + compatible = "arm,cortex-a53"; + device_type = "cpu"; + reg = <0x0 0x103>; + next-level-cache = <&A53_L2>; + clocks = <&dvfs_controller 3>; + operating-points-v2 = <&cpu_opp1_table>; + }; + + }; + + cpu_opp0_table: opp-table-0 { + compatible = "operating-points-v2"; + opp-shared; + }; + + cpu_opp1_table: opp-table-1 { + compatible = "operating-points-v2"; + opp-shared; + }; +... diff --git a/Documentation/devicetree/bindings/opp/opp.txt b/Documentation/devicetree/bindings/opp/opp.txt deleted file mode 100644 index 08b3da4736cf..000000000000 --- a/Documentation/devicetree/bindings/opp/opp.txt +++ /dev/null @@ -1,622 +0,0 @@ -Generic OPP (Operating Performance Points) Bindings ----------------------------------------------------- - -Devices work at voltage-current-frequency combinations and some implementations -have the liberty of choosing these. These combinations are called Operating -Performance Points aka OPPs. This document defines bindings for these OPPs -applicable across wide range of devices. For illustration purpose, this document -uses CPU as a device. - -This document contain multiple versions of OPP binding and only one of them -should be used per device. - -Binding 1: operating-points -============================ - -This binding only supports voltage-frequency pairs. - -Properties: -- operating-points: An array of 2-tuples items, and each item consists - of frequency and voltage like . - freq: clock frequency in kHz - vol: voltage in microvolt - -Examples: - -cpu@0 { - compatible = "arm,cortex-a9"; - reg = <0>; - next-level-cache = <&L2>; - operating-points = < - /* kHz uV */ - 792000 1100000 - 396000 950000 - 198000 850000 - >; -}; - - -Binding 2: operating-points-v2 -============================ - -* Property: operating-points-v2 - -Devices supporting OPPs must set their "operating-points-v2" property with -phandle to a OPP table in their DT node. The OPP core will use this phandle to -find the operating points for the device. - -This can contain more than one phandle for power domain providers that provide -multiple power domains. That is, one phandle for each power domain. If only one -phandle is available, then the same OPP table will be used for all power domains -provided by the power domain provider. - -If required, this can be extended for SoC vendor specific bindings. Such bindings -should be documented as Documentation/devicetree/bindings/power/-opp.txt -and should have a compatible description like: "operating-points-v2-". - -* OPP Table Node - -This describes the OPPs belonging to a device. This node can have following -properties: - -Required properties: -- compatible: Allow OPPs to express their compatibility. It should be: - "operating-points-v2". - -- OPP nodes: One or more OPP nodes describing voltage-current-frequency - combinations. Their name isn't significant but their phandle can be used to - reference an OPP. These are mandatory except for the case where the OPP table - is present only to indicate dependency between devices using the opp-shared - property. - -Optional properties: -- opp-shared: Indicates that device nodes using this OPP Table Node's phandle - switch their DVFS state together, i.e. they share clock/voltage/current lines. - Missing property means devices have independent clock/voltage/current lines, - but they share OPP tables. - -- status: Marks the OPP table enabled/disabled. - - -* OPP Node - -This defines voltage-current-frequency combinations along with other related -properties. - -Required properties: -- opp-hz: Frequency in Hz, expressed as a 64-bit big-endian integer. This is a - required property for all device nodes, unless another "required" property to - uniquely identify the OPP nodes exists. Devices like power domains must have - another (implementation dependent) property. - -- opp-peak-kBps: Peak bandwidth in kilobytes per second, expressed as an array - of 32-bit big-endian integers. Each element of the array represents the - peak bandwidth value of each interconnect path. The number of elements should - match the number of interconnect paths. - -Optional properties: -- opp-microvolt: voltage in micro Volts. - - A single regulator's voltage is specified with an array of size one or three. - Single entry is for target voltage and three entries are for - voltages. - - Entries for multiple regulators shall be provided in the same field separated - by angular brackets <>. The OPP binding doesn't provide any provisions to - relate the values to their power supplies or the order in which the supplies - need to be configured and that is left for the implementation specific - binding. - - Entries for all regulators shall be of the same size, i.e. either all use a - single value or triplets. - -- opp-microvolt-: Named opp-microvolt property. This is exactly similar to - the above opp-microvolt property, but allows multiple voltage ranges to be - provided for the same OPP. At runtime, the platform can pick a and - matching opp-microvolt- property will be enabled for all OPPs. If the - platform doesn't pick a specific or the doesn't match with any - opp-microvolt- properties, then opp-microvolt property shall be used, if - present. - -- opp-microamp: The maximum current drawn by the device in microamperes - considering system specific parameters (such as transients, process, aging, - maximum operating temperature range etc.) as necessary. This may be used to - set the most efficient regulator operating mode. - - Should only be set if opp-microvolt is set for the OPP. - - Entries for multiple regulators shall be provided in the same field separated - by angular brackets <>. If current values aren't required for a regulator, - then it shall be filled with 0. If current values aren't required for any of - the regulators, then this field is not required. The OPP binding doesn't - provide any provisions to relate the values to their power supplies or the - order in which the supplies need to be configured and that is left for the - implementation specific binding. - -- opp-microamp-: Named opp-microamp property. Similar to - opp-microvolt- property, but for microamp instead. - -- opp-level: A value representing the performance level of the device, - expressed as a 32-bit integer. - -- opp-avg-kBps: Average bandwidth in kilobytes per second, expressed as an array - of 32-bit big-endian integers. Each element of the array represents the - average bandwidth value of each interconnect path. The number of elements - should match the number of interconnect paths. This property is only - meaningful in OPP tables where opp-peak-kBps is present. - -- clock-latency-ns: Specifies the maximum possible transition latency (in - nanoseconds) for switching to this OPP from any other OPP. - -- turbo-mode: Marks the OPP to be used only for turbo modes. Turbo mode is - available on some platforms, where the device can run over its operating - frequency for a short duration of time limited by the device's power, current - and thermal limits. - -- opp-suspend: Marks the OPP to be used during device suspend. If multiple OPPs - in the table have this, the OPP with highest opp-hz will be used. - -- opp-supported-hw: This property allows a platform to enable only a subset of - the OPPs from the larger set present in the OPP table, based on the current - version of the hardware (already known to the operating system). - - Each block present in the array of blocks in this property, represents a - sub-group of hardware versions supported by the OPP. i.e. , - , etc. The OPP will be enabled if _any_ of these sub-groups match - the hardware's version. - - Each sub-group is a platform defined array representing the hierarchy of - hardware versions supported by the platform. For a platform with three - hierarchical levels of version (X.Y.Z), this field shall look like - - opp-supported-hw = , , . - - Each level (eg. X1) in version hierarchy is represented by a 32 bit value, one - bit per version and so there can be maximum 32 versions per level. Logical AND - (&) operation is performed for each level with the hardware's level version - and a non-zero output for _all_ the levels in a sub-group means the OPP is - supported by hardware. A value of 0xFFFFFFFF for each level in the sub-group - will enable the OPP for all versions for the hardware. - -- status: Marks the node enabled/disabled. - -- required-opps: This contains phandle to an OPP node in another device's OPP - table. It may contain an array of phandles, where each phandle points to an - OPP of a different device. It should not contain multiple phandles to the OPP - nodes in the same OPP table. This specifies the minimum required OPP of the - device(s), whose OPP's phandle is present in this property, for the - functioning of the current device at the current OPP (where this property is - present). - -Example 1: Single cluster Dual-core ARM cortex A9, switch DVFS states together. - -/ { - cpus { - #address-cells = <1>; - #size-cells = <0>; - - cpu@0 { - compatible = "arm,cortex-a9"; - reg = <0>; - next-level-cache = <&L2>; - clocks = <&clk_controller 0>; - clock-names = "cpu"; - cpu-supply = <&cpu_supply0>; - operating-points-v2 = <&cpu0_opp_table>; - }; - - cpu@1 { - compatible = "arm,cortex-a9"; - reg = <1>; - next-level-cache = <&L2>; - clocks = <&clk_controller 0>; - clock-names = "cpu"; - cpu-supply = <&cpu_supply0>; - operating-points-v2 = <&cpu0_opp_table>; - }; - }; - - cpu0_opp_table: opp_table0 { - compatible = "operating-points-v2"; - opp-shared; - - opp-1000000000 { - opp-hz = /bits/ 64 <1000000000>; - opp-microvolt = <975000 970000 985000>; - opp-microamp = <70000>; - clock-latency-ns = <300000>; - opp-suspend; - }; - opp-1100000000 { - opp-hz = /bits/ 64 <1100000000>; - opp-microvolt = <1000000 980000 1010000>; - opp-microamp = <80000>; - clock-latency-ns = <310000>; - }; - opp-1200000000 { - opp-hz = /bits/ 64 <1200000000>; - opp-microvolt = <1025000>; - clock-latency-ns = <290000>; - turbo-mode; - }; - }; -}; - -Example 2: Single cluster, Quad-core Qualcom-krait, switches DVFS states -independently. - -/ { - cpus { - #address-cells = <1>; - #size-cells = <0>; - - cpu@0 { - compatible = "qcom,krait"; - reg = <0>; - next-level-cache = <&L2>; - clocks = <&clk_controller 0>; - clock-names = "cpu"; - cpu-supply = <&cpu_supply0>; - operating-points-v2 = <&cpu_opp_table>; - }; - - cpu@1 { - compatible = "qcom,krait"; - reg = <1>; - next-level-cache = <&L2>; - clocks = <&clk_controller 1>; - clock-names = "cpu"; - cpu-supply = <&cpu_supply1>; - operating-points-v2 = <&cpu_opp_table>; - }; - - cpu@2 { - compatible = "qcom,krait"; - reg = <2>; - next-level-cache = <&L2>; - clocks = <&clk_controller 2>; - clock-names = "cpu"; - cpu-supply = <&cpu_supply2>; - operating-points-v2 = <&cpu_opp_table>; - }; - - cpu@3 { - compatible = "qcom,krait"; - reg = <3>; - next-level-cache = <&L2>; - clocks = <&clk_controller 3>; - clock-names = "cpu"; - cpu-supply = <&cpu_supply3>; - operating-points-v2 = <&cpu_opp_table>; - }; - }; - - cpu_opp_table: opp_table { - compatible = "operating-points-v2"; - - /* - * Missing opp-shared property means CPUs switch DVFS states - * independently. - */ - - opp-1000000000 { - opp-hz = /bits/ 64 <1000000000>; - opp-microvolt = <975000 970000 985000>; - opp-microamp = <70000>; - clock-latency-ns = <300000>; - opp-suspend; - }; - opp-1100000000 { - opp-hz = /bits/ 64 <1100000000>; - opp-microvolt = <1000000 980000 1010000>; - opp-microamp = <80000>; - clock-latency-ns = <310000>; - }; - opp-1200000000 { - opp-hz = /bits/ 64 <1200000000>; - opp-microvolt = <1025000>; - opp-microamp = <90000; - lock-latency-ns = <290000>; - turbo-mode; - }; - }; -}; - -Example 3: Dual-cluster, Dual-core per cluster. CPUs within a cluster switch -DVFS state together. - -/ { - cpus { - #address-cells = <1>; - #size-cells = <0>; - - cpu@0 { - compatible = "arm,cortex-a7"; - reg = <0>; - next-level-cache = <&L2>; - clocks = <&clk_controller 0>; - clock-names = "cpu"; - cpu-supply = <&cpu_supply0>; - operating-points-v2 = <&cluster0_opp>; - }; - - cpu@1 { - compatible = "arm,cortex-a7"; - reg = <1>; - next-level-cache = <&L2>; - clocks = <&clk_controller 0>; - clock-names = "cpu"; - cpu-supply = <&cpu_supply0>; - operating-points-v2 = <&cluster0_opp>; - }; - - cpu@100 { - compatible = "arm,cortex-a15"; - reg = <100>; - next-level-cache = <&L2>; - clocks = <&clk_controller 1>; - clock-names = "cpu"; - cpu-supply = <&cpu_supply1>; - operating-points-v2 = <&cluster1_opp>; - }; - - cpu@101 { - compatible = "arm,cortex-a15"; - reg = <101>; - next-level-cache = <&L2>; - clocks = <&clk_controller 1>; - clock-names = "cpu"; - cpu-supply = <&cpu_supply1>; - operating-points-v2 = <&cluster1_opp>; - }; - }; - - cluster0_opp: opp_table0 { - compatible = "operating-points-v2"; - opp-shared; - - opp-1000000000 { - opp-hz = /bits/ 64 <1000000000>; - opp-microvolt = <975000 970000 985000>; - opp-microamp = <70000>; - clock-latency-ns = <300000>; - opp-suspend; - }; - opp-1100000000 { - opp-hz = /bits/ 64 <1100000000>; - opp-microvolt = <1000000 980000 1010000>; - opp-microamp = <80000>; - clock-latency-ns = <310000>; - }; - opp-1200000000 { - opp-hz = /bits/ 64 <1200000000>; - opp-microvolt = <1025000>; - opp-microamp = <90000>; - clock-latency-ns = <290000>; - turbo-mode; - }; - }; - - cluster1_opp: opp_table1 { - compatible = "operating-points-v2"; - opp-shared; - - opp-1300000000 { - opp-hz = /bits/ 64 <1300000000>; - opp-microvolt = <1050000 1045000 1055000>; - opp-microamp = <95000>; - clock-latency-ns = <400000>; - opp-suspend; - }; - opp-1400000000 { - opp-hz = /bits/ 64 <1400000000>; - opp-microvolt = <1075000>; - opp-microamp = <100000>; - clock-latency-ns = <400000>; - }; - opp-1500000000 { - opp-hz = /bits/ 64 <1500000000>; - opp-microvolt = <1100000 1010000 1110000>; - opp-microamp = <95000>; - clock-latency-ns = <400000>; - turbo-mode; - }; - }; -}; - -Example 4: Handling multiple regulators - -/ { - cpus { - cpu@0 { - compatible = "vendor,cpu-type"; - ... - - vcc0-supply = <&cpu_supply0>; - vcc1-supply = <&cpu_supply1>; - vcc2-supply = <&cpu_supply2>; - operating-points-v2 = <&cpu0_opp_table>; - }; - }; - - cpu0_opp_table: opp_table0 { - compatible = "operating-points-v2"; - opp-shared; - - opp-1000000000 { - opp-hz = /bits/ 64 <1000000000>; - opp-microvolt = <970000>, /* Supply 0 */ - <960000>, /* Supply 1 */ - <960000>; /* Supply 2 */ - opp-microamp = <70000>, /* Supply 0 */ - <70000>, /* Supply 1 */ - <70000>; /* Supply 2 */ - clock-latency-ns = <300000>; - }; - - /* OR */ - - opp-1000000000 { - opp-hz = /bits/ 64 <1000000000>; - opp-microvolt = <975000 970000 985000>, /* Supply 0 */ - <965000 960000 975000>, /* Supply 1 */ - <965000 960000 975000>; /* Supply 2 */ - opp-microamp = <70000>, /* Supply 0 */ - <70000>, /* Supply 1 */ - <70000>; /* Supply 2 */ - clock-latency-ns = <300000>; - }; - - /* OR */ - - opp-1000000000 { - opp-hz = /bits/ 64 <1000000000>; - opp-microvolt = <975000 970000 985000>, /* Supply 0 */ - <965000 960000 975000>, /* Supply 1 */ - <965000 960000 975000>; /* Supply 2 */ - opp-microamp = <70000>, /* Supply 0 */ - <0>, /* Supply 1 doesn't need this */ - <70000>; /* Supply 2 */ - clock-latency-ns = <300000>; - }; - }; -}; - -Example 5: opp-supported-hw -(example: three level hierarchy of versions: cuts, substrate and process) - -/ { - cpus { - cpu@0 { - compatible = "arm,cortex-a7"; - ... - - cpu-supply = <&cpu_supply> - operating-points-v2 = <&cpu0_opp_table_slow>; - }; - }; - - opp_table { - compatible = "operating-points-v2"; - opp-shared; - - opp-600000000 { - /* - * Supports all substrate and process versions for 0xF - * cuts, i.e. only first four cuts. - */ - opp-supported-hw = <0xF 0xFFFFFFFF 0xFFFFFFFF> - opp-hz = /bits/ 64 <600000000>; - ... - }; - - opp-800000000 { - /* - * Supports: - * - cuts: only one, 6th cut (represented by 6th bit). - * - substrate: supports 16 different substrate versions - * - process: supports 9 different process versions - */ - opp-supported-hw = <0x20 0xff0000ff 0x0000f4f0> - opp-hz = /bits/ 64 <800000000>; - ... - }; - - opp-900000000 { - /* - * Supports: - * - All cuts and substrate where process version is 0x2. - * - All cuts and process where substrate version is 0x2. - */ - opp-supported-hw = <0xFFFFFFFF 0xFFFFFFFF 0x02>, <0xFFFFFFFF 0x01 0xFFFFFFFF> - opp-hz = /bits/ 64 <900000000>; - ... - }; - }; -}; - -Example 6: opp-microvolt-, opp-microamp-: -(example: device with two possible microvolt ranges: slow and fast) - -/ { - cpus { - cpu@0 { - compatible = "arm,cortex-a7"; - ... - - operating-points-v2 = <&cpu0_opp_table>; - }; - }; - - cpu0_opp_table: opp_table0 { - compatible = "operating-points-v2"; - opp-shared; - - opp-1000000000 { - opp-hz = /bits/ 64 <1000000000>; - opp-microvolt-slow = <915000 900000 925000>; - opp-microvolt-fast = <975000 970000 985000>; - opp-microamp-slow = <70000>; - opp-microamp-fast = <71000>; - }; - - opp-1200000000 { - opp-hz = /bits/ 64 <1200000000>; - opp-microvolt-slow = <915000 900000 925000>, /* Supply vcc0 */ - <925000 910000 935000>; /* Supply vcc1 */ - opp-microvolt-fast = <975000 970000 985000>, /* Supply vcc0 */ - <965000 960000 975000>; /* Supply vcc1 */ - opp-microamp = <70000>; /* Will be used for both slow/fast */ - }; - }; -}; - -Example 7: Single cluster Quad-core ARM cortex A53, OPP points from firmware, -distinct clock controls but two sets of clock/voltage/current lines. - -/ { - cpus { - #address-cells = <2>; - #size-cells = <0>; - - cpu@0 { - compatible = "arm,cortex-a53"; - reg = <0x0 0x100>; - next-level-cache = <&A53_L2>; - clocks = <&dvfs_controller 0>; - operating-points-v2 = <&cpu_opp0_table>; - }; - cpu@1 { - compatible = "arm,cortex-a53"; - reg = <0x0 0x101>; - next-level-cache = <&A53_L2>; - clocks = <&dvfs_controller 1>; - operating-points-v2 = <&cpu_opp0_table>; - }; - cpu@2 { - compatible = "arm,cortex-a53"; - reg = <0x0 0x102>; - next-level-cache = <&A53_L2>; - clocks = <&dvfs_controller 2>; - operating-points-v2 = <&cpu_opp1_table>; - }; - cpu@3 { - compatible = "arm,cortex-a53"; - reg = <0x0 0x103>; - next-level-cache = <&A53_L2>; - clocks = <&dvfs_controller 3>; - operating-points-v2 = <&cpu_opp1_table>; - }; - - }; - - cpu_opp0_table: opp0_table { - compatible = "operating-points-v2"; - opp-shared; - }; - - cpu_opp1_table: opp1_table { - compatible = "operating-points-v2"; - opp-shared; - }; -}; diff --git a/Documentation/devicetree/bindings/opp/qcom-opp.txt b/Documentation/devicetree/bindings/opp/qcom-opp.txt index 32eb0793c7e6..41d3e4ff2dc3 100644 --- a/Documentation/devicetree/bindings/opp/qcom-opp.txt +++ b/Documentation/devicetree/bindings/opp/qcom-opp.txt @@ -1,7 +1,7 @@ Qualcomm OPP bindings to describe OPP nodes The bindings are based on top of the operating-points-v2 bindings -described in Documentation/devicetree/bindings/opp/opp.txt +described in Documentation/devicetree/bindings/opp/opp-v2-base.yaml Additional properties are described below. * OPP Table Node diff --git a/Documentation/devicetree/bindings/opp/ti-omap5-opp-supply.txt b/Documentation/devicetree/bindings/opp/ti-omap5-opp-supply.txt index 832346e489a3..b70d326117cd 100644 --- a/Documentation/devicetree/bindings/opp/ti-omap5-opp-supply.txt +++ b/Documentation/devicetree/bindings/opp/ti-omap5-opp-supply.txt @@ -13,7 +13,7 @@ regulators to the device that will undergo OPP transitions we can make use of the multi regulator binding that is part of the OPP core described here [1] to describe both regulators needed by the platform. -[1] Documentation/devicetree/bindings/opp/opp.txt +[1] Documentation/devicetree/bindings/opp/opp-v2.yaml Required Properties for Device Node: - vdd-supply: phandle to regulator controlling VDD supply diff --git a/Documentation/devicetree/bindings/power/power-domain.yaml b/Documentation/devicetree/bindings/power/power-domain.yaml index aed51e9dcb11..3143ed9a3313 100644 --- a/Documentation/devicetree/bindings/power/power-domain.yaml +++ b/Documentation/devicetree/bindings/power/power-domain.yaml @@ -46,7 +46,7 @@ properties: Phandles to the OPP tables of power domains provided by a power domain provider. If the provider provides a single power domain only or all the power domains provided by the provider have identical OPP tables, - then this shall contain a single phandle. Refer to ../opp/opp.txt + then this shall contain a single phandle. Refer to ../opp/opp-v2-base.yaml for more information. "#power-domain-cells": From 4d584efae0b28928011dd5b3b9b3a3ac4324bcac Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 10 Aug 2021 12:24:36 +0530 Subject: [PATCH 11/23] cpufreq: scpi: Use .register_em() to register with energy model Set the newly added .register_em() callback with cpufreq_register_em_with_opp() to register with the EM core. Acked-by: Sudeep Holla Signed-off-by: Viresh Kumar --- drivers/cpufreq/scpi-cpufreq.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c index d6a698a1b5d1..bda3e7d42964 100644 --- a/drivers/cpufreq/scpi-cpufreq.c +++ b/drivers/cpufreq/scpi-cpufreq.c @@ -163,8 +163,6 @@ static int scpi_cpufreq_init(struct cpufreq_policy *policy) policy->fast_switch_possible = false; - dev_pm_opp_of_register_em(cpu_dev, policy->cpus); - return 0; out_free_cpufreq_table: @@ -200,6 +198,7 @@ static struct cpufreq_driver scpi_cpufreq_driver = { .init = scpi_cpufreq_init, .exit = scpi_cpufreq_exit, .target_index = scpi_cpufreq_set_target, + .register_em = cpufreq_register_em_with_opp, }; static int scpi_cpufreq_probe(struct platform_device *pdev) From 3fd23111185d2167696547d59235bda8d307425c Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 10 Aug 2021 12:24:36 +0530 Subject: [PATCH 12/23] cpufreq: vexpress: Use .register_em() to register with energy model Set the newly added .register_em() callback with cpufreq_register_em_with_opp() to register with the EM core. Acked-by: Sudeep Holla Signed-off-by: Viresh Kumar --- drivers/cpufreq/vexpress-spc-cpufreq.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/cpufreq/vexpress-spc-cpufreq.c b/drivers/cpufreq/vexpress-spc-cpufreq.c index ab56813b7256..284b6bd040b1 100644 --- a/drivers/cpufreq/vexpress-spc-cpufreq.c +++ b/drivers/cpufreq/vexpress-spc-cpufreq.c @@ -440,8 +440,6 @@ static int ve_spc_cpufreq_init(struct cpufreq_policy *policy) policy->freq_table = freq_table[cur_cluster]; policy->cpuinfo.transition_latency = 1000000; /* 1 ms */ - dev_pm_opp_of_register_em(cpu_dev, policy->cpus); - if (is_bL_switching_enabled()) per_cpu(cpu_last_req_freq, policy->cpu) = clk_get_cpu_rate(policy->cpu); @@ -475,6 +473,7 @@ static struct cpufreq_driver ve_spc_cpufreq_driver = { .get = ve_spc_cpufreq_get_rate, .init = ve_spc_cpufreq_init, .exit = ve_spc_cpufreq_exit, + .register_em = cpufreq_register_em_with_opp, .attr = cpufreq_generic_attr, }; From 37f188318ea3f1da75b32df3b1a19f45d9840652 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 10 Aug 2021 12:24:36 +0530 Subject: [PATCH 13/23] cpufreq: scmi: Use .register_em() to register with energy model Set the newly added .register_em() callback to register with the EM after the cpufreq policy is properly initialized. Acked-by: Sudeep Holla Signed-off-by: Viresh Kumar --- drivers/cpufreq/scmi-cpufreq.c | 65 ++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 23 deletions(-) diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index 75f818d04b48..1e0cd4d165f0 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -22,7 +22,9 @@ struct scmi_data { int domain_id; + int nr_opp; struct device *cpu_dev; + cpumask_var_t opp_shared_cpus; }; static struct scmi_protocol_handle *ph; @@ -123,9 +125,6 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) struct device *cpu_dev; struct scmi_data *priv; struct cpufreq_frequency_table *freq_table; - struct em_data_callback em_cb = EM_DATA_CB(scmi_get_cpu_power); - cpumask_var_t opp_shared_cpus; - bool power_scale_mw; cpu_dev = get_cpu_device(policy->cpu); if (!cpu_dev) { @@ -133,9 +132,15 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) return -ENODEV; } - if (!zalloc_cpumask_var(&opp_shared_cpus, GFP_KERNEL)) + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) return -ENOMEM; + if (!zalloc_cpumask_var(&priv->opp_shared_cpus, GFP_KERNEL)) { + ret = -ENOMEM; + goto out_free_priv; + } + /* Obtain CPUs that share SCMI performance controls */ ret = scmi_get_sharing_cpus(cpu_dev, policy->cpus); if (ret) { @@ -148,14 +153,14 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) * The OPP 'sharing cpus' info may come from DT through an empty opp * table and opp-shared. */ - ret = dev_pm_opp_of_get_sharing_cpus(cpu_dev, opp_shared_cpus); - if (ret || !cpumask_weight(opp_shared_cpus)) { + ret = dev_pm_opp_of_get_sharing_cpus(cpu_dev, priv->opp_shared_cpus); + if (ret || !cpumask_weight(priv->opp_shared_cpus)) { /* * Either opp-table is not set or no opp-shared was found. * Use the CPU mask from SCMI to designate CPUs sharing an OPP * table. */ - cpumask_copy(opp_shared_cpus, policy->cpus); + cpumask_copy(priv->opp_shared_cpus, policy->cpus); } /* @@ -180,7 +185,7 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) goto out_free_opp; } - ret = dev_pm_opp_set_sharing_cpus(cpu_dev, opp_shared_cpus); + ret = dev_pm_opp_set_sharing_cpus(cpu_dev, priv->opp_shared_cpus); if (ret) { dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n", __func__, ret); @@ -188,21 +193,13 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) goto out_free_opp; } - power_scale_mw = perf_ops->power_scale_mw_get(ph); - em_dev_register_perf_domain(cpu_dev, nr_opp, &em_cb, - opp_shared_cpus, power_scale_mw); - } - - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) { - ret = -ENOMEM; - goto out_free_opp; + priv->nr_opp = nr_opp; } ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table); if (ret) { dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret); - goto out_free_priv; + goto out_free_opp; } priv->cpu_dev = cpu_dev; @@ -223,17 +220,16 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) policy->fast_switch_possible = perf_ops->fast_switch_possible(ph, cpu_dev); - free_cpumask_var(opp_shared_cpus); return 0; -out_free_priv: - kfree(priv); - out_free_opp: dev_pm_opp_remove_all_dynamic(cpu_dev); out_free_cpumask: - free_cpumask_var(opp_shared_cpus); + free_cpumask_var(priv->opp_shared_cpus); + +out_free_priv: + kfree(priv); return ret; } @@ -244,11 +240,33 @@ static int scmi_cpufreq_exit(struct cpufreq_policy *policy) dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table); dev_pm_opp_remove_all_dynamic(priv->cpu_dev); + free_cpumask_var(priv->opp_shared_cpus); kfree(priv); return 0; } +static void scmi_cpufreq_register_em(struct cpufreq_policy *policy) +{ + struct em_data_callback em_cb = EM_DATA_CB(scmi_get_cpu_power); + bool power_scale_mw = perf_ops->power_scale_mw_get(ph); + struct scmi_data *priv = policy->driver_data; + + /* + * This callback will be called for each policy, but we don't need to + * register with EM every time. Despite not being part of the same + * policy, some CPUs may still share their perf-domains, and a CPU from + * another policy may already have registered with EM on behalf of CPUs + * of this policy. + */ + if (!priv->nr_opp) + return; + + em_dev_register_perf_domain(get_cpu_device(policy->cpu), priv->nr_opp, + &em_cb, priv->opp_shared_cpus, + power_scale_mw); +} + static struct cpufreq_driver scmi_cpufreq_driver = { .name = "scmi", .flags = CPUFREQ_HAVE_GOVERNOR_PER_POLICY | @@ -261,6 +279,7 @@ static struct cpufreq_driver scmi_cpufreq_driver = { .get = scmi_cpufreq_get_rate, .init = scmi_cpufreq_init, .exit = scmi_cpufreq_exit, + .register_em = scmi_cpufreq_register_em, }; static int scmi_cpufreq_probe(struct scmi_device *sdev) From 275157b367f479334f3e2df7be93a3dd772f359c Mon Sep 17 00:00:00 2001 From: Thara Gopinath Date: Mon, 9 Aug 2021 15:16:01 -0400 Subject: [PATCH 14/23] cpufreq: qcom-cpufreq-hw: Add dcvs interrupt support Add interrupt support to notify the kernel of h/w initiated frequency throttling by LMh. Convey this to scheduler via thermal presssure interface. Signed-off-by: Thara Gopinath [Viresh: Added changes for arch_topology.c to fix build errors ] Signed-off-by: Viresh Kumar --- drivers/base/arch_topology.c | 2 + drivers/cpufreq/qcom-cpufreq-hw.c | 147 ++++++++++++++++++++++++++++++ 2 files changed, 149 insertions(+) diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 921312a8d957..43407665918f 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -149,6 +149,7 @@ void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq, } DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; +EXPORT_PER_CPU_SYMBOL_GPL(cpu_scale); void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity) { @@ -165,6 +166,7 @@ void topology_set_thermal_pressure(const struct cpumask *cpus, for_each_cpu(cpu, cpus) WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure); } +EXPORT_SYMBOL_GPL(topology_set_thermal_pressure); static ssize_t cpu_capacity_show(struct device *dev, struct device_attribute *attr, diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index c2e71c430fbf..6c2086c6de0c 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -7,12 +7,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #define LUT_MAX_ENTRIES 40U #define LUT_SRC GENMASK(31, 30) @@ -22,10 +24,13 @@ #define CLK_HW_DIV 2 #define LUT_TURBO_IND 1 +#define HZ_PER_KHZ 1000 + struct qcom_cpufreq_soc_data { u32 reg_enable; u32 reg_freq_lut; u32 reg_volt_lut; + u32 reg_current_vote; u32 reg_perf_state; u8 lut_row_size; }; @@ -34,6 +39,16 @@ struct qcom_cpufreq_data { void __iomem *base; struct resource *res; const struct qcom_cpufreq_soc_data *soc_data; + + /* + * Mutex to synchronize between de-init sequence and re-starting LMh + * polling/interrupts + */ + struct mutex throttle_lock; + int throttle_irq; + bool cancel_throttle; + struct delayed_work throttle_work; + struct cpufreq_policy *policy; }; static unsigned long cpu_hw_rate, xo_rate; @@ -251,10 +266,92 @@ static void qcom_get_related_cpus(int index, struct cpumask *m) } } +static unsigned int qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data) +{ + unsigned int val = readl_relaxed(data->base + data->soc_data->reg_current_vote); + + return (val & 0x3FF) * 19200; +} + +static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data) +{ + unsigned long max_capacity, capacity, freq_hz, throttled_freq; + struct cpufreq_policy *policy = data->policy; + int cpu = cpumask_first(policy->cpus); + struct device *dev = get_cpu_device(cpu); + struct dev_pm_opp *opp; + unsigned int freq; + + /* + * Get the h/w throttled frequency, normalize it using the + * registered opp table and use it to calculate thermal pressure. + */ + freq = qcom_lmh_get_throttle_freq(data); + freq_hz = freq * HZ_PER_KHZ; + + opp = dev_pm_opp_find_freq_floor(dev, &freq_hz); + if (IS_ERR(opp) && PTR_ERR(opp) == -ERANGE) + dev_pm_opp_find_freq_ceil(dev, &freq_hz); + + throttled_freq = freq_hz / HZ_PER_KHZ; + + /* Update thermal pressure */ + + max_capacity = arch_scale_cpu_capacity(cpu); + capacity = mult_frac(max_capacity, throttled_freq, policy->cpuinfo.max_freq); + + /* Don't pass boost capacity to scheduler */ + if (capacity > max_capacity) + capacity = max_capacity; + + arch_set_thermal_pressure(policy->cpus, max_capacity - capacity); + + /* + * In the unlikely case policy is unregistered do not enable + * polling or h/w interrupt + */ + mutex_lock(&data->throttle_lock); + if (data->cancel_throttle) + goto out; + + /* + * If h/w throttled frequency is higher than what cpufreq has requested + * for, then stop polling and switch back to interrupt mechanism. + */ + if (throttled_freq >= qcom_cpufreq_hw_get(cpu)) + enable_irq(data->throttle_irq); + else + mod_delayed_work(system_highpri_wq, &data->throttle_work, + msecs_to_jiffies(10)); + +out: + mutex_unlock(&data->throttle_lock); +} + +static void qcom_lmh_dcvs_poll(struct work_struct *work) +{ + struct qcom_cpufreq_data *data; + + data = container_of(work, struct qcom_cpufreq_data, throttle_work.work); + qcom_lmh_dcvs_notify(data); +} + +static irqreturn_t qcom_lmh_dcvs_handle_irq(int irq, void *data) +{ + struct qcom_cpufreq_data *c_data = data; + + /* Disable interrupt and enable polling */ + disable_irq_nosync(c_data->throttle_irq); + qcom_lmh_dcvs_notify(c_data); + + return 0; +} + static const struct qcom_cpufreq_soc_data qcom_soc_data = { .reg_enable = 0x0, .reg_freq_lut = 0x110, .reg_volt_lut = 0x114, + .reg_current_vote = 0x704, .reg_perf_state = 0x920, .lut_row_size = 32, }; @@ -274,6 +371,51 @@ static const struct of_device_id qcom_cpufreq_hw_match[] = { }; MODULE_DEVICE_TABLE(of, qcom_cpufreq_hw_match); +static int qcom_cpufreq_hw_lmh_init(struct cpufreq_policy *policy, int index) +{ + struct qcom_cpufreq_data *data = policy->driver_data; + struct platform_device *pdev = cpufreq_get_driver_data(); + char irq_name[15]; + int ret; + + /* + * Look for LMh interrupt. If no interrupt line is specified / + * if there is an error, allow cpufreq to be enabled as usual. + */ + data->throttle_irq = platform_get_irq(pdev, index); + if (data->throttle_irq <= 0) + return data->throttle_irq == -EPROBE_DEFER ? -EPROBE_DEFER : 0; + + data->cancel_throttle = false; + data->policy = policy; + + mutex_init(&data->throttle_lock); + INIT_DEFERRABLE_WORK(&data->throttle_work, qcom_lmh_dcvs_poll); + + snprintf(irq_name, sizeof(irq_name), "dcvsh-irq-%u", policy->cpu); + ret = request_threaded_irq(data->throttle_irq, NULL, qcom_lmh_dcvs_handle_irq, + IRQF_ONESHOT, irq_name, data); + if (ret) { + dev_err(&pdev->dev, "Error registering %s: %d\n", irq_name, ret); + return 0; + } + + return 0; +} + +static void qcom_cpufreq_hw_lmh_exit(struct qcom_cpufreq_data *data) +{ + if (data->throttle_irq <= 0) + return; + + mutex_lock(&data->throttle_lock); + data->cancel_throttle = true; + mutex_unlock(&data->throttle_lock); + + cancel_delayed_work_sync(&data->throttle_work); + free_irq(data->throttle_irq, data); +} + static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) { struct platform_device *pdev = cpufreq_get_driver_data(); @@ -368,6 +510,10 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) dev_warn(cpu_dev, "failed to enable boost: %d\n", ret); } + ret = qcom_cpufreq_hw_lmh_init(policy, index); + if (ret) + goto error; + return 0; error: kfree(data); @@ -387,6 +533,7 @@ static int qcom_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy) dev_pm_opp_remove_all_dynamic(cpu_dev); dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); + qcom_cpufreq_hw_lmh_exit(data); kfree(policy->freq_table); kfree(data); iounmap(base); From 5e79d6d9ea00c273f3efbced841212de85a384d1 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 20 Aug 2021 14:57:00 -0700 Subject: [PATCH 15/23] cpufreq: blocklist more Qualcomm platforms in cpufreq-dt-platdev The Qualcomm sa8155p, sm6350, sm8250 and sm8350 platforms also uses the qcom-cpufreq-hw driver, so add them to the cpufreq-dt-platdev driver's blocklist. Signed-off-by: Bjorn Andersson Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt-platdev.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 231e585f6ba2..ca1d103ec449 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -137,11 +137,15 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "qcom,apq8096", }, { .compatible = "qcom,msm8996", }, { .compatible = "qcom,qcs404", }, + { .compatible = "qcom,sa8155p" }, { .compatible = "qcom,sc7180", }, { .compatible = "qcom,sc7280", }, { .compatible = "qcom,sc8180x", }, { .compatible = "qcom,sdm845", }, + { .compatible = "qcom,sm6350", }, { .compatible = "qcom,sm8150", }, + { .compatible = "qcom,sm8250", }, + { .compatible = "qcom,sm8350", }, { .compatible = "st,stih407", }, { .compatible = "st,stih410", }, From f0712ace7fe0723b40733c3b98591d34c1b0bfb9 Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Mon, 23 Aug 2021 15:45:46 +0530 Subject: [PATCH 16/23] cpufreq: qcom-hw: Set dvfs_possible_from_any_cpu cpufreq driver flag As remote cpufreq updates are supported on QCOM platforms, set dvfs_possible_from_any_cpu cpufreq driver flag. Signed-off-by: Taniya Das Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 6c2086c6de0c..a2be0df7e174 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -490,6 +490,7 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) } policy->driver_data = data; + policy->dvfs_possible_from_any_cpu = true; ret = qcom_cpufreq_hw_read_lut(cpu_dev, policy); if (ret) { From 692a3b9a89947b27fc76d40b2613b33286a1690b Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 1 Sep 2021 14:41:55 +0530 Subject: [PATCH 17/23] cpufreq: acpi: Remove acpi_cpufreq_cpu_ready() The ready() callback was implemented earlier for acpi-cpufreq driver as we wanted to use policy->cpuinfo.max_freq for which the policy was required to be verified. That is no longer the case and we can do the pr_warn() right from ->init() callback now. Remove acpi_cpufreq_cpu_ready(). Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/acpi-cpufreq.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index b49612895c78..28467d83c745 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -889,6 +889,9 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) policy->fast_switch_possible = !acpi_pstate_strict && !(policy_is_shared(policy) && policy->shared_type != CPUFREQ_SHARED_TYPE_ANY); + if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency) + pr_warn(FW_WARN "P-state 0 is not max freq\n"); + return result; err_unreg: @@ -918,16 +921,6 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) return 0; } -static void acpi_cpufreq_cpu_ready(struct cpufreq_policy *policy) -{ - struct acpi_processor_performance *perf = per_cpu_ptr(acpi_perf_data, - policy->cpu); - unsigned int freq = policy->freq_table[0].frequency; - - if (perf->states[0].core_frequency * 1000 != freq) - pr_warn(FW_WARN "P-state 0 is not max freq\n"); -} - static int acpi_cpufreq_resume(struct cpufreq_policy *policy) { struct acpi_cpufreq_data *data = policy->driver_data; @@ -955,7 +948,6 @@ static struct cpufreq_driver acpi_cpufreq_driver = { .bios_limit = acpi_processor_get_bios_limit, .init = acpi_cpufreq_cpu_init, .exit = acpi_cpufreq_cpu_exit, - .ready = acpi_cpufreq_cpu_ready, .resume = acpi_cpufreq_resume, .name = "acpi-cpufreq", .attr = acpi_cpufreq_attr, From 9ab0a6cb76b998f5f2231e1c428bee3771893002 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 1 Sep 2021 14:41:56 +0530 Subject: [PATCH 18/23] cpufreq: sh: Remove sh_cpufreq_cpu_ready() The ->ready() callback is going away and since we don't do any important stuff in sh_cpufreq_cpu_ready(), remove it. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/sh-cpufreq.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/cpufreq/sh-cpufreq.c b/drivers/cpufreq/sh-cpufreq.c index 1a251e635ebd..b8704232c27b 100644 --- a/drivers/cpufreq/sh-cpufreq.c +++ b/drivers/cpufreq/sh-cpufreq.c @@ -145,16 +145,6 @@ static int sh_cpufreq_cpu_exit(struct cpufreq_policy *policy) return 0; } -static void sh_cpufreq_cpu_ready(struct cpufreq_policy *policy) -{ - struct device *dev = get_cpu_device(policy->cpu); - - dev_info(dev, "CPU Frequencies - Minimum %u.%03u MHz, " - "Maximum %u.%03u MHz.\n", - policy->min / 1000, policy->min % 1000, - policy->max / 1000, policy->max % 1000); -} - static struct cpufreq_driver sh_cpufreq_driver = { .name = "sh", .flags = CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING, @@ -163,7 +153,6 @@ static struct cpufreq_driver sh_cpufreq_driver = { .verify = sh_cpufreq_verify, .init = sh_cpufreq_cpu_init, .exit = sh_cpufreq_cpu_exit, - .ready = sh_cpufreq_cpu_ready, .attr = cpufreq_generic_attr, }; From 4bf8e582119ed9767f907abb6dc62ef9dddf10df Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 1 Sep 2021 14:41:57 +0530 Subject: [PATCH 19/23] cpufreq: Remove ready() callback This isn't used anymore, get rid of it. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- Documentation/cpu-freq/cpu-drivers.rst | 3 --- Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst | 2 -- drivers/cpufreq/cpufreq.c | 4 ---- include/linux/cpufreq.h | 3 --- 4 files changed, 12 deletions(-) diff --git a/Documentation/cpu-freq/cpu-drivers.rst b/Documentation/cpu-freq/cpu-drivers.rst index d84ededb66f9..3b32336a7803 100644 --- a/Documentation/cpu-freq/cpu-drivers.rst +++ b/Documentation/cpu-freq/cpu-drivers.rst @@ -75,9 +75,6 @@ And optionally .resume - A pointer to a per-policy resume function which is called with interrupts disabled and _before_ the governor is started again. - .ready - A pointer to a per-policy ready function which is called after - the policy is fully initialized. - .attr - A pointer to a NULL-terminated list of "struct freq_attr" which allow to export values to sysfs. diff --git a/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst b/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst index 5ae9cfa2ec55..334f30ae198b 100644 --- a/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst +++ b/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst @@ -80,8 +80,6 @@ CPUfreq核心层注册一个cpufreq_driver结构体。 .resume - 一个指向per-policy恢复函数的指针,该函数在关中断且在调节器再一次开始前被 调用。 - .ready - 一个指向per-policy准备函数的指针,该函数在策略完全初始化之后被调用。 - .attr - 一个指向NULL结尾的"struct freq_attr"列表的指针,该函数允许导出值到 sysfs。 diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 7d5f170ecad1..5782b15a8caa 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1517,10 +1517,6 @@ static int cpufreq_online(unsigned int cpu) kobject_uevent(&policy->kobj, KOBJ_ADD); - /* Callback for handling stuff after policy is ready */ - if (cpufreq_driver->ready) - cpufreq_driver->ready(policy); - if (cpufreq_thermal_control_enabled(cpufreq_driver)) policy->cdev = of_cpufreq_cooling_register(policy); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index c65a1d7385f8..fe6acc04e5e5 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -367,9 +367,6 @@ struct cpufreq_driver { int (*suspend)(struct cpufreq_policy *policy); int (*resume)(struct cpufreq_policy *policy); - /* Will be called after the driver is fully initialized */ - void (*ready)(struct cpufreq_policy *policy); - struct freq_attr **attr; /* platform specific boost support code */ From a8bbe0c9440561cb407cefc0b1def808c2c38431 Mon Sep 17 00:00:00 2001 From: "Hector.Yuan" Date: Fri, 3 Sep 2021 16:39:22 +0800 Subject: [PATCH 20/23] dt-bindings: cpufreq: add bindings for MediaTek cpufreq HW Add devicetree bindings for MediaTek HW driver. Signed-off-by: Hector.Yuan Reviewed-by: Rob Herring Signed-off-by: Viresh Kumar --- .../bindings/cpufreq/cpufreq-mediatek-hw.yaml | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek-hw.yaml diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek-hw.yaml b/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek-hw.yaml new file mode 100644 index 000000000000..9cd42a64b13e --- /dev/null +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek-hw.yaml @@ -0,0 +1,70 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/cpufreq/cpufreq-mediatek-hw.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: MediaTek's CPUFREQ Bindings + +maintainers: + - Hector Yuan + +description: + CPUFREQ HW is a hardware engine used by MediaTek SoCs to + manage frequency in hardware. It is capable of controlling + frequency for multiple clusters. + +properties: + compatible: + const: mediatek,cpufreq-hw + + reg: + minItems: 1 + maxItems: 2 + description: + Addresses and sizes for the memory of the HW bases in + each frequency domain. Each entry corresponds to + a register bank for each frequency domain present. + + "#performance-domain-cells": + description: + Number of cells in a performance domain specifier. + Set const to 1 here for nodes providing multiple + performance domains. + const: 1 + +required: + - compatible + - reg + - "#performance-domain-cells" + +additionalProperties: false + +examples: + - | + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu0: cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a55"; + enable-method = "psci"; + performance-domains = <&performance 0>; + reg = <0x000>; + }; + }; + + /* ... */ + + soc { + #address-cells = <2>; + #size-cells = <2>; + + performance: performance-controller@11bc00 { + compatible = "mediatek,cpufreq-hw"; + reg = <0 0x0011bc10 0 0x120>, <0 0x0011bd30 0 0x120>; + + #performance-domain-cells = <1>; + }; + }; From 8486a32dd484a7d7ec25295c7439094608f54915 Mon Sep 17 00:00:00 2001 From: "Hector.Yuan" Date: Fri, 3 Sep 2021 16:39:23 +0800 Subject: [PATCH 21/23] cpufreq: Add of_perf_domain_get_sharing_cpumask Add of_perf_domain_get_sharing_cpumask function to group cpu to specific performance domain. Signed-off-by: Hector.Yuan [ Viresh: create separate routine parse_perf_domain() and always set the cpumask. ] Signed-off-by: Viresh Kumar --- include/linux/cpufreq.h | 58 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index c65a1d7385f8..acd3ee5b8b0a 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include #include #include @@ -1003,6 +1005,55 @@ static inline int cpufreq_table_count_valid_entries(const struct cpufreq_policy return count; } + +static inline int parse_perf_domain(int cpu, const char *list_name, + const char *cell_name) +{ + struct device_node *cpu_np; + struct of_phandle_args args; + int ret; + + cpu_np = of_cpu_device_node_get(cpu); + if (!cpu_np) + return -ENODEV; + + ret = of_parse_phandle_with_args(cpu_np, list_name, cell_name, 0, + &args); + if (ret < 0) + return ret; + + of_node_put(cpu_np); + + return args.args[0]; +} + +static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_name, + const char *cell_name, struct cpumask *cpumask) +{ + int target_idx; + int cpu, ret; + + ret = parse_perf_domain(pcpu, list_name, cell_name); + if (ret < 0) + return ret; + + target_idx = ret; + cpumask_set_cpu(pcpu, cpumask); + + for_each_possible_cpu(cpu) { + if (cpu == pcpu) + continue; + + ret = parse_perf_domain(pcpu, list_name, cell_name); + if (ret < 0) + continue; + + if (target_idx == ret) + cpumask_set_cpu(cpu, cpumask); + } + + return target_idx; +} #else static inline int cpufreq_boost_trigger_state(int state) { @@ -1022,6 +1073,12 @@ static inline bool policy_has_boost_freq(struct cpufreq_policy *policy) { return false; } + +static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_name, + const char *cell_name, struct cpumask *cpumask) +{ + return -EOPNOTSUPP; +} #endif #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) @@ -1043,7 +1100,6 @@ void arch_set_freq_scale(const struct cpumask *cpus, { } #endif - /* the following are really really optional */ extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs; extern struct freq_attr cpufreq_freq_attr_scaling_boost_freqs; From 4855e26bcf4d28956f3e33231b961610a0d4a72d Mon Sep 17 00:00:00 2001 From: "Hector.Yuan" Date: Fri, 3 Sep 2021 16:39:24 +0800 Subject: [PATCH 22/23] cpufreq: mediatek-hw: Add support for CPUFREQ HW Introduce cpufreq HW driver which can support CPU frequency adjust in MT6779 platform. Signed-off-by: Hector.Yuan [ Viresh: Massaged the patch and cleaned some stuff. ] Signed-off-by: Viresh Kumar --- drivers/cpufreq/Kconfig.arm | 12 + drivers/cpufreq/Makefile | 1 + drivers/cpufreq/mediatek-cpufreq-hw.c | 308 ++++++++++++++++++++++++++ 3 files changed, 321 insertions(+) create mode 100644 drivers/cpufreq/mediatek-cpufreq-hw.c diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index a5c5f70acfc9..954749afb5fe 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -133,6 +133,18 @@ config ARM_MEDIATEK_CPUFREQ help This adds the CPUFreq driver support for MediaTek SoCs. +config ARM_MEDIATEK_CPUFREQ_HW + tristate "MediaTek CPUFreq HW driver" + depends on ARCH_MEDIATEK || COMPILE_TEST + default m + help + Support for the CPUFreq HW driver. + Some MediaTek chipsets have a HW engine to offload the steps + necessary for changing the frequency of the CPUs. Firmware loaded + in this engine exposes a programming interface to the OS. + The driver implements the cpufreq interface for this HW engine. + Say Y if you want to support CPUFreq HW. + config ARM_OMAP2PLUS_CPUFREQ bool "TI OMAP2+" depends on ARCH_OMAP2PLUS diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 27d3bd7ea9d4..48ee5859030c 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -56,6 +56,7 @@ obj-$(CONFIG_ARM_IMX6Q_CPUFREQ) += imx6q-cpufreq.o obj-$(CONFIG_ARM_IMX_CPUFREQ_DT) += imx-cpufreq-dt.o obj-$(CONFIG_ARM_KIRKWOOD_CPUFREQ) += kirkwood-cpufreq.o obj-$(CONFIG_ARM_MEDIATEK_CPUFREQ) += mediatek-cpufreq.o +obj-$(CONFIG_ARM_MEDIATEK_CPUFREQ_HW) += mediatek-cpufreq-hw.o obj-$(CONFIG_MACH_MVEBU_V7) += mvebu-cpufreq.o obj-$(CONFIG_ARM_OMAP2PLUS_CPUFREQ) += omap-cpufreq.o obj-$(CONFIG_ARM_PXA2xx_CPUFREQ) += pxa2xx-cpufreq.o diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c new file mode 100644 index 000000000000..0cf18dd46b92 --- /dev/null +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c @@ -0,0 +1,308 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2020 MediaTek Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define LUT_MAX_ENTRIES 32U +#define LUT_FREQ GENMASK(11, 0) +#define LUT_ROW_SIZE 0x4 +#define CPUFREQ_HW_STATUS BIT(0) +#define SVS_HW_STATUS BIT(1) +#define POLL_USEC 1000 +#define TIMEOUT_USEC 300000 + +enum { + REG_FREQ_LUT_TABLE, + REG_FREQ_ENABLE, + REG_FREQ_PERF_STATE, + REG_FREQ_HW_STATE, + REG_EM_POWER_TBL, + REG_FREQ_LATENCY, + + REG_ARRAY_SIZE, +}; + +struct mtk_cpufreq_data { + struct cpufreq_frequency_table *table; + void __iomem *reg_bases[REG_ARRAY_SIZE]; + int nr_opp; +}; + +static const u16 cpufreq_mtk_offsets[REG_ARRAY_SIZE] = { + [REG_FREQ_LUT_TABLE] = 0x0, + [REG_FREQ_ENABLE] = 0x84, + [REG_FREQ_PERF_STATE] = 0x88, + [REG_FREQ_HW_STATE] = 0x8c, + [REG_EM_POWER_TBL] = 0x90, + [REG_FREQ_LATENCY] = 0x110, +}; + +static int __maybe_unused +mtk_cpufreq_get_cpu_power(unsigned long *mW, + unsigned long *KHz, struct device *cpu_dev) +{ + struct mtk_cpufreq_data *data; + struct cpufreq_policy *policy; + int i; + + policy = cpufreq_cpu_get_raw(cpu_dev->id); + if (!policy) + return 0; + + data = policy->driver_data; + + for (i = 0; i < data->nr_opp; i++) { + if (data->table[i].frequency < *KHz) + break; + } + i--; + + *KHz = data->table[i].frequency; + *mW = readl_relaxed(data->reg_bases[REG_EM_POWER_TBL] + + i * LUT_ROW_SIZE) / 1000; + + return 0; +} + +static int mtk_cpufreq_hw_target_index(struct cpufreq_policy *policy, + unsigned int index) +{ + struct mtk_cpufreq_data *data = policy->driver_data; + + writel_relaxed(index, data->reg_bases[REG_FREQ_PERF_STATE]); + + return 0; +} + +static unsigned int mtk_cpufreq_hw_get(unsigned int cpu) +{ + struct mtk_cpufreq_data *data; + struct cpufreq_policy *policy; + unsigned int index; + + policy = cpufreq_cpu_get_raw(cpu); + if (!policy) + return 0; + + data = policy->driver_data; + + index = readl_relaxed(data->reg_bases[REG_FREQ_PERF_STATE]); + index = min(index, LUT_MAX_ENTRIES - 1); + + return data->table[index].frequency; +} + +static unsigned int mtk_cpufreq_hw_fast_switch(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + struct mtk_cpufreq_data *data = policy->driver_data; + unsigned int index; + + index = cpufreq_table_find_index_dl(policy, target_freq); + + writel_relaxed(index, data->reg_bases[REG_FREQ_PERF_STATE]); + + return policy->freq_table[index].frequency; +} + +static int mtk_cpu_create_freq_table(struct platform_device *pdev, + struct mtk_cpufreq_data *data) +{ + struct device *dev = &pdev->dev; + u32 temp, i, freq, prev_freq = 0; + void __iomem *base_table; + + data->table = devm_kcalloc(dev, LUT_MAX_ENTRIES + 1, + sizeof(*data->table), GFP_KERNEL); + if (!data->table) + return -ENOMEM; + + base_table = data->reg_bases[REG_FREQ_LUT_TABLE]; + + for (i = 0; i < LUT_MAX_ENTRIES; i++) { + temp = readl_relaxed(base_table + (i * LUT_ROW_SIZE)); + freq = FIELD_GET(LUT_FREQ, temp) * 1000; + + if (freq == prev_freq) + break; + + data->table[i].frequency = freq; + + dev_dbg(dev, "index=%d freq=%d\n", i, data->table[i].frequency); + + prev_freq = freq; + } + + data->table[i].frequency = CPUFREQ_TABLE_END; + data->nr_opp = i; + + return 0; +} + +static int mtk_cpu_resources_init(struct platform_device *pdev, + struct cpufreq_policy *policy, + const u16 *offsets) +{ + struct mtk_cpufreq_data *data; + struct device *dev = &pdev->dev; + void __iomem *base; + int ret, i; + int index; + + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + index = of_perf_domain_get_sharing_cpumask(policy->cpu, "performance-domains", + "#performance-domain-cells", + policy->cpus); + if (index < 0) + return index; + + base = devm_platform_ioremap_resource(pdev, index); + if (IS_ERR(base)) + return PTR_ERR(base); + + for (i = REG_FREQ_LUT_TABLE; i < REG_ARRAY_SIZE; i++) + data->reg_bases[i] = base + offsets[i]; + + ret = mtk_cpu_create_freq_table(pdev, data); + if (ret) { + dev_info(dev, "Domain-%d failed to create freq table\n", index); + return ret; + } + + policy->freq_table = data->table; + policy->driver_data = data; + + return 0; +} + +static int mtk_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) +{ + struct platform_device *pdev = cpufreq_get_driver_data(); + int sig, pwr_hw = CPUFREQ_HW_STATUS | SVS_HW_STATUS; + struct mtk_cpufreq_data *data; + unsigned int latency; + int ret; + + /* Get the bases of cpufreq for domains */ + ret = mtk_cpu_resources_init(pdev, policy, platform_get_drvdata(pdev)); + if (ret) { + dev_info(&pdev->dev, "CPUFreq resource init failed\n"); + return ret; + } + + data = policy->driver_data; + + latency = readl_relaxed(data->reg_bases[REG_FREQ_LATENCY]) * 1000; + if (!latency) + latency = CPUFREQ_ETERNAL; + + policy->cpuinfo.transition_latency = latency; + policy->fast_switch_possible = true; + + /* HW should be in enabled state to proceed now */ + writel_relaxed(0x1, data->reg_bases[REG_FREQ_ENABLE]); + if (readl_poll_timeout(data->reg_bases[REG_FREQ_HW_STATE], sig, + (sig & pwr_hw) == pwr_hw, POLL_USEC, + TIMEOUT_USEC)) { + if (!(sig & CPUFREQ_HW_STATUS)) { + pr_info("cpufreq hardware of CPU%d is not enabled\n", + policy->cpu); + return -ENODEV; + } + + pr_info("SVS of CPU%d is not enabled\n", policy->cpu); + } + + return 0; +} + +static int mtk_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy) +{ + struct mtk_cpufreq_data *data = policy->driver_data; + + /* HW should be in paused state now */ + writel_relaxed(0x0, data->reg_bases[REG_FREQ_ENABLE]); + + return 0; +} + +static void mtk_cpufreq_register_em(struct cpufreq_policy *policy) +{ + struct em_data_callback em_cb = EM_DATA_CB(mtk_cpufreq_get_cpu_power); + struct mtk_cpufreq_data *data = policy->driver_data; + + em_dev_register_perf_domain(get_cpu_device(policy->cpu), data->nr_opp, + &em_cb, policy->cpus, true); +} + +static struct cpufreq_driver cpufreq_mtk_hw_driver = { + .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK | + CPUFREQ_HAVE_GOVERNOR_PER_POLICY | + CPUFREQ_IS_COOLING_DEV, + .verify = cpufreq_generic_frequency_table_verify, + .target_index = mtk_cpufreq_hw_target_index, + .get = mtk_cpufreq_hw_get, + .init = mtk_cpufreq_hw_cpu_init, + .exit = mtk_cpufreq_hw_cpu_exit, + .register_em = mtk_cpufreq_register_em, + .fast_switch = mtk_cpufreq_hw_fast_switch, + .name = "mtk-cpufreq-hw", + .attr = cpufreq_generic_attr, +}; + +static int mtk_cpufreq_hw_driver_probe(struct platform_device *pdev) +{ + const void *data; + int ret; + + data = of_device_get_match_data(&pdev->dev); + if (!data) + return -EINVAL; + + platform_set_drvdata(pdev, (void *) data); + cpufreq_mtk_hw_driver.driver_data = pdev; + + ret = cpufreq_register_driver(&cpufreq_mtk_hw_driver); + if (ret) + dev_err(&pdev->dev, "CPUFreq HW driver failed to register\n"); + + return ret; +} + +static int mtk_cpufreq_hw_driver_remove(struct platform_device *pdev) +{ + return cpufreq_unregister_driver(&cpufreq_mtk_hw_driver); +} + +static const struct of_device_id mtk_cpufreq_hw_match[] = { + { .compatible = "mediatek,cpufreq-hw", .data = &cpufreq_mtk_offsets }, + {} +}; + +static struct platform_driver mtk_cpufreq_hw_driver = { + .probe = mtk_cpufreq_hw_driver_probe, + .remove = mtk_cpufreq_hw_driver_remove, + .driver = { + .name = "mtk-cpufreq-hw", + .of_match_table = mtk_cpufreq_hw_match, + }, +}; +module_platform_driver(mtk_cpufreq_hw_driver); + +MODULE_AUTHOR("Hector Yuan "); +MODULE_DESCRIPTION("Mediatek cpufreq-hw driver"); +MODULE_LICENSE("GPL v2"); From dd7c46d6e58e8737b0ac3ba21e8584c2632dba65 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 7 Sep 2021 15:32:48 +0200 Subject: [PATCH 23/23] Revert "cpufreq: intel_pstate: Process HWP Guaranteed change notification" Revert commit d0e936adbd22 ("cpufreq: intel_pstate: Process HWP Guaranteed change notification"), because it causes a NULL pointer dereference to occur on Lenovo X1 gen9 laptops due to an HWP guaranteed performance change interrupt arriving prematurely. This feature will be revisited in the next cycle. Reported-by: Jens Axboe Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 39 ---------------------------------- 1 file changed, 39 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index b4ffe6c8a0d0..2d83a9f9651b 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -32,7 +32,6 @@ #include #include #include -#include "../drivers/thermal/intel/thermal_interrupt.h" #define INTEL_PSTATE_SAMPLING_INTERVAL (10 * NSEC_PER_MSEC) @@ -220,7 +219,6 @@ struct global_params { * @sched_flags: Store scheduler flags for possible cross CPU update * @hwp_boost_min: Last HWP boosted min performance * @suspended: Whether or not the driver has been suspended. - * @hwp_notify_work: workqueue for HWP notifications. * * This structure stores per CPU instance data for all CPUs. */ @@ -259,7 +257,6 @@ struct cpudata { unsigned int sched_flags; u32 hwp_boost_min; bool suspended; - struct delayed_work hwp_notify_work; }; static struct cpudata **all_cpu_data; @@ -1628,40 +1625,6 @@ static void intel_pstate_sysfs_hide_hwp_dynamic_boost(void) /************************** sysfs end ************************/ -static void intel_pstate_notify_work(struct work_struct *work) -{ - mutex_lock(&intel_pstate_driver_lock); - cpufreq_update_policy(smp_processor_id()); - wrmsrl(MSR_HWP_STATUS, 0); - mutex_unlock(&intel_pstate_driver_lock); -} - -void notify_hwp_interrupt(void) -{ - unsigned int this_cpu = smp_processor_id(); - struct cpudata *cpudata; - u64 value; - - if (!hwp_active || !boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) - return; - - rdmsrl(MSR_HWP_STATUS, value); - if (!(value & 0x01)) - return; - - cpudata = all_cpu_data[this_cpu]; - schedule_delayed_work_on(this_cpu, &cpudata->hwp_notify_work, msecs_to_jiffies(10)); -} - -static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata) -{ - /* Enable HWP notification interrupt for guaranteed performance change */ - if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) { - INIT_DELAYED_WORK(&cpudata->hwp_notify_work, intel_pstate_notify_work); - wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x01); - } -} - static void intel_pstate_hwp_enable(struct cpudata *cpudata) { /* First disable HWP notification interrupt as we don't process them */ @@ -1671,8 +1634,6 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata) wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); if (cpudata->epp_default == -EINVAL) cpudata->epp_default = intel_pstate_get_epp(cpudata, 0); - - intel_pstate_enable_hwp_interrupt(cpudata); } static int atom_get_min_pstate(void)