From 6e9643a864aa4d532b0d467bacc18a15adf5ca82 Mon Sep 17 00:00:00 2001
From: Zhang Qilong <zhangqilong3@huawei.com>
Date: Fri, 23 Oct 2020 17:58:39 +0800
Subject: [PATCH 1/9] cpufreq: e_powersaver: remove unreachable break

A 'break' following a 'return' statement is pointless, so remove it.

Signed-off-by: Zhang Qilong <zhangqilong3@huawei.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/e_powersaver.c | 1 -
 drivers/cpufreq/longhaul.c     | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/cpufreq/e_powersaver.c b/drivers/cpufreq/e_powersaver.c
index 776a58bab0ff..ab93bce8ae77 100644
--- a/drivers/cpufreq/e_powersaver.c
+++ b/drivers/cpufreq/e_powersaver.c
@@ -223,7 +223,6 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
 	case EPS_BRAND_C3:
 		pr_cont("C3\n");
 		return -ENODEV;
-		break;
 	}
 	/* Enable Enhanced PowerSaver */
 	rdmsrl(MSR_IA32_MISC_ENABLE, val);
diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c
index 123fb006810d..182a4dbca095 100644
--- a/drivers/cpufreq/longhaul.c
+++ b/drivers/cpufreq/longhaul.c
@@ -593,7 +593,6 @@ static void longhaul_setup_voltagescaling(void)
 		break;
 	default:
 		return;
-		break;
 	}
 	if (min_vid_speed >= highest_speed)
 		return;

From db865272d9c4687520dc29f77e701a1b2669872f Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 23 Oct 2020 17:15:56 +0200
Subject: [PATCH 2/9] cpufreq: Avoid configuring old governors as default with
 intel_pstate

Commit 33aa46f252c7 ("cpufreq: intel_pstate: Use passive mode by
default without HWP") was meant to cause intel_pstate to be used
in the passive mode with the schedutil governor on top of it, but
it missed the case in which either "ondemand" or "conservative"
was selected as the default governor in the existing kernel config,
in which case the previous old governor configuration would be used,
causing the default legacy governor to be used on top of intel_pstate
instead of schedutil.

Address this by preventing "ondemand" and "conservative" from being
configured as the default cpufreq governor in the case when schedutil
is the default choice for the default governor setting.

[Note that the default cpufreq governor can still be set via the
 kernel command line if need be and that choice is not limited,
 so if anyone really wants to use one of the legacy governors by
 default, it can be achieved this way.]

Fixes: 33aa46f252c7 ("cpufreq: intel_pstate: Use passive mode by default without HWP")
Reported-by: Julia Lawall <julia.lawall@inria.fr>
Cc: 5.8+ <stable@vger.kernel.org> # 5.8+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/Kconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 2c7171e0b001..85de313ddec2 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -71,6 +71,7 @@ config CPU_FREQ_DEFAULT_GOV_USERSPACE
 
 config CPU_FREQ_DEFAULT_GOV_ONDEMAND
 	bool "ondemand"
+	depends on !(X86_INTEL_PSTATE && SMP)
 	select CPU_FREQ_GOV_ONDEMAND
 	select CPU_FREQ_GOV_PERFORMANCE
 	help
@@ -83,6 +84,7 @@ config CPU_FREQ_DEFAULT_GOV_ONDEMAND
 
 config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
 	bool "conservative"
+	depends on !(X86_INTEL_PSTATE && SMP)
 	select CPU_FREQ_GOV_CONSERVATIVE
 	select CPU_FREQ_GOV_PERFORMANCE
 	help

From 1c534352f47fd83eb08075ac2474f707e74bf7f7 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 23 Oct 2020 17:35:19 +0200
Subject: [PATCH 3/9] cpufreq: Introduce CPUFREQ_NEED_UPDATE_LIMITS driver flag

Generally, a cpufreq driver may need to update some internal upper
and lower frequency boundaries on policy max and min changes,
respectively, but currently this does not work if the target
frequency does not change along with the policy limit.

Namely, if the target frequency does not change along with the
policy min or max, the "target_freq == policy->cur" check in
__cpufreq_driver_target() prevents driver callbacks from being
invoked and they do not even have a chance to update the
corresponding internal boundary.

This particularly affects the "powersave" and "performance"
governors that always set the target frequency to one of the
policy limits and it never changes when the other limit is updated.

To allow cpufreq the drivers needing to update internal frequency
boundaries on policy limits changes to avoid this issue, introduce
a new driver flag, CPUFREQ_NEED_UPDATE_LIMITS, that (when set) will
neutralize the check mentioned above.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cpufreq.c |  3 ++-
 include/linux/cpufreq.h   | 10 +++++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index f4b60663efe6..ea58337fb65f 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2187,7 +2187,8 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
 	 * exactly same freq is called again and so we can save on few function
 	 * calls.
 	 */
-	if (target_freq == policy->cur)
+	if (target_freq == policy->cur &&
+	    !(cpufreq_driver->flags & CPUFREQ_NEED_UPDATE_LIMITS))
 		return 0;
 
 	/* Save last value to restore later on errors */
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index fa37b1c66443..038ed83aab41 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -298,7 +298,7 @@ __ATTR(_name, 0644, show_##_name, store_##_name)
 
 struct cpufreq_driver {
 	char		name[CPUFREQ_NAME_LEN];
-	u8		flags;
+	u16		flags;
 	void		*driver_data;
 
 	/* needed by all drivers */
@@ -422,6 +422,14 @@ struct cpufreq_driver {
  */
 #define CPUFREQ_IS_COOLING_DEV			BIT(7)
 
+/*
+ * Set by drivers that need to update internale upper and lower boundaries along
+ * with the target frequency and so the core and governors should also invoke
+ * the diver if the target frequency does not change, but the policy min or max
+ * may have changed.
+ */
+#define CPUFREQ_NEED_UPDATE_LIMITS		BIT(8)
+
 int cpufreq_register_driver(struct cpufreq_driver *driver_data);
 int cpufreq_unregister_driver(struct cpufreq_driver *driver_data);
 

From e0be38ed4ab413ddd492118cf146369b86ee0ab5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 23 Oct 2020 17:35:32 +0200
Subject: [PATCH 4/9] cpufreq: intel_pstate: Avoid missing HWP max updates in
 passive mode

If the cpufreq policy max limit is changed when intel_pstate operates
in the passive mode with HWP enabled and the "powersave" governor is
used on top of it, the HWP max limit is not updated as appropriate.

Namely, in the "powersave" governor case, the target P-state
is always equal to the policy min limit, so if the latter does
not change, intel_cpufreq_adjust_hwp() is not invoked to update
the HWP Request MSR due to the "target_pstate != old_pstate" check
in intel_cpufreq_update_pstate(), so the HWP max limit is not
updated as a result.

Also, if the CPUFREQ_NEED_UPDATE_LIMITS flag is not set for the
driver and the target frequency does not change along with the
policy max limit, the "target_freq == policy->cur" check in
__cpufreq_driver_target() prevents the driver's ->target() callback
from being invoked at all, so the HWP max limit is not updated.

To prevent that occurring, set the CPUFREQ_NEED_UPDATE_LIMITS flag
in the intel_cpufreq driver structure if HWP is enabled and modify
intel_cpufreq_update_pstate() to do the "target_pstate != old_pstate"
check only in the non-HWP case and let intel_cpufreq_adjust_hwp()
always run in the HWP case (it will update HWP Request only if the
cached value of the register is different from the new one including
the limits, so if neither the target P-state value nor the max limit
changes, the register write will still be avoided).

Fixes: f6ebbcf08f37 ("cpufreq: intel_pstate: Implement passive mode with HWP enabled")
Reported-by: Zhang Rui <rui.zhang@intel.com>
Cc: 5.9+ <stable@vger.kernel.org> # 5.9+: 1c534352f47f cpufreq: Introduce CPUFREQ_NEED_UPDATE_LIMITS ...
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Tested-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 3c1455518738..b7a9779250aa 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2568,14 +2568,12 @@ static int intel_cpufreq_update_pstate(struct cpudata *cpu, int target_pstate,
 	int old_pstate = cpu->pstate.current_pstate;
 
 	target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
-	if (target_pstate != old_pstate) {
+	if (hwp_active) {
+		intel_cpufreq_adjust_hwp(cpu, target_pstate, fast_switch);
+		cpu->pstate.current_pstate = target_pstate;
+	} else if (target_pstate != old_pstate) {
+		intel_cpufreq_adjust_perf_ctl(cpu, target_pstate, fast_switch);
 		cpu->pstate.current_pstate = target_pstate;
-		if (hwp_active)
-			intel_cpufreq_adjust_hwp(cpu, target_pstate,
-						 fast_switch);
-		else
-			intel_cpufreq_adjust_perf_ctl(cpu, target_pstate,
-						      fast_switch);
 	}
 
 	intel_cpufreq_trace(cpu, fast_switch ? INTEL_PSTATE_TRACE_FAST_SWITCH :
@@ -3032,6 +3030,7 @@ static int __init intel_pstate_init(void)
 			hwp_mode_bdw = id->driver_data;
 			intel_pstate.attr = hwp_cpufreq_attrs;
 			intel_cpufreq.attr = hwp_cpufreq_attrs;
+			intel_cpufreq.flags |= CPUFREQ_NEED_UPDATE_LIMITS;
 			if (!default_driver)
 				default_driver = &intel_pstate;
 

From 4e0ba5577dba686f96c1c10ef4166380667fdec7 Mon Sep 17 00:00:00 2001
From: Chen Yu <yu.c.chen@intel.com>
Date: Sun, 25 Oct 2020 00:29:53 +0800
Subject: [PATCH 5/9] intel_idle: Fix max_cstate for processor models without
 C-state tables

Currently intel_idle driver gets the c-state information from ACPI
_CST if the processor model is not recognized by it. However the
c-state in _CST starts with index 1 which is different from the
index in intel_idle driver's internal c-state table.

While intel_idle_max_cstate_reached() was previously introduced to
deal with intel_idle driver's internal c-state table, re-using
this function directly on _CST is incorrect.

Fix this by subtracting 1 from the index when checking max_cstate
in the _CST case.

For example, append intel_idle.max_cstate=1 in boot command line,
Before the patch:
grep . /sys/devices/system/cpu/cpu0/cpuidle/state*/name
POLL
After the patch:
grep . /sys/devices/system/cpu/cpu0/cpuidle/state*/name
/sys/devices/system/cpu/cpu0/cpuidle/state0/name:POLL
/sys/devices/system/cpu/cpu0/cpuidle/state1/name:C1_ACPI

Fixes: 18734958e9bf ("intel_idle: Use ACPI _CST for processor models without C-state tables")
Reported-by: Pengfei Xu <pengfei.xu@intel.com>
Cc: 5.6+ <stable@vger.kernel.org> # 5.6+
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
[ rjw: Changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/idle/intel_idle.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 56f5b8077cba..01bace49a962 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -1239,7 +1239,7 @@ static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
 		struct acpi_processor_cx *cx;
 		struct cpuidle_state *state;
 
-		if (intel_idle_max_cstate_reached(cstate))
+		if (intel_idle_max_cstate_reached(cstate - 1))
 			break;
 
 		cx = &acpi_state_table.states[cstate];

From 4d4ce8053bfac9a72b9094c6879119938efaa05d Mon Sep 17 00:00:00 2001
From: Jackie Zamow <jackie.zamow@gmail.com>
Date: Tue, 27 Oct 2020 07:43:19 -0500
Subject: [PATCH 6/9] PM: sleep: fix typo in kernel/power/process.c

Fix a typo in a comment in freeze_processes().

Signed-off-by: Jackie Zamow <jackie.zamow@gmail.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/power/process.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/power/process.c b/kernel/power/process.c
index 4b6a54da7e65..45b054b7b5ec 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -146,7 +146,7 @@ int freeze_processes(void)
 	BUG_ON(in_atomic());
 
 	/*
-	 * Now that the whole userspace is frozen we need to disbale
+	 * Now that the whole userspace is frozen we need to disable
 	 * the OOM killer to disallow any further interference with
 	 * killable tasks. There is no guarantee oom victims will
 	 * ever reach a point they go away we have to wait with a timeout.

From 00d4394792418f8fe968f0cb22557053c6310010 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Tue, 27 Oct 2020 11:59:34 -0700
Subject: [PATCH 7/9] cpufreq: speedstep: remove unneeded semicolon

A semicolon is not needed after a switch statement.

Signed-off-by: Tom Rix <trix@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/speedstep-lib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/speedstep-lib.c b/drivers/cpufreq/speedstep-lib.c
index a13a2d1e444e..0b66df4ed513 100644
--- a/drivers/cpufreq/speedstep-lib.c
+++ b/drivers/cpufreq/speedstep-lib.c
@@ -240,7 +240,7 @@ unsigned int speedstep_get_frequency(enum speedstep_processor processor)
 		return pentium3_get_frequency(processor);
 	default:
 		return 0;
-	};
+	}
 	return 0;
 }
 EXPORT_SYMBOL_GPL(speedstep_get_frequency);

From a62f68f5ca53ab61cba2f0a410d0add7a6d54a52 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 23 Oct 2020 17:35:46 +0200
Subject: [PATCH 8/9] cpufreq: Introduce cpufreq_driver_test_flags()

Add a helper function to test the flags of the cpufreq driver in use
againt a given flags mask.

In particular, this will be needed to test the
CPUFREQ_NEED_UPDATE_LIMITS cpufreq driver flag in the schedutil
governor.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 12 ++++++++++++
 include/linux/cpufreq.h   |  1 +
 2 files changed, 13 insertions(+)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index ea58337fb65f..336b5e94cbc8 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1907,6 +1907,18 @@ void cpufreq_resume(void)
 	}
 }
 
+/**
+ * cpufreq_driver_test_flags - Test cpufreq driver's flags against given ones.
+ * @flags: Flags to test against the current cpufreq driver's flags.
+ *
+ * Assumes that the driver is there, so callers must ensure that this is the
+ * case.
+ */
+bool cpufreq_driver_test_flags(u16 flags)
+{
+	return !!(cpufreq_driver->flags & flags);
+}
+
 /**
  *	cpufreq_get_current_driver - return current driver's name
  *
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 038ed83aab41..1eaa04f1bae6 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -433,6 +433,7 @@ struct cpufreq_driver {
 int cpufreq_register_driver(struct cpufreq_driver *driver_data);
 int cpufreq_unregister_driver(struct cpufreq_driver *driver_data);
 
+bool cpufreq_driver_test_flags(u16 flags);
 const char *cpufreq_get_current_driver(void);
 void *cpufreq_get_driver_data(void);
 

From d1e7c2996e988866e7ceceb4641a0886885b7889 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 29 Oct 2020 12:12:46 +0100
Subject: [PATCH 9/9] cpufreq: schedutil: Always call driver if
 CPUFREQ_NEED_UPDATE_LIMITS is set

Because sugov_update_next_freq() may skip a frequency update even if
the need_freq_update flag has been set for the policy at hand, policy
limits updates may not take effect as expected.

For example, if the intel_pstate driver operates in the passive mode
with HWP enabled, it needs to update the HWP min and max limits when
the policy min and max limits change, respectively, but that may not
happen if the target frequency does not change along with the limit
at hand.  In particular, if the policy min is changed first, causing
the target frequency to be adjusted to it, and the policy max limit
is changed later to the same value, the HWP max limit will not be
updated to follow it as expected, because the target frequency is
still equal to the policy min limit and it will not change until
that limit is updated.

To address this issue, modify get_next_freq() to let the driver
callback run if the CPUFREQ_NEED_UPDATE_LIMITS cpufreq driver flag
is set regardless of whether or not the new frequency to set is
equal to the previous one.

Fixes: f6ebbcf08f37 ("cpufreq: intel_pstate: Implement passive mode with HWP enabled")
Reported-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Zhang Rui <rui.zhang@intel.com>
Cc: 5.9+ <stable@vger.kernel.org> # 5.9+: 1c534352f47f cpufreq: Introduce CPUFREQ_NEED_UPDATE_LIMITS ...
Cc: 5.9+ <stable@vger.kernel.org> # 5.9+: a62f68f5ca53 cpufreq: Introduce cpufreq_driver_test_flags()
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/sched/cpufreq_schedutil.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index e254745a82cb..c03a5775d019 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -102,7 +102,8 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
 static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time,
 				   unsigned int next_freq)
 {
-	if (sg_policy->next_freq == next_freq)
+	if (sg_policy->next_freq == next_freq &&
+	    !cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS))
 		return false;
 
 	sg_policy->next_freq = next_freq;
@@ -161,7 +162,8 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
 
 	freq = map_util_freq(util, freq, max);
 
-	if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update)
+	if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update &&
+	    !cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS))
 		return sg_policy->next_freq;
 
 	sg_policy->need_freq_update = false;