mirror of
https://github.com/torvalds/linux.git
synced 2024-11-21 11:31:31 +00:00
Merge branches 'pm-cpuidle' and 'pm-em'
Merge cpuidle and Energy Model changes for 6.13-rc1: - Add a built-in idle states table for Granite Rapids Xeon D to the intel_idle driver (Artem Bityutskiy). - Fix some typos in comments in the cpuidle core and drivers (Shen Lichuan). - Remove iowait influence from the menu cpuidle governor (Christian Loehle). - Add min/max available performance state limits to the Energy Model management code (Lukasz Luba). * pm-cpuidle: intel_idle: add Granite Rapids Xeon D support cpuidle: Correct some typos in comments cpuidle: menu: Remove iowait influence * pm-em: PM: EM: Add min/max available performance state limits
This commit is contained in:
commit
923c256e37
@ -139,7 +139,7 @@ out_kfree_drv:
|
||||
*
|
||||
* Initializes arm cpuidle driver for all CPUs, if any CPU fails
|
||||
* to register cpuidle driver then rollback to cancel all CPUs
|
||||
* registeration.
|
||||
* registration.
|
||||
*/
|
||||
static int __init arm_idle_init(void)
|
||||
{
|
||||
|
@ -48,7 +48,7 @@ static int qcom_cpu_spc(struct spm_driver_data *drv)
|
||||
ret = cpu_suspend(0, qcom_pm_collapse);
|
||||
/*
|
||||
* ARM common code executes WFI without calling into our driver and
|
||||
* if the SPM mode is not reset, then we may accidently power down the
|
||||
* if the SPM mode is not reset, then we may accidentally power down the
|
||||
* cpu when we intended only to gate the cpu clock.
|
||||
* Ensure the state is set to standby before returning.
|
||||
*/
|
||||
|
@ -406,7 +406,7 @@ void cpuidle_reflect(struct cpuidle_device *dev, int index)
|
||||
* Min polling interval of 10usec is a guess. It is assuming that
|
||||
* for most users, the time for a single ping-pong workload like
|
||||
* perf bench pipe would generally complete within 10usec but
|
||||
* this is hardware dependant. Actual time can be estimated with
|
||||
* this is hardware dependent. Actual time can be estimated with
|
||||
*
|
||||
* perf bench sched pipe -l 10000
|
||||
*
|
||||
|
@ -261,7 +261,7 @@ static void __cpuidle_unregister_driver(struct cpuidle_driver *drv)
|
||||
* @drv: a pointer to a valid struct cpuidle_driver
|
||||
*
|
||||
* Register the driver under a lock to prevent concurrent attempts to
|
||||
* [un]register the driver from occuring at the same time.
|
||||
* [un]register the driver from occurring at the same time.
|
||||
*
|
||||
* Returns 0 on success, a negative error code (returned by
|
||||
* __cpuidle_register_driver()) otherwise.
|
||||
@ -296,7 +296,7 @@ EXPORT_SYMBOL_GPL(cpuidle_register_driver);
|
||||
* @drv: a pointer to a valid struct cpuidle_driver
|
||||
*
|
||||
* Unregisters the cpuidle driver under a lock to prevent concurrent attempts
|
||||
* to [un]register the driver from occuring at the same time. @drv has to
|
||||
* to [un]register the driver from occurring at the same time. @drv has to
|
||||
* match the currently registered driver.
|
||||
*/
|
||||
void cpuidle_unregister_driver(struct cpuidle_driver *drv)
|
||||
|
@ -19,7 +19,7 @@
|
||||
|
||||
#include "gov.h"
|
||||
|
||||
#define BUCKETS 12
|
||||
#define BUCKETS 6
|
||||
#define INTERVAL_SHIFT 3
|
||||
#define INTERVALS (1UL << INTERVAL_SHIFT)
|
||||
#define RESOLUTION 1024
|
||||
@ -29,12 +29,11 @@
|
||||
/*
|
||||
* Concepts and ideas behind the menu governor
|
||||
*
|
||||
* For the menu governor, there are 3 decision factors for picking a C
|
||||
* For the menu governor, there are 2 decision factors for picking a C
|
||||
* state:
|
||||
* 1) Energy break even point
|
||||
* 2) Performance impact
|
||||
* 3) Latency tolerance (from pmqos infrastructure)
|
||||
* These three factors are treated independently.
|
||||
* 2) Latency tolerance (from pmqos infrastructure)
|
||||
* These two factors are treated independently.
|
||||
*
|
||||
* Energy break even point
|
||||
* -----------------------
|
||||
@ -75,30 +74,6 @@
|
||||
* intervals and if the stand deviation of these 8 intervals is below a
|
||||
* threshold value, we use the average of these intervals as prediction.
|
||||
*
|
||||
* Limiting Performance Impact
|
||||
* ---------------------------
|
||||
* C states, especially those with large exit latencies, can have a real
|
||||
* noticeable impact on workloads, which is not acceptable for most sysadmins,
|
||||
* and in addition, less performance has a power price of its own.
|
||||
*
|
||||
* As a general rule of thumb, menu assumes that the following heuristic
|
||||
* holds:
|
||||
* The busier the system, the less impact of C states is acceptable
|
||||
*
|
||||
* This rule-of-thumb is implemented using a performance-multiplier:
|
||||
* If the exit latency times the performance multiplier is longer than
|
||||
* the predicted duration, the C state is not considered a candidate
|
||||
* for selection due to a too high performance impact. So the higher
|
||||
* this multiplier is, the longer we need to be idle to pick a deep C
|
||||
* state, and thus the less likely a busy CPU will hit such a deep
|
||||
* C state.
|
||||
*
|
||||
* Currently there is only one value determining the factor:
|
||||
* 10 points are added for each process that is waiting for IO on this CPU.
|
||||
* (This value was experimentally determined.)
|
||||
* Utilization is no longer a factor as it was shown that it never contributed
|
||||
* significantly to the performance multiplier in the first place.
|
||||
*
|
||||
*/
|
||||
|
||||
struct menu_device {
|
||||
@ -112,19 +87,10 @@ struct menu_device {
|
||||
int interval_ptr;
|
||||
};
|
||||
|
||||
static inline int which_bucket(u64 duration_ns, unsigned int nr_iowaiters)
|
||||
static inline int which_bucket(u64 duration_ns)
|
||||
{
|
||||
int bucket = 0;
|
||||
|
||||
/*
|
||||
* We keep two groups of stats; one with no
|
||||
* IO pending, one without.
|
||||
* This allows us to calculate
|
||||
* E(duration)|iowait
|
||||
*/
|
||||
if (nr_iowaiters)
|
||||
bucket = BUCKETS/2;
|
||||
|
||||
if (duration_ns < 10ULL * NSEC_PER_USEC)
|
||||
return bucket;
|
||||
if (duration_ns < 100ULL * NSEC_PER_USEC)
|
||||
@ -138,19 +104,6 @@ static inline int which_bucket(u64 duration_ns, unsigned int nr_iowaiters)
|
||||
return bucket + 5;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return a multiplier for the exit latency that is intended
|
||||
* to take performance requirements into account.
|
||||
* The more performance critical we estimate the system
|
||||
* to be, the higher this multiplier, and thus the higher
|
||||
* the barrier to go to an expensive C state.
|
||||
*/
|
||||
static inline int performance_multiplier(unsigned int nr_iowaiters)
|
||||
{
|
||||
/* for IO wait tasks (per cpu!) we add 10x each */
|
||||
return 1 + 10 * nr_iowaiters;
|
||||
}
|
||||
|
||||
static DEFINE_PER_CPU(struct menu_device, menu_devices);
|
||||
|
||||
static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
|
||||
@ -258,8 +211,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||
struct menu_device *data = this_cpu_ptr(&menu_devices);
|
||||
s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
|
||||
u64 predicted_ns;
|
||||
u64 interactivity_req;
|
||||
unsigned int nr_iowaiters;
|
||||
ktime_t delta, delta_tick;
|
||||
int i, idx;
|
||||
|
||||
@ -268,8 +219,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||
data->needs_update = 0;
|
||||
}
|
||||
|
||||
nr_iowaiters = nr_iowait_cpu(dev->cpu);
|
||||
|
||||
/* Find the shortest expected idle interval. */
|
||||
predicted_ns = get_typical_interval(data) * NSEC_PER_USEC;
|
||||
if (predicted_ns > RESIDENCY_THRESHOLD_NS) {
|
||||
@ -283,7 +232,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||
}
|
||||
|
||||
data->next_timer_ns = delta;
|
||||
data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters);
|
||||
data->bucket = which_bucket(data->next_timer_ns);
|
||||
|
||||
/* Round up the result for half microseconds. */
|
||||
timer_us = div_u64((RESOLUTION * DECAY * NSEC_PER_USEC) / 2 +
|
||||
@ -301,7 +250,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||
*/
|
||||
data->next_timer_ns = KTIME_MAX;
|
||||
delta_tick = TICK_NSEC / 2;
|
||||
data->bucket = which_bucket(KTIME_MAX, nr_iowaiters);
|
||||
data->bucket = which_bucket(KTIME_MAX);
|
||||
}
|
||||
|
||||
if (unlikely(drv->state_count <= 1 || latency_req == 0) ||
|
||||
@ -328,15 +277,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||
*/
|
||||
if (predicted_ns < TICK_NSEC)
|
||||
predicted_ns = data->next_timer_ns;
|
||||
} else {
|
||||
/*
|
||||
* Use the performance multiplier and the user-configurable
|
||||
* latency_req to determine the maximum exit latency.
|
||||
*/
|
||||
interactivity_req = div64_u64(predicted_ns,
|
||||
performance_multiplier(nr_iowaiters));
|
||||
if (latency_req > interactivity_req)
|
||||
latency_req = interactivity_req;
|
||||
} else if (latency_req > predicted_ns) {
|
||||
latency_req = predicted_ns;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1069,6 +1069,47 @@ static struct cpuidle_state gnr_cstates[] __initdata = {
|
||||
.enter = NULL }
|
||||
};
|
||||
|
||||
static struct cpuidle_state gnrd_cstates[] __initdata = {
|
||||
{
|
||||
.name = "C1",
|
||||
.desc = "MWAIT 0x00",
|
||||
.flags = MWAIT2flg(0x00),
|
||||
.exit_latency = 1,
|
||||
.target_residency = 1,
|
||||
.enter = &intel_idle,
|
||||
.enter_s2idle = intel_idle_s2idle, },
|
||||
{
|
||||
.name = "C1E",
|
||||
.desc = "MWAIT 0x01",
|
||||
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
|
||||
.exit_latency = 4,
|
||||
.target_residency = 4,
|
||||
.enter = &intel_idle,
|
||||
.enter_s2idle = intel_idle_s2idle, },
|
||||
{
|
||||
.name = "C6",
|
||||
.desc = "MWAIT 0x20",
|
||||
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED |
|
||||
CPUIDLE_FLAG_INIT_XSTATE |
|
||||
CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
|
||||
.exit_latency = 220,
|
||||
.target_residency = 650,
|
||||
.enter = &intel_idle,
|
||||
.enter_s2idle = intel_idle_s2idle, },
|
||||
{
|
||||
.name = "C6P",
|
||||
.desc = "MWAIT 0x21",
|
||||
.flags = MWAIT2flg(0x21) | CPUIDLE_FLAG_TLB_FLUSHED |
|
||||
CPUIDLE_FLAG_INIT_XSTATE |
|
||||
CPUIDLE_FLAG_PARTIAL_HINT_MATCH,
|
||||
.exit_latency = 240,
|
||||
.target_residency = 750,
|
||||
.enter = &intel_idle,
|
||||
.enter_s2idle = intel_idle_s2idle, },
|
||||
{
|
||||
.enter = NULL }
|
||||
};
|
||||
|
||||
static struct cpuidle_state atom_cstates[] __initdata = {
|
||||
{
|
||||
.name = "C1E",
|
||||
@ -1508,6 +1549,12 @@ static const struct idle_cpu idle_cpu_gnr __initconst = {
|
||||
.use_acpi = true,
|
||||
};
|
||||
|
||||
static const struct idle_cpu idle_cpu_gnrd __initconst = {
|
||||
.state_table = gnrd_cstates,
|
||||
.disable_promotion_to_c1e = true,
|
||||
.use_acpi = true,
|
||||
};
|
||||
|
||||
static const struct idle_cpu idle_cpu_avn __initconst = {
|
||||
.state_table = avn_cstates,
|
||||
.disable_promotion_to_c1e = true,
|
||||
@ -1593,6 +1640,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
|
||||
X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &idle_cpu_spr),
|
||||
X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &idle_cpu_spr),
|
||||
X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, &idle_cpu_gnr),
|
||||
X86_MATCH_VFM(INTEL_GRANITERAPIDS_D, &idle_cpu_gnrd),
|
||||
X86_MATCH_VFM(INTEL_XEON_PHI_KNL, &idle_cpu_knl),
|
||||
X86_MATCH_VFM(INTEL_XEON_PHI_KNM, &idle_cpu_knl),
|
||||
X86_MATCH_VFM(INTEL_ATOM_GOLDMONT, &idle_cpu_bxt),
|
||||
|
@ -55,6 +55,8 @@ struct em_perf_table {
|
||||
* struct em_perf_domain - Performance domain
|
||||
* @em_table: Pointer to the runtime modifiable em_perf_table
|
||||
* @nr_perf_states: Number of performance states
|
||||
* @min_perf_state: Minimum allowed Performance State index
|
||||
* @max_perf_state: Maximum allowed Performance State index
|
||||
* @flags: See "em_perf_domain flags"
|
||||
* @cpus: Cpumask covering the CPUs of the domain. It's here
|
||||
* for performance reasons to avoid potential cache
|
||||
@ -70,6 +72,8 @@ struct em_perf_table {
|
||||
struct em_perf_domain {
|
||||
struct em_perf_table __rcu *em_table;
|
||||
int nr_perf_states;
|
||||
int min_perf_state;
|
||||
int max_perf_state;
|
||||
unsigned long flags;
|
||||
unsigned long cpus[];
|
||||
};
|
||||
@ -173,13 +177,14 @@ void em_table_free(struct em_perf_table __rcu *table);
|
||||
int em_dev_compute_costs(struct device *dev, struct em_perf_state *table,
|
||||
int nr_states);
|
||||
int em_dev_update_chip_binning(struct device *dev);
|
||||
int em_update_performance_limits(struct em_perf_domain *pd,
|
||||
unsigned long freq_min_khz, unsigned long freq_max_khz);
|
||||
|
||||
/**
|
||||
* em_pd_get_efficient_state() - Get an efficient performance state from the EM
|
||||
* @table: List of performance states, in ascending order
|
||||
* @nr_perf_states: Number of performance states
|
||||
* @pd: performance domain for which this must be done
|
||||
* @max_util: Max utilization to map with the EM
|
||||
* @pd_flags: Performance Domain flags
|
||||
*
|
||||
* It is called from the scheduler code quite frequently and as a consequence
|
||||
* doesn't implement any check.
|
||||
@ -188,13 +193,16 @@ int em_dev_update_chip_binning(struct device *dev);
|
||||
* requirement.
|
||||
*/
|
||||
static inline int
|
||||
em_pd_get_efficient_state(struct em_perf_state *table, int nr_perf_states,
|
||||
unsigned long max_util, unsigned long pd_flags)
|
||||
em_pd_get_efficient_state(struct em_perf_state *table,
|
||||
struct em_perf_domain *pd, unsigned long max_util)
|
||||
{
|
||||
unsigned long pd_flags = pd->flags;
|
||||
int min_ps = pd->min_perf_state;
|
||||
int max_ps = pd->max_perf_state;
|
||||
struct em_perf_state *ps;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nr_perf_states; i++) {
|
||||
for (i = min_ps; i <= max_ps; i++) {
|
||||
ps = &table[i];
|
||||
if (ps->performance >= max_util) {
|
||||
if (pd_flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES &&
|
||||
@ -204,7 +212,7 @@ em_pd_get_efficient_state(struct em_perf_state *table, int nr_perf_states,
|
||||
}
|
||||
}
|
||||
|
||||
return nr_perf_states - 1;
|
||||
return max_ps;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -253,8 +261,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
|
||||
* requested performance.
|
||||
*/
|
||||
em_table = rcu_dereference(pd->em_table);
|
||||
i = em_pd_get_efficient_state(em_table->state, pd->nr_perf_states,
|
||||
max_util, pd->flags);
|
||||
i = em_pd_get_efficient_state(em_table->state, pd, max_util);
|
||||
ps = &em_table->state[i];
|
||||
|
||||
/*
|
||||
@ -391,6 +398,12 @@ static inline int em_dev_update_chip_binning(struct device *dev)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
static inline
|
||||
int em_update_performance_limits(struct em_perf_domain *pd,
|
||||
unsigned long freq_min_khz, unsigned long freq_max_khz)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -628,6 +628,8 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
|
||||
goto unlock;
|
||||
|
||||
dev->em_pd->flags |= flags;
|
||||
dev->em_pd->min_perf_state = 0;
|
||||
dev->em_pd->max_perf_state = nr_states - 1;
|
||||
|
||||
em_cpufreq_update_efficiencies(dev, dev->em_pd->em_table->state);
|
||||
|
||||
@ -856,3 +858,53 @@ int em_dev_update_chip_binning(struct device *dev)
|
||||
return em_recalc_and_update(dev, pd, em_table);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(em_dev_update_chip_binning);
|
||||
|
||||
|
||||
/**
|
||||
* em_update_performance_limits() - Update Energy Model with performance
|
||||
* limits information.
|
||||
* @pd : Performance Domain with EM that has to be updated.
|
||||
* @freq_min_khz : New minimum allowed frequency for this device.
|
||||
* @freq_max_khz : New maximum allowed frequency for this device.
|
||||
*
|
||||
* This function allows to update the EM with information about available
|
||||
* performance levels. It takes the minimum and maximum frequency in kHz
|
||||
* and does internal translation to performance levels.
|
||||
* Returns 0 on success or -EINVAL when failed.
|
||||
*/
|
||||
int em_update_performance_limits(struct em_perf_domain *pd,
|
||||
unsigned long freq_min_khz, unsigned long freq_max_khz)
|
||||
{
|
||||
struct em_perf_state *table;
|
||||
int min_ps = -1;
|
||||
int max_ps = -1;
|
||||
int i;
|
||||
|
||||
if (!pd)
|
||||
return -EINVAL;
|
||||
|
||||
rcu_read_lock();
|
||||
table = em_perf_state_from_pd(pd);
|
||||
|
||||
for (i = 0; i < pd->nr_perf_states; i++) {
|
||||
if (freq_min_khz == table[i].frequency)
|
||||
min_ps = i;
|
||||
if (freq_max_khz == table[i].frequency)
|
||||
max_ps = i;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
/* Only update when both are found and sane */
|
||||
if (min_ps < 0 || max_ps < 0 || max_ps < min_ps)
|
||||
return -EINVAL;
|
||||
|
||||
|
||||
/* Guard simultaneous updates and make them atomic */
|
||||
mutex_lock(&em_pd_mutex);
|
||||
pd->min_perf_state = min_ps;
|
||||
pd->max_perf_state = max_ps;
|
||||
mutex_unlock(&em_pd_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(em_update_performance_limits);
|
||||
|
Loading…
Reference in New Issue
Block a user