From 521b512b157a1315ff2bf11c11ab184c79515aea Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Wed, 27 May 2020 10:58:47 +0100 Subject: [PATCH 01/71] PM / EM: change naming convention from 'capacity' to 'performance' The Energy Model uses concept of performance domain and capacity states in order to calculate power used by CPUs. Change naming convention from capacity to performance state would enable wider usage in future, e.g. upcoming support for other devices other than CPUs. Acked-by: Daniel Lezcano Acked-by: Quentin Perret Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- drivers/thermal/cpufreq_cooling.c | 12 ++--- include/linux/energy_model.h | 86 +++++++++++++++++-------------- kernel/power/energy_model.c | 44 ++++++++-------- kernel/sched/topology.c | 20 +++---- 4 files changed, 84 insertions(+), 78 deletions(-) diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c index 9e124020519f..641995ebc107 100644 --- a/drivers/thermal/cpufreq_cooling.c +++ b/drivers/thermal/cpufreq_cooling.c @@ -333,18 +333,18 @@ static inline bool em_is_sane(struct cpufreq_cooling_device *cpufreq_cdev, return false; policy = cpufreq_cdev->policy; - if (!cpumask_equal(policy->related_cpus, to_cpumask(em->cpus))) { + if (!cpumask_equal(policy->related_cpus, em_span_cpus(em))) { pr_err("The span of pd %*pbl is misaligned with cpufreq policy %*pbl\n", - cpumask_pr_args(to_cpumask(em->cpus)), + cpumask_pr_args(em_span_cpus(em)), cpumask_pr_args(policy->related_cpus)); return false; } nr_levels = cpufreq_cdev->max_level + 1; - if (em->nr_cap_states != nr_levels) { - pr_err("The number of cap states in pd %*pbl (%u) doesn't match the number of cooling levels (%u)\n", - cpumask_pr_args(to_cpumask(em->cpus)), - em->nr_cap_states, nr_levels); + if (em_pd_nr_perf_states(em) != nr_levels) { + pr_err("The number of performance states in pd %*pbl (%u) doesn't match the number of cooling levels (%u)\n", + cpumask_pr_args(em_span_cpus(em)), + em_pd_nr_perf_states(em), nr_levels); return false; } diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index ade6486a3382..fe336a9eb5d4 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -10,13 +10,13 @@ #include /** - * em_cap_state - Capacity state of a performance domain + * em_perf_state - Performance state of a performance domain * @frequency: The CPU frequency in KHz, for consistency with CPUFreq * @power: The power consumed by 1 CPU at this level, in milli-watts * @cost: The cost coefficient associated with this level, used during * energy calculation. Equal to: power * max_frequency / frequency */ -struct em_cap_state { +struct em_perf_state { unsigned long frequency; unsigned long power; unsigned long cost; @@ -24,8 +24,8 @@ struct em_cap_state { /** * em_perf_domain - Performance domain - * @table: List of capacity states, in ascending order - * @nr_cap_states: Number of capacity states + * @table: List of performance states, in ascending order + * @nr_perf_states: Number of performance states * @cpus: Cpumask covering the CPUs of the domain * * A "performance domain" represents a group of CPUs whose performance is @@ -34,22 +34,27 @@ struct em_cap_state { * CPUFreq policies. */ struct em_perf_domain { - struct em_cap_state *table; - int nr_cap_states; + struct em_perf_state *table; + int nr_perf_states; unsigned long cpus[]; }; +#define em_span_cpus(em) (to_cpumask((em)->cpus)) + #ifdef CONFIG_ENERGY_MODEL #define EM_CPU_MAX_POWER 0xFFFF struct em_data_callback { /** - * active_power() - Provide power at the next capacity state of a CPU - * @power : Active power at the capacity state in mW (modified) - * @freq : Frequency at the capacity state in kHz (modified) + * active_power() - Provide power at the next performance state of + * a CPU + * @power : Active power at the performance state in mW + * (modified) + * @freq : Frequency at the performance state in kHz + * (modified) * @cpu : CPU for which we do this operation * - * active_power() must find the lowest capacity state of 'cpu' above + * active_power() must find the lowest performance state of 'cpu' above * 'freq' and update 'power' and 'freq' to the matching active power * and frequency. * @@ -80,46 +85,46 @@ static inline unsigned long em_pd_energy(struct em_perf_domain *pd, unsigned long max_util, unsigned long sum_util) { unsigned long freq, scale_cpu; - struct em_cap_state *cs; + struct em_perf_state *ps; int i, cpu; /* - * In order to predict the capacity state, map the utilization of the - * most utilized CPU of the performance domain to a requested frequency, - * like schedutil. + * In order to predict the performance state, map the utilization of + * the most utilized CPU of the performance domain to a requested + * frequency, like schedutil. */ cpu = cpumask_first(to_cpumask(pd->cpus)); scale_cpu = arch_scale_cpu_capacity(cpu); - cs = &pd->table[pd->nr_cap_states - 1]; - freq = map_util_freq(max_util, cs->frequency, scale_cpu); + ps = &pd->table[pd->nr_perf_states - 1]; + freq = map_util_freq(max_util, ps->frequency, scale_cpu); /* - * Find the lowest capacity state of the Energy Model above the + * Find the lowest performance state of the Energy Model above the * requested frequency. */ - for (i = 0; i < pd->nr_cap_states; i++) { - cs = &pd->table[i]; - if (cs->frequency >= freq) + for (i = 0; i < pd->nr_perf_states; i++) { + ps = &pd->table[i]; + if (ps->frequency >= freq) break; } /* - * The capacity of a CPU in the domain at that capacity state (cs) + * The capacity of a CPU in the domain at the performance state (ps) * can be computed as: * - * cs->freq * scale_cpu - * cs->cap = -------------------- (1) + * ps->freq * scale_cpu + * ps->cap = -------------------- (1) * cpu_max_freq * * So, ignoring the costs of idle states (which are not available in - * the EM), the energy consumed by this CPU at that capacity state is - * estimated as: + * the EM), the energy consumed by this CPU at that performance state + * is estimated as: * - * cs->power * cpu_util + * ps->power * cpu_util * cpu_nrg = -------------------- (2) - * cs->cap + * ps->cap * - * since 'cpu_util / cs->cap' represents its percentage of busy time. + * since 'cpu_util / ps->cap' represents its percentage of busy time. * * NOTE: Although the result of this computation actually is in * units of power, it can be manipulated as an energy value @@ -129,34 +134,35 @@ static inline unsigned long em_pd_energy(struct em_perf_domain *pd, * By injecting (1) in (2), 'cpu_nrg' can be re-expressed as a product * of two terms: * - * cs->power * cpu_max_freq cpu_util + * ps->power * cpu_max_freq cpu_util * cpu_nrg = ------------------------ * --------- (3) - * cs->freq scale_cpu + * ps->freq scale_cpu * - * The first term is static, and is stored in the em_cap_state struct - * as 'cs->cost'. + * The first term is static, and is stored in the em_perf_state struct + * as 'ps->cost'. * * Since all CPUs of the domain have the same micro-architecture, they - * share the same 'cs->cost', and the same CPU capacity. Hence, the + * share the same 'ps->cost', and the same CPU capacity. Hence, the * total energy of the domain (which is the simple sum of the energy of * all of its CPUs) can be factorized as: * - * cs->cost * \Sum cpu_util + * ps->cost * \Sum cpu_util * pd_nrg = ------------------------ (4) * scale_cpu */ - return cs->cost * sum_util / scale_cpu; + return ps->cost * sum_util / scale_cpu; } /** - * em_pd_nr_cap_states() - Get the number of capacity states of a perf. domain + * em_pd_nr_perf_states() - Get the number of performance states of a perf. + * domain * @pd : performance domain for which this must be done * - * Return: the number of capacity states in the performance domain table + * Return: the number of performance states in the performance domain table */ -static inline int em_pd_nr_cap_states(struct em_perf_domain *pd) +static inline int em_pd_nr_perf_states(struct em_perf_domain *pd) { - return pd->nr_cap_states; + return pd->nr_perf_states; } #else @@ -177,7 +183,7 @@ static inline unsigned long em_pd_energy(struct em_perf_domain *pd, { return 0; } -static inline int em_pd_nr_cap_states(struct em_perf_domain *pd) +static inline int em_pd_nr_perf_states(struct em_perf_domain *pd) { return 0; } diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 0a9326f5f421..9892d548a0fa 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -27,18 +27,18 @@ static DEFINE_MUTEX(em_pd_mutex); #ifdef CONFIG_DEBUG_FS static struct dentry *rootdir; -static void em_debug_create_cs(struct em_cap_state *cs, struct dentry *pd) +static void em_debug_create_ps(struct em_perf_state *ps, struct dentry *pd) { struct dentry *d; char name[24]; - snprintf(name, sizeof(name), "cs:%lu", cs->frequency); + snprintf(name, sizeof(name), "ps:%lu", ps->frequency); - /* Create per-cs directory */ + /* Create per-ps directory */ d = debugfs_create_dir(name, pd); - debugfs_create_ulong("frequency", 0444, d, &cs->frequency); - debugfs_create_ulong("power", 0444, d, &cs->power); - debugfs_create_ulong("cost", 0444, d, &cs->cost); + debugfs_create_ulong("frequency", 0444, d, &ps->frequency); + debugfs_create_ulong("power", 0444, d, &ps->power); + debugfs_create_ulong("cost", 0444, d, &ps->cost); } static int em_debug_cpus_show(struct seq_file *s, void *unused) @@ -62,9 +62,9 @@ static void em_debug_create_pd(struct em_perf_domain *pd, int cpu) debugfs_create_file("cpus", 0444, d, pd->cpus, &em_debug_cpus_fops); - /* Create a sub-directory for each capacity state */ - for (i = 0; i < pd->nr_cap_states; i++) - em_debug_create_cs(&pd->table[i], d); + /* Create a sub-directory for each performance state */ + for (i = 0; i < pd->nr_perf_states; i++) + em_debug_create_ps(&pd->table[i], d); } static int __init em_debug_init(void) @@ -84,7 +84,7 @@ static struct em_perf_domain *em_create_pd(cpumask_t *span, int nr_states, unsigned long opp_eff, prev_opp_eff = ULONG_MAX; unsigned long power, freq, prev_freq = 0; int i, ret, cpu = cpumask_first(span); - struct em_cap_state *table; + struct em_perf_state *table; struct em_perf_domain *pd; u64 fmax; @@ -99,26 +99,26 @@ static struct em_perf_domain *em_create_pd(cpumask_t *span, int nr_states, if (!table) goto free_pd; - /* Build the list of capacity states for this performance domain */ + /* Build the list of performance states for this performance domain */ for (i = 0, freq = 0; i < nr_states; i++, freq++) { /* * active_power() is a driver callback which ceils 'freq' to - * lowest capacity state of 'cpu' above 'freq' and updates + * lowest performance state of 'cpu' above 'freq' and updates * 'power' and 'freq' accordingly. */ ret = cb->active_power(&power, &freq, cpu); if (ret) { - pr_err("pd%d: invalid cap. state: %d\n", cpu, ret); - goto free_cs_table; + pr_err("pd%d: invalid perf. state: %d\n", cpu, ret); + goto free_ps_table; } /* * We expect the driver callback to increase the frequency for - * higher capacity states. + * higher performance states. */ if (freq <= prev_freq) { pr_err("pd%d: non-increasing freq: %lu\n", cpu, freq); - goto free_cs_table; + goto free_ps_table; } /* @@ -127,7 +127,7 @@ static struct em_perf_domain *em_create_pd(cpumask_t *span, int nr_states, */ if (!power || power > EM_CPU_MAX_POWER) { pr_err("pd%d: invalid power: %lu\n", cpu, power); - goto free_cs_table; + goto free_ps_table; } table[i].power = power; @@ -141,12 +141,12 @@ static struct em_perf_domain *em_create_pd(cpumask_t *span, int nr_states, */ opp_eff = freq / power; if (opp_eff >= prev_opp_eff) - pr_warn("pd%d: hertz/watts ratio non-monotonically decreasing: em_cap_state %d >= em_cap_state%d\n", + pr_warn("pd%d: hertz/watts ratio non-monotonically decreasing: em_perf_state %d >= em_perf_state%d\n", cpu, i, i - 1); prev_opp_eff = opp_eff; } - /* Compute the cost of each capacity_state. */ + /* Compute the cost of each performance state. */ fmax = (u64) table[nr_states - 1].frequency; for (i = 0; i < nr_states; i++) { table[i].cost = div64_u64(fmax * table[i].power, @@ -154,14 +154,14 @@ static struct em_perf_domain *em_create_pd(cpumask_t *span, int nr_states, } pd->table = table; - pd->nr_cap_states = nr_states; + pd->nr_perf_states = nr_states; cpumask_copy(to_cpumask(pd->cpus), span); em_debug_create_pd(pd, cpu); return pd; -free_cs_table: +free_ps_table: kfree(table); free_pd: kfree(pd); @@ -185,7 +185,7 @@ EXPORT_SYMBOL_GPL(em_cpu_get); /** * em_register_perf_domain() - Register the Energy Model of a performance domain * @span : Mask of CPUs in the performance domain - * @nr_states : Number of capacity states to register + * @nr_states : Number of performance states to register * @cb : Callback functions providing the data of the Energy Model * * Create Energy Model tables for a performance domain using the callbacks diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index ba81187bb7af..2f91d3126365 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -272,10 +272,10 @@ static void perf_domain_debug(const struct cpumask *cpu_map, printk(KERN_DEBUG "root_domain %*pbl:", cpumask_pr_args(cpu_map)); while (pd) { - printk(KERN_CONT " pd%d:{ cpus=%*pbl nr_cstate=%d }", + printk(KERN_CONT " pd%d:{ cpus=%*pbl nr_pstate=%d }", cpumask_first(perf_domain_span(pd)), cpumask_pr_args(perf_domain_span(pd)), - em_pd_nr_cap_states(pd->em_pd)); + em_pd_nr_perf_states(pd->em_pd)); pd = pd->next; } @@ -313,26 +313,26 @@ static void sched_energy_set(bool has_eas) * * The complexity of the Energy Model is defined as: * - * C = nr_pd * (nr_cpus + nr_cs) + * C = nr_pd * (nr_cpus + nr_ps) * * with parameters defined as: * - nr_pd: the number of performance domains * - nr_cpus: the number of CPUs - * - nr_cs: the sum of the number of capacity states of all performance + * - nr_ps: the sum of the number of performance states of all performance * domains (for example, on a system with 2 performance domains, - * with 10 capacity states each, nr_cs = 2 * 10 = 20). + * with 10 performance states each, nr_ps = 2 * 10 = 20). * * It is generally not a good idea to use such a model in the wake-up path on * very complex platforms because of the associated scheduling overheads. The * arbitrary constraint below prevents that. It makes EAS usable up to 16 CPUs - * with per-CPU DVFS and less than 8 capacity states each, for example. + * with per-CPU DVFS and less than 8 performance states each, for example. */ #define EM_MAX_COMPLEXITY 2048 extern struct cpufreq_governor schedutil_gov; static bool build_perf_domains(const struct cpumask *cpu_map) { - int i, nr_pd = 0, nr_cs = 0, nr_cpus = cpumask_weight(cpu_map); + int i, nr_pd = 0, nr_ps = 0, nr_cpus = cpumask_weight(cpu_map); struct perf_domain *pd = NULL, *tmp; int cpu = cpumask_first(cpu_map); struct root_domain *rd = cpu_rq(cpu)->rd; @@ -384,15 +384,15 @@ static bool build_perf_domains(const struct cpumask *cpu_map) pd = tmp; /* - * Count performance domains and capacity states for the + * Count performance domains and performance states for the * complexity check. */ nr_pd++; - nr_cs += em_pd_nr_cap_states(pd->em_pd); + nr_ps += em_pd_nr_perf_states(pd->em_pd); } /* Bail out if the Energy Model complexity is too high. */ - if (nr_pd * (nr_cs + nr_cpus) > EM_MAX_COMPLEXITY) { + if (nr_pd * (nr_ps + nr_cpus) > EM_MAX_COMPLEXITY) { WARN(1, "rd %*pbl: Failed to start EAS, EM complexity is too high\n", cpumask_pr_args(cpu_map)); goto free; From 7d9895c7fbfc9c70afce7029b7de0f3f974adb88 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Wed, 27 May 2020 10:58:48 +0100 Subject: [PATCH 02/71] PM / EM: introduce em_dev_register_perf_domain function Add now function in the Energy Model framework which is going to support new devices. This function will help in transition and make it smoother. For now it still checks if the cpumask is a valid pointer, which will be removed later when the new structures and infrastructure will be ready. Acked-by: Daniel Lezcano Acked-by: Quentin Perret Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- include/linux/energy_model.h | 13 ++++++++++-- kernel/power/energy_model.c | 40 ++++++++++++++++++++++++++++++------ 2 files changed, 45 insertions(+), 8 deletions(-) diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index fe336a9eb5d4..7c048df98447 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -2,6 +2,7 @@ #ifndef _LINUX_ENERGY_MODEL_H #define _LINUX_ENERGY_MODEL_H #include +#include #include #include #include @@ -42,7 +43,7 @@ struct em_perf_domain { #define em_span_cpus(em) (to_cpumask((em)->cpus)) #ifdef CONFIG_ENERGY_MODEL -#define EM_CPU_MAX_POWER 0xFFFF +#define EM_MAX_POWER 0xFFFF struct em_data_callback { /** @@ -59,7 +60,7 @@ struct em_data_callback { * and frequency. * * The power is the one of a single CPU in the domain, expressed in - * milli-watts. It is expected to fit in the [0, EM_CPU_MAX_POWER] + * milli-watts. It is expected to fit in the [0, EM_MAX_POWER] * range. * * Return 0 on success. @@ -71,6 +72,8 @@ struct em_data_callback { struct em_perf_domain *em_cpu_get(int cpu); int em_register_perf_domain(cpumask_t *span, unsigned int nr_states, struct em_data_callback *cb); +int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, + struct em_data_callback *cb, cpumask_t *span); /** * em_pd_energy() - Estimates the energy consumed by the CPUs of a perf. domain @@ -174,6 +177,12 @@ static inline int em_register_perf_domain(cpumask_t *span, { return -EINVAL; } +static inline +int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, + struct em_data_callback *cb, cpumask_t *span) +{ + return -EINVAL; +} static inline struct em_perf_domain *em_cpu_get(int cpu) { return NULL; diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 9892d548a0fa..875b163e54ab 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -125,7 +125,7 @@ static struct em_perf_domain *em_create_pd(cpumask_t *span, int nr_states, * The power returned by active_state() is expected to be * positive, in milli-watts and to fit into 16 bits. */ - if (!power || power > EM_CPU_MAX_POWER) { + if (!power || power > EM_MAX_POWER) { pr_err("pd%d: invalid power: %lu\n", cpu, power); goto free_ps_table; } @@ -183,10 +183,13 @@ struct em_perf_domain *em_cpu_get(int cpu) EXPORT_SYMBOL_GPL(em_cpu_get); /** - * em_register_perf_domain() - Register the Energy Model of a performance domain - * @span : Mask of CPUs in the performance domain + * em_dev_register_perf_domain() - Register the Energy Model (EM) for a device + * @dev : Device for which the EM is to register * @nr_states : Number of performance states to register * @cb : Callback functions providing the data of the Energy Model + * @span : Pointer to cpumask_t, which in case of a CPU device is + * obligatory. It can be taken from i.e. 'policy->cpus'. For other + * type of devices this should be set to NULL. * * Create Energy Model tables for a performance domain using the callbacks * defined in cb. @@ -196,14 +199,14 @@ EXPORT_SYMBOL_GPL(em_cpu_get); * * Return 0 on success */ -int em_register_perf_domain(cpumask_t *span, unsigned int nr_states, - struct em_data_callback *cb) +int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, + struct em_data_callback *cb, cpumask_t *span) { unsigned long cap, prev_cap = 0; struct em_perf_domain *pd; int cpu, ret = 0; - if (!span || !nr_states || !cb) + if (!dev || !span || !nr_states || !cb) return -EINVAL; /* @@ -255,4 +258,29 @@ unlock: return ret; } +EXPORT_SYMBOL_GPL(em_dev_register_perf_domain); + +/** + * em_register_perf_domain() - Register the Energy Model of a performance domain + * @span : Mask of CPUs in the performance domain + * @nr_states : Number of capacity states to register + * @cb : Callback functions providing the data of the Energy Model + * + * Create Energy Model tables for a performance domain using the callbacks + * defined in cb. + * + * If multiple clients register the same performance domain, all but the first + * registration will be ignored. + * + * Return 0 on success + */ +int em_register_perf_domain(cpumask_t *span, unsigned int nr_states, + struct em_data_callback *cb) +{ + struct device *cpu_dev; + + cpu_dev = get_cpu_device(cpumask_first(span)); + + return em_dev_register_perf_domain(cpu_dev, nr_states, cb, span); +} EXPORT_SYMBOL_GPL(em_register_perf_domain); From d0351cc3b0f57214d157e4d589564730af2aedae Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Wed, 27 May 2020 10:58:49 +0100 Subject: [PATCH 03/71] PM / EM: update callback structure and add device pointer The Energy Model framework is going to support devices other that CPUs. In order to make this happen change the callback function and add pointer to a device as an argument. Update the related users to use new function and new callback from the Energy Model. Acked-by: Quentin Perret Signed-off-by: Lukasz Luba Acked-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/scmi-cpufreq.c | 11 +++-------- drivers/opp/of.c | 9 ++------- include/linux/energy_model.h | 15 ++++++++------- kernel/power/energy_model.c | 9 +++++---- 4 files changed, 18 insertions(+), 26 deletions(-) diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index 61623e2ff149..11ee24e06d12 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -103,17 +103,12 @@ scmi_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask) } static int __maybe_unused -scmi_get_cpu_power(unsigned long *power, unsigned long *KHz, int cpu) +scmi_get_cpu_power(unsigned long *power, unsigned long *KHz, + struct device *cpu_dev) { - struct device *cpu_dev = get_cpu_device(cpu); unsigned long Hz; int ret, domain; - if (!cpu_dev) { - pr_err("failed to get cpu%d device\n", cpu); - return -ENODEV; - } - domain = handle->perf_ops->device_domain_id(cpu_dev); if (domain < 0) return domain; @@ -200,7 +195,7 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) policy->fast_switch_possible = true; - em_register_perf_domain(policy->cpus, nr_opp, &em_cb); + em_dev_register_perf_domain(cpu_dev, nr_opp, &em_cb, policy->cpus); return 0; diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 9a5873591a40..e273f419a4bf 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -1216,9 +1216,8 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_of_node); * calculation failed because of missing parameters, 0 otherwise. */ static int __maybe_unused _get_cpu_power(unsigned long *mW, unsigned long *kHz, - int cpu) + struct device *cpu_dev) { - struct device *cpu_dev; struct dev_pm_opp *opp; struct device_node *np; unsigned long mV, Hz; @@ -1226,10 +1225,6 @@ static int __maybe_unused _get_cpu_power(unsigned long *mW, unsigned long *kHz, u64 tmp; int ret; - cpu_dev = get_cpu_device(cpu); - if (!cpu_dev) - return -ENODEV; - np = of_node_get(cpu_dev->of_node); if (!np) return -EINVAL; @@ -1297,6 +1292,6 @@ void dev_pm_opp_of_register_em(struct cpumask *cpus) if (ret || !cap) return; - em_register_perf_domain(cpus, nr_opp, &em_cb); + em_dev_register_perf_domain(cpu_dev, nr_opp, &em_cb, cpus); } EXPORT_SYMBOL_GPL(dev_pm_opp_of_register_em); diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 7c048df98447..7076cb22b247 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -48,24 +48,25 @@ struct em_perf_domain { struct em_data_callback { /** * active_power() - Provide power at the next performance state of - * a CPU + * a device * @power : Active power at the performance state in mW * (modified) * @freq : Frequency at the performance state in kHz * (modified) - * @cpu : CPU for which we do this operation + * @dev : Device for which we do this operation (can be a CPU) * - * active_power() must find the lowest performance state of 'cpu' above + * active_power() must find the lowest performance state of 'dev' above * 'freq' and update 'power' and 'freq' to the matching active power * and frequency. * - * The power is the one of a single CPU in the domain, expressed in - * milli-watts. It is expected to fit in the [0, EM_MAX_POWER] - * range. + * In case of CPUs, the power is the one of a single CPU in the domain, + * expressed in milli-watts. It is expected to fit in the + * [0, EM_MAX_POWER] range. * * Return 0 on success. */ - int (*active_power)(unsigned long *power, unsigned long *freq, int cpu); + int (*active_power)(unsigned long *power, unsigned long *freq, + struct device *dev); }; #define EM_DATA_CB(_active_power_cb) { .active_power = &_active_power_cb } diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 875b163e54ab..5b8a1566526a 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -78,8 +78,9 @@ core_initcall(em_debug_init); #else /* CONFIG_DEBUG_FS */ static void em_debug_create_pd(struct em_perf_domain *pd, int cpu) {} #endif -static struct em_perf_domain *em_create_pd(cpumask_t *span, int nr_states, - struct em_data_callback *cb) +static struct em_perf_domain * +em_create_pd(struct device *dev, int nr_states, struct em_data_callback *cb, + cpumask_t *span) { unsigned long opp_eff, prev_opp_eff = ULONG_MAX; unsigned long power, freq, prev_freq = 0; @@ -106,7 +107,7 @@ static struct em_perf_domain *em_create_pd(cpumask_t *span, int nr_states, * lowest performance state of 'cpu' above 'freq' and updates * 'power' and 'freq' accordingly. */ - ret = cb->active_power(&power, &freq, cpu); + ret = cb->active_power(&power, &freq, dev); if (ret) { pr_err("pd%d: invalid perf. state: %d\n", cpu, ret); goto free_ps_table; @@ -237,7 +238,7 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, } /* Create the performance domain and add it to the Energy Model. */ - pd = em_create_pd(span, nr_states, cb); + pd = em_create_pd(dev, nr_states, cb, span); if (!pd) { ret = -EINVAL; goto unlock; From 1bc138c622959979eb547be2d3bbc6442a5c80b0 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Wed, 10 Jun 2020 11:12:23 +0100 Subject: [PATCH 04/71] PM / EM: add support for other devices than CPUs in Energy Model Add support for other devices than CPUs. The registration function does not require a valid cpumask pointer and is ready to handle new devices. Some of the internal structures has been reorganized in order to keep consistent view (like removing per_cpu pd pointers). Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- include/linux/device.h | 5 + include/linux/energy_model.h | 29 +++- kernel/power/energy_model.c | 254 ++++++++++++++++++++++++----------- 3 files changed, 199 insertions(+), 89 deletions(-) diff --git a/include/linux/device.h b/include/linux/device.h index 15460a5ac024..b72e6f9ad845 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -13,6 +13,7 @@ #define _DEVICE_H_ #include +#include #include #include #include @@ -559,6 +560,10 @@ struct device { struct dev_pm_info power; struct dev_pm_domain *pm_domain; +#ifdef CONFIG_ENERGY_MODEL + struct em_perf_domain *em_pd; +#endif + #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN struct irq_domain *msi_domain; #endif diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 7076cb22b247..2d4689964029 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -12,8 +12,10 @@ /** * em_perf_state - Performance state of a performance domain - * @frequency: The CPU frequency in KHz, for consistency with CPUFreq - * @power: The power consumed by 1 CPU at this level, in milli-watts + * @frequency: The frequency in KHz, for consistency with CPUFreq + * @power: The power consumed at this level, in milli-watts (by 1 CPU or + by a registered device). It can be a total power: static and + dynamic. * @cost: The cost coefficient associated with this level, used during * energy calculation. Equal to: power * max_frequency / frequency */ @@ -27,12 +29,16 @@ struct em_perf_state { * em_perf_domain - Performance domain * @table: List of performance states, in ascending order * @nr_perf_states: Number of performance states - * @cpus: Cpumask covering the CPUs of the domain + * @cpus: Cpumask covering the CPUs of the domain. It's here + * for performance reasons to avoid potential cache + * misses during energy calculations in the scheduler + * and simplifies allocating/freeing that memory region. * - * A "performance domain" represents a group of CPUs whose performance is - * scaled together. All CPUs of a performance domain must have the same - * micro-architecture. Performance domains often have a 1-to-1 mapping with - * CPUFreq policies. + * In case of CPU device, a "performance domain" represents a group of CPUs + * whose performance is scaled together. All CPUs of a performance domain + * must have the same micro-architecture. Performance domains often have + * a 1-to-1 mapping with CPUFreq policies. In case of other devices the @cpus + * field is unused. */ struct em_perf_domain { struct em_perf_state *table; @@ -71,10 +77,12 @@ struct em_data_callback { #define EM_DATA_CB(_active_power_cb) { .active_power = &_active_power_cb } struct em_perf_domain *em_cpu_get(int cpu); +struct em_perf_domain *em_pd_get(struct device *dev); int em_register_perf_domain(cpumask_t *span, unsigned int nr_states, struct em_data_callback *cb); int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, struct em_data_callback *cb, cpumask_t *span); +void em_dev_unregister_perf_domain(struct device *dev); /** * em_pd_energy() - Estimates the energy consumed by the CPUs of a perf. domain @@ -184,10 +192,17 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, { return -EINVAL; } +static inline void em_dev_unregister_perf_domain(struct device *dev) +{ +} static inline struct em_perf_domain *em_cpu_get(int cpu) { return NULL; } +static inline struct em_perf_domain *em_pd_get(struct device *dev) +{ + return NULL; +} static inline unsigned long em_pd_energy(struct em_perf_domain *pd, unsigned long max_util, unsigned long sum_util) { diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 5b8a1566526a..32d76e78f992 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -1,9 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Energy Model of CPUs + * Energy Model of devices * - * Copyright (c) 2018, Arm ltd. + * Copyright (c) 2018-2020, Arm ltd. * Written by: Quentin Perret, Arm ltd. + * Improvements provided by: Lukasz Luba, Arm ltd. */ #define pr_fmt(fmt) "energy_model: " fmt @@ -15,15 +16,17 @@ #include #include -/* Mapping of each CPU to the performance domain to which it belongs. */ -static DEFINE_PER_CPU(struct em_perf_domain *, em_data); - /* * Mutex serializing the registrations of performance domains and letting * callbacks defined by drivers sleep. */ static DEFINE_MUTEX(em_pd_mutex); +static bool _is_cpu_device(struct device *dev) +{ + return (dev->bus == &cpu_subsys); +} + #ifdef CONFIG_DEBUG_FS static struct dentry *rootdir; @@ -49,22 +52,30 @@ static int em_debug_cpus_show(struct seq_file *s, void *unused) } DEFINE_SHOW_ATTRIBUTE(em_debug_cpus); -static void em_debug_create_pd(struct em_perf_domain *pd, int cpu) +static void em_debug_create_pd(struct device *dev) { struct dentry *d; - char name[8]; int i; - snprintf(name, sizeof(name), "pd%d", cpu); - /* Create the directory of the performance domain */ - d = debugfs_create_dir(name, rootdir); + d = debugfs_create_dir(dev_name(dev), rootdir); - debugfs_create_file("cpus", 0444, d, pd->cpus, &em_debug_cpus_fops); + if (_is_cpu_device(dev)) + debugfs_create_file("cpus", 0444, d, dev->em_pd->cpus, + &em_debug_cpus_fops); /* Create a sub-directory for each performance state */ - for (i = 0; i < pd->nr_perf_states; i++) - em_debug_create_ps(&pd->table[i], d); + for (i = 0; i < dev->em_pd->nr_perf_states; i++) + em_debug_create_ps(&dev->em_pd->table[i], d); + +} + +static void em_debug_remove_pd(struct device *dev) +{ + struct dentry *debug_dir; + + debug_dir = debugfs_lookup(dev_name(dev), rootdir); + debugfs_remove_recursive(debug_dir); } static int __init em_debug_init(void) @@ -76,40 +87,34 @@ static int __init em_debug_init(void) } core_initcall(em_debug_init); #else /* CONFIG_DEBUG_FS */ -static void em_debug_create_pd(struct em_perf_domain *pd, int cpu) {} +static void em_debug_create_pd(struct device *dev) {} +static void em_debug_remove_pd(struct device *dev) {} #endif -static struct em_perf_domain * -em_create_pd(struct device *dev, int nr_states, struct em_data_callback *cb, - cpumask_t *span) + +static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, + int nr_states, struct em_data_callback *cb) { unsigned long opp_eff, prev_opp_eff = ULONG_MAX; unsigned long power, freq, prev_freq = 0; - int i, ret, cpu = cpumask_first(span); struct em_perf_state *table; - struct em_perf_domain *pd; + int i, ret; u64 fmax; - if (!cb->active_power) - return NULL; - - pd = kzalloc(sizeof(*pd) + cpumask_size(), GFP_KERNEL); - if (!pd) - return NULL; - table = kcalloc(nr_states, sizeof(*table), GFP_KERNEL); if (!table) - goto free_pd; + return -ENOMEM; /* Build the list of performance states for this performance domain */ for (i = 0, freq = 0; i < nr_states; i++, freq++) { /* * active_power() is a driver callback which ceils 'freq' to - * lowest performance state of 'cpu' above 'freq' and updates + * lowest performance state of 'dev' above 'freq' and updates * 'power' and 'freq' accordingly. */ ret = cb->active_power(&power, &freq, dev); if (ret) { - pr_err("pd%d: invalid perf. state: %d\n", cpu, ret); + dev_err(dev, "EM: invalid perf. state: %d\n", + ret); goto free_ps_table; } @@ -118,7 +123,8 @@ em_create_pd(struct device *dev, int nr_states, struct em_data_callback *cb, * higher performance states. */ if (freq <= prev_freq) { - pr_err("pd%d: non-increasing freq: %lu\n", cpu, freq); + dev_err(dev, "EM: non-increasing freq: %lu\n", + freq); goto free_ps_table; } @@ -127,7 +133,8 @@ em_create_pd(struct device *dev, int nr_states, struct em_data_callback *cb, * positive, in milli-watts and to fit into 16 bits. */ if (!power || power > EM_MAX_POWER) { - pr_err("pd%d: invalid power: %lu\n", cpu, power); + dev_err(dev, "EM: invalid power: %lu\n", + power); goto free_ps_table; } @@ -142,8 +149,8 @@ em_create_pd(struct device *dev, int nr_states, struct em_data_callback *cb, */ opp_eff = freq / power; if (opp_eff >= prev_opp_eff) - pr_warn("pd%d: hertz/watts ratio non-monotonically decreasing: em_perf_state %d >= em_perf_state%d\n", - cpu, i, i - 1); + dev_dbg(dev, "EM: hertz/watts ratio non-monotonically decreasing: em_perf_state %d >= em_perf_state%d\n", + i, i - 1); prev_opp_eff = opp_eff; } @@ -156,30 +163,82 @@ em_create_pd(struct device *dev, int nr_states, struct em_data_callback *cb, pd->table = table; pd->nr_perf_states = nr_states; - cpumask_copy(to_cpumask(pd->cpus), span); - em_debug_create_pd(pd, cpu); - - return pd; + return 0; free_ps_table: kfree(table); -free_pd: - kfree(pd); - - return NULL; + return -EINVAL; } +static int em_create_pd(struct device *dev, int nr_states, + struct em_data_callback *cb, cpumask_t *cpus) +{ + struct em_perf_domain *pd; + struct device *cpu_dev; + int cpu, ret; + + if (_is_cpu_device(dev)) { + pd = kzalloc(sizeof(*pd) + cpumask_size(), GFP_KERNEL); + if (!pd) + return -ENOMEM; + + cpumask_copy(em_span_cpus(pd), cpus); + } else { + pd = kzalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) + return -ENOMEM; + } + + ret = em_create_perf_table(dev, pd, nr_states, cb); + if (ret) { + kfree(pd); + return ret; + } + + if (_is_cpu_device(dev)) + for_each_cpu(cpu, cpus) { + cpu_dev = get_cpu_device(cpu); + cpu_dev->em_pd = pd; + } + + dev->em_pd = pd; + + return 0; +} + +/** + * em_pd_get() - Return the performance domain for a device + * @dev : Device to find the performance domain for + * + * Returns the performance domain to which @dev belongs, or NULL if it doesn't + * exist. + */ +struct em_perf_domain *em_pd_get(struct device *dev) +{ + if (IS_ERR_OR_NULL(dev)) + return NULL; + + return dev->em_pd; +} +EXPORT_SYMBOL_GPL(em_pd_get); + /** * em_cpu_get() - Return the performance domain for a CPU * @cpu : CPU to find the performance domain for * - * Return: the performance domain to which 'cpu' belongs, or NULL if it doesn't + * Returns the performance domain to which @cpu belongs, or NULL if it doesn't * exist. */ struct em_perf_domain *em_cpu_get(int cpu) { - return READ_ONCE(per_cpu(em_data, cpu)); + struct device *cpu_dev; + + cpu_dev = get_cpu_device(cpu); + if (!cpu_dev) + return NULL; + + return em_pd_get(cpu_dev); } EXPORT_SYMBOL_GPL(em_cpu_get); @@ -188,7 +247,7 @@ EXPORT_SYMBOL_GPL(em_cpu_get); * @dev : Device for which the EM is to register * @nr_states : Number of performance states to register * @cb : Callback functions providing the data of the Energy Model - * @span : Pointer to cpumask_t, which in case of a CPU device is + * @cpus : Pointer to cpumask_t, which in case of a CPU device is * obligatory. It can be taken from i.e. 'policy->cpus'. For other * type of devices this should be set to NULL. * @@ -201,13 +260,12 @@ EXPORT_SYMBOL_GPL(em_cpu_get); * Return 0 on success */ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, - struct em_data_callback *cb, cpumask_t *span) + struct em_data_callback *cb, cpumask_t *cpus) { unsigned long cap, prev_cap = 0; - struct em_perf_domain *pd; - int cpu, ret = 0; + int cpu, ret; - if (!dev || !span || !nr_states || !cb) + if (!dev || !nr_states || !cb) return -EINVAL; /* @@ -216,47 +274,50 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, */ mutex_lock(&em_pd_mutex); - for_each_cpu(cpu, span) { - /* Make sure we don't register again an existing domain. */ - if (READ_ONCE(per_cpu(em_data, cpu))) { - ret = -EEXIST; - goto unlock; - } - - /* - * All CPUs of a domain must have the same micro-architecture - * since they all share the same table. - */ - cap = arch_scale_cpu_capacity(cpu); - if (prev_cap && prev_cap != cap) { - pr_err("CPUs of %*pbl must have the same capacity\n", - cpumask_pr_args(span)); - ret = -EINVAL; - goto unlock; - } - prev_cap = cap; - } - - /* Create the performance domain and add it to the Energy Model. */ - pd = em_create_pd(dev, nr_states, cb, span); - if (!pd) { - ret = -EINVAL; + if (dev->em_pd) { + ret = -EEXIST; goto unlock; } - for_each_cpu(cpu, span) { - /* - * The per-cpu array can be read concurrently from em_cpu_get(). - * The barrier enforces the ordering needed to make sure readers - * can only access well formed em_perf_domain structs. - */ - smp_store_release(per_cpu_ptr(&em_data, cpu), pd); + if (_is_cpu_device(dev)) { + if (!cpus) { + dev_err(dev, "EM: invalid CPU mask\n"); + ret = -EINVAL; + goto unlock; + } + + for_each_cpu(cpu, cpus) { + if (em_cpu_get(cpu)) { + dev_err(dev, "EM: exists for CPU%d\n", cpu); + ret = -EEXIST; + goto unlock; + } + /* + * All CPUs of a domain must have the same + * micro-architecture since they all share the same + * table. + */ + cap = arch_scale_cpu_capacity(cpu); + if (prev_cap && prev_cap != cap) { + dev_err(dev, "EM: CPUs of %*pbl must have the same capacity\n", + cpumask_pr_args(cpus)); + + ret = -EINVAL; + goto unlock; + } + prev_cap = cap; + } } - pr_debug("Created perf domain %*pbl\n", cpumask_pr_args(span)); + ret = em_create_pd(dev, nr_states, cb, cpus); + if (ret) + goto unlock; + + em_debug_create_pd(dev); + dev_info(dev, "EM: created perf domain\n"); + unlock: mutex_unlock(&em_pd_mutex); - return ret; } EXPORT_SYMBOL_GPL(em_dev_register_perf_domain); @@ -285,3 +346,32 @@ int em_register_perf_domain(cpumask_t *span, unsigned int nr_states, return em_dev_register_perf_domain(cpu_dev, nr_states, cb, span); } EXPORT_SYMBOL_GPL(em_register_perf_domain); + +/** + * em_dev_unregister_perf_domain() - Unregister Energy Model (EM) for a device + * @dev : Device for which the EM is registered + * + * Unregister the EM for the specified @dev (but not a CPU device). + */ +void em_dev_unregister_perf_domain(struct device *dev) +{ + if (IS_ERR_OR_NULL(dev) || !dev->em_pd) + return; + + if (_is_cpu_device(dev)) + return; + + /* + * The mutex separates all register/unregister requests and protects + * from potential clean-up/setup issues in the debugfs directories. + * The debugfs directory name is the same as device's name. + */ + mutex_lock(&em_pd_mutex); + em_debug_remove_pd(dev); + + kfree(dev->em_pd->table); + kfree(dev->em_pd); + dev->em_pd = NULL; + mutex_unlock(&em_pd_mutex); +} +EXPORT_SYMBOL_GPL(em_dev_unregister_perf_domain); From 07891f15d91317b2220a0b610a2d7e324a88105d Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Wed, 27 May 2020 10:58:51 +0100 Subject: [PATCH 05/71] PM / EM: remove em_register_perf_domain Remove old function em_register_perf_domain which is no longer needed. There is em_dev_register_perf_domain that covers old use cases and new as well. Acked-by: Daniel Lezcano Acked-by: Quentin Perret Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- include/linux/energy_model.h | 7 ------- kernel/power/energy_model.c | 25 ------------------------- 2 files changed, 32 deletions(-) diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 2d4689964029..0f94e871a202 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -78,8 +78,6 @@ struct em_data_callback { struct em_perf_domain *em_cpu_get(int cpu); struct em_perf_domain *em_pd_get(struct device *dev); -int em_register_perf_domain(cpumask_t *span, unsigned int nr_states, - struct em_data_callback *cb); int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, struct em_data_callback *cb, cpumask_t *span); void em_dev_unregister_perf_domain(struct device *dev); @@ -181,11 +179,6 @@ static inline int em_pd_nr_perf_states(struct em_perf_domain *pd) struct em_data_callback {}; #define EM_DATA_CB(_active_power_cb) { } -static inline int em_register_perf_domain(cpumask_t *span, - unsigned int nr_states, struct em_data_callback *cb) -{ - return -EINVAL; -} static inline int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, struct em_data_callback *cb, cpumask_t *span) diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 32d76e78f992..c1ff7fa030ab 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -322,31 +322,6 @@ unlock: } EXPORT_SYMBOL_GPL(em_dev_register_perf_domain); -/** - * em_register_perf_domain() - Register the Energy Model of a performance domain - * @span : Mask of CPUs in the performance domain - * @nr_states : Number of capacity states to register - * @cb : Callback functions providing the data of the Energy Model - * - * Create Energy Model tables for a performance domain using the callbacks - * defined in cb. - * - * If multiple clients register the same performance domain, all but the first - * registration will be ignored. - * - * Return 0 on success - */ -int em_register_perf_domain(cpumask_t *span, unsigned int nr_states, - struct em_data_callback *cb) -{ - struct device *cpu_dev; - - cpu_dev = get_cpu_device(cpumask_first(span)); - - return em_dev_register_perf_domain(cpu_dev, nr_states, cb, span); -} -EXPORT_SYMBOL_GPL(em_register_perf_domain); - /** * em_dev_unregister_perf_domain() - Unregister Energy Model (EM) for a device * @dev : Device for which the EM is registered From f0b5694791ce70dba16758c3b838d5ddc7731b02 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Wed, 27 May 2020 10:58:52 +0100 Subject: [PATCH 06/71] PM / EM: change name of em_pd_energy to em_cpu_energy Energy Model framework now supports other devices than CPUs. Refactor some of the functions in order to prevent wrong usage. The old function em_pd_energy has to generic name. It must not be used without proper cpumask pointer, which is possible only for CPU devices. Thus, rename it and add proper description to warn of potential wrong usage for other devices. Acked-by: Daniel Lezcano Acked-by: Quentin Perret Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- include/linux/energy_model.h | 11 ++++++++--- kernel/sched/fair.c | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 0f94e871a202..b67a51c574b9 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -83,15 +83,20 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, void em_dev_unregister_perf_domain(struct device *dev); /** - * em_pd_energy() - Estimates the energy consumed by the CPUs of a perf. domain + * em_cpu_energy() - Estimates the energy consumed by the CPUs of a + performance domain * @pd : performance domain for which energy has to be estimated * @max_util : highest utilization among CPUs of the domain * @sum_util : sum of the utilization of all CPUs in the domain * + * This function must be used only for CPU devices. There is no validation, + * i.e. if the EM is a CPU type and has cpumask allocated. It is called from + * the scheduler code quite frequently and that is why there is not checks. + * * Return: the sum of the energy consumed by the CPUs of the domain assuming * a capacity state satisfying the max utilization of the domain. */ -static inline unsigned long em_pd_energy(struct em_perf_domain *pd, +static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, unsigned long max_util, unsigned long sum_util) { unsigned long freq, scale_cpu; @@ -196,7 +201,7 @@ static inline struct em_perf_domain *em_pd_get(struct device *dev) { return NULL; } -static inline unsigned long em_pd_energy(struct em_perf_domain *pd, +static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, unsigned long max_util, unsigned long sum_util) { return 0; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index cbcb2f71599b..6da601c8d383 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6497,7 +6497,7 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd) max_util = max(max_util, cpu_util); } - return em_pd_energy(pd->em_pd, max_util, sum_util); + return em_cpu_energy(pd->em_pd, max_util, sum_util); } /* From 7b7570ad0d76410bdefe61d77aa624900e2396ce Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Wed, 27 May 2020 10:58:53 +0100 Subject: [PATCH 07/71] Documentation: power: update Energy Model description The Energy Model framework supports also other devices than CPUs. Update related information and add description for the new usage. Acked-by: Daniel Lezcano Acked-by: Quentin Perret Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- Documentation/power/energy-model.rst | 135 +++++++++++++++------------ 1 file changed, 75 insertions(+), 60 deletions(-) diff --git a/Documentation/power/energy-model.rst b/Documentation/power/energy-model.rst index 90a345d57ae9..a6fb986abe3c 100644 --- a/Documentation/power/energy-model.rst +++ b/Documentation/power/energy-model.rst @@ -1,15 +1,17 @@ -==================== -Energy Model of CPUs -==================== +.. SPDX-License-Identifier: GPL-2.0 + +======================= +Energy Model of devices +======================= 1. Overview ----------- The Energy Model (EM) framework serves as an interface between drivers knowing -the power consumed by CPUs at various performance levels, and the kernel +the power consumed by devices at various performance levels, and the kernel subsystems willing to use that information to make energy-aware decisions. -The source of the information about the power consumed by CPUs can vary greatly +The source of the information about the power consumed by devices can vary greatly from one platform to another. These power costs can be estimated using devicetree data in some cases. In others, the firmware will know better. Alternatively, userspace might be best positioned. And so on. In order to avoid @@ -25,7 +27,7 @@ framework, and interested clients reading the data from it:: +---------------+ +-----------------+ +---------------+ | Thermal (IPA) | | Scheduler (EAS) | | Other | +---------------+ +-----------------+ +---------------+ - | | em_pd_energy() | + | | em_cpu_energy() | | | em_cpu_get() | +---------+ | +---------+ | | | @@ -35,7 +37,7 @@ framework, and interested clients reading the data from it:: | Framework | +---------------------+ ^ ^ ^ - | | | em_register_perf_domain() + | | | em_dev_register_perf_domain() +----------+ | +---------+ | | | +---------------+ +---------------+ +--------------+ @@ -47,12 +49,12 @@ framework, and interested clients reading the data from it:: | Device Tree | | Firmware | | ? | +--------------+ +---------------+ +--------------+ -The EM framework manages power cost tables per 'performance domain' in the -system. A performance domain is a group of CPUs whose performance is scaled -together. Performance domains generally have a 1-to-1 mapping with CPUFreq -policies. All CPUs in a performance domain are required to have the same -micro-architecture. CPUs in different performance domains can have different -micro-architectures. +In case of CPU devices the EM framework manages power cost tables per +'performance domain' in the system. A performance domain is a group of CPUs +whose performance is scaled together. Performance domains generally have a +1-to-1 mapping with CPUFreq policies. All CPUs in a performance domain are +required to have the same micro-architecture. CPUs in different performance +domains can have different micro-architectures. 2. Core APIs @@ -70,14 +72,16 @@ CONFIG_ENERGY_MODEL must be enabled to use the EM framework. Drivers are expected to register performance domains into the EM framework by calling the following API:: - int em_register_perf_domain(cpumask_t *span, unsigned int nr_states, - struct em_data_callback *cb); + int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, + struct em_data_callback *cb, cpumask_t *cpus); -Drivers must specify the CPUs of the performance domains using the cpumask -argument, and provide a callback function returning tuples -for each capacity state. The callback function provided by the driver is free +Drivers must provide a callback function returning tuples +for each performance state. The callback function provided by the driver is free to fetch data from any relevant location (DT, firmware, ...), and by any mean -deemed necessary. See Section 3. for an example of driver implementing this +deemed necessary. Only for CPU devices, drivers must specify the CPUs of the +performance domains using cpumask. For other devices than CPUs the last +argument must be set to NULL. +See Section 3. for an example of driver implementing this callback, and kernel/power/energy_model.c for further documentation on this API. @@ -85,13 +89,20 @@ API. 2.3 Accessing performance domains ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +There are two API functions which provide the access to the energy model: +em_cpu_get() which takes CPU id as an argument and em_pd_get() with device +pointer as an argument. It depends on the subsystem which interface it is +going to use, but in case of CPU devices both functions return the same +performance domain. + Subsystems interested in the energy model of a CPU can retrieve it using the em_cpu_get() API. The energy model tables are allocated once upon creation of the performance domains, and kept in memory untouched. The energy consumed by a performance domain can be estimated using the -em_pd_energy() API. The estimation is performed assuming that the schedutil -CPUfreq governor is in use. +em_cpu_energy() API. The estimation is performed assuming that the schedutil +CPUfreq governor is in use in case of CPU device. Currently this calculation is +not provided for other type of devices. More details about the above APIs can be found in include/linux/energy_model.h. @@ -106,42 +117,46 @@ EM framework:: -> drivers/cpufreq/foo_cpufreq.c - 01 static int est_power(unsigned long *mW, unsigned long *KHz, int cpu) - 02 { - 03 long freq, power; - 04 - 05 /* Use the 'foo' protocol to ceil the frequency */ - 06 freq = foo_get_freq_ceil(cpu, *KHz); - 07 if (freq < 0); - 08 return freq; - 09 - 10 /* Estimate the power cost for the CPU at the relevant freq. */ - 11 power = foo_estimate_power(cpu, freq); - 12 if (power < 0); - 13 return power; - 14 - 15 /* Return the values to the EM framework */ - 16 *mW = power; - 17 *KHz = freq; - 18 - 19 return 0; - 20 } - 21 - 22 static int foo_cpufreq_init(struct cpufreq_policy *policy) - 23 { - 24 struct em_data_callback em_cb = EM_DATA_CB(est_power); - 25 int nr_opp, ret; - 26 - 27 /* Do the actual CPUFreq init work ... */ - 28 ret = do_foo_cpufreq_init(policy); - 29 if (ret) - 30 return ret; - 31 - 32 /* Find the number of OPPs for this policy */ - 33 nr_opp = foo_get_nr_opp(policy); - 34 - 35 /* And register the new performance domain */ - 36 em_register_perf_domain(policy->cpus, nr_opp, &em_cb); - 37 - 38 return 0; - 39 } + 01 static int est_power(unsigned long *mW, unsigned long *KHz, + 02 struct device *dev) + 03 { + 04 long freq, power; + 05 + 06 /* Use the 'foo' protocol to ceil the frequency */ + 07 freq = foo_get_freq_ceil(dev, *KHz); + 08 if (freq < 0); + 09 return freq; + 10 + 11 /* Estimate the power cost for the dev at the relevant freq. */ + 12 power = foo_estimate_power(dev, freq); + 13 if (power < 0); + 14 return power; + 15 + 16 /* Return the values to the EM framework */ + 17 *mW = power; + 18 *KHz = freq; + 19 + 20 return 0; + 21 } + 22 + 23 static int foo_cpufreq_init(struct cpufreq_policy *policy) + 24 { + 25 struct em_data_callback em_cb = EM_DATA_CB(est_power); + 26 struct device *cpu_dev; + 27 int nr_opp, ret; + 28 + 29 cpu_dev = get_cpu_device(cpumask_first(policy->cpus)); + 30 + 31 /* Do the actual CPUFreq init work ... */ + 32 ret = do_foo_cpufreq_init(policy); + 33 if (ret) + 34 return ret; + 35 + 36 /* Find the number of OPPs for this policy */ + 37 nr_opp = foo_get_nr_opp(policy); + 38 + 39 /* And register the new performance domain */ + 40 em_dev_register_perf_domain(cpu_dev, nr_opp, &em_cb, policy->cpus); + 41 + 42 return 0; + 43 } From 0e0ffa855d1590e54ec0033404a49e2e57e294fe Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Wed, 27 May 2020 10:58:54 +0100 Subject: [PATCH 08/71] OPP: refactor dev_pm_opp_of_register_em() and update related drivers The Energy Model framework supports not only CPU devices. Drop the CPU specific interface with cpumask and add struct device. Add also a return value, user might use it. This new interface provides easy way to create a simple Energy Model, which then might be used by e.g. thermal subsystem. Acked-by: Daniel Lezcano Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt.c | 2 +- drivers/cpufreq/imx6q-cpufreq.c | 2 +- drivers/cpufreq/mediatek-cpufreq.c | 2 +- drivers/cpufreq/omap-cpufreq.c | 2 +- drivers/cpufreq/qcom-cpufreq-hw.c | 2 +- drivers/cpufreq/scpi-cpufreq.c | 2 +- drivers/cpufreq/vexpress-spc-cpufreq.c | 2 +- drivers/opp/of.c | 71 ++++++++++++++++---------- include/linux/pm_opp.h | 15 +++++- 9 files changed, 65 insertions(+), 35 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 79742bbd221f..944d7b45afe9 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -279,7 +279,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) policy->cpuinfo.transition_latency = transition_latency; policy->dvfs_possible_from_any_cpu = true; - dev_pm_opp_of_register_em(policy->cpus); + dev_pm_opp_of_register_em(cpu_dev, policy->cpus); return 0; diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index fdb2ffffbd15..ef7b34c1fd2b 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -193,7 +193,7 @@ static int imx6q_cpufreq_init(struct cpufreq_policy *policy) policy->clk = clks[ARM].clk; cpufreq_generic_init(policy, freq_table, transition_latency); policy->suspend_freq = max_freq; - dev_pm_opp_of_register_em(policy->cpus); + dev_pm_opp_of_register_em(cpu_dev, policy->cpus); return 0; } diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 0c98dd08273d..7d1212c9b7c8 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -448,7 +448,7 @@ static int mtk_cpufreq_init(struct cpufreq_policy *policy) policy->driver_data = info; policy->clk = info->cpu_clk; - dev_pm_opp_of_register_em(policy->cpus); + dev_pm_opp_of_register_em(info->cpu_dev, policy->cpus); return 0; } diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c index 8d14b42a8c6f..3694bb030df3 100644 --- a/drivers/cpufreq/omap-cpufreq.c +++ b/drivers/cpufreq/omap-cpufreq.c @@ -131,7 +131,7 @@ static int omap_cpu_init(struct cpufreq_policy *policy) /* FIXME: what's the actual transition time? */ cpufreq_generic_init(policy, freq_table, 300 * 1000); - dev_pm_opp_of_register_em(policy->cpus); + dev_pm_opp_of_register_em(mpu_dev, policy->cpus); return 0; } diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index fc92a8842e25..0a04b6f03b9a 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -238,7 +238,7 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) goto error; } - dev_pm_opp_of_register_em(policy->cpus); + dev_pm_opp_of_register_em(cpu_dev, policy->cpus); policy->fast_switch_possible = true; diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c index 20d1f85d5f5a..b0f5388b8854 100644 --- a/drivers/cpufreq/scpi-cpufreq.c +++ b/drivers/cpufreq/scpi-cpufreq.c @@ -167,7 +167,7 @@ static int scpi_cpufreq_init(struct cpufreq_policy *policy) policy->fast_switch_possible = false; - dev_pm_opp_of_register_em(policy->cpus); + dev_pm_opp_of_register_em(cpu_dev, policy->cpus); return 0; diff --git a/drivers/cpufreq/vexpress-spc-cpufreq.c b/drivers/cpufreq/vexpress-spc-cpufreq.c index 83c85d3d67e3..4e8b1dee7c9a 100644 --- a/drivers/cpufreq/vexpress-spc-cpufreq.c +++ b/drivers/cpufreq/vexpress-spc-cpufreq.c @@ -450,7 +450,7 @@ static int ve_spc_cpufreq_init(struct cpufreq_policy *policy) policy->freq_table = freq_table[cur_cluster]; policy->cpuinfo.transition_latency = 1000000; /* 1 ms */ - dev_pm_opp_of_register_em(policy->cpus); + dev_pm_opp_of_register_em(cpu_dev, policy->cpus); if (is_bL_switching_enabled()) per_cpu(cpu_last_req_freq, policy->cpu) = diff --git a/drivers/opp/of.c b/drivers/opp/of.c index e273f419a4bf..4aa42739599e 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -1205,18 +1205,18 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_of_node); /* * Callback function provided to the Energy Model framework upon registration. - * This computes the power estimated by @CPU at @kHz if it is the frequency + * This computes the power estimated by @dev at @kHz if it is the frequency * of an existing OPP, or at the frequency of the first OPP above @kHz otherwise * (see dev_pm_opp_find_freq_ceil()). This function updates @kHz to the ceiled * frequency and @mW to the associated power. The power is estimated as - * P = C * V^2 * f with C being the CPU's capacitance and V and f respectively - * the voltage and frequency of the OPP. + * P = C * V^2 * f with C being the device's capacitance and V and f + * respectively the voltage and frequency of the OPP. * - * Returns -ENODEV if the CPU device cannot be found, -EINVAL if the power - * calculation failed because of missing parameters, 0 otherwise. + * Returns -EINVAL if the power calculation failed because of missing + * parameters, 0 otherwise. */ -static int __maybe_unused _get_cpu_power(unsigned long *mW, unsigned long *kHz, - struct device *cpu_dev) +static int __maybe_unused _get_power(unsigned long *mW, unsigned long *kHz, + struct device *dev) { struct dev_pm_opp *opp; struct device_node *np; @@ -1225,7 +1225,7 @@ static int __maybe_unused _get_cpu_power(unsigned long *mW, unsigned long *kHz, u64 tmp; int ret; - np = of_node_get(cpu_dev->of_node); + np = of_node_get(dev->of_node); if (!np) return -EINVAL; @@ -1235,7 +1235,7 @@ static int __maybe_unused _get_cpu_power(unsigned long *mW, unsigned long *kHz, return -EINVAL; Hz = *kHz * 1000; - opp = dev_pm_opp_find_freq_ceil(cpu_dev, &Hz); + opp = dev_pm_opp_find_freq_ceil(dev, &Hz); if (IS_ERR(opp)) return -EINVAL; @@ -1255,30 +1255,38 @@ static int __maybe_unused _get_cpu_power(unsigned long *mW, unsigned long *kHz, /** * dev_pm_opp_of_register_em() - Attempt to register an Energy Model - * @cpus : CPUs for which an Energy Model has to be registered + * @dev : Device for which an Energy Model has to be registered + * @cpus : CPUs for which an Energy Model has to be registered. For + * other type of devices it should be set to NULL. * * This checks whether the "dynamic-power-coefficient" devicetree property has * been specified, and tries to register an Energy Model with it if it has. + * Having this property means the voltages are known for OPPs and the EM + * might be calculated. */ -void dev_pm_opp_of_register_em(struct cpumask *cpus) +int dev_pm_opp_of_register_em(struct device *dev, struct cpumask *cpus) { - struct em_data_callback em_cb = EM_DATA_CB(_get_cpu_power); - int ret, nr_opp, cpu = cpumask_first(cpus); - struct device *cpu_dev; + struct em_data_callback em_cb = EM_DATA_CB(_get_power); struct device_node *np; + int ret, nr_opp; u32 cap; - cpu_dev = get_cpu_device(cpu); - if (!cpu_dev) - return; + if (IS_ERR_OR_NULL(dev)) { + ret = -EINVAL; + goto failed; + } - nr_opp = dev_pm_opp_get_opp_count(cpu_dev); - if (nr_opp <= 0) - return; + nr_opp = dev_pm_opp_get_opp_count(dev); + if (nr_opp <= 0) { + ret = -EINVAL; + goto failed; + } - np = of_node_get(cpu_dev->of_node); - if (!np) - return; + np = of_node_get(dev->of_node); + if (!np) { + ret = -EINVAL; + goto failed; + } /* * Register an EM only if the 'dynamic-power-coefficient' property is @@ -1289,9 +1297,20 @@ void dev_pm_opp_of_register_em(struct cpumask *cpus) */ ret = of_property_read_u32(np, "dynamic-power-coefficient", &cap); of_node_put(np); - if (ret || !cap) - return; + if (ret || !cap) { + dev_dbg(dev, "Couldn't find proper 'dynamic-power-coefficient' in DT\n"); + ret = -EINVAL; + goto failed; + } - em_dev_register_perf_domain(cpu_dev, nr_opp, &em_cb, cpus); + ret = em_dev_register_perf_domain(dev, nr_opp, &em_cb, cpus); + if (ret) + goto failed; + + return 0; + +failed: + dev_dbg(dev, "Couldn't register Energy Model %d\n", ret); + return ret; } EXPORT_SYMBOL_GPL(dev_pm_opp_of_register_em); diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index d5c4a329321d..ee34c553f6bf 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -11,6 +11,7 @@ #ifndef __LINUX_OPP_H__ #define __LINUX_OPP_H__ +#include #include #include @@ -373,7 +374,11 @@ struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev); struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp); int of_get_required_opp_performance_state(struct device_node *np, int index); int dev_pm_opp_of_find_icc_paths(struct device *dev, struct opp_table *opp_table); -void dev_pm_opp_of_register_em(struct cpumask *cpus); +int dev_pm_opp_of_register_em(struct device *dev, struct cpumask *cpus); +static inline void dev_pm_opp_of_unregister_em(struct device *dev) +{ + em_dev_unregister_perf_domain(dev); +} #else static inline int dev_pm_opp_of_add_table(struct device *dev) { @@ -413,7 +418,13 @@ static inline struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp) return NULL; } -static inline void dev_pm_opp_of_register_em(struct cpumask *cpus) +static inline int dev_pm_opp_of_register_em(struct device *dev, + struct cpumask *cpus) +{ + return -ENOTSUPP; +} + +static inline void dev_pm_opp_of_unregister_em(struct device *dev) { } From dab20177b626198603176604c0f85ea1d67044ef Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 29 Jun 2020 13:58:28 +0200 Subject: [PATCH 09/71] intel_idle: Eliminate redundant static variable The value of the lapic_timer_always_reliable static variable in the intel_idle driver reflects the boot_cpu_has(X86_FEATURE_ARAT) value and so it also reflects the static_cpu_has(X86_FEATURE_ARAT) value. Hence, the lapic_timer_always_reliable check in intel_idle() is redundant and apart from this lapic_timer_always_reliable is only used in two places in which boot_cpu_has(X86_FEATURE_ARAT) can be used directly. Eliminate the lapic_timer_always_reliable variable in accordance with the above observations. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index f4495841bf68..fa23a7ea01ac 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -66,8 +66,6 @@ static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; static unsigned long auto_demotion_disable_flags; static bool disable_promotion_to_c1e; -static bool lapic_timer_always_reliable; - struct idle_cpu { struct cpuidle_state *state_table; @@ -142,7 +140,7 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev, if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED) leave_mm(cpu); - if (!static_cpu_has(X86_FEATURE_ARAT) && !lapic_timer_always_reliable) { + if (!static_cpu_has(X86_FEATURE_ARAT)) { /* * Switch over to one-shot tick broadcast if the target C-state * is deeper than C1. @@ -1562,7 +1560,7 @@ static int intel_idle_cpu_online(unsigned int cpu) { struct cpuidle_device *dev; - if (!lapic_timer_always_reliable) + if (!boot_cpu_has(X86_FEATURE_ARAT)) tick_broadcast_enable(); /* @@ -1655,16 +1653,13 @@ static int __init intel_idle_init(void) goto init_driver_fail; } - if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */ - lapic_timer_always_reliable = true; - retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", intel_idle_cpu_online, NULL); if (retval < 0) goto hp_setup_fail; pr_debug("Local APIC timer is reliable in %s\n", - lapic_timer_always_reliable ? "all C-states" : "C1"); + boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1"); return 0; From 2d798d9f5967a3f134b1c55acbbe026c032e3429 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Mon, 29 Jun 2020 13:34:50 +0800 Subject: [PATCH 10/71] powercap: intel_rapl: add support for Sapphire Rapids RAPL on SPR behaves similar to Haswell server, except that SPR uses a fixed energy unit (1 Joule) for the PSYS RAPL domain. Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_common.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 61a63a16b5e7..b739ce4f390d 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -93,6 +93,7 @@ struct rapl_defaults { u64 (*compute_time_window)(struct rapl_package *rp, u64 val, bool to_raw); unsigned int dram_domain_energy_unit; + unsigned int psys_domain_energy_unit; }; static struct rapl_defaults *rapl_defaults; @@ -533,12 +534,23 @@ static void rapl_init_domains(struct rapl_package *rp) for (j = 0; j < RAPL_DOMAIN_REG_MAX; j++) rd->regs[j] = rp->priv->regs[i][j]; - if (i == RAPL_DOMAIN_DRAM) { + switch (i) { + case RAPL_DOMAIN_DRAM: rd->domain_energy_unit = rapl_defaults->dram_domain_energy_unit; if (rd->domain_energy_unit) pr_info("DRAM domain energy unit %dpj\n", rd->domain_energy_unit); + break; + case RAPL_DOMAIN_PLATFORM: + rd->domain_energy_unit = + rapl_defaults->psys_domain_energy_unit; + if (rd->domain_energy_unit) + pr_info("Platform domain energy unit %dpj\n", + rd->domain_energy_unit); + break; + default: + break; } rd++; } @@ -919,6 +931,14 @@ static const struct rapl_defaults rapl_defaults_hsw_server = { .dram_domain_energy_unit = 15300, }; +static const struct rapl_defaults rapl_defaults_spr_server = { + .check_unit = rapl_check_unit_core, + .set_floor_freq = set_floor_freq_default, + .compute_time_window = rapl_compute_time_window_core, + .dram_domain_energy_unit = 15300, + .psys_domain_energy_unit = 1000000000, +}; + static const struct rapl_defaults rapl_defaults_byt = { .floor_freq_reg_addr = IOSF_CPU_POWER_BUDGET_CTL_BYT, .check_unit = rapl_check_unit_atom, @@ -978,6 +998,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &rapl_defaults_spr_server), X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &rapl_defaults_byt), X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &rapl_defaults_cht), From ed7bde7a6dab521ee9e28fe2264018f83d83c7fa Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 26 Jun 2020 11:34:00 -0700 Subject: [PATCH 11/71] cpufreq: intel_pstate: Allow enable/disable energy efficiency By default intel_pstate the driver disables energy efficiency by setting MSR_IA32_POWER_CTL bit 19 for Kaby Lake desktop CPU model in HWP mode. This CPU model is also shared by Coffee Lake desktop CPUs. This allows these systems to reach maximum possible frequency. But this adds power penalty, which some customers don't want. They want some way to enable/ disable dynamically. So, add an additional attribute "energy_efficiency" under /sys/devices/system/cpu/intel_pstate/ for these CPU models. This allows to read and write bit 19 ("Disable Energy Efficiency Optimization") in the MSR IA32_POWER_CTL. This attribute is present in both HWP and non-HWP mode as this has an effect in both modes. Refer to Intel Software Developer's manual for details. The scope of this bit is package wide. Also these systems are single package systems. So read/write MSR on the current CPU is enough. The energy efficiency (EE) bit setting needs to be preserved during suspend/resume and CPU offline/online operation. To do this: - Restoring the EE setting from the cpufreq resume() callback, if there is change from the system default. - By default, don't disable EE from cpufreq init() callback for matching CPU models. Since the scope is package wide and is a single package system, move the disable EE calls from init() callback to intel_pstate_init() function, which is called only once. Suggested-by: Len Brown Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/pm/intel_pstate.rst | 11 +++ arch/x86/include/asm/msr-index.h | 6 +- drivers/cpufreq/intel_pstate.c | 97 ++++++++++++++----- 3 files changed, 88 insertions(+), 26 deletions(-) diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst index 39d80bc29ccd..25e1097fc332 100644 --- a/Documentation/admin-guide/pm/intel_pstate.rst +++ b/Documentation/admin-guide/pm/intel_pstate.rst @@ -431,6 +431,17 @@ argument is passed to the kernel in the command line. supported in the current configuration, writes to this attribute will fail with an appropriate error. +``energy_efficiency`` + This attribute is only present on platforms, which have CPUs matching + Kaby Lake or Coffee Lake desktop CPU model. By default + energy efficiency optimizations are disabled on these CPU models in HWP + mode by this driver. Enabling energy efficiency may limit maximum + operating frequency in both HWP and non HWP mode. In non HWP mode, + optimizations are done only in the turbo frequency range. In HWP mode, + optimizations are done in the entire frequency range. Setting this + attribute to "1" enables energy efficiency optimizations and setting + to "0" disables energy efficiency optimizations. + Interpretation of Policy Attributes ----------------------------------- diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index e8370e64a155..21b409195b46 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -149,6 +149,10 @@ #define MSR_LBR_SELECT 0x000001c8 #define MSR_LBR_TOS 0x000001c9 + +#define MSR_IA32_POWER_CTL 0x000001fc +#define MSR_IA32_POWER_CTL_BIT_EE 19 + #define MSR_LBR_NHM_FROM 0x00000680 #define MSR_LBR_NHM_TO 0x000006c0 #define MSR_LBR_CORE_FROM 0x00000040 @@ -253,8 +257,6 @@ #define MSR_PEBS_FRONTEND 0x000003f7 -#define MSR_IA32_POWER_CTL 0x000001fc - #define MSR_IA32_MC0_CTL 0x00000400 #define MSR_IA32_MC0_STATUS 0x00000401 #define MSR_IA32_MC0_ADDR 0x00000402 diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index e771e8b4f99f..fd1bef0b9bb4 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -866,10 +866,39 @@ static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy) return 0; } +#define POWER_CTL_EE_ENABLE 1 +#define POWER_CTL_EE_DISABLE 2 + +static int power_ctl_ee_state; + +static void set_power_ctl_ee_state(bool input) +{ + u64 power_ctl; + + mutex_lock(&intel_pstate_driver_lock); + rdmsrl(MSR_IA32_POWER_CTL, power_ctl); + if (input) { + power_ctl &= ~BIT(MSR_IA32_POWER_CTL_BIT_EE); + power_ctl_ee_state = POWER_CTL_EE_ENABLE; + } else { + power_ctl |= BIT(MSR_IA32_POWER_CTL_BIT_EE); + power_ctl_ee_state = POWER_CTL_EE_DISABLE; + } + wrmsrl(MSR_IA32_POWER_CTL, power_ctl); + mutex_unlock(&intel_pstate_driver_lock); +} + static void intel_pstate_hwp_enable(struct cpudata *cpudata); static int intel_pstate_resume(struct cpufreq_policy *policy) { + + /* Only restore if the system default is changed */ + if (power_ctl_ee_state == POWER_CTL_EE_ENABLE) + set_power_ctl_ee_state(true); + else if (power_ctl_ee_state == POWER_CTL_EE_DISABLE) + set_power_ctl_ee_state(false); + if (!hwp_active) return 0; @@ -1218,6 +1247,32 @@ static ssize_t store_hwp_dynamic_boost(struct kobject *a, return count; } +static ssize_t show_energy_efficiency(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + u64 power_ctl; + int enable; + + rdmsrl(MSR_IA32_POWER_CTL, power_ctl); + enable = !!(power_ctl & BIT(MSR_IA32_POWER_CTL_BIT_EE)); + return sprintf(buf, "%d\n", !enable); +} + +static ssize_t store_energy_efficiency(struct kobject *a, struct kobj_attribute *b, + const char *buf, size_t count) +{ + bool input; + int ret; + + ret = kstrtobool(buf, &input); + if (ret) + return ret; + + set_power_ctl_ee_state(input); + + return count; +} + show_one(max_perf_pct, max_perf_pct); show_one(min_perf_pct, min_perf_pct); @@ -1228,6 +1283,7 @@ define_one_global_rw(min_perf_pct); define_one_global_ro(turbo_pct); define_one_global_ro(num_pstates); define_one_global_rw(hwp_dynamic_boost); +define_one_global_rw(energy_efficiency); static struct attribute *intel_pstate_attributes[] = { &status.attr, @@ -1241,6 +1297,8 @@ static const struct attribute_group intel_pstate_attr_group = { .attrs = intel_pstate_attributes, }; +static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[]; + static void __init intel_pstate_sysfs_expose_params(void) { struct kobject *intel_pstate_kobject; @@ -1273,6 +1331,11 @@ static void __init intel_pstate_sysfs_expose_params(void) &hwp_dynamic_boost.attr); WARN_ON(rc); } + + if (x86_match_cpu(intel_pstate_cpu_ee_disable_ids)) { + rc = sysfs_create_file(intel_pstate_kobject, &energy_efficiency.attr); + WARN_ON(rc); + } } /************************** sysfs end ************************/ @@ -1288,25 +1351,6 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata) cpudata->epp_default = intel_pstate_get_epp(cpudata, 0); } -#define MSR_IA32_POWER_CTL_BIT_EE 19 - -/* Disable energy efficiency optimization */ -static void intel_pstate_disable_ee(int cpu) -{ - u64 power_ctl; - int ret; - - ret = rdmsrl_on_cpu(cpu, MSR_IA32_POWER_CTL, &power_ctl); - if (ret) - return; - - if (!(power_ctl & BIT(MSR_IA32_POWER_CTL_BIT_EE))) { - pr_info("Disabling energy efficiency optimization\n"); - power_ctl |= BIT(MSR_IA32_POWER_CTL_BIT_EE); - wrmsrl_on_cpu(cpu, MSR_IA32_POWER_CTL, power_ctl); - } -} - static int atom_get_min_pstate(void) { u64 value; @@ -1982,10 +2026,6 @@ static int intel_pstate_init_cpu(unsigned int cpunum) if (hwp_active) { const struct x86_cpu_id *id; - id = x86_match_cpu(intel_pstate_cpu_ee_disable_ids); - if (id) - intel_pstate_disable_ee(cpunum); - intel_pstate_hwp_enable(cpu); id = x86_match_cpu(intel_pstate_hwp_boost_ids); @@ -2806,8 +2846,17 @@ hwp_cpu_matched: if (rc) return rc; - if (hwp_active) + if (hwp_active) { + const struct x86_cpu_id *id; + + id = x86_match_cpu(intel_pstate_cpu_ee_disable_ids); + if (id) { + set_power_ctl_ee_state(false); + pr_info("Disabling energy efficiency optimization\n"); + } + pr_info("HWP enabled\n"); + } return 0; } From f473bf398bf1ed42b1bdbc5206a804d8d3140a2d Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 26 Jun 2020 11:34:01 -0700 Subject: [PATCH 12/71] cpufreq: intel_pstate: Allow raw energy performance preference value Currently using attribute "energy_performance_preference", user space can write one of the four per-defined preference string. These preference strings gets mapped to a hard-coded Energy-Performance Preference (EPP) or Energy-Performance Bias (EPB) knob. These four values are supposed to cover broad spectrum of use cases, but are not uniformly distributed in the range. There are number of cases, where this is not enough. For example: Suppose user wants more performance when connected to AC. Instead of using default "balance performance", the "performance" setting can be used. This changes EPP value from 0x80 to 0x00. But setting EPP to 0, results in electrical and thermal issues on some platforms. This results in aggressive throttling, which causes a drop in performance. But some value between 0x80 and 0x00 results in better performance. But that value can't be fixed as the power curve is not linear. In some cases just changing EPP from 0x80 to 0x75 is enough to get significant performance gain. Similarly on battery the default "balance_performance" mode can be aggressive in power consumption. But picking up the next choice "balance power" results in too much loss of performance, which results in bad user experience in use cases like "Google Hangout". It was observed that some value between these two EPP is optimal. This change allows fine grain EPP tuning for platform like Chromebook or for users who wants to fine tune power and performance. Here based on the product and use cases, different EPP values can be set. This change is similar to the change done for: /sys/devices/system/cpu/cpu*/power/energy_perf_bias where user has choice to write a predefined string or raw value. The change itself is trivial. When user preference doesn't match predefined string preferences and value is an unsigned integer and in range, use that value for EPP. When the EPP feature is not present writing raw value is not supported. Suggested-by: Len Brown Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/pm/intel_pstate.rst | 6 ++- drivers/cpufreq/intel_pstate.c | 50 +++++++++++++++---- 2 files changed, 45 insertions(+), 11 deletions(-) diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst index 25e1097fc332..40d481cca368 100644 --- a/Documentation/admin-guide/pm/intel_pstate.rst +++ b/Documentation/admin-guide/pm/intel_pstate.rst @@ -565,7 +565,11 @@ somewhere between the two extremes: Strings written to the ``energy_performance_preference`` attribute are internally translated to integer values written to the processor's Energy-Performance Preference (EPP) knob (if supported) or its -Energy-Performance Bias (EPB) knob. +Energy-Performance Bias (EPB) knob. It is also possible to write a positive +integer value between 0 to 255, if the EPP feature is present. If the EPP +feature is not present, writing integer value to this attribute is not +supported. In this case, user can use + "/sys/devices/system/cpu/cpu*/power/energy_perf_bias" interface. [Note that tasks may by migrated from one CPU to another by the scheduler's load-balancing algorithm and if different energy vs performance hints are diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index fd1bef0b9bb4..44c7b4677675 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -602,11 +602,12 @@ static const unsigned int epp_values[] = { HWP_EPP_POWERSAVE }; -static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data) +static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data, int *raw_epp) { s16 epp; int index = -EINVAL; + *raw_epp = 0; epp = intel_pstate_get_epp(cpu_data, 0); if (epp < 0) return epp; @@ -614,12 +615,14 @@ static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data) if (boot_cpu_has(X86_FEATURE_HWP_EPP)) { if (epp == HWP_EPP_PERFORMANCE) return 1; - if (epp <= HWP_EPP_BALANCE_PERFORMANCE) + if (epp == HWP_EPP_BALANCE_PERFORMANCE) return 2; - if (epp <= HWP_EPP_BALANCE_POWERSAVE) + if (epp == HWP_EPP_BALANCE_POWERSAVE) return 3; - else + if (epp == HWP_EPP_POWERSAVE) return 4; + *raw_epp = epp; + return 0; } else if (boot_cpu_has(X86_FEATURE_EPB)) { /* * Range: @@ -638,7 +641,8 @@ static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data) } static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data, - int pref_index) + int pref_index, bool use_raw, + u32 raw_epp) { int epp = -EINVAL; int ret; @@ -657,6 +661,16 @@ static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data, value &= ~GENMASK_ULL(31, 24); + if (use_raw) { + if (raw_epp > 255) { + ret = -EINVAL; + goto return_pref; + } + value |= (u64)raw_epp << 24; + ret = wrmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, value); + goto return_pref; + } + if (epp == -EINVAL) epp = epp_values[pref_index - 1]; @@ -694,6 +708,8 @@ static ssize_t store_energy_performance_preference( { struct cpudata *cpu_data = all_cpu_data[policy->cpu]; char str_preference[21]; + bool raw = false; + u32 epp; int ret; ret = sscanf(buf, "%20s", str_preference); @@ -701,10 +717,21 @@ static ssize_t store_energy_performance_preference( return -EINVAL; ret = match_string(energy_perf_strings, -1, str_preference); - if (ret < 0) + if (ret < 0) { + if (!boot_cpu_has(X86_FEATURE_HWP_EPP)) + return ret; + + ret = kstrtouint(buf, 10, &epp); + if (ret) + return ret; + + raw = true; + } + + ret = intel_pstate_set_energy_pref_index(cpu_data, ret, raw, epp); + if (ret) return ret; - intel_pstate_set_energy_pref_index(cpu_data, ret); return count; } @@ -712,13 +739,16 @@ static ssize_t show_energy_performance_preference( struct cpufreq_policy *policy, char *buf) { struct cpudata *cpu_data = all_cpu_data[policy->cpu]; - int preference; + int preference, raw_epp; - preference = intel_pstate_get_energy_pref_index(cpu_data); + preference = intel_pstate_get_energy_pref_index(cpu_data, &raw_epp); if (preference < 0) return preference; - return sprintf(buf, "%s\n", energy_perf_strings[preference]); + if (raw_epp) + return sprintf(buf, "%d\n", raw_epp); + else + return sprintf(buf, "%s\n", energy_perf_strings[preference]); } cpufreq_freq_attr_rw(energy_performance_preference); From 8cc46ae565c393f77417cb9530b1265eb50f5d2e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 29 Jun 2020 13:54:58 +0530 Subject: [PATCH 13/71] cpufreq: Fix locking issues with governors The locking around governors handling isn't adequate currently. The list of governors should never be traversed without the locking in place. Also governor modules must not be removed while the code in them is still in use. Reported-by: Quentin Perret Signed-off-by: Viresh Kumar Cc: All applicable [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 58 +++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 0128de3603df..e9e8200a0211 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -621,6 +621,24 @@ static struct cpufreq_governor *find_governor(const char *str_governor) return NULL; } +static struct cpufreq_governor *get_governor(const char *str_governor) +{ + struct cpufreq_governor *t; + + mutex_lock(&cpufreq_governor_mutex); + t = find_governor(str_governor); + if (!t) + goto unlock; + + if (!try_module_get(t->owner)) + t = NULL; + +unlock: + mutex_unlock(&cpufreq_governor_mutex); + + return t; +} + static unsigned int cpufreq_parse_policy(char *str_governor) { if (!strncasecmp(str_governor, "performance", CPUFREQ_NAME_LEN)) @@ -640,28 +658,14 @@ static struct cpufreq_governor *cpufreq_parse_governor(char *str_governor) { struct cpufreq_governor *t; - mutex_lock(&cpufreq_governor_mutex); + t = get_governor(str_governor); + if (t) + return t; - t = find_governor(str_governor); - if (!t) { - int ret; + if (request_module("cpufreq_%s", str_governor)) + return NULL; - mutex_unlock(&cpufreq_governor_mutex); - - ret = request_module("cpufreq_%s", str_governor); - if (ret) - return NULL; - - mutex_lock(&cpufreq_governor_mutex); - - t = find_governor(str_governor); - } - if (t && !try_module_get(t->owner)) - t = NULL; - - mutex_unlock(&cpufreq_governor_mutex); - - return t; + return get_governor(str_governor); } /** @@ -815,12 +819,14 @@ static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy, goto out; } + mutex_lock(&cpufreq_governor_mutex); for_each_governor(t) { if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char)) - (CPUFREQ_NAME_LEN + 2))) - goto out; + break; i += scnprintf(&buf[i], CPUFREQ_NAME_PLEN, "%s ", t->name); } + mutex_unlock(&cpufreq_governor_mutex); out: i += sprintf(&buf[i], "\n"); return i; @@ -1058,15 +1064,17 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy) struct cpufreq_governor *def_gov = cpufreq_default_governor(); struct cpufreq_governor *gov = NULL; unsigned int pol = CPUFREQ_POLICY_UNKNOWN; + int ret; if (has_target()) { /* Update policy governor to the one used before hotplug. */ - gov = find_governor(policy->last_governor); + gov = get_governor(policy->last_governor); if (gov) { pr_debug("Restoring governor %s for cpu %d\n", policy->governor->name, policy->cpu); } else if (def_gov) { gov = def_gov; + __module_get(gov->owner); } else { return -ENODATA; } @@ -1089,7 +1097,11 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy) return -ENODATA; } - return cpufreq_set_policy(policy, gov, pol); + ret = cpufreq_set_policy(policy, gov, pol); + if (gov) + module_put(gov->owner); + + return ret; } static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu) From 10dd8573b09e84b81539d939d55ebdb6a36c5f3a Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 29 Jun 2020 13:54:59 +0530 Subject: [PATCH 14/71] cpufreq: Register governors at core_initcall Currently, most CPUFreq governors are registered at the core_initcall time when the given governor is the default one, and the module_init time otherwise. In preparation for letting users specify the default governor on the kernel command line, change all of them to be registered at the core_initcall unconditionally, as it is already the case for the schedutil and performance governors. This will allow us to assume that builtin governors have been registered before the built-in CPUFreq drivers probe. And since all governors have similar init/exit patterns now, introduce two new macros, cpufreq_governor_{init,exit}(), to factorize the code. Acked-by: Viresh Kumar Signed-off-by: Quentin Perret Signed-off-by: Viresh Kumar [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- .../platforms/cell/cpufreq_spudemand.c | 26 ++----------------- drivers/cpufreq/cpufreq_conservative.c | 22 ++++------------ drivers/cpufreq/cpufreq_ondemand.c | 24 +++++------------ drivers/cpufreq/cpufreq_performance.c | 14 ++-------- drivers/cpufreq/cpufreq_powersave.c | 18 +++---------- drivers/cpufreq/cpufreq_userspace.c | 18 +++---------- include/linux/cpufreq.h | 14 ++++++++++ kernel/sched/cpufreq_schedutil.c | 6 +---- 8 files changed, 36 insertions(+), 106 deletions(-) diff --git a/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/arch/powerpc/platforms/cell/cpufreq_spudemand.c index 55b31eadb3c8..ca7849e113d7 100644 --- a/arch/powerpc/platforms/cell/cpufreq_spudemand.c +++ b/arch/powerpc/platforms/cell/cpufreq_spudemand.c @@ -126,30 +126,8 @@ static struct cpufreq_governor spu_governor = { .stop = spu_gov_stop, .owner = THIS_MODULE, }; - -/* - * module init and destoy - */ - -static int __init spu_gov_init(void) -{ - int ret; - - ret = cpufreq_register_governor(&spu_governor); - if (ret) - printk(KERN_ERR "registration of governor failed\n"); - return ret; -} - -static void __exit spu_gov_exit(void) -{ - cpufreq_unregister_governor(&spu_governor); -} - - -module_init(spu_gov_init); -module_exit(spu_gov_exit); +cpufreq_governor_init(spu_governor); +cpufreq_governor_exit(spu_governor); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Christian Krafft "); - diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 737ff3b9c2c0..aa39ff31ec9f 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -322,17 +322,7 @@ static struct dbs_governor cs_governor = { .start = cs_start, }; -#define CPU_FREQ_GOV_CONSERVATIVE (&cs_governor.gov) - -static int __init cpufreq_gov_dbs_init(void) -{ - return cpufreq_register_governor(CPU_FREQ_GOV_CONSERVATIVE); -} - -static void __exit cpufreq_gov_dbs_exit(void) -{ - cpufreq_unregister_governor(CPU_FREQ_GOV_CONSERVATIVE); -} +#define CPU_FREQ_GOV_CONSERVATIVE (cs_governor.gov) MODULE_AUTHOR("Alexander Clouter "); MODULE_DESCRIPTION("'cpufreq_conservative' - A dynamic cpufreq governor for " @@ -343,11 +333,9 @@ MODULE_LICENSE("GPL"); #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE struct cpufreq_governor *cpufreq_default_governor(void) { - return CPU_FREQ_GOV_CONSERVATIVE; + return &CPU_FREQ_GOV_CONSERVATIVE; } - -core_initcall(cpufreq_gov_dbs_init); -#else -module_init(cpufreq_gov_dbs_init); #endif -module_exit(cpufreq_gov_dbs_exit); + +cpufreq_governor_init(CPU_FREQ_GOV_CONSERVATIVE); +cpufreq_governor_exit(CPU_FREQ_GOV_CONSERVATIVE); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 82a4d37ddecb..ac361a8b1d3b 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -408,7 +408,7 @@ static struct dbs_governor od_dbs_gov = { .start = od_start, }; -#define CPU_FREQ_GOV_ONDEMAND (&od_dbs_gov.gov) +#define CPU_FREQ_GOV_ONDEMAND (od_dbs_gov.gov) static void od_set_powersave_bias(unsigned int powersave_bias) { @@ -429,7 +429,7 @@ static void od_set_powersave_bias(unsigned int powersave_bias) continue; policy = cpufreq_cpu_get_raw(cpu); - if (!policy || policy->governor != CPU_FREQ_GOV_ONDEMAND) + if (!policy || policy->governor != &CPU_FREQ_GOV_ONDEMAND) continue; policy_dbs = policy->governor_data; @@ -461,16 +461,6 @@ void od_unregister_powersave_bias_handler(void) } EXPORT_SYMBOL_GPL(od_unregister_powersave_bias_handler); -static int __init cpufreq_gov_dbs_init(void) -{ - return cpufreq_register_governor(CPU_FREQ_GOV_ONDEMAND); -} - -static void __exit cpufreq_gov_dbs_exit(void) -{ - cpufreq_unregister_governor(CPU_FREQ_GOV_ONDEMAND); -} - MODULE_AUTHOR("Venkatesh Pallipadi "); MODULE_AUTHOR("Alexey Starikovskiy "); MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for " @@ -480,11 +470,9 @@ MODULE_LICENSE("GPL"); #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND struct cpufreq_governor *cpufreq_default_governor(void) { - return CPU_FREQ_GOV_ONDEMAND; + return &CPU_FREQ_GOV_ONDEMAND; } - -core_initcall(cpufreq_gov_dbs_init); -#else -module_init(cpufreq_gov_dbs_init); #endif -module_exit(cpufreq_gov_dbs_exit); + +cpufreq_governor_init(CPU_FREQ_GOV_ONDEMAND); +cpufreq_governor_exit(CPU_FREQ_GOV_ONDEMAND); diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c index def9afe0f5b8..71c1d9aba772 100644 --- a/drivers/cpufreq/cpufreq_performance.c +++ b/drivers/cpufreq/cpufreq_performance.c @@ -23,16 +23,6 @@ static struct cpufreq_governor cpufreq_gov_performance = { .limits = cpufreq_gov_performance_limits, }; -static int __init cpufreq_gov_performance_init(void) -{ - return cpufreq_register_governor(&cpufreq_gov_performance); -} - -static void __exit cpufreq_gov_performance_exit(void) -{ - cpufreq_unregister_governor(&cpufreq_gov_performance); -} - #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE struct cpufreq_governor *cpufreq_default_governor(void) { @@ -50,5 +40,5 @@ MODULE_AUTHOR("Dominik Brodowski "); MODULE_DESCRIPTION("CPUfreq policy governor 'performance'"); MODULE_LICENSE("GPL"); -core_initcall(cpufreq_gov_performance_init); -module_exit(cpufreq_gov_performance_exit); +cpufreq_governor_init(cpufreq_gov_performance); +cpufreq_governor_exit(cpufreq_gov_performance); diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c index 1ae66019eb83..7749522355b5 100644 --- a/drivers/cpufreq/cpufreq_powersave.c +++ b/drivers/cpufreq/cpufreq_powersave.c @@ -23,16 +23,6 @@ static struct cpufreq_governor cpufreq_gov_powersave = { .owner = THIS_MODULE, }; -static int __init cpufreq_gov_powersave_init(void) -{ - return cpufreq_register_governor(&cpufreq_gov_powersave); -} - -static void __exit cpufreq_gov_powersave_exit(void) -{ - cpufreq_unregister_governor(&cpufreq_gov_powersave); -} - MODULE_AUTHOR("Dominik Brodowski "); MODULE_DESCRIPTION("CPUfreq policy governor 'powersave'"); MODULE_LICENSE("GPL"); @@ -42,9 +32,7 @@ struct cpufreq_governor *cpufreq_default_governor(void) { return &cpufreq_gov_powersave; } - -core_initcall(cpufreq_gov_powersave_init); -#else -module_init(cpufreq_gov_powersave_init); #endif -module_exit(cpufreq_gov_powersave_exit); + +cpufreq_governor_init(cpufreq_gov_powersave); +cpufreq_governor_exit(cpufreq_gov_powersave); diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c index b43e7cd502c5..50a4d7846580 100644 --- a/drivers/cpufreq/cpufreq_userspace.c +++ b/drivers/cpufreq/cpufreq_userspace.c @@ -126,16 +126,6 @@ static struct cpufreq_governor cpufreq_gov_userspace = { .owner = THIS_MODULE, }; -static int __init cpufreq_gov_userspace_init(void) -{ - return cpufreq_register_governor(&cpufreq_gov_userspace); -} - -static void __exit cpufreq_gov_userspace_exit(void) -{ - cpufreq_unregister_governor(&cpufreq_gov_userspace); -} - MODULE_AUTHOR("Dominik Brodowski , " "Russell King "); MODULE_DESCRIPTION("CPUfreq policy governor 'userspace'"); @@ -146,9 +136,7 @@ struct cpufreq_governor *cpufreq_default_governor(void) { return &cpufreq_gov_userspace; } - -core_initcall(cpufreq_gov_userspace_init); -#else -module_init(cpufreq_gov_userspace_init); #endif -module_exit(cpufreq_gov_userspace_exit); + +cpufreq_governor_init(cpufreq_gov_userspace); +cpufreq_governor_exit(cpufreq_gov_userspace); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 3494f6763597..e62b022cb07e 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -577,6 +577,20 @@ unsigned int cpufreq_policy_transition_delay_us(struct cpufreq_policy *policy); int cpufreq_register_governor(struct cpufreq_governor *governor); void cpufreq_unregister_governor(struct cpufreq_governor *governor); +#define cpufreq_governor_init(__governor) \ +static int __init __governor##_init(void) \ +{ \ + return cpufreq_register_governor(&__governor); \ +} \ +core_initcall(__governor##_init) + +#define cpufreq_governor_exit(__governor) \ +static void __exit __governor##_exit(void) \ +{ \ + return cpufreq_unregister_governor(&__governor); \ +} \ +module_exit(__governor##_exit) + struct cpufreq_governor *cpufreq_default_governor(void); struct cpufreq_governor *cpufreq_fallback_governor(void); diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 7fbaee24c824..402a09af9f43 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -909,11 +909,7 @@ struct cpufreq_governor *cpufreq_default_governor(void) } #endif -static int __init sugov_register(void) -{ - return cpufreq_register_governor(&schedutil_gov); -} -core_initcall(sugov_register); +cpufreq_governor_init(schedutil_gov); #ifdef CONFIG_ENERGY_MODEL extern bool sched_energy_update; From 8412b4563e5910485c7bcd4fdcd8bcc3e728284c Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 29 Jun 2020 13:55:00 +0530 Subject: [PATCH 15/71] cpufreq: Specify default governor on command line Currently, the only way to specify the default CPUfreq governor is via Kconfig options, which suits users who can build the kernel themselves perfectly. However, for those who use a distro-like kernel (such as Android, with the Generic Kernel Image project), the only way to use a non-default governor is to boot to userspace, and to then switch using the sysfs interface. Being able to specify the default governor on the command line, like is the case for cpuidle, would allow those users to specify their governor of choice earlier on, and to simplify the userspace boot procedure slighlty. To support this use-case, add a kernel command line parameter allowing the default governor for CPUfreq to be specified, which takes precedence over the built-in default. This implementation has one notable limitation: the default governor must be registered before the driver. This is solved for builtin governors and drivers using appropriate *_initcall() functions. And in the modular case, this must be reflected as a constraint on the module loading order. Signed-off-by: Quentin Perret [ Viresh: Converted 'default_governor' to a string and parsing it only at initcall level, and several updates to cpufreq_init_policy(). ] Signed-off-by: Viresh Kumar [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- .../admin-guide/kernel-parameters.txt | 5 +++ Documentation/admin-guide/pm/cpufreq.rst | 6 ++-- drivers/cpufreq/cpufreq.c | 31 +++++++++++++------ 3 files changed, 30 insertions(+), 12 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index fb95fad81c79..8deb5a89328a 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -703,6 +703,11 @@ cpufreq.off=1 [CPU_FREQ] disable the cpufreq sub-system + cpufreq.default_governor= + [CPU_FREQ] Name of the default cpufreq governor or + policy to use. This governor must be registered in the + kernel before the cpufreq driver probes. + cpu_init_udelay=N [X86] Delay for N microsec between assert and de-assert of APIC INIT to start processors. This delay occurs diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst index 0c74a7784964..368e612145d2 100644 --- a/Documentation/admin-guide/pm/cpufreq.rst +++ b/Documentation/admin-guide/pm/cpufreq.rst @@ -147,9 +147,9 @@ CPUs in it. The next major initialization step for a new policy object is to attach a scaling governor to it (to begin with, that is the default scaling governor -determined by the kernel configuration, but it may be changed later -via ``sysfs``). First, a pointer to the new policy object is passed to the -governor's ``->init()`` callback which is expected to initialize all of the +determined by the kernel command line or configuration, but it may be changed +later via ``sysfs``). First, a pointer to the new policy object is passed to +the governor's ``->init()`` callback which is expected to initialize all of the data structures necessary to handle the given policy and, possibly, to add a governor ``sysfs`` interface to it. Next, the governor is started by invoking its ``->start()`` callback. diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e9e8200a0211..ad94b1d47ddb 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -50,6 +50,8 @@ static LIST_HEAD(cpufreq_governor_list); #define for_each_governor(__governor) \ list_for_each_entry(__governor, &cpufreq_governor_list, governor_list) +static char default_governor[CPUFREQ_NAME_LEN]; + /** * The "cpufreq driver" - the arch- or hardware-dependent low * level driver of CPUFreq support, and its spinlock. This lock @@ -1061,7 +1063,6 @@ __weak struct cpufreq_governor *cpufreq_default_governor(void) static int cpufreq_init_policy(struct cpufreq_policy *policy) { - struct cpufreq_governor *def_gov = cpufreq_default_governor(); struct cpufreq_governor *gov = NULL; unsigned int pol = CPUFREQ_POLICY_UNKNOWN; int ret; @@ -1071,21 +1072,27 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy) gov = get_governor(policy->last_governor); if (gov) { pr_debug("Restoring governor %s for cpu %d\n", - policy->governor->name, policy->cpu); - } else if (def_gov) { - gov = def_gov; - __module_get(gov->owner); + gov->name, policy->cpu); } else { - return -ENODATA; + gov = get_governor(default_governor); } + + if (!gov) { + gov = cpufreq_default_governor(); + if (!gov) + return -ENODATA; + __module_get(gov->owner); + } + } else { + /* Use the default policy if there is no last_policy. */ if (policy->last_policy) { pol = policy->last_policy; - } else if (def_gov) { - pol = cpufreq_parse_policy(def_gov->name); + } else { + pol = cpufreq_parse_policy(default_governor); /* - * In case the default governor is neiter "performance" + * In case the default governor is neither "performance" * nor "powersave", fall back to the initial policy * value set by the driver. */ @@ -2795,13 +2802,19 @@ EXPORT_SYMBOL_GPL(cpufreq_unregister_driver); static int __init cpufreq_core_init(void) { + struct cpufreq_governor *gov = cpufreq_default_governor(); + if (cpufreq_disabled()) return -ENODEV; cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj); BUG_ON(!cpufreq_global_kobject); + if (!strlen(default_governor)) + strncpy(default_governor, gov->name, CPUFREQ_NAME_LEN); + return 0; } module_param(off, int, 0444); +module_param_string(default_governor, default_governor, CPUFREQ_NAME_LEN, 0444); core_initcall(cpufreq_core_init); From 3a7e4fbbfd1a266ceea33e9b48f77f233ac994ac Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 29 Jun 2020 15:34:40 +0530 Subject: [PATCH 16/71] cpufreq: Remove the weakly defined cpufreq_default_governor() The default cpufreq governor is chosen with the help of a "choice" option in the Kconfig which will always end up selecting one of the governors and so the weakly defined definition of cpufreq_default_governor() will never get called. Moreover, this makes us skip the checking of the return value of that routine as it will always be non NULL. If the Kconfig option changes in future, then we will start getting a link error instead (and it won't go unnoticed as in the case of the weak definition). Suggested-by: Quentin Perret Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index ad94b1d47ddb..036f4cc42ede 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1056,11 +1056,6 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy) return 0; } -__weak struct cpufreq_governor *cpufreq_default_governor(void) -{ - return NULL; -} - static int cpufreq_init_policy(struct cpufreq_policy *policy) { struct cpufreq_governor *gov = NULL; @@ -1079,8 +1074,6 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy) if (!gov) { gov = cpufreq_default_governor(); - if (!gov) - return -ENODATA; __module_get(gov->owner); } From 8e022709c41e55b5b4a9d75a3380b7286c3c588c Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 29 Jun 2020 14:02:22 -0600 Subject: [PATCH 17/71] cpupower: Fix comparing pointer to 0 coccicheck warns Fix cocciccheck wanrns found by: make coccicheck MODE=report M=tools/power/cpupower/ tools/power/cpupower/utils/helpers/bitmask.c:29:12-13: WARNING comparing pointer to 0, suggest !E tools/power/cpupower/utils/helpers/bitmask.c:29:12-13: WARNING comparing pointer to 0 tools/power/cpupower/utils/helpers/bitmask.c:43:12-13: WARNING comparing pointer to 0 Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/helpers/bitmask.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/power/cpupower/utils/helpers/bitmask.c b/tools/power/cpupower/utils/helpers/bitmask.c index 6c7932f5bd66..649d87cb8b0f 100644 --- a/tools/power/cpupower/utils/helpers/bitmask.c +++ b/tools/power/cpupower/utils/helpers/bitmask.c @@ -26,11 +26,11 @@ struct bitmask *bitmask_alloc(unsigned int n) struct bitmask *bmp; bmp = malloc(sizeof(*bmp)); - if (bmp == 0) + if (!bmp) return 0; bmp->size = n; bmp->maskp = calloc(longsperbits(n), sizeof(unsigned long)); - if (bmp->maskp == 0) { + if (!bmp->maskp) { free(bmp); return 0; } @@ -40,7 +40,7 @@ struct bitmask *bitmask_alloc(unsigned int n) /* Free `struct bitmask` */ void bitmask_free(struct bitmask *bmp) { - if (bmp == 0) + if (!bmp) return; free(bmp->maskp); bmp->maskp = (unsigned long *)0xdeadcdef; /* double free tripwire */ From cbf25270933742c4a6d8671bba5c79a4e879cff3 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 29 Jun 2020 14:02:23 -0600 Subject: [PATCH 18/71] cpupower: Fix NULL but dereferenced coccicheck errors Fix NULL but dereferenced coccicheck errors found by: make coccicheck MODE=report M=tools/power/cpupower tools/power/cpupower/lib/cpufreq.c:384:19-23: ERROR: first is NULL but dereferenced. tools/power/cpupower/lib/cpufreq.c:440:19-23: ERROR: first is NULL but dereferenced. tools/power/cpupower/lib/cpufreq.c:308:19-23: ERROR: first is NULL but dereferenced. tools/power/cpupower/lib/cpufreq.c:753:19-23: ERROR: first is NULL but dereferenced. Signed-off-by: Shuah Khan --- tools/power/cpupower/lib/cpufreq.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/power/cpupower/lib/cpufreq.c b/tools/power/cpupower/lib/cpufreq.c index 6e04304560ca..c3b56db8b921 100644 --- a/tools/power/cpupower/lib/cpufreq.c +++ b/tools/power/cpupower/lib/cpufreq.c @@ -285,7 +285,7 @@ struct cpufreq_available_governors *cpufreq_get_available_governors(unsigned } else { first = malloc(sizeof(*first)); if (!first) - goto error_out; + return NULL; current = first; } current->first = first; @@ -362,7 +362,7 @@ struct cpufreq_available_frequencies } else { first = malloc(sizeof(*first)); if (!first) - goto error_out; + return NULL; current = first; } current->first = first; @@ -418,7 +418,7 @@ struct cpufreq_available_frequencies } else { first = malloc(sizeof(*first)); if (!first) - goto error_out; + return NULL; current = first; } current->first = first; @@ -493,7 +493,7 @@ static struct cpufreq_affected_cpus *sysfs_get_cpu_list(unsigned int cpu, } else { first = malloc(sizeof(*first)); if (!first) - goto error_out; + return NULL; current = first; } current->first = first; @@ -726,7 +726,7 @@ struct cpufreq_stats *cpufreq_get_stats(unsigned int cpu, } else { first = malloc(sizeof(*first)); if (!first) - goto error_out; + return NULL; current = first; } current->first = first; From 8d87ae48ced2dffd5e7247d19eb4c88be6f1c6f1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 8 Jul 2020 16:32:13 -0700 Subject: [PATCH 19/71] PM: domains: Fix up terminology with parent/child The genpd infrastructure uses the terms master/slave, but such uses have no external exposures (not even in Documentation/driver-api/pm/*) and are not mandated by nor associated with any external specifications. Change the language used through-out to parent/child. There was one possible exception in the debugfs node "pm_genpd/pm_genpd_summary" but its path has no hits outside of the kernel itself when performing a code search[1], and it seems even this single usage has been non-functional since it was introduced due to a typo in the Python ("apend" instead of correct "append"). Fix the typo while we're at it. Link: https://codesearch.debian.net/ # [1] Signed-off-by: Kees Cook Reviewed-by: Greg Kroah-Hartman Reviewed-by: Kieran Bingham Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 194 +++++++++++++-------------- drivers/base/power/domain_governor.c | 12 +- include/linux/pm_domain.h | 12 +- scripts/gdb/linux/genpd.py | 12 +- 4 files changed, 115 insertions(+), 115 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 0a01df608849..2cb5e04cf86c 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -263,18 +263,18 @@ static int _genpd_reeval_performance_state(struct generic_pm_domain *genpd, /* * Traverse all sub-domains within the domain. This can be * done without any additional locking as the link->performance_state - * field is protected by the master genpd->lock, which is already taken. + * field is protected by the parent genpd->lock, which is already taken. * * Also note that link->performance_state (subdomain's performance state - * requirement to master domain) is different from - * link->slave->performance_state (current performance state requirement + * requirement to parent domain) is different from + * link->child->performance_state (current performance state requirement * of the devices/sub-domains of the subdomain) and so can have a * different value. * * Note that we also take vote from powered-off sub-domains into account * as the same is done for devices right now. */ - list_for_each_entry(link, &genpd->master_links, master_node) { + list_for_each_entry(link, &genpd->parent_links, parent_node) { if (link->performance_state > state) state = link->performance_state; } @@ -285,40 +285,40 @@ static int _genpd_reeval_performance_state(struct generic_pm_domain *genpd, static int _genpd_set_performance_state(struct generic_pm_domain *genpd, unsigned int state, int depth) { - struct generic_pm_domain *master; + struct generic_pm_domain *parent; struct gpd_link *link; - int master_state, ret; + int parent_state, ret; if (state == genpd->performance_state) return 0; - /* Propagate to masters of genpd */ - list_for_each_entry(link, &genpd->slave_links, slave_node) { - master = link->master; + /* Propagate to parents of genpd */ + list_for_each_entry(link, &genpd->child_links, child_node) { + parent = link->parent; - if (!master->set_performance_state) + if (!parent->set_performance_state) continue; - /* Find master's performance state */ + /* Find parent's performance state */ ret = dev_pm_opp_xlate_performance_state(genpd->opp_table, - master->opp_table, + parent->opp_table, state); if (unlikely(ret < 0)) goto err; - master_state = ret; + parent_state = ret; - genpd_lock_nested(master, depth + 1); + genpd_lock_nested(parent, depth + 1); link->prev_performance_state = link->performance_state; - link->performance_state = master_state; - master_state = _genpd_reeval_performance_state(master, - master_state); - ret = _genpd_set_performance_state(master, master_state, depth + 1); + link->performance_state = parent_state; + parent_state = _genpd_reeval_performance_state(parent, + parent_state); + ret = _genpd_set_performance_state(parent, parent_state, depth + 1); if (ret) link->performance_state = link->prev_performance_state; - genpd_unlock(master); + genpd_unlock(parent); if (ret) goto err; @@ -333,26 +333,26 @@ static int _genpd_set_performance_state(struct generic_pm_domain *genpd, err: /* Encountered an error, lets rollback */ - list_for_each_entry_continue_reverse(link, &genpd->slave_links, - slave_node) { - master = link->master; + list_for_each_entry_continue_reverse(link, &genpd->child_links, + child_node) { + parent = link->parent; - if (!master->set_performance_state) + if (!parent->set_performance_state) continue; - genpd_lock_nested(master, depth + 1); + genpd_lock_nested(parent, depth + 1); - master_state = link->prev_performance_state; - link->performance_state = master_state; + parent_state = link->prev_performance_state; + link->performance_state = parent_state; - master_state = _genpd_reeval_performance_state(master, - master_state); - if (_genpd_set_performance_state(master, master_state, depth + 1)) { + parent_state = _genpd_reeval_performance_state(parent, + parent_state); + if (_genpd_set_performance_state(parent, parent_state, depth + 1)) { pr_err("%s: Failed to roll back to %d performance state\n", - master->name, master_state); + parent->name, parent_state); } - genpd_unlock(master); + genpd_unlock(parent); } return ret; @@ -552,7 +552,7 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on, /* * If sd_count > 0 at this point, one of the subdomains hasn't - * managed to call genpd_power_on() for the master yet after + * managed to call genpd_power_on() for the parent yet after * incrementing it. In that case genpd_power_on() will wait * for us to drop the lock, so we can call .power_off() and let * the genpd_power_on() restore power for us (this shouldn't @@ -566,22 +566,22 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on, genpd->status = GPD_STATE_POWER_OFF; genpd_update_accounting(genpd); - list_for_each_entry(link, &genpd->slave_links, slave_node) { - genpd_sd_counter_dec(link->master); - genpd_lock_nested(link->master, depth + 1); - genpd_power_off(link->master, false, depth + 1); - genpd_unlock(link->master); + list_for_each_entry(link, &genpd->child_links, child_node) { + genpd_sd_counter_dec(link->parent); + genpd_lock_nested(link->parent, depth + 1); + genpd_power_off(link->parent, false, depth + 1); + genpd_unlock(link->parent); } return 0; } /** - * genpd_power_on - Restore power to a given PM domain and its masters. + * genpd_power_on - Restore power to a given PM domain and its parents. * @genpd: PM domain to power up. * @depth: nesting count for lockdep. * - * Restore power to @genpd and all of its masters so that it is possible to + * Restore power to @genpd and all of its parents so that it is possible to * resume a device belonging to it. */ static int genpd_power_on(struct generic_pm_domain *genpd, unsigned int depth) @@ -594,20 +594,20 @@ static int genpd_power_on(struct generic_pm_domain *genpd, unsigned int depth) /* * The list is guaranteed not to change while the loop below is being - * executed, unless one of the masters' .power_on() callbacks fiddles + * executed, unless one of the parents' .power_on() callbacks fiddles * with it. */ - list_for_each_entry(link, &genpd->slave_links, slave_node) { - struct generic_pm_domain *master = link->master; + list_for_each_entry(link, &genpd->child_links, child_node) { + struct generic_pm_domain *parent = link->parent; - genpd_sd_counter_inc(master); + genpd_sd_counter_inc(parent); - genpd_lock_nested(master, depth + 1); - ret = genpd_power_on(master, depth + 1); - genpd_unlock(master); + genpd_lock_nested(parent, depth + 1); + ret = genpd_power_on(parent, depth + 1); + genpd_unlock(parent); if (ret) { - genpd_sd_counter_dec(master); + genpd_sd_counter_dec(parent); goto err; } } @@ -623,12 +623,12 @@ static int genpd_power_on(struct generic_pm_domain *genpd, unsigned int depth) err: list_for_each_entry_continue_reverse(link, - &genpd->slave_links, - slave_node) { - genpd_sd_counter_dec(link->master); - genpd_lock_nested(link->master, depth + 1); - genpd_power_off(link->master, false, depth + 1); - genpd_unlock(link->master); + &genpd->child_links, + child_node) { + genpd_sd_counter_dec(link->parent); + genpd_lock_nested(link->parent, depth + 1); + genpd_power_off(link->parent, false, depth + 1); + genpd_unlock(link->parent); } return ret; @@ -932,13 +932,13 @@ late_initcall(genpd_power_off_unused); #ifdef CONFIG_PM_SLEEP /** - * genpd_sync_power_off - Synchronously power off a PM domain and its masters. + * genpd_sync_power_off - Synchronously power off a PM domain and its parents. * @genpd: PM domain to power off, if possible. * @use_lock: use the lock. * @depth: nesting count for lockdep. * * Check if the given PM domain can be powered off (during system suspend or - * hibernation) and do that if so. Also, in that case propagate to its masters. + * hibernation) and do that if so. Also, in that case propagate to its parents. * * This function is only called in "noirq" and "syscore" stages of system power * transitions. The "noirq" callbacks may be executed asynchronously, thus in @@ -963,21 +963,21 @@ static void genpd_sync_power_off(struct generic_pm_domain *genpd, bool use_lock, genpd->status = GPD_STATE_POWER_OFF; - list_for_each_entry(link, &genpd->slave_links, slave_node) { - genpd_sd_counter_dec(link->master); + list_for_each_entry(link, &genpd->child_links, child_node) { + genpd_sd_counter_dec(link->parent); if (use_lock) - genpd_lock_nested(link->master, depth + 1); + genpd_lock_nested(link->parent, depth + 1); - genpd_sync_power_off(link->master, use_lock, depth + 1); + genpd_sync_power_off(link->parent, use_lock, depth + 1); if (use_lock) - genpd_unlock(link->master); + genpd_unlock(link->parent); } } /** - * genpd_sync_power_on - Synchronously power on a PM domain and its masters. + * genpd_sync_power_on - Synchronously power on a PM domain and its parents. * @genpd: PM domain to power on. * @use_lock: use the lock. * @depth: nesting count for lockdep. @@ -994,16 +994,16 @@ static void genpd_sync_power_on(struct generic_pm_domain *genpd, bool use_lock, if (genpd_status_on(genpd)) return; - list_for_each_entry(link, &genpd->slave_links, slave_node) { - genpd_sd_counter_inc(link->master); + list_for_each_entry(link, &genpd->child_links, child_node) { + genpd_sd_counter_inc(link->parent); if (use_lock) - genpd_lock_nested(link->master, depth + 1); + genpd_lock_nested(link->parent, depth + 1); - genpd_sync_power_on(link->master, use_lock, depth + 1); + genpd_sync_power_on(link->parent, use_lock, depth + 1); if (use_lock) - genpd_unlock(link->master); + genpd_unlock(link->parent); } _genpd_power_on(genpd, false); @@ -1443,12 +1443,12 @@ static void genpd_update_cpumask(struct generic_pm_domain *genpd, if (!genpd_is_cpu_domain(genpd)) return; - list_for_each_entry(link, &genpd->slave_links, slave_node) { - struct generic_pm_domain *master = link->master; + list_for_each_entry(link, &genpd->child_links, child_node) { + struct generic_pm_domain *parent = link->parent; - genpd_lock_nested(master, depth + 1); - genpd_update_cpumask(master, cpu, set, depth + 1); - genpd_unlock(master); + genpd_lock_nested(parent, depth + 1); + genpd_update_cpumask(parent, cpu, set, depth + 1); + genpd_unlock(parent); } if (set) @@ -1636,17 +1636,17 @@ static int genpd_add_subdomain(struct generic_pm_domain *genpd, goto out; } - list_for_each_entry(itr, &genpd->master_links, master_node) { - if (itr->slave == subdomain && itr->master == genpd) { + list_for_each_entry(itr, &genpd->parent_links, parent_node) { + if (itr->child == subdomain && itr->parent == genpd) { ret = -EINVAL; goto out; } } - link->master = genpd; - list_add_tail(&link->master_node, &genpd->master_links); - link->slave = subdomain; - list_add_tail(&link->slave_node, &subdomain->slave_links); + link->parent = genpd; + list_add_tail(&link->parent_node, &genpd->parent_links); + link->child = subdomain; + list_add_tail(&link->child_node, &subdomain->child_links); if (genpd_status_on(subdomain)) genpd_sd_counter_inc(genpd); @@ -1660,7 +1660,7 @@ static int genpd_add_subdomain(struct generic_pm_domain *genpd, /** * pm_genpd_add_subdomain - Add a subdomain to an I/O PM domain. - * @genpd: Master PM domain to add the subdomain to. + * @genpd: Leader PM domain to add the subdomain to. * @subdomain: Subdomain to be added. */ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, @@ -1678,7 +1678,7 @@ EXPORT_SYMBOL_GPL(pm_genpd_add_subdomain); /** * pm_genpd_remove_subdomain - Remove a subdomain from an I/O PM domain. - * @genpd: Master PM domain to remove the subdomain from. + * @genpd: Leader PM domain to remove the subdomain from. * @subdomain: Subdomain to be removed. */ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, @@ -1693,19 +1693,19 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, genpd_lock(subdomain); genpd_lock_nested(genpd, SINGLE_DEPTH_NESTING); - if (!list_empty(&subdomain->master_links) || subdomain->device_count) { + if (!list_empty(&subdomain->parent_links) || subdomain->device_count) { pr_warn("%s: unable to remove subdomain %s\n", genpd->name, subdomain->name); ret = -EBUSY; goto out; } - list_for_each_entry_safe(link, l, &genpd->master_links, master_node) { - if (link->slave != subdomain) + list_for_each_entry_safe(link, l, &genpd->parent_links, parent_node) { + if (link->child != subdomain) continue; - list_del(&link->master_node); - list_del(&link->slave_node); + list_del(&link->parent_node); + list_del(&link->child_node); kfree(link); if (genpd_status_on(subdomain)) genpd_sd_counter_dec(genpd); @@ -1770,8 +1770,8 @@ int pm_genpd_init(struct generic_pm_domain *genpd, if (IS_ERR_OR_NULL(genpd)) return -EINVAL; - INIT_LIST_HEAD(&genpd->master_links); - INIT_LIST_HEAD(&genpd->slave_links); + INIT_LIST_HEAD(&genpd->parent_links); + INIT_LIST_HEAD(&genpd->child_links); INIT_LIST_HEAD(&genpd->dev_list); genpd_lock_init(genpd); genpd->gov = gov; @@ -1848,15 +1848,15 @@ static int genpd_remove(struct generic_pm_domain *genpd) return -EBUSY; } - if (!list_empty(&genpd->master_links) || genpd->device_count) { + if (!list_empty(&genpd->parent_links) || genpd->device_count) { genpd_unlock(genpd); pr_err("%s: unable to remove %s\n", __func__, genpd->name); return -EBUSY; } - list_for_each_entry_safe(link, l, &genpd->slave_links, slave_node) { - list_del(&link->master_node); - list_del(&link->slave_node); + list_for_each_entry_safe(link, l, &genpd->child_links, child_node) { + list_del(&link->parent_node); + list_del(&link->child_node); kfree(link); } @@ -2827,12 +2827,12 @@ static int genpd_summary_one(struct seq_file *s, /* * Modifications on the list require holding locks on both - * master and slave, so we are safe. + * parent and child, so we are safe. * Also genpd->name is immutable. */ - list_for_each_entry(link, &genpd->master_links, master_node) { - seq_printf(s, "%s", link->slave->name); - if (!list_is_last(&link->master_node, &genpd->master_links)) + list_for_each_entry(link, &genpd->parent_links, parent_node) { + seq_printf(s, "%s", link->child->name); + if (!list_is_last(&link->parent_node, &genpd->parent_links)) seq_puts(s, ", "); } @@ -2860,7 +2860,7 @@ static int summary_show(struct seq_file *s, void *data) struct generic_pm_domain *genpd; int ret = 0; - seq_puts(s, "domain status slaves\n"); + seq_puts(s, "domain status children\n"); seq_puts(s, " /device runtime status\n"); seq_puts(s, "----------------------------------------------------------------------\n"); @@ -2915,8 +2915,8 @@ static int sub_domains_show(struct seq_file *s, void *data) if (ret) return -ERESTARTSYS; - list_for_each_entry(link, &genpd->master_links, master_node) - seq_printf(s, "%s\n", link->slave->name); + list_for_each_entry(link, &genpd->parent_links, parent_node) + seq_printf(s, "%s\n", link->child->name); genpd_unlock(genpd); return ret; diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index daa8c7689f7e..490ed7deb99a 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -135,8 +135,8 @@ static bool __default_power_down_ok(struct dev_pm_domain *pd, * * All subdomains have been powered off already at this point. */ - list_for_each_entry(link, &genpd->master_links, master_node) { - struct generic_pm_domain *sd = link->slave; + list_for_each_entry(link, &genpd->parent_links, parent_node) { + struct generic_pm_domain *sd = link->child; s64 sd_max_off_ns = sd->max_off_time_ns; if (sd_max_off_ns < 0) @@ -217,13 +217,13 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) } /* - * We have to invalidate the cached results for the masters, so + * We have to invalidate the cached results for the parents, so * use the observation that default_power_down_ok() is not - * going to be called for any master until this instance + * going to be called for any parent until this instance * returns. */ - list_for_each_entry(link, &genpd->slave_links, slave_node) - link->master->max_off_time_changed = true; + list_for_each_entry(link, &genpd->child_links, child_node) + link->parent->max_off_time_changed = true; genpd->max_off_time_ns = -1; genpd->max_off_time_changed = false; diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 9ec78ee53652..574a1fadb1e5 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -95,8 +95,8 @@ struct generic_pm_domain { struct device dev; struct dev_pm_domain domain; /* PM domain operations */ struct list_head gpd_list_node; /* Node in the global PM domains list */ - struct list_head master_links; /* Links with PM domain as a master */ - struct list_head slave_links; /* Links with PM domain as a slave */ + struct list_head parent_links; /* Links with PM domain as a parent */ + struct list_head child_links;/* Links with PM domain as a child */ struct list_head dev_list; /* List of devices */ struct dev_power_governor *gov; struct work_struct power_off_work; @@ -151,10 +151,10 @@ static inline struct generic_pm_domain *pd_to_genpd(struct dev_pm_domain *pd) } struct gpd_link { - struct generic_pm_domain *master; - struct list_head master_node; - struct generic_pm_domain *slave; - struct list_head slave_node; + struct generic_pm_domain *parent; + struct list_head parent_node; + struct generic_pm_domain *child; + struct list_head child_node; /* Sub-domain's per-master domain performance state */ unsigned int performance_state; diff --git a/scripts/gdb/linux/genpd.py b/scripts/gdb/linux/genpd.py index 6ca93bd2949e..39cd1abd8559 100644 --- a/scripts/gdb/linux/genpd.py +++ b/scripts/gdb/linux/genpd.py @@ -49,17 +49,17 @@ Output is similar to /sys/kernel/debug/pm_genpd/pm_genpd_summary''' else: status_string = 'off-{}'.format(genpd['state_idx']) - slave_names = [] + child_names = [] for link in list_for_each_entry( - genpd['master_links'], + genpd['parent_links'], device_link_type.get_type().pointer(), - 'master_node'): - slave_names.apend(link['slave']['name']) + 'parent_node'): + child_names.append(link['child']['name']) gdb.write('%-30s %-15s %s\n' % ( genpd['name'].string(), status_string, - ', '.join(slave_names))) + ', '.join(child_names))) # Print devices in domain for pm_data in list_for_each_entry(genpd['dev_list'], @@ -70,7 +70,7 @@ Output is similar to /sys/kernel/debug/pm_genpd/pm_genpd_summary''' gdb.write(' %-50s %s\n' % (kobj_path, rtpm_status_str(dev))) def invoke(self, arg, from_tty): - gdb.write('domain status slaves\n'); + gdb.write('domain status children\n'); gdb.write(' /device runtime status\n'); gdb.write('----------------------------------------------------------------------\n'); for genpd in list_for_each_entry( From 0735069c5e49d3f4f5f92669fcc3882500c118e5 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Sun, 21 Jun 2020 16:04:12 +0800 Subject: [PATCH 20/71] powercap: idle_inject: Replace play_idle() with play_idle_precise() in comments After commit 333cff6c963fbc ("powercap/drivers/idle_inject: Specify idle state max latency"), we convert to use play_idle_precise() with max allowed latency to specify the idle state. Some function comments still use play_idle(), let's update it to play_idle_precise(). Signed-off-by: Yangtao Li Signed-off-by: Frank Lee Signed-off-by: Rafael J. Wysocki --- drivers/powercap/idle_inject.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c index c90f0990968b..597733ed86e9 100644 --- a/drivers/powercap/idle_inject.c +++ b/drivers/powercap/idle_inject.c @@ -19,8 +19,8 @@ * The idle + run duration is specified via separate helpers and that allows * idle injection to be started. * - * The idle injection kthreads will call play_idle() with the idle duration - * specified as per the above. + * The idle injection kthreads will call play_idle_precise() with the idle + * duration and max allowed latency specified as per the above. * * After all of them have been woken up, a timer is set to start the next idle * injection cycle. @@ -100,7 +100,7 @@ static void idle_inject_wakeup(struct idle_inject_device *ii_dev) * * This function is called when the idle injection timer expires. It wakes up * idle injection tasks associated with the timer and they, in turn, invoke - * play_idle() to inject a specified amount of CPU idle time. + * play_idle_precise() to inject a specified amount of CPU idle time. * * Return: HRTIMER_RESTART. */ @@ -124,8 +124,8 @@ static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer) * idle_inject_fn - idle injection work function * @cpu: the CPU owning the task * - * This function calls play_idle() to inject a specified amount of CPU idle - * time. + * This function calls play_idle_precise() to inject a specified amount of CPU + * idle time. */ static void idle_inject_fn(unsigned int cpu) { From 3ff79754d7a2b27f6d2d56b3951fc9aa8080b063 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 9 Jul 2020 13:05:22 -0700 Subject: [PATCH 21/71] cpufreq: intel_pstate: Fix static checker warning for epp variable Fix warning for: drivers/cpufreq/intel_pstate.c:731 store_energy_performance_preference() error: uninitialized symbol 'epp'. This warning is for a case, when energy_performance_preference attribute matches pre defined strings. In this case the value of raw epp will not be used to set EPP bits in MSR_HWP_REQUEST. So initializing with any value is fine. Reported-by: Dan Carpenter Signed-off-by: Srinivas Pandruvada Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 44c7b4677675..94cd07678ee3 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -709,7 +709,7 @@ static ssize_t store_energy_performance_preference( struct cpudata *cpu_data = all_cpu_data[policy->cpu]; char str_preference[21]; bool raw = false; - u32 epp; + u32 epp = 0; int ret; ret = sscanf(buf, "%20s", str_preference); From 8479eb82084112b1cf5862099536b384bba5b92c Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Mon, 13 Jul 2020 12:55:02 +0200 Subject: [PATCH 22/71] cpufreq: Replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/acpi-cpufreq.c | 2 +- drivers/cpufreq/davinci-cpufreq.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 429e5a36c08a..29e8ff422b91 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -612,7 +612,7 @@ static const struct dmi_system_id sw_any_bug_dmi_table[] = { static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c) { /* Intel Xeon Processor 7100 Series Specification Update - * http://www.intel.com/Assets/PDF/specupdate/314554.pdf + * https://www.intel.com/Assets/PDF/specupdate/314554.pdf * AL30: A Machine Check Exception (MCE) Occurring during an * Enhanced Intel SpeedStep Technology Ratio Change May Cause * Both Processor Cores to Lock Up. */ diff --git a/drivers/cpufreq/davinci-cpufreq.c b/drivers/cpufreq/davinci-cpufreq.c index 297d23cad8b5..91f477a6cbc4 100644 --- a/drivers/cpufreq/davinci-cpufreq.c +++ b/drivers/cpufreq/davinci-cpufreq.c @@ -2,7 +2,7 @@ /* * CPU frequency scaling for DaVinci * - * Copyright (C) 2009 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2009 Texas Instruments Incorporated - https://www.ti.com/ * * Based on linux/arch/arm/plat-omap/cpu-omap.c. Original Copyright follows: * From afb0367a80553e795e7ad055299096544454f3f6 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 14 Jul 2020 14:56:25 +0200 Subject: [PATCH 23/71] PM: domains: Restore comment indentation for generic_pm_domain.child_links The rename of generic_pm_domain.slave_links to generic_pm_domain.child_links accidentally dropped the TAB to align the member's comment. Re-add the lost TAB to restore indentation. Fixes: 8d87ae48ced2dffd ("PM: domains: Fix up terminology with parent/child") Signed-off-by: Geert Uytterhoeven [ rjw: Minor subject edit ] Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 574a1fadb1e5..ee11502a575b 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -96,7 +96,7 @@ struct generic_pm_domain { struct dev_pm_domain domain; /* PM domain operations */ struct list_head gpd_list_node; /* Node in the global PM domains list */ struct list_head parent_links; /* Links with PM domain as a parent */ - struct list_head child_links;/* Links with PM domain as a child */ + struct list_head child_links; /* Links with PM domain as a child */ struct list_head dev_list; /* List of devices */ struct dev_power_governor *gov; struct work_struct power_off_work; From a45aca510b73b745f27f39c6bb590b1743ea1792 Mon Sep 17 00:00:00 2001 From: Abhishek Pandit-Subedi Date: Tue, 7 Jul 2020 10:28:44 -0700 Subject: [PATCH 24/71] PM: sleep: core: Emit changed uevent on wakeup_sysfs_add/remove Udev rules that depend on the power/wakeup attribute don't get triggered correctly if device_set_wakeup_capable is called after the device is created. This can happen for several reasons (driver sets wakeup after device is created, wakeup is changed on parent device, etc) and it seems reasonable to emit a changed event when adding or removing attributes on the device. Signed-off-by: Abhishek Pandit-Subedi Reviewed-by: Greg Kroah-Hartman Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- drivers/base/power/sysfs.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 24d25cf8ab14..c7b24812523c 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* sysfs entries for device PM */ #include +#include #include #include #include @@ -739,12 +740,18 @@ int dpm_sysfs_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid) int wakeup_sysfs_add(struct device *dev) { - return sysfs_merge_group(&dev->kobj, &pm_wakeup_attr_group); + int ret = sysfs_merge_group(&dev->kobj, &pm_wakeup_attr_group); + + if (!ret) + kobject_uevent(&dev->kobj, KOBJ_CHANGE); + + return ret; } void wakeup_sysfs_remove(struct device *dev) { sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group); + kobject_uevent(&dev->kobj, KOBJ_CHANGE); } int pm_qos_sysfs_add_resume_latency(struct device *dev) From 59679d9933ab897d197297eaf37e5b3788c052a5 Mon Sep 17 00:00:00 2001 From: He Zhe Date: Mon, 6 Jul 2020 17:52:24 +0800 Subject: [PATCH 25/71] freezer: Add unsafe version of freezable_schedule_timeout_interruptible() for NFS commit 0688e64bc600 ("NFS: Allow signal interruption of NFS4ERR_DELAYed operations") introduces nfs4_delay_interruptible which also needs an _unsafe version to avoid the following call trace for the same reason explained in commit 416ad3c9c006 ("freezer: add unsafe versions of freezable helpers for NFS") CPU: 4 PID: 3968 Comm: rm Tainted: G W 5.8.0-rc4 #1 Hardware name: Marvell OcteonTX CN96XX board (DT) Call trace: dump_backtrace+0x0/0x1dc show_stack+0x20/0x30 dump_stack+0xdc/0x150 debug_check_no_locks_held+0x98/0xa0 nfs4_delay_interruptible+0xd8/0x120 nfs4_handle_exception+0x130/0x170 nfs4_proc_rmdir+0x8c/0x220 nfs_rmdir+0xa4/0x360 vfs_rmdir.part.0+0x6c/0x1b0 do_rmdir+0x18c/0x210 __arm64_sys_unlinkat+0x64/0x7c el0_svc_common.constprop.0+0x7c/0x110 do_el0_svc+0x24/0xa0 el0_sync_handler+0x13c/0x1b8 el0_sync+0x158/0x180 Signed-off-by: He Zhe Signed-off-by: Rafael J. Wysocki --- fs/nfs/nfs4proc.c | 2 +- include/linux/freezer.h | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e32717fd1169..15ecfa474e37 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -414,7 +414,7 @@ static int nfs4_delay_interruptible(long *timeout) { might_sleep(); - freezable_schedule_timeout_interruptible(nfs4_update_delay(timeout)); + freezable_schedule_timeout_interruptible_unsafe(nfs4_update_delay(timeout)); if (!signal_pending(current)) return 0; return __fatal_signal_pending(current) ? -EINTR :-ERESTARTSYS; diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 21f5aa0b217f..27828145ca09 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -207,6 +207,17 @@ static inline long freezable_schedule_timeout_interruptible(long timeout) return __retval; } +/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ +static inline long freezable_schedule_timeout_interruptible_unsafe(long timeout) +{ + long __retval; + + freezer_do_not_count(); + __retval = schedule_timeout_interruptible(timeout); + freezer_count_unsafe(); + return __retval; +} + /* Like schedule_timeout_killable(), but should not block the freezer. */ static inline long freezable_schedule_timeout_killable(long timeout) { @@ -285,6 +296,9 @@ static inline void set_freezable(void) {} #define freezable_schedule_timeout_interruptible(timeout) \ schedule_timeout_interruptible(timeout) +#define freezable_schedule_timeout_interruptible_unsafe(timeout) \ + schedule_timeout_interruptible(timeout) + #define freezable_schedule_timeout_killable(timeout) \ schedule_timeout_killable(timeout) From 6ada7ba2fa7710eead191b69fd948569b05d0f61 Mon Sep 17 00:00:00 2001 From: Xiang Chen Date: Mon, 22 Jun 2020 17:09:13 +0800 Subject: [PATCH 26/71] PM: hibernate: fix white space in a few places In hibernate.c, some places lack of spaces while some places have redundant spaces. So fix them. Signed-off-by: Xiang Chen Signed-off-by: Rafael J. Wysocki --- kernel/power/hibernate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 02ec716a4927..5714f51ba9f8 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -1062,7 +1062,7 @@ power_attr(disk); static ssize_t resume_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf,"%d:%d\n", MAJOR(swsusp_resume_device), + return sprintf(buf, "%d:%d\n", MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device)); } @@ -1162,7 +1162,7 @@ static ssize_t reserved_size_store(struct kobject *kobj, power_attr(reserved_size); -static struct attribute * g[] = { +static struct attribute *g[] = { &disk_attr.attr, &resume_offset_attr.attr, &resume_attr.attr, @@ -1190,7 +1190,7 @@ static int __init resume_setup(char *str) if (noresume) return 1; - strncpy( resume_file, str, 255 ); + strncpy(resume_file, str, 255); return 1; } From 02d7f4005e942f75c63302d54a659e673edd00df Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 10 Jul 2020 22:12:10 +0300 Subject: [PATCH 27/71] PM: sleep: spread "const char *" correctness Fixed string literals can be referred to as "const char *". Signed-off-by: Alexey Dobriyan [ rjw: Minor subject edit ] Signed-off-by: Rafael J. Wysocki --- kernel/power/power.h | 2 +- kernel/power/snapshot.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/power/power.h b/kernel/power/power.h index ba2094db6294..32fc89ac96c3 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -32,7 +32,7 @@ static inline int init_header_complete(struct swsusp_info *info) return arch_hibernation_header_save(info, MAX_ARCH_HEADER_SIZE); } -static inline char *check_image_kernel(struct swsusp_info *info) +static inline const char *check_image_kernel(struct swsusp_info *info) { return arch_hibernation_header_restore(info) ? "architecture specific data" : NULL; diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 881128b9351e..cef154261fe2 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -2023,7 +2023,7 @@ static int init_header_complete(struct swsusp_info *info) return 0; } -static char *check_image_kernel(struct swsusp_info *info) +static const char *check_image_kernel(struct swsusp_info *info) { if (info->version_code != LINUX_VERSION_CODE) return "kernel version"; @@ -2176,7 +2176,7 @@ static void mark_unsafe_pages(struct memory_bitmap *bm) static int check_header(struct swsusp_info *info) { - char *reason; + const char *reason; reason = check_image_kernel(info); if (!reason && info->num_physpages != get_num_physpages()) From 19231a8739d13acd27cde253d757c9dd3dcb0e1d Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 15 Jul 2020 09:26:22 +0100 Subject: [PATCH 28/71] cpufreq: freq_table: Demote obvious misuse of kerneldoc to standard comment blocks No attempt has been made to document any of the demoted functions here. Fixes the following W=1 kernel build warning(s): drivers/cpufreq/freq_table.c:229: warning: Function parameter or member 'policy' not described in 'show_available_freqs' drivers/cpufreq/freq_table.c:229: warning: Function parameter or member 'buf' not described in 'show_available_freqs' drivers/cpufreq/freq_table.c:229: warning: Function parameter or member 'show_boost' not described in 'show_available_freqs' drivers/cpufreq/freq_table.c:269: warning: Function parameter or member 'policy' not described in 'scaling_available_frequencies_show' drivers/cpufreq/freq_table.c:269: warning: Function parameter or member 'buf' not described in 'scaling_available_frequencies_show' drivers/cpufreq/freq_table.c:281: warning: Function parameter or member 'policy' not described in 'scaling_boost_frequencies_show' drivers/cpufreq/freq_table.c:281: warning: Function parameter or member 'buf' not described in 'scaling_boost_frequencies_show' Signed-off-by: Lee Jones Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/freq_table.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index e117b0059123..f839dc9852c0 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -221,7 +221,7 @@ int cpufreq_frequency_table_get_index(struct cpufreq_policy *policy, } EXPORT_SYMBOL_GPL(cpufreq_frequency_table_get_index); -/** +/* * show_available_freqs - show available frequencies for the specified CPU */ static ssize_t show_available_freqs(struct cpufreq_policy *policy, char *buf, @@ -260,7 +260,7 @@ static ssize_t show_available_freqs(struct cpufreq_policy *policy, char *buf, struct freq_attr cpufreq_freq_attr_##_name##_freqs = \ __ATTR_RO(_name##_frequencies) -/** +/* * show_scaling_available_frequencies - show available normal frequencies for * the specified CPU */ @@ -272,7 +272,7 @@ static ssize_t scaling_available_frequencies_show(struct cpufreq_policy *policy, cpufreq_attr_available_freq(scaling_available); EXPORT_SYMBOL_GPL(cpufreq_freq_attr_scaling_available_freqs); -/** +/* * show_available_boost_freqs - show available boost frequencies for * the specified CPU */ From a9909c217fb1430433ce791d52171a44ac5b937e Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 15 Jul 2020 09:26:23 +0100 Subject: [PATCH 29/71] cpufreq: cpufreq: Demote lots of function headers unworthy of kerneldoc status Also provide missing function parameter description for 'cpu' and 'policy'. Fixes the following W=1 kernel build warning(s): drivers/cpufreq/cpufreq.c:60: warning: cannot understand function prototype: 'struct cpufreq_driver *cpufreq_driver; ' drivers/cpufreq/cpufreq.c:90: warning: Function parameter or member 'cpufreq_policy_notifier_list' not described in 'BLOCKING_NOTIFIER_HEAD' drivers/cpufreq/cpufreq.c:312: warning: Function parameter or member 'val' not described in 'adjust_jiffies' drivers/cpufreq/cpufreq.c:312: warning: Function parameter or member 'ci' not described in 'adjust_jiffies' drivers/cpufreq/cpufreq.c:538: warning: Function parameter or member 'policy' not described in 'cpufreq_driver_resolve_freq' drivers/cpufreq/cpufreq.c:686: warning: Function parameter or member 'file_name' not described in 'show_one' drivers/cpufreq/cpufreq.c:686: warning: Function parameter or member 'object' not described in 'show_one' drivers/cpufreq/cpufreq.c:731: warning: Function parameter or member 'file_name' not described in 'store_one' drivers/cpufreq/cpufreq.c:731: warning: Function parameter or member 'object' not described in 'store_one' drivers/cpufreq/cpufreq.c:741: warning: Function parameter or member 'policy' not described in 'show_cpuinfo_cur_freq' drivers/cpufreq/cpufreq.c:741: warning: Function parameter or member 'buf' not described in 'show_cpuinfo_cur_freq' drivers/cpufreq/cpufreq.c:754: warning: Function parameter or member 'policy' not described in 'show_scaling_governor' drivers/cpufreq/cpufreq.c:754: warning: Function parameter or member 'buf' not described in 'show_scaling_governor' drivers/cpufreq/cpufreq.c:770: warning: Function parameter or member 'policy' not described in 'store_scaling_governor' drivers/cpufreq/cpufreq.c:770: warning: Function parameter or member 'buf' not described in 'store_scaling_governor' drivers/cpufreq/cpufreq.c:770: warning: Function parameter or member 'count' not described in 'store_scaling_governor' drivers/cpufreq/cpufreq.c:806: warning: Function parameter or member 'policy' not described in 'show_scaling_driver' drivers/cpufreq/cpufreq.c:806: warning: Function parameter or member 'buf' not described in 'show_scaling_driver' drivers/cpufreq/cpufreq.c:815: warning: Function parameter or member 'policy' not described in 'show_scaling_available_governors' drivers/cpufreq/cpufreq.c:815: warning: Function parameter or member 'buf' not described in 'show_scaling_available_governors' drivers/cpufreq/cpufreq.c:859: warning: Function parameter or member 'policy' not described in 'show_related_cpus' drivers/cpufreq/cpufreq.c:859: warning: Function parameter or member 'buf' not described in 'show_related_cpus' drivers/cpufreq/cpufreq.c:867: warning: Function parameter or member 'policy' not described in 'show_affected_cpus' drivers/cpufreq/cpufreq.c:867: warning: Function parameter or member 'buf' not described in 'show_affected_cpus' drivers/cpufreq/cpufreq.c:901: warning: Function parameter or member 'policy' not described in 'show_bios_limit' drivers/cpufreq/cpufreq.c:901: warning: Function parameter or member 'buf' not described in 'show_bios_limit' drivers/cpufreq/cpufreq.c:1625: warning: Function parameter or member 'dev' not described in 'cpufreq_remove_dev' drivers/cpufreq/cpufreq.c:1625: warning: Function parameter or member 'sif' not described in 'cpufreq_remove_dev' drivers/cpufreq/cpufreq.c:2380: warning: Function parameter or member 'cpu' not described in 'cpufreq_get_policy' drivers/cpufreq/cpufreq.c:2771: warning: Function parameter or member 'driver' not described in 'cpufreq_unregister_driver' Signed-off-by: Lee Jones Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 036f4cc42ede..17c1c3becd92 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -52,7 +52,7 @@ static LIST_HEAD(cpufreq_governor_list); static char default_governor[CPUFREQ_NAME_LEN]; -/** +/* * The "cpufreq driver" - the arch- or hardware-dependent low * level driver of CPUFreq support, and its spinlock. This lock * also protects the cpufreq_cpu_data array. @@ -80,7 +80,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, struct cpufreq_governor *new_gov, unsigned int new_pol); -/** +/* * Two notifier lists: the "policy" list is involved in the * validation process for a new CPU frequency policy; the * "transition" list for kernel code that needs to handle @@ -300,7 +300,7 @@ struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu) * EXTERNALLY AFFECTING FREQUENCY CHANGES * *********************************************************************/ -/** +/* * adjust_jiffies - adjust the system "loops_per_jiffy" * * This function alters the system "loops_per_jiffy" for the clock @@ -526,6 +526,7 @@ EXPORT_SYMBOL_GPL(cpufreq_disable_fast_switch); /** * cpufreq_driver_resolve_freq - Map a target frequency to a driver-supported * one. + * @policy: associated policy to interrogate * @target_freq: target frequency to resolve. * * The target to driver frequency mapping is cached in the policy. @@ -670,7 +671,7 @@ static struct cpufreq_governor *cpufreq_parse_governor(char *str_governor) return get_governor(str_governor); } -/** +/* * cpufreq_per_cpu_attr_read() / show_##file_name() - * print out cpufreq information * @@ -712,7 +713,7 @@ static ssize_t show_scaling_cur_freq(struct cpufreq_policy *policy, char *buf) return ret; } -/** +/* * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access */ #define store_one(file_name, object) \ @@ -733,7 +734,7 @@ static ssize_t store_##file_name \ store_one(scaling_min_freq, min); store_one(scaling_max_freq, max); -/** +/* * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware */ static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy, @@ -747,7 +748,7 @@ static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy, return sprintf(buf, "\n"); } -/** +/* * show_scaling_governor - show the current policy for the specified CPU */ static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf) @@ -762,7 +763,7 @@ static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf) return -EINVAL; } -/** +/* * store_scaling_governor - store policy for the specified CPU */ static ssize_t store_scaling_governor(struct cpufreq_policy *policy, @@ -799,7 +800,7 @@ static ssize_t store_scaling_governor(struct cpufreq_policy *policy, return ret ? ret : count; } -/** +/* * show_scaling_driver - show the cpufreq driver currently loaded */ static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf) @@ -807,7 +808,7 @@ static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf) return scnprintf(buf, CPUFREQ_NAME_PLEN, "%s\n", cpufreq_driver->name); } -/** +/* * show_scaling_available_governors - show the available CPUfreq governors */ static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy, @@ -851,7 +852,7 @@ ssize_t cpufreq_show_cpus(const struct cpumask *mask, char *buf) } EXPORT_SYMBOL_GPL(cpufreq_show_cpus); -/** +/* * show_related_cpus - show the CPUs affected by each transition even if * hw coordination is in use */ @@ -860,7 +861,7 @@ static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf) return cpufreq_show_cpus(policy->related_cpus, buf); } -/** +/* * show_affected_cpus - show the CPUs affected by each transition */ static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf) @@ -894,7 +895,7 @@ static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf) return policy->governor->show_setspeed(policy, buf); } -/** +/* * show_bios_limit - show the current cpufreq HW/BIOS limitation */ static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf) @@ -1616,7 +1617,7 @@ unlock: return 0; } -/** +/* * cpufreq_remove_dev - remove a CPU device * * Removes the cpufreq interface for a CPU device. @@ -2373,6 +2374,7 @@ EXPORT_SYMBOL_GPL(cpufreq_unregister_governor); * cpufreq_get_policy - get the current cpufreq_policy * @policy: struct cpufreq_policy into which the current cpufreq_policy * is written + * @cpu: CPU to find the policy for * * Reads the current cpufreq policy. */ @@ -2759,7 +2761,7 @@ out: } EXPORT_SYMBOL_GPL(cpufreq_register_driver); -/** +/* * cpufreq_unregister_driver - unregister the current CPUFreq driver * * Unregister the current CPUFreq driver. Only call this if you have From dd2e65f2ae92268c0945842d6101fcece8f19de5 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 15 Jul 2020 09:26:24 +0100 Subject: [PATCH 30/71] cpufreq: cpufreq_governor: Demote store_sampling_rate() header to standard comment block There is no need for this to be denoted as kerneldoc. Fixes the following W=1 kernel build warning(s): drivers/cpufreq/cpufreq_governor.c:46: warning: Function parameter or member 'attr_set' not described in 'store_sampling_rate' drivers/cpufreq/cpufreq_governor.c:46: warning: Function parameter or member 'buf' not described in 'store_sampling_rate' drivers/cpufreq/cpufreq_governor.c:46: warning: Function parameter or member 'count' not described in 'store_sampling_rate' Signed-off-by: Lee Jones Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index f99ae45efaea..63f7c219062b 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -26,7 +26,7 @@ static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs); static DEFINE_MUTEX(gov_dbs_data_mutex); /* Common sysfs tunables */ -/** +/* * store_sampling_rate - update sampling rate effective immediately if needed. * * If new rate is smaller than the old, simply updating From 4a27aa9cb5b24691d3e3b2ebc3d726ee585fc960 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 15 Jul 2020 09:26:26 +0100 Subject: [PATCH 31/71] cpufreq: pasemi: Include header file for {check,restore}_astate prototypes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If function callers and providers do not share the same prototypes the compiler complains of missing prototypes. Fix this by including the correct platforms header file. Fixes the following W=1 kernel build warning(s): drivers/cpufreq/pasemi-cpufreq.c:109:5: warning: no previous prototype for ā€˜check_astateā€™ [-Wmissing-prototypes] 109 | int check_astate(void) | ^~~~~~~~~~~~ drivers/cpufreq/pasemi-cpufreq.c:114:6: warning: no previous prototype for ā€˜restore_astateā€™ [-Wmissing-prototypes] 114 | void restore_astate(int cpu) | ^~~~~~~~~~~~~~ Suggested-by: Olof Johansson Signed-off-by: Lee Jones Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/pasemi-cpufreq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/cpufreq/pasemi-cpufreq.c b/drivers/cpufreq/pasemi-cpufreq.c index c66f566a854c..815645170c4d 100644 --- a/drivers/cpufreq/pasemi-cpufreq.c +++ b/drivers/cpufreq/pasemi-cpufreq.c @@ -22,6 +22,8 @@ #include #include +#include + #define SDCASR_REG 0x0100 #define SDCASR_REG_STRIDE 0x1000 #define SDCPWR_CFGA0_REG 0x0100 From 44bd9a30ef737d6bdfcf7c4e31896a09a4b15955 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 15 Jul 2020 09:26:28 +0100 Subject: [PATCH 32/71] cpufreq: powernv-cpufreq: Fix a bunch of kerneldoc related issues Repair problems with formatting and missing attributes/parameters, and demote header comments which do not meet the required standards applicable to kerneldoc. Fixes the following W=1 kernel build warning(s): drivers/cpufreq/powernv-cpufreq.c:84: warning: Function parameter or member 'last_lpstate_idx' not described in 'global_pstate_info' drivers/cpufreq/powernv-cpufreq.c:84: warning: Function parameter or member 'last_gpstate_idx' not described in 'global_pstate_info' drivers/cpufreq/powernv-cpufreq.c:84: warning: Function parameter or member 'policy' not described in 'global_pstate_info' drivers/cpufreq/powernv-cpufreq.c:182: warning: Function parameter or member 'i' not described in 'idx_to_pstate' drivers/cpufreq/powernv-cpufreq.c:201: warning: Function parameter or member 'pstate' not described in 'pstate_to_idx' drivers/cpufreq/powernv-cpufreq.c:670: warning: Function parameter or member 't' not described in 'gpstate_timer_handler' drivers/cpufreq/powernv-cpufreq.c:670: warning: Excess function parameter 'data' description in 'gpstate_timer_handler' Signed-off-by: Lee Jones Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernv-cpufreq.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 8646eb197cd9..b39954c14e89 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -64,13 +64,14 @@ * highest_lpstate_idx * @last_sampled_time: Time from boot in ms when global pstates were * last set - * @last_lpstate_idx, Last set value of local pstate and global - * last_gpstate_idx pstate in terms of cpufreq table index + * @last_lpstate_idx: Last set value of local pstate and global + * @last_gpstate_idx: pstate in terms of cpufreq table index * @timer: Is used for ramping down if cpu goes idle for * a long time with global pstate held high * @gpstate_lock: A spinlock to maintain synchronization between * routines called by the timer handler and * governer's target_index calls + * @policy: Associated CPUFreq policy */ struct global_pstate_info { int highest_lpstate_idx; @@ -170,7 +171,7 @@ static inline u8 extract_pstate(u64 pmsr_val, unsigned int shift) /* Use following functions for conversions between pstate_id and index */ -/** +/* * idx_to_pstate : Returns the pstate id corresponding to the * frequency in the cpufreq frequency table * powernv_freqs indexed by @i. @@ -188,7 +189,7 @@ static inline u8 idx_to_pstate(unsigned int i) return powernv_freqs[i].driver_data; } -/** +/* * pstate_to_idx : Returns the index in the cpufreq frequencytable * powernv_freqs for the frequency whose corresponding * pstate id is @pstate. @@ -660,7 +661,7 @@ static inline void queue_gpstate_timer(struct global_pstate_info *gpstates) /** * gpstate_timer_handler * - * @data: pointer to cpufreq_policy on which timer was queued + * @t: Timer context used to fetch global pstate info struct * * This handler brings down the global pstate closer to the local pstate * according quadratic equation. Queues a new timer if it is still not equal From e1711f296a2de54a6dd5bde4d92bb903dfaed1fb Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 15 Jul 2020 09:26:29 +0100 Subject: [PATCH 33/71] cpufreq: acpi-cpufreq: Mark 'dummy' variable as __always_unused MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we fail to use a variable, even a 'dummy' one, then the compiler complains that it is set but not used. We know this is fine, so we set it as __always_unused to let the compiler know. Fixes the following W=1 kernel build warning(s): drivers/cpufreq/acpi-cpufreq.c: In function ā€˜cpu_freq_read_intelā€™: drivers/cpufreq/acpi-cpufreq.c:247:11: warning: variable ā€˜dummyā€™ set but not used [-Wunused-but-set-variable] drivers/cpufreq/acpi-cpufreq.c: In function ā€˜cpu_freq_read_amdā€™: drivers/cpufreq/acpi-cpufreq.c:265:11: warning: variable ā€˜dummyā€™ set but not used [-Wunused-but-set-variable] Signed-off-by: Lee Jones Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/acpi-cpufreq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 29e8ff422b91..20422f07580f 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -244,7 +244,7 @@ static unsigned extract_freq(struct cpufreq_policy *policy, u32 val) static u32 cpu_freq_read_intel(struct acpi_pct_register *not_used) { - u32 val, dummy; + u32 val, dummy __always_unused; rdmsr(MSR_IA32_PERF_CTL, val, dummy); return val; @@ -261,7 +261,7 @@ static void cpu_freq_write_intel(struct acpi_pct_register *not_used, u32 val) static u32 cpu_freq_read_amd(struct acpi_pct_register *not_used) { - u32 val, dummy; + u32 val, dummy __always_unused; rdmsr(MSR_AMD_PERF_CTL, val, dummy); return val; From a7b909376d3994ffb983a4037dd12689159fd4fb Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 15 Jul 2020 09:26:30 +0100 Subject: [PATCH 34/71] cpufreq: acpi-cpufreq: Mark sometimes used ID structs as __maybe_unused MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not used when MODULE is not defined. Fixes the following W=1 kernel build warning(s): drivers/cpufreq/acpi-cpufreq.c:1004:36: warning: ā€˜processor_device_idsā€™ defined but not used [-Wunused-const-variable=] 997 | static const struct x86_cpu_id acpi_cpufreq_ids[] = { | ^~~~~~~~~~~~~~~~ drivers/cpufreq/acpi-cpufreq.c:997:32: warning: ā€˜acpi_cpufreq_idsā€™ defined but not used [-Wunused-const-variable=] 619 | static const struct acpi_device_id processor_device_ids[] = { | ^~~~~~~~~~~~~~~~~~~~ Signed-off-by: Lee Jones Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/acpi-cpufreq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 20422f07580f..e4ff681faaaa 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -993,14 +993,14 @@ MODULE_PARM_DESC(acpi_pstate_strict, late_initcall(acpi_cpufreq_init); module_exit(acpi_cpufreq_exit); -static const struct x86_cpu_id acpi_cpufreq_ids[] = { +static const struct x86_cpu_id __maybe_unused acpi_cpufreq_ids[] = { X86_MATCH_FEATURE(X86_FEATURE_ACPI, NULL), X86_MATCH_FEATURE(X86_FEATURE_HW_PSTATE, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, acpi_cpufreq_ids); -static const struct acpi_device_id processor_device_ids[] = { +static const struct acpi_device_id __maybe_unused processor_device_ids[] = { {ACPI_PROCESSOR_OBJECT_HID, }, {ACPI_PROCESSOR_DEVICE_HID, }, {}, From 638b509795a18c88ce391fb94d9191f1abcfe88d Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 15 Jul 2020 09:26:31 +0100 Subject: [PATCH 35/71] cpufreq: powernow-k8: Mark 'hi' and 'lo' dummy variables as __always_unused MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit if we fail to use a variable, even a dummy ones, then the compiler complains that it is set but not used. We know this is fine, so we set them as __always_unused here to let the compiler know. Fixes the following W=1 kernel build warning(s): drivers/cpufreq/powernow-k8.c: In function ā€˜pending_bit_stuckā€™: drivers/cpufreq/powernow-k8.c:89:10: warning: variable ā€˜hiā€™ set but not used [-Wunused-but-set-variable] 89 | u32 lo, hi; | ^~ drivers/cpufreq/powernow-k8.c: In function ā€˜core_voltage_pre_transitionā€™: drivers/cpufreq/powernow-k8.c:285:14: warning: variable ā€˜loā€™ set but not used [-Wunused-but-set-variable] 285 | u32 maxvid, lo, rvomult = 1; | ^~ Signed-off-by: Lee Jones Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernow-k8.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index 3984959eed1d..0acc9e241cd7 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -86,7 +86,7 @@ static u32 convert_fid_to_vco_fid(u32 fid) */ static int pending_bit_stuck(void) { - u32 lo, hi; + u32 lo, hi __always_unused; rdmsr(MSR_FIDVID_STATUS, lo, hi); return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0; @@ -282,7 +282,7 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, { u32 rvosteps = data->rvo; u32 savefid = data->currfid; - u32 maxvid, lo, rvomult = 1; + u32 maxvid, lo __always_unused, rvomult = 1; pr_debug("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, reqvid 0x%x, rvo 0x%x\n", smp_processor_id(), From 3098174990f51671c0a1e660527ac1443e23ed0f Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 15 Jul 2020 09:26:32 +0100 Subject: [PATCH 36/71] cpufreq: pcc-cpufreq: Mark sometimes used ID structs as __maybe_unused MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not used when MODULE is not defined. Fixes the following W=1 kernel build warning(s): drivers/cpufreq/pcc-cpufreq.c:619:36: warning: ā€˜processor_device_idsā€™ defined but not used [-Wunused-const-variable=] 619 | static const struct acpi_device_id processor_device_ids[] = { | ^~~~~~~~~~~~~~~~~~~~ Signed-off-by: Lee Jones Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/pcc-cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c index 5789fe7a94bd..9f3fc7a073d0 100644 --- a/drivers/cpufreq/pcc-cpufreq.c +++ b/drivers/cpufreq/pcc-cpufreq.c @@ -616,7 +616,7 @@ static void __exit pcc_cpufreq_exit(void) free_percpu(pcc_cpu_info); } -static const struct acpi_device_id processor_device_ids[] = { +static const struct acpi_device_id __maybe_unused processor_device_ids[] = { {ACPI_PROCESSOR_OBJECT_HID, }, {ACPI_PROCESSOR_DEVICE_HID, }, {}, From 8f23d1f12cf1515693893aae3a79e6079bcdcf85 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 15 Jul 2020 09:26:33 +0100 Subject: [PATCH 37/71] cpufreq: intel_pstate: Supply struct attribute description for get_aperf_mperf_shift() Fixes the following W=1 kernel build warning(s): drivers/cpufreq/intel_pstate.c:293: warning: Function parameter or member 'get_aperf_mperf_shift' not described in 'pstate_funcs' Suggested-by: "Rafael J. Wysocki" Signed-off-by: Lee Jones Acked-by: Viresh Kumar [ rjw: Remove line break ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 94cd07678ee3..57130a16d242 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -275,6 +275,7 @@ static struct cpudata **all_cpu_data; * @get_min: Callback to get minimum P state * @get_turbo: Callback to get turbo P state * @get_scaling: Callback to get frequency scaling factor + * @get_aperf_mperf_shift: Callback to get the APERF vs MPERF frequency difference * @get_val: Callback to convert P state to actual MSR write value * @get_vid: Callback to get VID data for Atom platforms * From 52fe0b16f60ed3bc38f5f4bafdeb4f618e97f62b Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 15 Jul 2020 09:26:34 +0100 Subject: [PATCH 38/71] cpufreq: amd_freq_sensitivity: Mark sometimes used ID structs as __maybe_unused MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ot used when MODULE is not defined. Fixes the following W=1 kernel build warning(s): drivers/cpufreq/amd_freq_sensitivity.c:147:32: warning: ā€˜amd_freq_sensitivity_idsā€™ defined but not used [-Wunused-const-variable=] 147 | static const struct x86_cpu_id amd_freq_sensitivity_ids[] = { | ^~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Lee Jones Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/amd_freq_sensitivity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c index f7c4206d4c90..d0b10baf039a 100644 --- a/drivers/cpufreq/amd_freq_sensitivity.c +++ b/drivers/cpufreq/amd_freq_sensitivity.c @@ -144,7 +144,7 @@ static void __exit amd_freq_sensitivity_exit(void) } module_exit(amd_freq_sensitivity_exit); -static const struct x86_cpu_id amd_freq_sensitivity_ids[] = { +static const struct x86_cpu_id __maybe_unused amd_freq_sensitivity_ids[] = { X86_MATCH_FEATURE(X86_FEATURE_PROC_FEEDBACK, NULL), {} }; From 133c6c84f280362911c020117532fbe538df6ae5 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 14 Jul 2020 22:23:55 +0800 Subject: [PATCH 39/71] cpufreq: powernv: Make some symbols static The sparse tool complains as follows: drivers/cpufreq/powernv-cpufreq.c:88:1: warning: symbol 'pstate_revmap' was not declared. Should it be static? drivers/cpufreq/powernv-cpufreq.c:383:18: warning: symbol 'cpufreq_freq_attr_cpuinfo_nominal_freq' was not declared. Should it be static? drivers/cpufreq/powernv-cpufreq.c:669:6: warning: symbol 'gpstate_timer_handler' was not declared. Should it be static? drivers/cpufreq/powernv-cpufreq.c:902:6: warning: symbol 'powernv_cpufreq_work_fn' was not declared. Should it be static? Those symbols are not used outside of this file, so mark them static. Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Acked-by: Viresh Kumar Reviewed-by: Lee Jones Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernv-cpufreq.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index b39954c14e89..a9af15e994cc 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -86,7 +86,7 @@ struct global_pstate_info { static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1]; -DEFINE_HASHTABLE(pstate_revmap, POWERNV_MAX_PSTATES_ORDER); +static DEFINE_HASHTABLE(pstate_revmap, POWERNV_MAX_PSTATES_ORDER); /** * struct pstate_idx_revmap_data: Entry in the hashmap pstate_revmap * indexed by a function of pstate id. @@ -381,7 +381,7 @@ static ssize_t cpuinfo_nominal_freq_show(struct cpufreq_policy *policy, powernv_freqs[powernv_pstate_info.nominal].frequency); } -struct freq_attr cpufreq_freq_attr_cpuinfo_nominal_freq = +static struct freq_attr cpufreq_freq_attr_cpuinfo_nominal_freq = __ATTR_RO(cpuinfo_nominal_freq); #define SCALING_BOOST_FREQS_ATTR_INDEX 2 @@ -667,7 +667,7 @@ static inline void queue_gpstate_timer(struct global_pstate_info *gpstates) * according quadratic equation. Queues a new timer if it is still not equal * to local pstate */ -void gpstate_timer_handler(struct timer_list *t) +static void gpstate_timer_handler(struct timer_list *t) { struct global_pstate_info *gpstates = from_timer(gpstates, t, timer); struct cpufreq_policy *policy = gpstates->policy; @@ -900,7 +900,7 @@ static struct notifier_block powernv_cpufreq_reboot_nb = { .notifier_call = powernv_cpufreq_reboot_notifier, }; -void powernv_cpufreq_work_fn(struct work_struct *work) +static void powernv_cpufreq_work_fn(struct work_struct *work) { struct chip *chip = container_of(work, struct chip, throttle); struct cpufreq_policy *policy; From 23a522e388f5a0cbeaf37e10e3dba832a83a1671 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 15 Jul 2020 15:54:57 +0200 Subject: [PATCH 40/71] cpufreq: intel_pstate: Clean up aperf_mperf_shift description The kerneldoc description of the aperf_mperf_shift field in struct global_params is unclear and there is a typo in it, so simplify it and clean it up. Reported-by: Lee Jones Signed-off-by: Rafael J. Wysocki Reviewed-by: Lee Jones --- drivers/cpufreq/intel_pstate.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 57130a16d242..a394ff05aba6 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -201,9 +201,7 @@ struct global_params { * @pstate: Stores P state limits for this CPU * @vid: Stores VID limits for this CPU * @last_sample_time: Last Sample time - * @aperf_mperf_shift: Number of clock cycles after aperf, merf is incremented - * This shift is a multiplier to mperf delta to - * calculate CPU busy. + * @aperf_mperf_shift: APERF vs MPERF counting frequency difference * @prev_aperf: Last APERF value read from APERF MSR * @prev_mperf: Last MPERF value read from MPERF MSR * @prev_tsc: Last timestamp counter (TSC) value From 7aa1031223bc01483e7aca6b7838598c4edc19f4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 14 Jul 2020 20:17:24 +0200 Subject: [PATCH 41/71] cpufreq: intel_pstate: Avoid enabling HWP if EPP is not supported Although there are processors supporting hardware-managed P-states (HWP) without the energy-performance preference (EPP) feature, they are not expected to be run with HWP enabled (the BIOS should disable HWP on those systems). Missing EPP support generally indicates an incomplete HWP implementation and so it is better to avoid using HWP on those systems in production. However, intel_pstate currently enables HWP on such systems, which is questionable, so prevent it from doing that by making it check EPP support before enabling HWP and avoid enabling it if EPP is not supported by the processor at hand. Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index a394ff05aba6..482a5f241d1a 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2823,7 +2823,12 @@ static int __init intel_pstate_init(void) id = x86_match_cpu(hwp_support_ids); if (id) { copy_cpu_funcs(&core_funcs); - if (!no_hwp) { + /* + * Avoid enabling HWP for processors without EPP support, + * because that means incomplete HWP implementation which is a + * corner case and supporting it is generally problematic. + */ + if (!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) { hwp_active++; hwp_mode_bdw = id->driver_data; intel_pstate.attr = hwp_cpufreq_attrs; From fa0866a1d1be8ffa6979bf2127272b7e3d511fcd Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Mon, 13 Jul 2020 13:05:11 +0200 Subject: [PATCH 42/71] cpupower: Replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Signed-off-by: Shuah Khan --- tools/power/cpupower/man/cpupower-monitor.1 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/power/cpupower/man/cpupower-monitor.1 b/tools/power/cpupower/man/cpupower-monitor.1 index 70a56476f4b0..8ee737eefa5c 100644 --- a/tools/power/cpupower/man/cpupower-monitor.1 +++ b/tools/power/cpupower/man/cpupower-monitor.1 @@ -170,7 +170,7 @@ displayed. .SH REFERENCES "BIOS and Kernel Developerā€™s Guide (BKDG) for AMD Family 14h Processors" -http://support.amd.com/us/Processor_TechDocs/43170.pdf +https://support.amd.com/us/Processor_TechDocs/43170.pdf "IntelĀ® Turbo Boost Technology in IntelĀ® Coreā„¢ Microarchitecture (Nehalem) Based Processors" @@ -178,7 +178,7 @@ http://download.intel.com/design/processor/applnots/320354.pdf "IntelĀ® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide" -http://www.intel.com/products/processor/manuals +https://www.intel.com/products/processor/manuals .SH FILES .ta From 036491542aa47633a7c9a1b01a0c19f2c5e67f04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valdis=20Kl=C4=93tnieks?= Date: Sat, 20 Jun 2020 13:03:22 -0400 Subject: [PATCH 43/71] opp: core: Add missing export for dev_pm_opp_adjust_voltage Export dev_pm_opp_adjust_voltage() as it may be used by modules later on. Signed-off-by: Valdis Kletnieks [ Viresh: Rewrote commit log ] Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index dfbd3d10410c..864cf4c65fff 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -2271,6 +2271,7 @@ adjust_put_table: dev_pm_opp_put_opp_table(opp_table); return r; } +EXPORT_SYMBOL_GPL(dev_pm_opp_adjust_voltage); /** * dev_pm_opp_enable() - Enable a specific OPP From 0e510bf1b881e872a59033634e6226cd4c501e77 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Mon, 13 Jul 2020 20:05:16 +0200 Subject: [PATCH 44/71] opp: ti-opp-supply: Replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Signed-off-by: Viresh Kumar --- drivers/opp/ti-opp-supply.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/opp/ti-opp-supply.c b/drivers/opp/ti-opp-supply.c index e3357e91decb..bd4771f388ab 100644 --- a/drivers/opp/ti-opp-supply.c +++ b/drivers/opp/ti-opp-supply.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2016-2017 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2016-2017 Texas Instruments Incorporated - https://www.ti.com/ * Nishanth Menon * Dave Gerlach * From 06a8a059e88bd73ee81a4ad19e97c04766f84def Mon Sep 17 00:00:00 2001 From: "Andrew-sh.Cheng" Date: Mon, 20 Jul 2020 16:55:26 +0800 Subject: [PATCH 45/71] opp: Allow disabled OPPs in dev_pm_opp_get_freq() Allow dev_pm_opp_get_freq() to work for disabled OPPs. Signed-off-by: Andrew-sh.Cheng [ Viresh: Massaged commit log ] Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 864cf4c65fff..0c8c74a3c868 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -118,7 +118,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_voltage); */ unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp) { - if (IS_ERR_OR_NULL(opp) || !opp->available) { + if (IS_ERR_OR_NULL(opp)) { pr_err("%s: Invalid parameters\n", __func__); return 0; } From 68f9b22842897087895230964776083e168415de Mon Sep 17 00:00:00 2001 From: Todd Brandt Date: Mon, 20 Jul 2020 18:02:49 -0700 Subject: [PATCH 46/71] pm-graph v5.7 - important s2idle fixes Important fixes: - in s2idle, use timekeeping_freeze trace mark instead of machine_suspend to denote entry into s2idle mode. - in s2idle, use machine_suspend trace mark to create a new virtual device called "s2idle_enter_x". It denotes an s2idle_enter call loop of iterations where s2idle was never actually achieved. It isn't counted as "freeze time" in the header. - in s2idle, only show multiple freeze times if s2idle went in and out of resume_noirq. Otherwise multiple freezes are shown with "waking" time subtracted (waking time is time spent outside s2idle dealing with wakeups). - in s2idle summaries, include "FREEZEWAKE" as an issue when at least 1ms is spent waking from s2idle. A clean run should only wake for the rtc timer. - add support for device callbacks with matching names in the same phase. In rare cases some devices register multiple callbacks from separate drivers using the same name. Without this fix only one is shown. - add kparamsfmt string back to fix bootgraph General updates: - when suspend_machine is missing, error says "failed in suspend_machine" - extract target count/time and add to summary title if -multi used - include any instances of "timeout" in dmesg as issues to be logged. - fix ftrace parse to handle any number of flags (instead of just 4). - remove sync/async_device string from device detail, remains in hover. - when using callgraph (-f) add driver name to callgraph titles. Signed-off-by: Todd Brandt Signed-off-by: Rafael J. Wysocki --- tools/power/pm-graph/README | 2 +- tools/power/pm-graph/sleepgraph.py | 249 +++++++++++++++++------------ 2 files changed, 149 insertions(+), 102 deletions(-) diff --git a/tools/power/pm-graph/README b/tools/power/pm-graph/README index afe6beb40ad9..89d0a7dab4bc 100644 --- a/tools/power/pm-graph/README +++ b/tools/power/pm-graph/README @@ -6,7 +6,7 @@ |_| |___/ |_| pm-graph: suspend/resume/boot timing analysis tools - Version: 5.6 + Version: 5.7 Author: Todd Brandt Home Page: https://01.org/pm-graph diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index 602e64b68ba7..46ff97e909c6 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -81,7 +81,7 @@ def ascii(text): # store system values and test parameters class SystemValues: title = 'SleepGraph' - version = '5.6' + version = '5.7' ansi = False rs = 0 display = '' @@ -198,7 +198,7 @@ class SystemValues: 'suspend_console': {}, 'acpi_pm_prepare': {}, 'syscore_suspend': {}, - 'arch_thaw_secondary_cpus_end': {}, + 'arch_enable_nonboot_cpus_end': {}, 'syscore_resume': {}, 'acpi_pm_finish': {}, 'resume_console': {}, @@ -924,10 +924,7 @@ class SystemValues: tp = TestProps() tf = self.openlog(self.ftracefile, 'r') for line in tf: - # determine the trace data type (required for further parsing) - m = re.match(tp.tracertypefmt, line) - if(m): - tp.setTracerType(m.group('t')) + if tp.stampInfo(line, self): continue # parse only valid lines, if this is not one move on m = re.match(tp.ftrace_line_fmt, line) @@ -1244,8 +1241,8 @@ class DevProps: if self.xtraclass: return ' '+self.xtraclass if self.isasync: - return ' async_device' - return ' sync_device' + return ' (async)' + return ' (sync)' # Class: DeviceNode # Description: @@ -1301,6 +1298,7 @@ class Data: 'FAIL' : r'(?i).*\bFAILED\b.*', 'INVALID' : r'(?i).*\bINVALID\b.*', 'CRASH' : r'(?i).*\bCRASHED\b.*', + 'TIMEOUT' : r'(?i).*\bTIMEOUT\b.*', 'IRQ' : r'.*\bgenirq: .*', 'TASKFAIL': r'.*Freezing of tasks *.*', 'ACPI' : r'.*\bACPI *(?P[A-Za-z]*) *Error[: ].*', @@ -1358,11 +1356,11 @@ class Data: if self.dmesg[p]['order'] == order: return p return '' - def lastPhase(self): + def lastPhase(self, depth=1): plist = self.sortedPhases() - if len(plist) < 1: + if len(plist) < depth: return '' - return plist[-1] + return plist[-1*depth] def turbostatInfo(self): tp = TestProps() out = {'syslpi':'N/A','pkgpc10':'N/A'} @@ -1382,9 +1380,12 @@ class Data: if len(self.dmesgtext) < 1 and sysvals.dmesgfile: lf = sysvals.openlog(sysvals.dmesgfile, 'r') i = 0 + tp = TestProps() list = [] for line in lf: i += 1 + if tp.stampInfo(line, sysvals): + continue m = re.match('[ \t]*(\[ *)(?P[0-9\.]*)(\]) (?P.*)', line) if not m: continue @@ -1400,15 +1401,15 @@ class Data: list.append((msg, err, dir, t, i, i)) self.kerror = True break - msglist = [] + tp.msglist = [] for msg, type, dir, t, idx1, idx2 in list: - msglist.append(msg) + tp.msglist.append(msg) self.errorinfo[dir].append((type, t, idx1, idx2)) if self.kerror: sysvals.dmesglog = True if len(self.dmesgtext) < 1 and sysvals.dmesgfile: lf.close() - return msglist + return tp def setStart(self, time, msg=''): self.start = time if msg: @@ -1623,6 +1624,8 @@ class Data: if('src' in d): for e in d['src']: e.time = self.trimTimeVal(e.time, t0, dT, left) + e.end = self.trimTimeVal(e.end, t0, dT, left) + e.length = e.end - e.time for dir in ['suspend', 'resume']: list = [] for e in self.errorinfo[dir]: @@ -1640,7 +1643,12 @@ class Data: if tL > 0: left = True if tR > tZero else False self.trimTime(tS, tL, left) - self.tLow.append('%.0f'%(tL*1000)) + if 'trying' in self.dmesg[lp] and self.dmesg[lp]['trying'] >= 0.001: + tTry = round(self.dmesg[lp]['trying'] * 1000) + text = '%.0f (-%.0f waking)' % (tL * 1000, tTry) + else: + text = '%.0f' % (tL * 1000) + self.tLow.append(text) lp = phase def getMemTime(self): if not self.hwstart or not self.hwend: @@ -1776,7 +1784,7 @@ class Data: length = -1.0 if(start >= 0 and end >= 0): length = end - start - if pid == -2: + if pid == -2 or name not in sysvals.tracefuncs.keys(): i = 2 origname = name while(name in list): @@ -1789,6 +1797,15 @@ class Data: if color: list[name]['color'] = color return name + def findDevice(self, phase, name): + list = self.dmesg[phase]['list'] + mydev = '' + for devname in sorted(list): + if name == devname or re.match('^%s\[(?P[0-9]*)\]$' % name, devname): + mydev = devname + if mydev: + return list[mydev] + return False def deviceChildren(self, devname, phase): devlist = [] list = self.dmesg[phase]['list'] @@ -2779,6 +2796,7 @@ class TestProps: testerrfmt = '^# enter_sleep_error (?P.*)' sysinfofmt = '^# sysinfo .*' cmdlinefmt = '^# command \| (?P.*)' + kparamsfmt = '^# kparams \| (?P.*)' devpropfmt = '# Device Properties: .*' pinfofmt = '# platform-(?P[a-z,A-Z,0-9]*): (?P.*)' tracertypefmt = '# tracer: (?P.*)' @@ -2790,8 +2808,9 @@ class TestProps: '[ +!#\*@$]*(?P[0-9\.]*) .*\| (?P.*)' ftrace_line_fmt_nop = \ ' *(?P.*)-(?P[0-9]*) *\[(?P[0-9]*)\] *'+\ - '(?P.{4}) *(?P