cpufreq: powernv: cpufreq driver for powernv platform
Backend driver to dynamically set voltage and frequency on IBM POWER non-virtualized platforms. Power management SPRs are used to set the required PState. This driver works in conjunction with cpufreq governors like 'ondemand' to provide a demand based frequency and voltage setting on IBM POWER non-virtualized platforms. PState table is obtained from OPAL v3 firmware through device tree. powernv_cpufreq back-end driver would parse the relevant device-tree nodes and initialise the cpufreq subsystem on powernv platform. The code was originally written by svaidy@linux.vnet.ibm.com. Over time it was modified to accomodate bug-fixes as well as updates to the the cpu-freq core. Relevant portions of the change logs corresponding to those modifications are noted below: * The policy->cpus needs to be populated in a hotplug-invariant manner instead of using cpu_sibling_mask() which varies with cpu-hotplug. This is because the cpufreq core code copies this content into policy->related_cpus mask which should not vary on cpu-hotplug. [Authored by srivatsa.bhat@linux.vnet.ibm.com] * Create a helper routine that can return the cpu-frequency for the corresponding pstate_id. Also, cache the values of the pstate_max, pstate_min and pstate_nominal and nr_pstates in a static structure so that they can be reused in the future to perform any validations. [Authored by ego@linux.vnet.ibm.com] * Create a driver attribute named cpuinfo_nominal_freq which creates a sysfs read-only file named cpuinfo_nominal_freq. Export the frequency corresponding to the nominal_pstate through this interface. Nominal frequency is the highest non-turbo frequency for the platform. This is generally used for setting governor policies from user space for optimal energy efficiency. [Authored by ego@linux.vnet.ibm.com] * Implement a powernv_cpufreq_get(unsigned int cpu) method which will return the current operating frequency. Export this via the sysfs interface cpuinfo_cur_freq by setting powernv_cpufreq_driver.get to powernv_cpufreq_get(). [Authored by ego@linux.vnet.ibm.com] [Change log updated by ego@linux.vnet.ibm.com] Reviewed-by: Preeti U Murthy <preeti@linux.vnet.ibm.com> Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com> Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
This commit is contained in:
parent
0ca97886fe
commit
b3d627a5f2
@ -271,6 +271,10 @@
|
||||
#define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */
|
||||
#define SPRN_IC 0x350 /* Virtual Instruction Count */
|
||||
#define SPRN_VTB 0x351 /* Virtual Time Base */
|
||||
#define SPRN_PMICR 0x354 /* Power Management Idle Control Reg */
|
||||
#define SPRN_PMSR 0x355 /* Power Management Status Reg */
|
||||
#define SPRN_PMCR 0x374 /* Power Management Control Register */
|
||||
|
||||
/* HFSCR and FSCR bit numbers are the same */
|
||||
#define FSCR_TAR_LG 8 /* Enable Target Address Register */
|
||||
#define FSCR_EBB_LG 7 /* Enable Event Based Branching */
|
||||
|
@ -54,3 +54,11 @@ config PPC_PASEMI_CPUFREQ
|
||||
help
|
||||
This adds the support for frequency switching on PA Semi
|
||||
PWRficient processors.
|
||||
|
||||
config POWERNV_CPUFREQ
|
||||
tristate "CPU frequency scaling for IBM POWERNV platform"
|
||||
depends on PPC_POWERNV
|
||||
default y
|
||||
help
|
||||
This adds support for CPU frequency switching on IBM POWERNV
|
||||
platform
|
||||
|
@ -86,6 +86,7 @@ obj-$(CONFIG_PPC_CORENET_CPUFREQ) += ppc-corenet-cpufreq.o
|
||||
obj-$(CONFIG_CPU_FREQ_PMAC) += pmac32-cpufreq.o
|
||||
obj-$(CONFIG_CPU_FREQ_PMAC64) += pmac64-cpufreq.o
|
||||
obj-$(CONFIG_PPC_PASEMI_CPUFREQ) += pasemi-cpufreq.o
|
||||
obj-$(CONFIG_POWERNV_CPUFREQ) += powernv-cpufreq.o
|
||||
|
||||
##################################################################################
|
||||
# Other platform drivers
|
||||
|
342
drivers/cpufreq/powernv-cpufreq.c
Normal file
342
drivers/cpufreq/powernv-cpufreq.c
Normal file
@ -0,0 +1,342 @@
|
||||
/*
|
||||
* POWERNV cpufreq driver for the IBM POWER processors
|
||||
*
|
||||
* (C) Copyright IBM 2014
|
||||
*
|
||||
* Author: Vaidyanathan Srinivasan <svaidy at linux.vnet.ibm.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "powernv-cpufreq: " fmt
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/cpufreq.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/of.h>
|
||||
|
||||
#include <asm/cputhreads.h>
|
||||
#include <asm/reg.h>
|
||||
|
||||
#define POWERNV_MAX_PSTATES 256
|
||||
|
||||
static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1];
|
||||
static int powernv_pstate_ids[POWERNV_MAX_PSTATES+1];
|
||||
|
||||
/*
|
||||
* Note: The set of pstates consists of contiguous integers, the
|
||||
* smallest of which is indicated by powernv_pstate_info.min, the
|
||||
* largest of which is indicated by powernv_pstate_info.max.
|
||||
*
|
||||
* The nominal pstate is the highest non-turbo pstate in this
|
||||
* platform. This is indicated by powernv_pstate_info.nominal.
|
||||
*/
|
||||
static struct powernv_pstate_info {
|
||||
int min;
|
||||
int max;
|
||||
int nominal;
|
||||
int nr_pstates;
|
||||
} powernv_pstate_info;
|
||||
|
||||
/*
|
||||
* Initialize the freq table based on data obtained
|
||||
* from the firmware passed via device-tree
|
||||
*/
|
||||
static int init_powernv_pstates(void)
|
||||
{
|
||||
struct device_node *power_mgt;
|
||||
int i, pstate_min, pstate_max, pstate_nominal, nr_pstates = 0;
|
||||
const __be32 *pstate_ids, *pstate_freqs;
|
||||
u32 len_ids, len_freqs;
|
||||
|
||||
power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
|
||||
if (!power_mgt) {
|
||||
pr_warn("power-mgt node not found\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (of_property_read_u32(power_mgt, "ibm,pstate-min", &pstate_min)) {
|
||||
pr_warn("ibm,pstate-min node not found\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (of_property_read_u32(power_mgt, "ibm,pstate-max", &pstate_max)) {
|
||||
pr_warn("ibm,pstate-max node not found\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (of_property_read_u32(power_mgt, "ibm,pstate-nominal",
|
||||
&pstate_nominal)) {
|
||||
pr_warn("ibm,pstate-nominal not found\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
pr_info("cpufreq pstate min %d nominal %d max %d\n", pstate_min,
|
||||
pstate_nominal, pstate_max);
|
||||
|
||||
pstate_ids = of_get_property(power_mgt, "ibm,pstate-ids", &len_ids);
|
||||
if (!pstate_ids) {
|
||||
pr_warn("ibm,pstate-ids not found\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
pstate_freqs = of_get_property(power_mgt, "ibm,pstate-frequencies-mhz",
|
||||
&len_freqs);
|
||||
if (!pstate_freqs) {
|
||||
pr_warn("ibm,pstate-frequencies-mhz not found\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
WARN_ON(len_ids != len_freqs);
|
||||
nr_pstates = min(len_ids, len_freqs) / sizeof(u32);
|
||||
if (!nr_pstates) {
|
||||
pr_warn("No PStates found\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
pr_debug("NR PStates %d\n", nr_pstates);
|
||||
for (i = 0; i < nr_pstates; i++) {
|
||||
u32 id = be32_to_cpu(pstate_ids[i]);
|
||||
u32 freq = be32_to_cpu(pstate_freqs[i]);
|
||||
|
||||
pr_debug("PState id %d freq %d MHz\n", id, freq);
|
||||
powernv_freqs[i].frequency = freq * 1000; /* kHz */
|
||||
powernv_pstate_ids[i] = id;
|
||||
}
|
||||
/* End of list marker entry */
|
||||
powernv_freqs[i].frequency = CPUFREQ_TABLE_END;
|
||||
|
||||
powernv_pstate_info.min = pstate_min;
|
||||
powernv_pstate_info.max = pstate_max;
|
||||
powernv_pstate_info.nominal = pstate_nominal;
|
||||
powernv_pstate_info.nr_pstates = nr_pstates;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Returns the CPU frequency corresponding to the pstate_id. */
|
||||
static unsigned int pstate_id_to_freq(int pstate_id)
|
||||
{
|
||||
int i;
|
||||
|
||||
i = powernv_pstate_info.max - pstate_id;
|
||||
BUG_ON(i >= powernv_pstate_info.nr_pstates || i < 0);
|
||||
|
||||
return powernv_freqs[i].frequency;
|
||||
}
|
||||
|
||||
/*
|
||||
* cpuinfo_nominal_freq_show - Show the nominal CPU frequency as indicated by
|
||||
* the firmware
|
||||
*/
|
||||
static ssize_t cpuinfo_nominal_freq_show(struct cpufreq_policy *policy,
|
||||
char *buf)
|
||||
{
|
||||
return sprintf(buf, "%u\n",
|
||||
pstate_id_to_freq(powernv_pstate_info.nominal));
|
||||
}
|
||||
|
||||
struct freq_attr cpufreq_freq_attr_cpuinfo_nominal_freq =
|
||||
__ATTR_RO(cpuinfo_nominal_freq);
|
||||
|
||||
static struct freq_attr *powernv_cpu_freq_attr[] = {
|
||||
&cpufreq_freq_attr_scaling_available_freqs,
|
||||
&cpufreq_freq_attr_cpuinfo_nominal_freq,
|
||||
NULL,
|
||||
};
|
||||
|
||||
/* Helper routines */
|
||||
|
||||
/* Access helpers to power mgt SPR */
|
||||
|
||||
static inline unsigned long get_pmspr(unsigned long sprn)
|
||||
{
|
||||
switch (sprn) {
|
||||
case SPRN_PMCR:
|
||||
return mfspr(SPRN_PMCR);
|
||||
|
||||
case SPRN_PMICR:
|
||||
return mfspr(SPRN_PMICR);
|
||||
|
||||
case SPRN_PMSR:
|
||||
return mfspr(SPRN_PMSR);
|
||||
}
|
||||
BUG();
|
||||
}
|
||||
|
||||
static inline void set_pmspr(unsigned long sprn, unsigned long val)
|
||||
{
|
||||
switch (sprn) {
|
||||
case SPRN_PMCR:
|
||||
mtspr(SPRN_PMCR, val);
|
||||
return;
|
||||
|
||||
case SPRN_PMICR:
|
||||
mtspr(SPRN_PMICR, val);
|
||||
return;
|
||||
}
|
||||
BUG();
|
||||
}
|
||||
|
||||
/*
|
||||
* Use objects of this type to query/update
|
||||
* pstates on a remote CPU via smp_call_function.
|
||||
*/
|
||||
struct powernv_smp_call_data {
|
||||
unsigned int freq;
|
||||
int pstate_id;
|
||||
};
|
||||
|
||||
/*
|
||||
* powernv_read_cpu_freq: Reads the current frequency on this CPU.
|
||||
*
|
||||
* Called via smp_call_function.
|
||||
*
|
||||
* Note: The caller of the smp_call_function should pass an argument of
|
||||
* the type 'struct powernv_smp_call_data *' along with this function.
|
||||
*
|
||||
* The current frequency on this CPU will be returned via
|
||||
* ((struct powernv_smp_call_data *)arg)->freq;
|
||||
*/
|
||||
static void powernv_read_cpu_freq(void *arg)
|
||||
{
|
||||
unsigned long pmspr_val;
|
||||
s8 local_pstate_id;
|
||||
struct powernv_smp_call_data *freq_data = arg;
|
||||
|
||||
pmspr_val = get_pmspr(SPRN_PMSR);
|
||||
|
||||
/*
|
||||
* The local pstate id corresponds bits 48..55 in the PMSR.
|
||||
* Note: Watch out for the sign!
|
||||
*/
|
||||
local_pstate_id = (pmspr_val >> 48) & 0xFF;
|
||||
freq_data->pstate_id = local_pstate_id;
|
||||
freq_data->freq = pstate_id_to_freq(freq_data->pstate_id);
|
||||
|
||||
pr_debug("cpu %d pmsr %016lX pstate_id %d frequency %d kHz\n",
|
||||
raw_smp_processor_id(), pmspr_val, freq_data->pstate_id,
|
||||
freq_data->freq);
|
||||
}
|
||||
|
||||
/*
|
||||
* powernv_cpufreq_get: Returns the CPU frequency as reported by the
|
||||
* firmware for CPU 'cpu'. This value is reported through the sysfs
|
||||
* file cpuinfo_cur_freq.
|
||||
*/
|
||||
unsigned int powernv_cpufreq_get(unsigned int cpu)
|
||||
{
|
||||
struct powernv_smp_call_data freq_data;
|
||||
|
||||
smp_call_function_any(cpu_sibling_mask(cpu), powernv_read_cpu_freq,
|
||||
&freq_data, 1);
|
||||
|
||||
return freq_data.freq;
|
||||
}
|
||||
|
||||
/*
|
||||
* set_pstate: Sets the pstate on this CPU.
|
||||
*
|
||||
* This is called via an smp_call_function.
|
||||
*
|
||||
* The caller must ensure that freq_data is of the type
|
||||
* (struct powernv_smp_call_data *) and the pstate_id which needs to be set
|
||||
* on this CPU should be present in freq_data->pstate_id.
|
||||
*/
|
||||
static void set_pstate(void *freq_data)
|
||||
{
|
||||
unsigned long val;
|
||||
unsigned long pstate_ul =
|
||||
((struct powernv_smp_call_data *) freq_data)->pstate_id;
|
||||
|
||||
val = get_pmspr(SPRN_PMCR);
|
||||
val = val & 0x0000FFFFFFFFFFFFULL;
|
||||
|
||||
pstate_ul = pstate_ul & 0xFF;
|
||||
|
||||
/* Set both global(bits 56..63) and local(bits 48..55) PStates */
|
||||
val = val | (pstate_ul << 56) | (pstate_ul << 48);
|
||||
|
||||
pr_debug("Setting cpu %d pmcr to %016lX\n",
|
||||
raw_smp_processor_id(), val);
|
||||
set_pmspr(SPRN_PMCR, val);
|
||||
}
|
||||
|
||||
/*
|
||||
* powernv_cpufreq_target_index: Sets the frequency corresponding to
|
||||
* the cpufreq table entry indexed by new_index on the cpus in the
|
||||
* mask policy->cpus
|
||||
*/
|
||||
static int powernv_cpufreq_target_index(struct cpufreq_policy *policy,
|
||||
unsigned int new_index)
|
||||
{
|
||||
struct powernv_smp_call_data freq_data;
|
||||
|
||||
freq_data.pstate_id = powernv_pstate_ids[new_index];
|
||||
|
||||
/*
|
||||
* Use smp_call_function to send IPI and execute the
|
||||
* mtspr on target CPU. We could do that without IPI
|
||||
* if current CPU is within policy->cpus (core)
|
||||
*/
|
||||
smp_call_function_any(policy->cpus, set_pstate, &freq_data, 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy)
|
||||
{
|
||||
int base, i;
|
||||
|
||||
base = cpu_first_thread_sibling(policy->cpu);
|
||||
|
||||
for (i = 0; i < threads_per_core; i++)
|
||||
cpumask_set_cpu(base + i, policy->cpus);
|
||||
|
||||
return cpufreq_table_validate_and_show(policy, powernv_freqs);
|
||||
}
|
||||
|
||||
static struct cpufreq_driver powernv_cpufreq_driver = {
|
||||
.name = "powernv-cpufreq",
|
||||
.flags = CPUFREQ_CONST_LOOPS,
|
||||
.init = powernv_cpufreq_cpu_init,
|
||||
.verify = cpufreq_generic_frequency_table_verify,
|
||||
.target_index = powernv_cpufreq_target_index,
|
||||
.get = powernv_cpufreq_get,
|
||||
.attr = powernv_cpu_freq_attr,
|
||||
};
|
||||
|
||||
static int __init powernv_cpufreq_init(void)
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
/* Discover pstates from device tree and init */
|
||||
rc = init_powernv_pstates();
|
||||
if (rc) {
|
||||
pr_info("powernv-cpufreq disabled. System does not support PState control\n");
|
||||
return rc;
|
||||
}
|
||||
|
||||
return cpufreq_register_driver(&powernv_cpufreq_driver);
|
||||
}
|
||||
module_init(powernv_cpufreq_init);
|
||||
|
||||
static void __exit powernv_cpufreq_exit(void)
|
||||
{
|
||||
cpufreq_unregister_driver(&powernv_cpufreq_driver);
|
||||
}
|
||||
module_exit(powernv_cpufreq_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Vaidyanathan Srinivasan <svaidy at linux.vnet.ibm.com>");
|
Loading…
Reference in New Issue
Block a user