In some of Zen2 and Zen3 based processors, they are using the shared memory that exposed from ACPI SBIOS. In this kind of the processors, there is no MSR support, so we add acpi cppc function as the backend for them. It is using a module param (shared_mem) to enable related processors manually. We will enable this by default once we address performance issue on this solution. Signed-off-by: Jinzhou Su <Jinzhou.Su@amd.com> Signed-off-by: Huang Rui <ray.huang@amd.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
512 lines
14 KiB
C
512 lines
14 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/*
|
|
* amd-pstate.c - AMD Processor P-state Frequency Driver
|
|
*
|
|
* Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved.
|
|
*
|
|
* Author: Huang Rui <ray.huang@amd.com>
|
|
*
|
|
* AMD P-State introduces a new CPU performance scaling design for AMD
|
|
* processors using the ACPI Collaborative Performance and Power Control (CPPC)
|
|
* feature which works with the AMD SMU firmware providing a finer grained
|
|
* frequency control range. It is to replace the legacy ACPI P-States control,
|
|
* allows a flexible, low-latency interface for the Linux kernel to directly
|
|
* communicate the performance hints to hardware.
|
|
*
|
|
* AMD P-State is supported on recent AMD Zen base CPU series include some of
|
|
* Zen2 and Zen3 processors. _CPC needs to be present in the ACPI tables of AMD
|
|
* P-State supported system. And there are two types of hardware implementations
|
|
* for AMD P-State: 1) Full MSR Solution and 2) Shared Memory Solution.
|
|
* X86_FEATURE_CPPC CPU feature flag is used to distinguish the different types.
|
|
*/
|
|
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/module.h>
|
|
#include <linux/init.h>
|
|
#include <linux/smp.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/cpufreq.h>
|
|
#include <linux/compiler.h>
|
|
#include <linux/dmi.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/acpi.h>
|
|
#include <linux/io.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/static_call.h>
|
|
|
|
#include <acpi/processor.h>
|
|
#include <acpi/cppc_acpi.h>
|
|
|
|
#include <asm/msr.h>
|
|
#include <asm/processor.h>
|
|
#include <asm/cpufeature.h>
|
|
#include <asm/cpu_device_id.h>
|
|
|
|
#define AMD_PSTATE_TRANSITION_LATENCY 0x20000
|
|
#define AMD_PSTATE_TRANSITION_DELAY 500
|
|
|
|
/*
|
|
* TODO: We need more time to fine tune processors with shared memory solution
|
|
* with community together.
|
|
*
|
|
* There are some performance drops on the CPU benchmarks which reports from
|
|
* Suse. We are co-working with them to fine tune the shared memory solution. So
|
|
* we disable it by default to go acpi-cpufreq on these processors and add a
|
|
* module parameter to be able to enable it manually for debugging.
|
|
*/
|
|
static bool shared_mem = false;
|
|
module_param(shared_mem, bool, 0444);
|
|
MODULE_PARM_DESC(shared_mem,
|
|
"enable amd-pstate on processors with shared memory solution (false = disabled (default), true = enabled)");
|
|
|
|
static struct cpufreq_driver amd_pstate_driver;
|
|
|
|
/**
|
|
* struct amd_cpudata - private CPU data for AMD P-State
|
|
* @cpu: CPU number
|
|
* @cppc_req_cached: cached performance request hints
|
|
* @highest_perf: the maximum performance an individual processor may reach,
|
|
* assuming ideal conditions
|
|
* @nominal_perf: the maximum sustained performance level of the processor,
|
|
* assuming ideal operating conditions
|
|
* @lowest_nonlinear_perf: the lowest performance level at which nonlinear power
|
|
* savings are achieved
|
|
* @lowest_perf: the absolute lowest performance level of the processor
|
|
* @max_freq: the frequency that mapped to highest_perf
|
|
* @min_freq: the frequency that mapped to lowest_perf
|
|
* @nominal_freq: the frequency that mapped to nominal_perf
|
|
* @lowest_nonlinear_freq: the frequency that mapped to lowest_nonlinear_perf
|
|
*
|
|
* The amd_cpudata is key private data for each CPU thread in AMD P-State, and
|
|
* represents all the attributes and goals that AMD P-State requests at runtime.
|
|
*/
|
|
struct amd_cpudata {
|
|
int cpu;
|
|
|
|
u64 cppc_req_cached;
|
|
|
|
u32 highest_perf;
|
|
u32 nominal_perf;
|
|
u32 lowest_nonlinear_perf;
|
|
u32 lowest_perf;
|
|
|
|
u32 max_freq;
|
|
u32 min_freq;
|
|
u32 nominal_freq;
|
|
u32 lowest_nonlinear_freq;
|
|
};
|
|
|
|
static inline int pstate_enable(bool enable)
|
|
{
|
|
return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable);
|
|
}
|
|
|
|
static int cppc_enable(bool enable)
|
|
{
|
|
int cpu, ret = 0;
|
|
|
|
for_each_present_cpu(cpu) {
|
|
ret = cppc_set_enable(cpu, enable);
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
DEFINE_STATIC_CALL(amd_pstate_enable, pstate_enable);
|
|
|
|
static inline int amd_pstate_enable(bool enable)
|
|
{
|
|
return static_call(amd_pstate_enable)(enable);
|
|
}
|
|
|
|
static int pstate_init_perf(struct amd_cpudata *cpudata)
|
|
{
|
|
u64 cap1;
|
|
|
|
int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
|
|
&cap1);
|
|
if (ret)
|
|
return ret;
|
|
|
|
/*
|
|
* TODO: Introduce AMD specific power feature.
|
|
*
|
|
* CPPC entry doesn't indicate the highest performance in some ASICs.
|
|
*/
|
|
WRITE_ONCE(cpudata->highest_perf, amd_get_highest_perf());
|
|
|
|
WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
|
|
WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
|
|
WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int cppc_init_perf(struct amd_cpudata *cpudata)
|
|
{
|
|
struct cppc_perf_caps cppc_perf;
|
|
|
|
int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
|
|
if (ret)
|
|
return ret;
|
|
|
|
WRITE_ONCE(cpudata->highest_perf, amd_get_highest_perf());
|
|
|
|
WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf);
|
|
WRITE_ONCE(cpudata->lowest_nonlinear_perf,
|
|
cppc_perf.lowest_nonlinear_perf);
|
|
WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
|
|
|
|
return 0;
|
|
}
|
|
|
|
DEFINE_STATIC_CALL(amd_pstate_init_perf, pstate_init_perf);
|
|
|
|
static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata)
|
|
{
|
|
return static_call(amd_pstate_init_perf)(cpudata);
|
|
}
|
|
|
|
static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf,
|
|
u32 des_perf, u32 max_perf, bool fast_switch)
|
|
{
|
|
if (fast_switch)
|
|
wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached));
|
|
else
|
|
wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
|
|
READ_ONCE(cpudata->cppc_req_cached));
|
|
}
|
|
|
|
static void cppc_update_perf(struct amd_cpudata *cpudata,
|
|
u32 min_perf, u32 des_perf,
|
|
u32 max_perf, bool fast_switch)
|
|
{
|
|
struct cppc_perf_ctrls perf_ctrls;
|
|
|
|
perf_ctrls.max_perf = max_perf;
|
|
perf_ctrls.min_perf = min_perf;
|
|
perf_ctrls.desired_perf = des_perf;
|
|
|
|
cppc_set_perf(cpudata->cpu, &perf_ctrls);
|
|
}
|
|
|
|
DEFINE_STATIC_CALL(amd_pstate_update_perf, pstate_update_perf);
|
|
|
|
static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata,
|
|
u32 min_perf, u32 des_perf,
|
|
u32 max_perf, bool fast_switch)
|
|
{
|
|
static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf,
|
|
max_perf, fast_switch);
|
|
}
|
|
|
|
static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
|
|
u32 des_perf, u32 max_perf, bool fast_switch)
|
|
{
|
|
u64 prev = READ_ONCE(cpudata->cppc_req_cached);
|
|
u64 value = prev;
|
|
|
|
value &= ~AMD_CPPC_MIN_PERF(~0L);
|
|
value |= AMD_CPPC_MIN_PERF(min_perf);
|
|
|
|
value &= ~AMD_CPPC_DES_PERF(~0L);
|
|
value |= AMD_CPPC_DES_PERF(des_perf);
|
|
|
|
value &= ~AMD_CPPC_MAX_PERF(~0L);
|
|
value |= AMD_CPPC_MAX_PERF(max_perf);
|
|
|
|
if (value == prev)
|
|
return;
|
|
|
|
WRITE_ONCE(cpudata->cppc_req_cached, value);
|
|
|
|
amd_pstate_update_perf(cpudata, min_perf, des_perf,
|
|
max_perf, fast_switch);
|
|
}
|
|
|
|
static int amd_pstate_verify(struct cpufreq_policy_data *policy)
|
|
{
|
|
cpufreq_verify_within_cpu_limits(policy);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int amd_pstate_target(struct cpufreq_policy *policy,
|
|
unsigned int target_freq,
|
|
unsigned int relation)
|
|
{
|
|
struct cpufreq_freqs freqs;
|
|
struct amd_cpudata *cpudata = policy->driver_data;
|
|
unsigned long max_perf, min_perf, des_perf, cap_perf;
|
|
|
|
if (!cpudata->max_freq)
|
|
return -ENODEV;
|
|
|
|
cap_perf = READ_ONCE(cpudata->highest_perf);
|
|
min_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
|
|
max_perf = cap_perf;
|
|
|
|
freqs.old = policy->cur;
|
|
freqs.new = target_freq;
|
|
|
|
des_perf = DIV_ROUND_CLOSEST(target_freq * cap_perf,
|
|
cpudata->max_freq);
|
|
|
|
cpufreq_freq_transition_begin(policy, &freqs);
|
|
amd_pstate_update(cpudata, min_perf, des_perf,
|
|
max_perf, false);
|
|
cpufreq_freq_transition_end(policy, &freqs, false);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void amd_pstate_adjust_perf(unsigned int cpu,
|
|
unsigned long _min_perf,
|
|
unsigned long target_perf,
|
|
unsigned long capacity)
|
|
{
|
|
unsigned long max_perf, min_perf, des_perf,
|
|
cap_perf, lowest_nonlinear_perf;
|
|
struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
|
|
struct amd_cpudata *cpudata = policy->driver_data;
|
|
|
|
cap_perf = READ_ONCE(cpudata->highest_perf);
|
|
lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
|
|
|
|
des_perf = cap_perf;
|
|
if (target_perf < capacity)
|
|
des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity);
|
|
|
|
min_perf = READ_ONCE(cpudata->highest_perf);
|
|
if (_min_perf < capacity)
|
|
min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity);
|
|
|
|
if (min_perf < lowest_nonlinear_perf)
|
|
min_perf = lowest_nonlinear_perf;
|
|
|
|
max_perf = cap_perf;
|
|
if (max_perf < min_perf)
|
|
max_perf = min_perf;
|
|
|
|
des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
|
|
|
|
amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true);
|
|
}
|
|
|
|
static int amd_get_min_freq(struct amd_cpudata *cpudata)
|
|
{
|
|
struct cppc_perf_caps cppc_perf;
|
|
|
|
int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
|
|
if (ret)
|
|
return ret;
|
|
|
|
/* Switch to khz */
|
|
return cppc_perf.lowest_freq * 1000;
|
|
}
|
|
|
|
static int amd_get_max_freq(struct amd_cpudata *cpudata)
|
|
{
|
|
struct cppc_perf_caps cppc_perf;
|
|
u32 max_perf, max_freq, nominal_freq, nominal_perf;
|
|
u64 boost_ratio;
|
|
|
|
int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
|
|
if (ret)
|
|
return ret;
|
|
|
|
nominal_freq = cppc_perf.nominal_freq;
|
|
nominal_perf = READ_ONCE(cpudata->nominal_perf);
|
|
max_perf = READ_ONCE(cpudata->highest_perf);
|
|
|
|
boost_ratio = div_u64(max_perf << SCHED_CAPACITY_SHIFT,
|
|
nominal_perf);
|
|
|
|
max_freq = nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT;
|
|
|
|
/* Switch to khz */
|
|
return max_freq * 1000;
|
|
}
|
|
|
|
static int amd_get_nominal_freq(struct amd_cpudata *cpudata)
|
|
{
|
|
struct cppc_perf_caps cppc_perf;
|
|
|
|
int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
|
|
if (ret)
|
|
return ret;
|
|
|
|
/* Switch to khz */
|
|
return cppc_perf.nominal_freq * 1000;
|
|
}
|
|
|
|
static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata)
|
|
{
|
|
struct cppc_perf_caps cppc_perf;
|
|
u32 lowest_nonlinear_freq, lowest_nonlinear_perf,
|
|
nominal_freq, nominal_perf;
|
|
u64 lowest_nonlinear_ratio;
|
|
|
|
int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
|
|
if (ret)
|
|
return ret;
|
|
|
|
nominal_freq = cppc_perf.nominal_freq;
|
|
nominal_perf = READ_ONCE(cpudata->nominal_perf);
|
|
|
|
lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf;
|
|
|
|
lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT,
|
|
nominal_perf);
|
|
|
|
lowest_nonlinear_freq = nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT;
|
|
|
|
/* Switch to khz */
|
|
return lowest_nonlinear_freq * 1000;
|
|
}
|
|
|
|
static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
|
|
{
|
|
int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
|
|
struct device *dev;
|
|
struct amd_cpudata *cpudata;
|
|
|
|
dev = get_cpu_device(policy->cpu);
|
|
if (!dev)
|
|
return -ENODEV;
|
|
|
|
cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL);
|
|
if (!cpudata)
|
|
return -ENOMEM;
|
|
|
|
cpudata->cpu = policy->cpu;
|
|
|
|
ret = amd_pstate_init_perf(cpudata);
|
|
if (ret)
|
|
goto free_cpudata;
|
|
|
|
min_freq = amd_get_min_freq(cpudata);
|
|
max_freq = amd_get_max_freq(cpudata);
|
|
nominal_freq = amd_get_nominal_freq(cpudata);
|
|
lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata);
|
|
|
|
if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) {
|
|
dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n",
|
|
min_freq, max_freq);
|
|
ret = -EINVAL;
|
|
goto free_cpudata;
|
|
}
|
|
|
|
policy->cpuinfo.transition_latency = AMD_PSTATE_TRANSITION_LATENCY;
|
|
policy->transition_delay_us = AMD_PSTATE_TRANSITION_DELAY;
|
|
|
|
policy->min = min_freq;
|
|
policy->max = max_freq;
|
|
|
|
policy->cpuinfo.min_freq = min_freq;
|
|
policy->cpuinfo.max_freq = max_freq;
|
|
|
|
/* It will be updated by governor */
|
|
policy->cur = policy->cpuinfo.min_freq;
|
|
|
|
if (boot_cpu_has(X86_FEATURE_CPPC))
|
|
policy->fast_switch_possible = true;
|
|
|
|
/* Initial processor data capability frequencies */
|
|
cpudata->max_freq = max_freq;
|
|
cpudata->min_freq = min_freq;
|
|
cpudata->nominal_freq = nominal_freq;
|
|
cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
|
|
|
|
policy->driver_data = cpudata;
|
|
|
|
return 0;
|
|
|
|
free_cpudata:
|
|
kfree(cpudata);
|
|
return ret;
|
|
}
|
|
|
|
static int amd_pstate_cpu_exit(struct cpufreq_policy *policy)
|
|
{
|
|
struct amd_cpudata *cpudata;
|
|
|
|
cpudata = policy->driver_data;
|
|
|
|
kfree(cpudata);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static struct cpufreq_driver amd_pstate_driver = {
|
|
.flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
|
|
.verify = amd_pstate_verify,
|
|
.target = amd_pstate_target,
|
|
.init = amd_pstate_cpu_init,
|
|
.exit = amd_pstate_cpu_exit,
|
|
.name = "amd-pstate",
|
|
};
|
|
|
|
static int __init amd_pstate_init(void)
|
|
{
|
|
int ret;
|
|
|
|
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
|
|
return -ENODEV;
|
|
|
|
if (!acpi_cpc_valid()) {
|
|
pr_debug("the _CPC object is not present in SBIOS\n");
|
|
return -ENODEV;
|
|
}
|
|
|
|
/* don't keep reloading if cpufreq_driver exists */
|
|
if (cpufreq_get_current_driver())
|
|
return -EEXIST;
|
|
|
|
/* capability check */
|
|
if (boot_cpu_has(X86_FEATURE_CPPC)) {
|
|
pr_debug("AMD CPPC MSR based functionality is supported\n");
|
|
amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf;
|
|
} else if (shared_mem) {
|
|
static_call_update(amd_pstate_enable, cppc_enable);
|
|
static_call_update(amd_pstate_init_perf, cppc_init_perf);
|
|
static_call_update(amd_pstate_update_perf, cppc_update_perf);
|
|
} else {
|
|
pr_info("This processor supports shared memory solution, you can enable it with amd_pstate.shared_mem=1\n");
|
|
return -ENODEV;
|
|
}
|
|
|
|
/* enable amd pstate feature */
|
|
ret = amd_pstate_enable(true);
|
|
if (ret) {
|
|
pr_err("failed to enable amd-pstate with return %d\n", ret);
|
|
return ret;
|
|
}
|
|
|
|
ret = cpufreq_register_driver(&amd_pstate_driver);
|
|
if (ret)
|
|
pr_err("failed to register amd_pstate_driver with return %d\n",
|
|
ret);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void __exit amd_pstate_exit(void)
|
|
{
|
|
cpufreq_unregister_driver(&amd_pstate_driver);
|
|
|
|
amd_pstate_enable(false);
|
|
}
|
|
|
|
module_init(amd_pstate_init);
|
|
module_exit(amd_pstate_exit);
|
|
|
|
MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>");
|
|
MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver");
|
|
MODULE_LICENSE("GPL");
|