platform/x86: Add support for Uncore frequency control
Some server users set limits on the uncore frequency using MSR 620H, while running latency sensitive workloads. Here uncore frequency controls RING/LLC(last-level cache) clocks. But MSR control is not always possible from the user space, so this driver provides a sysfs interface to set max and min frequency limits. This MSR 620H is a die scoped in multi-die system or package scoped in non multi-die systems. When this driver is loaded, a new directory is created under /sys/devices/system/cpu. For example on a two package Skylake server: $cd /sys/devices/system/cpu/intel_uncore_frequency $ls package_00_die_00 package_01_die_00 $ls package_00_die_00 max_freq_khz min_freq_khz initial_max_freq_khz initial_min_freq_khz $grep . * max_freq_khz:2400000 min_freq_khz:1200000 initial_max_freq_khz:2400000 initial_min_freq_khz:1200000 Here, initial_max_freq_khz and initial_min_freq_khz are read only attributes to show power up or initial values of max and min frequencies respectively. Other attributes are read-write, so that users can modify. Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com> Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
This commit is contained in:
parent
26e66a0cf2
commit
49a474c7ba
@ -1337,6 +1337,17 @@ config PCENGINES_APU2
|
||||
To compile this driver as a module, choose M here: the module
|
||||
will be called pcengines-apuv2.
|
||||
|
||||
config INTEL_UNCORE_FREQ_CONTROL
|
||||
tristate "Intel Uncore frequency control driver"
|
||||
depends on X86_64
|
||||
help
|
||||
This driver allows control of uncore frequency limits on
|
||||
supported server platforms.
|
||||
Uncore frequency controls RING/LLC (last-level cache) clocks.
|
||||
|
||||
To compile this driver as a module, choose M here: the module
|
||||
will be called intel-uncore-frequency.
|
||||
|
||||
source "drivers/platform/x86/intel_speed_select_if/Kconfig"
|
||||
|
||||
config SYSTEM76_ACPI
|
||||
|
@ -105,3 +105,4 @@ obj-$(CONFIG_INTEL_ATOMISP2_PM) += intel_atomisp2_pm.o
|
||||
obj-$(CONFIG_PCENGINES_APU2) += pcengines-apuv2.o
|
||||
obj-$(CONFIG_INTEL_SPEED_SELECT_INTERFACE) += intel_speed_select_if/
|
||||
obj-$(CONFIG_SYSTEM76_ACPI) += system76_acpi.o
|
||||
obj-$(CONFIG_INTEL_UNCORE_FREQ_CONTROL) += intel-uncore-frequency.o
|
||||
|
437
drivers/platform/x86/intel-uncore-frequency.c
Normal file
437
drivers/platform/x86/intel-uncore-frequency.c
Normal file
@ -0,0 +1,437 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Intel Uncore Frequency Setting
|
||||
* Copyright (c) 2019, Intel Corporation.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Provide interface to set MSR 620 at a granularity of per die. On CPU online,
|
||||
* one control CPU is identified per die to read/write limit. This control CPU
|
||||
* is changed, if the CPU state is changed to offline. When the last CPU is
|
||||
* offline in a die then remove the sysfs object for that die.
|
||||
* The majority of actual code is related to sysfs create and read/write
|
||||
* attributes.
|
||||
*
|
||||
* Author: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
||||
*/
|
||||
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/suspend.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include <asm/intel-family.h>
|
||||
|
||||
#define MSR_UNCORE_RATIO_LIMIT 0x620
|
||||
#define UNCORE_FREQ_KHZ_MULTIPLIER 100000
|
||||
|
||||
/**
|
||||
* struct uncore_data - Encapsulate all uncore data
|
||||
* @stored_uncore_data: Last user changed MSR 620 value, which will be restored
|
||||
* on system resume.
|
||||
* @initial_min_freq_khz: Sampled minimum uncore frequency at driver init
|
||||
* @initial_max_freq_khz: Sampled maximum uncore frequency at driver init
|
||||
* @control_cpu: Designated CPU for a die to read/write
|
||||
* @valid: Mark the data valid/invalid
|
||||
*
|
||||
* This structure is used to encapsulate all data related to uncore sysfs
|
||||
* settings for a die/package.
|
||||
*/
|
||||
struct uncore_data {
|
||||
struct kobject kobj;
|
||||
u64 stored_uncore_data;
|
||||
u32 initial_min_freq_khz;
|
||||
u32 initial_max_freq_khz;
|
||||
int control_cpu;
|
||||
bool valid;
|
||||
};
|
||||
|
||||
#define to_uncore_data(a) container_of(a, struct uncore_data, kobj)
|
||||
|
||||
/* Max instances for uncore data, one for each die */
|
||||
static int uncore_max_entries __read_mostly;
|
||||
/* Storage for uncore data for all instances */
|
||||
static struct uncore_data *uncore_instances;
|
||||
/* Root of the all uncore sysfs kobjs */
|
||||
struct kobject uncore_root_kobj;
|
||||
/* Stores the CPU mask of the target CPUs to use during uncore read/write */
|
||||
static cpumask_t uncore_cpu_mask;
|
||||
/* CPU online callback register instance */
|
||||
static enum cpuhp_state uncore_hp_state __read_mostly;
|
||||
/* Mutex to control all mutual exclusions */
|
||||
static DEFINE_MUTEX(uncore_lock);
|
||||
|
||||
struct uncore_attr {
|
||||
struct attribute attr;
|
||||
ssize_t (*show)(struct kobject *kobj,
|
||||
struct attribute *attr, char *buf);
|
||||
ssize_t (*store)(struct kobject *kobj,
|
||||
struct attribute *attr, const char *c, ssize_t count);
|
||||
};
|
||||
|
||||
#define define_one_uncore_ro(_name) \
|
||||
static struct uncore_attr _name = \
|
||||
__ATTR(_name, 0444, show_##_name, NULL)
|
||||
|
||||
#define define_one_uncore_rw(_name) \
|
||||
static struct uncore_attr _name = \
|
||||
__ATTR(_name, 0644, show_##_name, store_##_name)
|
||||
|
||||
#define show_uncore_data(member_name) \
|
||||
static ssize_t show_##member_name(struct kobject *kobj, \
|
||||
struct attribute *attr, \
|
||||
char *buf) \
|
||||
{ \
|
||||
struct uncore_data *data = to_uncore_data(kobj); \
|
||||
return scnprintf(buf, PAGE_SIZE, "%u\n", \
|
||||
data->member_name); \
|
||||
} \
|
||||
define_one_uncore_ro(member_name)
|
||||
|
||||
show_uncore_data(initial_min_freq_khz);
|
||||
show_uncore_data(initial_max_freq_khz);
|
||||
|
||||
/* Common function to read MSR 0x620 and read min/max */
|
||||
static int uncore_read_ratio(struct uncore_data *data, unsigned int *min,
|
||||
unsigned int *max)
|
||||
{
|
||||
u64 cap;
|
||||
int ret;
|
||||
|
||||
ret = rdmsrl_on_cpu(data->control_cpu, MSR_UNCORE_RATIO_LIMIT, &cap);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
*max = (cap & 0x7F) * UNCORE_FREQ_KHZ_MULTIPLIER;
|
||||
*min = ((cap & GENMASK(14, 8)) >> 8) * UNCORE_FREQ_KHZ_MULTIPLIER;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Common function to set min/max ratios to be used by sysfs callbacks */
|
||||
static int uncore_write_ratio(struct uncore_data *data, unsigned int input,
|
||||
int set_max)
|
||||
{
|
||||
int ret;
|
||||
u64 cap;
|
||||
|
||||
mutex_lock(&uncore_lock);
|
||||
|
||||
input /= UNCORE_FREQ_KHZ_MULTIPLIER;
|
||||
if (!input || input > 0x7F) {
|
||||
ret = -EINVAL;
|
||||
goto finish_write;
|
||||
}
|
||||
|
||||
ret = rdmsrl_on_cpu(data->control_cpu, MSR_UNCORE_RATIO_LIMIT, &cap);
|
||||
if (ret)
|
||||
goto finish_write;
|
||||
|
||||
if (set_max) {
|
||||
cap &= ~0x7F;
|
||||
cap |= input;
|
||||
} else {
|
||||
cap &= ~GENMASK(14, 8);
|
||||
cap |= (input << 8);
|
||||
}
|
||||
|
||||
ret = wrmsrl_on_cpu(data->control_cpu, MSR_UNCORE_RATIO_LIMIT, cap);
|
||||
if (ret)
|
||||
goto finish_write;
|
||||
|
||||
data->stored_uncore_data = cap;
|
||||
|
||||
finish_write:
|
||||
mutex_unlock(&uncore_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t store_min_max_freq_khz(struct kobject *kobj,
|
||||
struct attribute *attr,
|
||||
const char *buf, ssize_t count,
|
||||
int min_max)
|
||||
{
|
||||
struct uncore_data *data = to_uncore_data(kobj);
|
||||
unsigned int input;
|
||||
|
||||
if (kstrtouint(buf, 10, &input))
|
||||
return -EINVAL;
|
||||
|
||||
uncore_write_ratio(data, input, min_max);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t show_min_max_freq_khz(struct kobject *kobj,
|
||||
struct attribute *attr,
|
||||
char *buf, int min_max)
|
||||
{
|
||||
struct uncore_data *data = to_uncore_data(kobj);
|
||||
unsigned int min, max;
|
||||
int ret;
|
||||
|
||||
mutex_lock(&uncore_lock);
|
||||
ret = uncore_read_ratio(data, &min, &max);
|
||||
mutex_unlock(&uncore_lock);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (min_max)
|
||||
return sprintf(buf, "%u\n", max);
|
||||
|
||||
return sprintf(buf, "%u\n", min);
|
||||
}
|
||||
|
||||
#define store_uncore_min_max(name, min_max) \
|
||||
static ssize_t store_##name(struct kobject *kobj, \
|
||||
struct attribute *attr, \
|
||||
const char *buf, ssize_t count) \
|
||||
{ \
|
||||
\
|
||||
return store_min_max_freq_khz(kobj, attr, buf, count, \
|
||||
min_max); \
|
||||
}
|
||||
|
||||
#define show_uncore_min_max(name, min_max) \
|
||||
static ssize_t show_##name(struct kobject *kobj, \
|
||||
struct attribute *attr, char *buf) \
|
||||
{ \
|
||||
\
|
||||
return show_min_max_freq_khz(kobj, attr, buf, min_max); \
|
||||
}
|
||||
|
||||
store_uncore_min_max(min_freq_khz, 0);
|
||||
store_uncore_min_max(max_freq_khz, 1);
|
||||
|
||||
show_uncore_min_max(min_freq_khz, 0);
|
||||
show_uncore_min_max(max_freq_khz, 1);
|
||||
|
||||
define_one_uncore_rw(min_freq_khz);
|
||||
define_one_uncore_rw(max_freq_khz);
|
||||
|
||||
static struct attribute *uncore_attrs[] = {
|
||||
&initial_min_freq_khz.attr,
|
||||
&initial_max_freq_khz.attr,
|
||||
&max_freq_khz.attr,
|
||||
&min_freq_khz.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct kobj_type uncore_ktype = {
|
||||
.sysfs_ops = &kobj_sysfs_ops,
|
||||
.default_attrs = uncore_attrs,
|
||||
};
|
||||
|
||||
static struct kobj_type uncore_root_ktype = {
|
||||
.sysfs_ops = &kobj_sysfs_ops,
|
||||
};
|
||||
|
||||
/* Caller provides protection */
|
||||
static struct uncore_data *uncore_get_instance(unsigned int cpu)
|
||||
{
|
||||
int id = topology_logical_die_id(cpu);
|
||||
|
||||
if (id >= 0 && id < uncore_max_entries)
|
||||
return &uncore_instances[id];
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void uncore_add_die_entry(int cpu)
|
||||
{
|
||||
struct uncore_data *data;
|
||||
|
||||
mutex_lock(&uncore_lock);
|
||||
data = uncore_get_instance(cpu);
|
||||
if (!data) {
|
||||
mutex_unlock(&uncore_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
if (data->valid) {
|
||||
/* control cpu changed */
|
||||
data->control_cpu = cpu;
|
||||
} else {
|
||||
char str[64];
|
||||
int ret;
|
||||
|
||||
memset(data, 0, sizeof(*data));
|
||||
sprintf(str, "package_%02d_die_%02d",
|
||||
topology_physical_package_id(cpu),
|
||||
topology_die_id(cpu));
|
||||
|
||||
uncore_read_ratio(data, &data->initial_min_freq_khz,
|
||||
&data->initial_max_freq_khz);
|
||||
|
||||
ret = kobject_init_and_add(&data->kobj, &uncore_ktype,
|
||||
&uncore_root_kobj, str);
|
||||
if (!ret) {
|
||||
data->control_cpu = cpu;
|
||||
data->valid = true;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&uncore_lock);
|
||||
}
|
||||
|
||||
/* Last CPU in this die is offline, so remove sysfs entries */
|
||||
static void uncore_remove_die_entry(int cpu)
|
||||
{
|
||||
struct uncore_data *data;
|
||||
|
||||
mutex_lock(&uncore_lock);
|
||||
data = uncore_get_instance(cpu);
|
||||
if (data) {
|
||||
kobject_put(&data->kobj);
|
||||
data->control_cpu = -1;
|
||||
data->valid = false;
|
||||
}
|
||||
mutex_unlock(&uncore_lock);
|
||||
}
|
||||
|
||||
static int uncore_event_cpu_online(unsigned int cpu)
|
||||
{
|
||||
int target;
|
||||
|
||||
/* Check if there is an online cpu in the package for uncore MSR */
|
||||
target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu));
|
||||
if (target < nr_cpu_ids)
|
||||
return 0;
|
||||
|
||||
/* Use this CPU on this die as a control CPU */
|
||||
cpumask_set_cpu(cpu, &uncore_cpu_mask);
|
||||
uncore_add_die_entry(cpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int uncore_event_cpu_offline(unsigned int cpu)
|
||||
{
|
||||
int target;
|
||||
|
||||
/* Check if existing cpu is used for uncore MSRs */
|
||||
if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
|
||||
return 0;
|
||||
|
||||
/* Find a new cpu to set uncore MSR */
|
||||
target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
|
||||
|
||||
if (target < nr_cpu_ids) {
|
||||
cpumask_set_cpu(target, &uncore_cpu_mask);
|
||||
uncore_add_die_entry(target);
|
||||
} else {
|
||||
uncore_remove_die_entry(cpu);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int uncore_pm_notify(struct notifier_block *nb, unsigned long mode,
|
||||
void *_unused)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
switch (mode) {
|
||||
case PM_POST_HIBERNATION:
|
||||
case PM_POST_RESTORE:
|
||||
case PM_POST_SUSPEND:
|
||||
for_each_cpu(cpu, &uncore_cpu_mask) {
|
||||
struct uncore_data *data;
|
||||
int ret;
|
||||
|
||||
data = uncore_get_instance(cpu);
|
||||
if (!data || !data->valid || !data->stored_uncore_data)
|
||||
continue;
|
||||
|
||||
ret = wrmsrl_on_cpu(cpu, MSR_UNCORE_RATIO_LIMIT,
|
||||
data->stored_uncore_data);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct notifier_block uncore_pm_nb = {
|
||||
.notifier_call = uncore_pm_notify,
|
||||
};
|
||||
|
||||
#define ICPU(model) { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, }
|
||||
|
||||
static const struct x86_cpu_id intel_uncore_cpu_ids[] = {
|
||||
ICPU(INTEL_FAM6_BROADWELL_G),
|
||||
ICPU(INTEL_FAM6_BROADWELL_X),
|
||||
ICPU(INTEL_FAM6_BROADWELL_D),
|
||||
ICPU(INTEL_FAM6_SKYLAKE_X),
|
||||
ICPU(INTEL_FAM6_ICELAKE_X),
|
||||
ICPU(INTEL_FAM6_ICELAKE_D),
|
||||
{}
|
||||
};
|
||||
|
||||
static int __init intel_uncore_init(void)
|
||||
{
|
||||
const struct x86_cpu_id *id;
|
||||
int ret;
|
||||
|
||||
id = x86_match_cpu(intel_uncore_cpu_ids);
|
||||
if (!id)
|
||||
return -ENODEV;
|
||||
|
||||
uncore_max_entries = topology_max_packages() *
|
||||
topology_max_die_per_package();
|
||||
uncore_instances = kcalloc(uncore_max_entries,
|
||||
sizeof(*uncore_instances), GFP_KERNEL);
|
||||
if (!uncore_instances)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = kobject_init_and_add(&uncore_root_kobj, &uncore_root_ktype,
|
||||
&cpu_subsys.dev_root->kobj,
|
||||
"intel_uncore_frequency");
|
||||
if (ret)
|
||||
goto err_free;
|
||||
|
||||
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
|
||||
"platform/x86/uncore-freq:online",
|
||||
uncore_event_cpu_online,
|
||||
uncore_event_cpu_offline);
|
||||
if (ret < 0)
|
||||
goto err_rem_kobj;
|
||||
|
||||
uncore_hp_state = ret;
|
||||
|
||||
ret = register_pm_notifier(&uncore_pm_nb);
|
||||
if (ret)
|
||||
goto err_rem_state;
|
||||
|
||||
return 0;
|
||||
|
||||
err_rem_state:
|
||||
cpuhp_remove_state(uncore_hp_state);
|
||||
err_rem_kobj:
|
||||
kobject_put(&uncore_root_kobj);
|
||||
err_free:
|
||||
kfree(uncore_instances);
|
||||
|
||||
return ret;
|
||||
}
|
||||
module_init(intel_uncore_init)
|
||||
|
||||
static void __exit intel_uncore_exit(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
unregister_pm_notifier(&uncore_pm_nb);
|
||||
cpuhp_remove_state(uncore_hp_state);
|
||||
for (i = 0; i < uncore_max_entries; ++i) {
|
||||
if (uncore_instances[i].valid)
|
||||
kobject_put(&uncore_instances[i].kobj);
|
||||
}
|
||||
kobject_put(&uncore_root_kobj);
|
||||
kfree(uncore_instances);
|
||||
}
|
||||
module_exit(intel_uncore_exit)
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_DESCRIPTION("Intel Uncore Frequency Limits Driver");
|
Loading…
Reference in New Issue
Block a user