platform/x86: Add support for Uncore frequency control

Some server users set limits on the uncore frequency using MSR 620H, while
running latency sensitive workloads. Here uncore frequency controls
RING/LLC(last-level cache) clocks.

But MSR control is not always possible from the user space, so this driver
provides a sysfs interface to set max and min frequency limits. This MSR
620H is a die scoped in multi-die system or package scoped in non multi-die
systems.

When this driver is loaded, a new directory is created under
 /sys/devices/system/cpu.

For example on a two package Skylake server:
$cd /sys/devices/system/cpu/intel_uncore_frequency

$ls
package_00_die_00 package_01_die_00

$ls package_00_die_00
max_freq_khz  min_freq_khz  initial_max_freq_khz
initial_min_freq_khz

$grep . *
    max_freq_khz:2400000
    min_freq_khz:1200000
    initial_max_freq_khz:2400000
    initial_min_freq_khz:1200000

Here, initial_max_freq_khz and initial_min_freq_khz are read only
attributes to show power up or initial values of max and min frequencies
respectively. Other attributes are read-write, so that users can modify.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
This commit is contained in:
Srinivas Pandruvada 2020-01-13 10:00:14 -08:00 committed by Andy Shevchenko
parent 26e66a0cf2
commit 49a474c7ba
3 changed files with 449 additions and 0 deletions

View File

@ -1337,6 +1337,17 @@ config PCENGINES_APU2
To compile this driver as a module, choose M here: the module
will be called pcengines-apuv2.
config INTEL_UNCORE_FREQ_CONTROL
tristate "Intel Uncore frequency control driver"
depends on X86_64
help
This driver allows control of uncore frequency limits on
supported server platforms.
Uncore frequency controls RING/LLC (last-level cache) clocks.
To compile this driver as a module, choose M here: the module
will be called intel-uncore-frequency.
source "drivers/platform/x86/intel_speed_select_if/Kconfig"
config SYSTEM76_ACPI

View File

@ -105,3 +105,4 @@ obj-$(CONFIG_INTEL_ATOMISP2_PM) += intel_atomisp2_pm.o
obj-$(CONFIG_PCENGINES_APU2) += pcengines-apuv2.o
obj-$(CONFIG_INTEL_SPEED_SELECT_INTERFACE) += intel_speed_select_if/
obj-$(CONFIG_SYSTEM76_ACPI) += system76_acpi.o
obj-$(CONFIG_INTEL_UNCORE_FREQ_CONTROL) += intel-uncore-frequency.o

View File

@ -0,0 +1,437 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Intel Uncore Frequency Setting
* Copyright (c) 2019, Intel Corporation.
* All rights reserved.
*
* Provide interface to set MSR 620 at a granularity of per die. On CPU online,
* one control CPU is identified per die to read/write limit. This control CPU
* is changed, if the CPU state is changed to offline. When the last CPU is
* offline in a die then remove the sysfs object for that die.
* The majority of actual code is related to sysfs create and read/write
* attributes.
*
* Author: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
*/
#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/suspend.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
#define MSR_UNCORE_RATIO_LIMIT 0x620
#define UNCORE_FREQ_KHZ_MULTIPLIER 100000
/**
* struct uncore_data - Encapsulate all uncore data
* @stored_uncore_data: Last user changed MSR 620 value, which will be restored
* on system resume.
* @initial_min_freq_khz: Sampled minimum uncore frequency at driver init
* @initial_max_freq_khz: Sampled maximum uncore frequency at driver init
* @control_cpu: Designated CPU for a die to read/write
* @valid: Mark the data valid/invalid
*
* This structure is used to encapsulate all data related to uncore sysfs
* settings for a die/package.
*/
struct uncore_data {
struct kobject kobj;
u64 stored_uncore_data;
u32 initial_min_freq_khz;
u32 initial_max_freq_khz;
int control_cpu;
bool valid;
};
#define to_uncore_data(a) container_of(a, struct uncore_data, kobj)
/* Max instances for uncore data, one for each die */
static int uncore_max_entries __read_mostly;
/* Storage for uncore data for all instances */
static struct uncore_data *uncore_instances;
/* Root of the all uncore sysfs kobjs */
struct kobject uncore_root_kobj;
/* Stores the CPU mask of the target CPUs to use during uncore read/write */
static cpumask_t uncore_cpu_mask;
/* CPU online callback register instance */
static enum cpuhp_state uncore_hp_state __read_mostly;
/* Mutex to control all mutual exclusions */
static DEFINE_MUTEX(uncore_lock);
struct uncore_attr {
struct attribute attr;
ssize_t (*show)(struct kobject *kobj,
struct attribute *attr, char *buf);
ssize_t (*store)(struct kobject *kobj,
struct attribute *attr, const char *c, ssize_t count);
};
#define define_one_uncore_ro(_name) \
static struct uncore_attr _name = \
__ATTR(_name, 0444, show_##_name, NULL)
#define define_one_uncore_rw(_name) \
static struct uncore_attr _name = \
__ATTR(_name, 0644, show_##_name, store_##_name)
#define show_uncore_data(member_name) \
static ssize_t show_##member_name(struct kobject *kobj, \
struct attribute *attr, \
char *buf) \
{ \
struct uncore_data *data = to_uncore_data(kobj); \
return scnprintf(buf, PAGE_SIZE, "%u\n", \
data->member_name); \
} \
define_one_uncore_ro(member_name)
show_uncore_data(initial_min_freq_khz);
show_uncore_data(initial_max_freq_khz);
/* Common function to read MSR 0x620 and read min/max */
static int uncore_read_ratio(struct uncore_data *data, unsigned int *min,
unsigned int *max)
{
u64 cap;
int ret;
ret = rdmsrl_on_cpu(data->control_cpu, MSR_UNCORE_RATIO_LIMIT, &cap);
if (ret)
return ret;
*max = (cap & 0x7F) * UNCORE_FREQ_KHZ_MULTIPLIER;
*min = ((cap & GENMASK(14, 8)) >> 8) * UNCORE_FREQ_KHZ_MULTIPLIER;
return 0;
}
/* Common function to set min/max ratios to be used by sysfs callbacks */
static int uncore_write_ratio(struct uncore_data *data, unsigned int input,
int set_max)
{
int ret;
u64 cap;
mutex_lock(&uncore_lock);
input /= UNCORE_FREQ_KHZ_MULTIPLIER;
if (!input || input > 0x7F) {
ret = -EINVAL;
goto finish_write;
}
ret = rdmsrl_on_cpu(data->control_cpu, MSR_UNCORE_RATIO_LIMIT, &cap);
if (ret)
goto finish_write;
if (set_max) {
cap &= ~0x7F;
cap |= input;
} else {
cap &= ~GENMASK(14, 8);
cap |= (input << 8);
}
ret = wrmsrl_on_cpu(data->control_cpu, MSR_UNCORE_RATIO_LIMIT, cap);
if (ret)
goto finish_write;
data->stored_uncore_data = cap;
finish_write:
mutex_unlock(&uncore_lock);
return ret;
}
static ssize_t store_min_max_freq_khz(struct kobject *kobj,
struct attribute *attr,
const char *buf, ssize_t count,
int min_max)
{
struct uncore_data *data = to_uncore_data(kobj);
unsigned int input;
if (kstrtouint(buf, 10, &input))
return -EINVAL;
uncore_write_ratio(data, input, min_max);
return count;
}
static ssize_t show_min_max_freq_khz(struct kobject *kobj,
struct attribute *attr,
char *buf, int min_max)
{
struct uncore_data *data = to_uncore_data(kobj);
unsigned int min, max;
int ret;
mutex_lock(&uncore_lock);
ret = uncore_read_ratio(data, &min, &max);
mutex_unlock(&uncore_lock);
if (ret)
return ret;
if (min_max)
return sprintf(buf, "%u\n", max);
return sprintf(buf, "%u\n", min);
}
#define store_uncore_min_max(name, min_max) \
static ssize_t store_##name(struct kobject *kobj, \
struct attribute *attr, \
const char *buf, ssize_t count) \
{ \
\
return store_min_max_freq_khz(kobj, attr, buf, count, \
min_max); \
}
#define show_uncore_min_max(name, min_max) \
static ssize_t show_##name(struct kobject *kobj, \
struct attribute *attr, char *buf) \
{ \
\
return show_min_max_freq_khz(kobj, attr, buf, min_max); \
}
store_uncore_min_max(min_freq_khz, 0);
store_uncore_min_max(max_freq_khz, 1);
show_uncore_min_max(min_freq_khz, 0);
show_uncore_min_max(max_freq_khz, 1);
define_one_uncore_rw(min_freq_khz);
define_one_uncore_rw(max_freq_khz);
static struct attribute *uncore_attrs[] = {
&initial_min_freq_khz.attr,
&initial_max_freq_khz.attr,
&max_freq_khz.attr,
&min_freq_khz.attr,
NULL
};
static struct kobj_type uncore_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
.default_attrs = uncore_attrs,
};
static struct kobj_type uncore_root_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
};
/* Caller provides protection */
static struct uncore_data *uncore_get_instance(unsigned int cpu)
{
int id = topology_logical_die_id(cpu);
if (id >= 0 && id < uncore_max_entries)
return &uncore_instances[id];
return NULL;
}
static void uncore_add_die_entry(int cpu)
{
struct uncore_data *data;
mutex_lock(&uncore_lock);
data = uncore_get_instance(cpu);
if (!data) {
mutex_unlock(&uncore_lock);
return;
}
if (data->valid) {
/* control cpu changed */
data->control_cpu = cpu;
} else {
char str[64];
int ret;
memset(data, 0, sizeof(*data));
sprintf(str, "package_%02d_die_%02d",
topology_physical_package_id(cpu),
topology_die_id(cpu));
uncore_read_ratio(data, &data->initial_min_freq_khz,
&data->initial_max_freq_khz);
ret = kobject_init_and_add(&data->kobj, &uncore_ktype,
&uncore_root_kobj, str);
if (!ret) {
data->control_cpu = cpu;
data->valid = true;
}
}
mutex_unlock(&uncore_lock);
}
/* Last CPU in this die is offline, so remove sysfs entries */
static void uncore_remove_die_entry(int cpu)
{
struct uncore_data *data;
mutex_lock(&uncore_lock);
data = uncore_get_instance(cpu);
if (data) {
kobject_put(&data->kobj);
data->control_cpu = -1;
data->valid = false;
}
mutex_unlock(&uncore_lock);
}
static int uncore_event_cpu_online(unsigned int cpu)
{
int target;
/* Check if there is an online cpu in the package for uncore MSR */
target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu));
if (target < nr_cpu_ids)
return 0;
/* Use this CPU on this die as a control CPU */
cpumask_set_cpu(cpu, &uncore_cpu_mask);
uncore_add_die_entry(cpu);
return 0;
}
static int uncore_event_cpu_offline(unsigned int cpu)
{
int target;
/* Check if existing cpu is used for uncore MSRs */
if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
return 0;
/* Find a new cpu to set uncore MSR */
target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
if (target < nr_cpu_ids) {
cpumask_set_cpu(target, &uncore_cpu_mask);
uncore_add_die_entry(target);
} else {
uncore_remove_die_entry(cpu);
}
return 0;
}
static int uncore_pm_notify(struct notifier_block *nb, unsigned long mode,
void *_unused)
{
int cpu;
switch (mode) {
case PM_POST_HIBERNATION:
case PM_POST_RESTORE:
case PM_POST_SUSPEND:
for_each_cpu(cpu, &uncore_cpu_mask) {
struct uncore_data *data;
int ret;
data = uncore_get_instance(cpu);
if (!data || !data->valid || !data->stored_uncore_data)
continue;
ret = wrmsrl_on_cpu(cpu, MSR_UNCORE_RATIO_LIMIT,
data->stored_uncore_data);
if (ret)
return ret;
}
break;
default:
break;
}
return 0;
}
static struct notifier_block uncore_pm_nb = {
.notifier_call = uncore_pm_notify,
};
#define ICPU(model) { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, }
static const struct x86_cpu_id intel_uncore_cpu_ids[] = {
ICPU(INTEL_FAM6_BROADWELL_G),
ICPU(INTEL_FAM6_BROADWELL_X),
ICPU(INTEL_FAM6_BROADWELL_D),
ICPU(INTEL_FAM6_SKYLAKE_X),
ICPU(INTEL_FAM6_ICELAKE_X),
ICPU(INTEL_FAM6_ICELAKE_D),
{}
};
static int __init intel_uncore_init(void)
{
const struct x86_cpu_id *id;
int ret;
id = x86_match_cpu(intel_uncore_cpu_ids);
if (!id)
return -ENODEV;
uncore_max_entries = topology_max_packages() *
topology_max_die_per_package();
uncore_instances = kcalloc(uncore_max_entries,
sizeof(*uncore_instances), GFP_KERNEL);
if (!uncore_instances)
return -ENOMEM;
ret = kobject_init_and_add(&uncore_root_kobj, &uncore_root_ktype,
&cpu_subsys.dev_root->kobj,
"intel_uncore_frequency");
if (ret)
goto err_free;
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
"platform/x86/uncore-freq:online",
uncore_event_cpu_online,
uncore_event_cpu_offline);
if (ret < 0)
goto err_rem_kobj;
uncore_hp_state = ret;
ret = register_pm_notifier(&uncore_pm_nb);
if (ret)
goto err_rem_state;
return 0;
err_rem_state:
cpuhp_remove_state(uncore_hp_state);
err_rem_kobj:
kobject_put(&uncore_root_kobj);
err_free:
kfree(uncore_instances);
return ret;
}
module_init(intel_uncore_init)
static void __exit intel_uncore_exit(void)
{
int i;
unregister_pm_notifier(&uncore_pm_nb);
cpuhp_remove_state(uncore_hp_state);
for (i = 0; i < uncore_max_entries; ++i) {
if (uncore_instances[i].valid)
kobject_put(&uncore_instances[i].kobj);
}
kobject_put(&uncore_root_kobj);
kfree(uncore_instances);
}
module_exit(intel_uncore_exit)
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("Intel Uncore Frequency Limits Driver");