- Simplify and make the code more self-encapsulate by dealing with the

dtpm structure only (Daniel Lezcano)
 
 - Fix the power intialization (Daniel Lezcano)
 
 - Add the CPU load ponderation when estimating the instaneous power
   consumption (Daniel Lezcano)
 -----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEEGn3N4YVz0WNVyHskqDIjiipP6E8FAmFxfGcACgkQqDIjiipP
 6E+TGgf/dWKtga4DrwY6GhffbQTCOuoTMThYFE+bUQBXoB5SRQ6xAdtpbYyQbGS/
 jfErRk8EMQDHTnlvL/LQYPdgy9ckX79ShVEJN/9U53E7ENBxZvdvG8BaK30zl99D
 FARtF+Q3ekhmu3R6LHEOMFzotk4tLhLakV5huf0bKaXjPvxV/xg2fRjwN8uE15Ze
 uV3E0jny0zGy13TKcFa8cUs0yhv42q9u46SSjGNviG92kHGm3uWedvWIjXq+JXsN
 4XKPkGQDQ1XMG4yWy92dDCVKBjy+D0pKp1aAT6WJkMIS+0bj8NqZVAy+0TvjYUPI
 Bc9bKQXMwN7/LQlTCYLuFraaUHD/xw==
 =BjBl
 -----END PGP SIGNATURE-----

Merge tag 'dtpm-v5.16' of https://git.linaro.org/people/daniel.lezcano/linux

Pull Dynamic Thermal Power Management (DTPM) framework changes for v5.16
from Daniel Lezcano:

- Simplify and make the code more self-encapsulate by dealing with the
  dtpm structure only (Daniel Lezcano)

- Fix power intialization (Daniel Lezcano)

- Add the CPU load consideration when estimating the instaneous power
  consumption (Daniel Lezcano)

* tag 'dtpm-v5.16' of https://git.linaro.org/people/daniel.lezcano/linux:
  powercap/drivers/dtpm: Fix power limit initialization
  powercap/drivers/dtpm: Scale the power with the load
  powercap/drivers/dtpm: Use container_of instead of a private data field
  powercap/drivers/dtpm: Simplify the dtpm table
  powercap/drivers/dtpm: Encapsulate even more the code
This commit is contained in:
Rafael J. Wysocki 2021-10-21 19:44:27 +02:00
commit 3b4bd49513
4 changed files with 174 additions and 162 deletions

View File

@ -116,8 +116,6 @@ static void __dtpm_sub_power(struct dtpm *dtpm)
parent->power_limit -= dtpm->power_limit;
parent = parent->parent;
}
__dtpm_rebalance_weight(root);
}
static void __dtpm_add_power(struct dtpm *dtpm)
@ -130,45 +128,45 @@ static void __dtpm_add_power(struct dtpm *dtpm)
parent->power_limit += dtpm->power_limit;
parent = parent->parent;
}
}
__dtpm_rebalance_weight(root);
static int __dtpm_update_power(struct dtpm *dtpm)
{
int ret;
__dtpm_sub_power(dtpm);
ret = dtpm->ops->update_power_uw(dtpm);
if (ret)
pr_err("Failed to update power for '%s': %d\n",
dtpm->zone.name, ret);
if (!test_bit(DTPM_POWER_LIMIT_FLAG, &dtpm->flags))
dtpm->power_limit = dtpm->power_max;
__dtpm_add_power(dtpm);
if (root)
__dtpm_rebalance_weight(root);
return ret;
}
/**
* dtpm_update_power - Update the power on the dtpm
* @dtpm: a pointer to a dtpm structure to update
* @power_min: a u64 representing the new power_min value
* @power_max: a u64 representing the new power_max value
*
* Function to update the power values of the dtpm node specified in
* parameter. These new values will be propagated to the tree.
*
* Return: zero on success, -EINVAL if the values are inconsistent
*/
int dtpm_update_power(struct dtpm *dtpm, u64 power_min, u64 power_max)
int dtpm_update_power(struct dtpm *dtpm)
{
int ret = 0;
int ret;
mutex_lock(&dtpm_lock);
if (power_min == dtpm->power_min && power_max == dtpm->power_max)
goto unlock;
if (power_max < power_min) {
ret = -EINVAL;
goto unlock;
}
__dtpm_sub_power(dtpm);
dtpm->power_min = power_min;
dtpm->power_max = power_max;
if (!test_bit(DTPM_POWER_LIMIT_FLAG, &dtpm->flags))
dtpm->power_limit = power_max;
__dtpm_add_power(dtpm);
unlock:
ret = __dtpm_update_power(dtpm);
mutex_unlock(&dtpm_lock);
return ret;
@ -359,24 +357,18 @@ static struct powercap_zone_ops zone_ops = {
};
/**
* dtpm_alloc - Allocate and initialize a dtpm struct
* @name: a string specifying the name of the node
*
* Return: a struct dtpm pointer, NULL in case of error
* dtpm_init - Allocate and initialize a dtpm struct
* @dtpm: The dtpm struct pointer to be initialized
* @ops: The dtpm device specific ops, NULL for a virtual node
*/
struct dtpm *dtpm_alloc(struct dtpm_ops *ops)
void dtpm_init(struct dtpm *dtpm, struct dtpm_ops *ops)
{
struct dtpm *dtpm;
dtpm = kzalloc(sizeof(*dtpm), GFP_KERNEL);
if (dtpm) {
INIT_LIST_HEAD(&dtpm->children);
INIT_LIST_HEAD(&dtpm->sibling);
dtpm->weight = 1024;
dtpm->ops = ops;
}
return dtpm;
}
/**
@ -436,6 +428,7 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
if (dtpm->ops && !(dtpm->ops->set_power_uw &&
dtpm->ops->get_power_uw &&
dtpm->ops->update_power_uw &&
dtpm->ops->release))
return -EINVAL;
@ -455,7 +448,10 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
root = dtpm;
}
__dtpm_add_power(dtpm);
if (dtpm->ops && !dtpm->ops->update_power_uw(dtpm)) {
__dtpm_add_power(dtpm);
dtpm->power_limit = dtpm->power_max;
}
pr_info("Registered dtpm node '%s' / %llu-%llu uW, \n",
dtpm->zone.name, dtpm->power_min, dtpm->power_max);
@ -465,9 +461,9 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
return 0;
}
static int __init dtpm_init(void)
static int __init init_dtpm(void)
{
struct dtpm_descr **dtpm_descr;
struct dtpm_descr *dtpm_descr;
pct = powercap_register_control_type(NULL, "dtpm", NULL);
if (IS_ERR(pct)) {
@ -476,8 +472,8 @@ static int __init dtpm_init(void)
}
for_each_dtpm_table(dtpm_descr)
(*dtpm_descr)->init(*dtpm_descr);
dtpm_descr->init();
return 0;
}
late_initcall(dtpm_init);
late_initcall(init_dtpm);

View File

@ -14,6 +14,8 @@
* The CPU hotplug is supported and the power numbers will be updated
* if a CPU is hot plugged / unplugged.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/cpumask.h>
#include <linux/cpufreq.h>
#include <linux/cpuhotplug.h>
@ -23,66 +25,29 @@
#include <linux/slab.h>
#include <linux/units.h>
static struct dtpm *__parent;
static DEFINE_PER_CPU(struct dtpm *, dtpm_per_cpu);
struct dtpm_cpu {
struct dtpm dtpm;
struct freq_qos_request qos_req;
int cpu;
};
/*
* When a new CPU is inserted at hotplug or boot time, add the power
* contribution and update the dtpm tree.
*/
static int power_add(struct dtpm *dtpm, struct em_perf_domain *em)
static DEFINE_PER_CPU(struct dtpm_cpu *, dtpm_per_cpu);
static struct dtpm_cpu *to_dtpm_cpu(struct dtpm *dtpm)
{
u64 power_min, power_max;
power_min = em->table[0].power;
power_min *= MICROWATT_PER_MILLIWATT;
power_min += dtpm->power_min;
power_max = em->table[em->nr_perf_states - 1].power;
power_max *= MICROWATT_PER_MILLIWATT;
power_max += dtpm->power_max;
return dtpm_update_power(dtpm, power_min, power_max);
}
/*
* When a CPU is unplugged, remove its power contribution from the
* dtpm tree.
*/
static int power_sub(struct dtpm *dtpm, struct em_perf_domain *em)
{
u64 power_min, power_max;
power_min = em->table[0].power;
power_min *= MICROWATT_PER_MILLIWATT;
power_min = dtpm->power_min - power_min;
power_max = em->table[em->nr_perf_states - 1].power;
power_max *= MICROWATT_PER_MILLIWATT;
power_max = dtpm->power_max - power_max;
return dtpm_update_power(dtpm, power_min, power_max);
return container_of(dtpm, struct dtpm_cpu, dtpm);
}
static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
{
struct dtpm_cpu *dtpm_cpu = dtpm->private;
struct em_perf_domain *pd;
struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
struct em_perf_domain *pd = em_cpu_get(dtpm_cpu->cpu);
struct cpumask cpus;
unsigned long freq;
u64 power;
int i, nr_cpus;
pd = em_cpu_get(dtpm_cpu->cpu);
cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus));
nr_cpus = cpumask_weight(&cpus);
for (i = 0; i < pd->nr_perf_states; i++) {
@ -103,34 +68,88 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
return power_limit;
}
static u64 scale_pd_power_uw(struct cpumask *pd_mask, u64 power)
{
unsigned long max = 0, sum_util = 0;
int cpu;
for_each_cpu_and(cpu, pd_mask, cpu_online_mask) {
/*
* The capacity is the same for all CPUs belonging to
* the same perf domain, so a single call to
* arch_scale_cpu_capacity() is enough. However, we
* need the CPU parameter to be initialized by the
* loop, so the call ends up in this block.
*
* We can initialize 'max' with a cpumask_first() call
* before the loop but the bits computation is not
* worth given the arch_scale_cpu_capacity() just
* returns a value where the resulting assembly code
* will be optimized by the compiler.
*/
max = arch_scale_cpu_capacity(cpu);
sum_util += sched_cpu_util(cpu, max);
}
/*
* In the improbable case where all the CPUs of the perf
* domain are offline, 'max' will be zero and will lead to an
* illegal operation with a zero division.
*/
return max ? (power * ((sum_util << 10) / max)) >> 10 : 0;
}
static u64 get_pd_power_uw(struct dtpm *dtpm)
{
struct dtpm_cpu *dtpm_cpu = dtpm->private;
struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
struct em_perf_domain *pd;
struct cpumask cpus;
struct cpumask *pd_mask;
unsigned long freq;
int i, nr_cpus;
int i;
pd = em_cpu_get(dtpm_cpu->cpu);
pd_mask = em_span_cpus(pd);
freq = cpufreq_quick_get(dtpm_cpu->cpu);
cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus));
nr_cpus = cpumask_weight(&cpus);
for (i = 0; i < pd->nr_perf_states; i++) {
if (pd->table[i].frequency < freq)
continue;
return pd->table[i].power *
MICROWATT_PER_MILLIWATT * nr_cpus;
return scale_pd_power_uw(pd_mask, pd->table[i].power *
MICROWATT_PER_MILLIWATT);
}
return 0;
}
static int update_pd_power_uw(struct dtpm *dtpm)
{
struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
struct em_perf_domain *em = em_cpu_get(dtpm_cpu->cpu);
struct cpumask cpus;
int nr_cpus;
cpumask_and(&cpus, cpu_online_mask, to_cpumask(em->cpus));
nr_cpus = cpumask_weight(&cpus);
dtpm->power_min = em->table[0].power;
dtpm->power_min *= MICROWATT_PER_MILLIWATT;
dtpm->power_min *= nr_cpus;
dtpm->power_max = em->table[em->nr_perf_states - 1].power;
dtpm->power_max *= MICROWATT_PER_MILLIWATT;
dtpm->power_max *= nr_cpus;
return 0;
}
static void pd_release(struct dtpm *dtpm)
{
struct dtpm_cpu *dtpm_cpu = dtpm->private;
struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
if (freq_qos_request_active(&dtpm_cpu->qos_req))
freq_qos_remove_request(&dtpm_cpu->qos_req);
@ -139,44 +158,28 @@ static void pd_release(struct dtpm *dtpm)
}
static struct dtpm_ops dtpm_ops = {
.set_power_uw = set_pd_power_limit,
.get_power_uw = get_pd_power_uw,
.release = pd_release,
.set_power_uw = set_pd_power_limit,
.get_power_uw = get_pd_power_uw,
.update_power_uw = update_pd_power_uw,
.release = pd_release,
};
static int cpuhp_dtpm_cpu_offline(unsigned int cpu)
{
struct cpufreq_policy *policy;
struct em_perf_domain *pd;
struct dtpm *dtpm;
policy = cpufreq_cpu_get(cpu);
if (!policy)
return 0;
struct dtpm_cpu *dtpm_cpu;
pd = em_cpu_get(cpu);
if (!pd)
return -EINVAL;
dtpm = per_cpu(dtpm_per_cpu, cpu);
dtpm_cpu = per_cpu(dtpm_per_cpu, cpu);
power_sub(dtpm, pd);
if (cpumask_weight(policy->cpus) != 1)
return 0;
for_each_cpu(cpu, policy->related_cpus)
per_cpu(dtpm_per_cpu, cpu) = NULL;
dtpm_unregister(dtpm);
return 0;
return dtpm_update_power(&dtpm_cpu->dtpm);
}
static int cpuhp_dtpm_cpu_online(unsigned int cpu)
{
struct dtpm *dtpm;
struct dtpm_cpu *dtpm_cpu;
struct cpufreq_policy *policy;
struct em_perf_domain *pd;
@ -184,7 +187,6 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
int ret = -ENOMEM;
policy = cpufreq_cpu_get(cpu);
if (!policy)
return 0;
@ -192,66 +194,82 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
if (!pd)
return -EINVAL;
dtpm = per_cpu(dtpm_per_cpu, cpu);
if (dtpm)
return power_add(dtpm, pd);
dtpm = dtpm_alloc(&dtpm_ops);
if (!dtpm)
return -EINVAL;
dtpm_cpu = per_cpu(dtpm_per_cpu, cpu);
if (dtpm_cpu)
return dtpm_update_power(&dtpm_cpu->dtpm);
dtpm_cpu = kzalloc(sizeof(*dtpm_cpu), GFP_KERNEL);
if (!dtpm_cpu)
goto out_kfree_dtpm;
return -ENOMEM;
dtpm->private = dtpm_cpu;
dtpm_init(&dtpm_cpu->dtpm, &dtpm_ops);
dtpm_cpu->cpu = cpu;
for_each_cpu(cpu, policy->related_cpus)
per_cpu(dtpm_per_cpu, cpu) = dtpm;
per_cpu(dtpm_per_cpu, cpu) = dtpm_cpu;
sprintf(name, "cpu%d", dtpm_cpu->cpu);
snprintf(name, sizeof(name), "cpu%d-cpufreq", dtpm_cpu->cpu);
ret = dtpm_register(name, dtpm, __parent);
ret = dtpm_register(name, &dtpm_cpu->dtpm, NULL);
if (ret)
goto out_kfree_dtpm_cpu;
ret = power_add(dtpm, pd);
if (ret)
goto out_dtpm_unregister;
ret = freq_qos_add_request(&policy->constraints,
&dtpm_cpu->qos_req, FREQ_QOS_MAX,
pd->table[pd->nr_perf_states - 1].frequency);
if (ret)
goto out_power_sub;
goto out_dtpm_unregister;
return 0;
out_power_sub:
power_sub(dtpm, pd);
out_dtpm_unregister:
dtpm_unregister(dtpm);
dtpm_unregister(&dtpm_cpu->dtpm);
dtpm_cpu = NULL;
dtpm = NULL;
out_kfree_dtpm_cpu:
for_each_cpu(cpu, policy->related_cpus)
per_cpu(dtpm_per_cpu, cpu) = NULL;
kfree(dtpm_cpu);
out_kfree_dtpm:
kfree(dtpm);
return ret;
}
int dtpm_register_cpu(struct dtpm *parent)
static int __init dtpm_cpu_init(void)
{
__parent = parent;
int ret;
return cpuhp_setup_state(CPUHP_AP_DTPM_CPU_ONLINE,
"dtpm_cpu:online",
cpuhp_dtpm_cpu_online,
cpuhp_dtpm_cpu_offline);
/*
* The callbacks at CPU hotplug time are calling
* dtpm_update_power() which in turns calls update_pd_power().
*
* The function update_pd_power() uses the online mask to
* figure out the power consumption limits.
*
* At CPUHP_AP_ONLINE_DYN, the CPU is present in the CPU
* online mask when the cpuhp_dtpm_cpu_online function is
* called, but the CPU is still in the online mask for the
* tear down callback. So the power can not be updated when
* the CPU is unplugged.
*
* At CPUHP_AP_DTPM_CPU_DEAD, the situation is the opposite as
* above. The CPU online mask is not up to date when the CPU
* is plugged in.
*
* For this reason, we need to call the online and offline
* callbacks at different moments when the CPU online mask is
* consistent with the power numbers we want to update.
*/
ret = cpuhp_setup_state(CPUHP_AP_DTPM_CPU_DEAD, "dtpm_cpu:offline",
NULL, cpuhp_dtpm_cpu_offline);
if (ret < 0)
return ret;
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "dtpm_cpu:online",
cpuhp_dtpm_cpu_online, NULL);
if (ret < 0)
return ret;
return 0;
}
DTPM_DECLARE(dtpm_cpu, dtpm_cpu_init);

View File

@ -97,6 +97,7 @@ enum cpuhp_state {
CPUHP_LUSTRE_CFS_DEAD,
CPUHP_AP_ARM_CACHE_B15_RAC_DEAD,
CPUHP_PADATA_DEAD,
CPUHP_AP_DTPM_CPU_DEAD,
CPUHP_WORKQUEUE_PREP,
CPUHP_POWER_NUMA_PREPARE,
CPUHP_HRTIMERS_PREPARE,
@ -242,7 +243,6 @@ enum cpuhp_state {
CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30,
CPUHP_AP_X86_HPET_ONLINE,
CPUHP_AP_X86_KVM_CLK_ONLINE,
CPUHP_AP_DTPM_CPU_ONLINE,
CPUHP_AP_ACTIVE,
CPUHP_ONLINE,
};

View File

@ -23,34 +23,32 @@ struct dtpm {
u64 power_max;
u64 power_min;
int weight;
void *private;
};
struct dtpm_ops {
u64 (*set_power_uw)(struct dtpm *, u64);
u64 (*get_power_uw)(struct dtpm *);
int (*update_power_uw)(struct dtpm *);
void (*release)(struct dtpm *);
};
struct dtpm_descr;
typedef int (*dtpm_init_t)(struct dtpm_descr *);
typedef int (*dtpm_init_t)(void);
struct dtpm_descr {
struct dtpm *parent;
const char *name;
dtpm_init_t init;
};
/* Init section thermal table */
extern struct dtpm_descr *__dtpm_table[];
extern struct dtpm_descr *__dtpm_table_end[];
extern struct dtpm_descr __dtpm_table[];
extern struct dtpm_descr __dtpm_table_end[];
#define DTPM_TABLE_ENTRY(name) \
static typeof(name) *__dtpm_table_entry_##name \
__used __section("__dtpm_table") = &name
#define DTPM_TABLE_ENTRY(name, __init) \
static struct dtpm_descr __dtpm_table_entry_##name \
__used __section("__dtpm_table") = { \
.init = __init, \
}
#define DTPM_DECLARE(name) DTPM_TABLE_ENTRY(name)
#define DTPM_DECLARE(name, init) DTPM_TABLE_ENTRY(name, init)
#define for_each_dtpm_table(__dtpm) \
for (__dtpm = __dtpm_table; \
@ -62,11 +60,11 @@ static inline struct dtpm *to_dtpm(struct powercap_zone *zone)
return container_of(zone, struct dtpm, zone);
}
int dtpm_update_power(struct dtpm *dtpm, u64 power_min, u64 power_max);
int dtpm_update_power(struct dtpm *dtpm);
int dtpm_release_zone(struct powercap_zone *pcz);
struct dtpm *dtpm_alloc(struct dtpm_ops *ops);
void dtpm_init(struct dtpm *dtpm, struct dtpm_ops *ops);
void dtpm_unregister(struct dtpm *dtpm);