Merge branches 'pm-cpufreq', 'pm-sleep' and 'pm-em'

* pm-cpufreq:
  cpufreq: intel_pstate: hybrid: Rework HWP calibration
  ACPI: CPPC: Introduce cppc_get_nominal_perf()

* pm-sleep:
  PM: sleep: core: Avoid setting power.must_resume to false
  PM: sleep: wakeirq: drop useless parameter from dev_pm_attach_wake_irq()

* pm-em:
  Documentation: power: include kernel-doc in Energy Model doc
  PM: EM: fix kernel-doc comments
This commit is contained in:
Rafael J. Wysocki 2021-09-10 20:26:08 +02:00
commit be2d24336f
7 changed files with 128 additions and 145 deletions

View File

@ -101,8 +101,7 @@ subsystems which use EM might rely on this flag to check if all EM devices use
the same scale. If there are different scales, these subsystems might decide
to: return warning/error, stop working or panic.
See Section 3. for an example of driver implementing this
callback, and kernel/power/energy_model.c for further documentation on this
API.
callback, or Section 2.4 for further documentation on this API
2.3 Accessing performance domains
@ -123,7 +122,17 @@ em_cpu_energy() API. The estimation is performed assuming that the schedutil
CPUfreq governor is in use in case of CPU device. Currently this calculation is
not provided for other type of devices.
More details about the above APIs can be found in include/linux/energy_model.h.
More details about the above APIs can be found in ``<linux/energy_model.h>``
or in Section 2.4
2.4 Description details of this API
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. kernel-doc:: include/linux/energy_model.h
:internal:
.. kernel-doc:: kernel/power/energy_model.c
:export:
3. Example driver

View File

@ -1008,23 +1008,14 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val)
return ret_val;
}
/**
* cppc_get_desired_perf - Get the value of desired performance register.
* @cpunum: CPU from which to get desired performance.
* @desired_perf: address of a variable to store the returned desired performance
*
* Return: 0 for success, -EIO otherwise.
*/
int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
static int cppc_get_perf(int cpunum, enum cppc_regs reg_idx, u64 *perf)
{
struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum);
int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum);
struct cpc_register_resource *desired_reg;
struct cppc_pcc_data *pcc_ss_data = NULL;
struct cpc_register_resource *reg = &cpc_desc->cpc_regs[reg_idx];
desired_reg = &cpc_desc->cpc_regs[DESIRED_PERF];
if (CPC_IN_PCC(desired_reg)) {
if (CPC_IN_PCC(reg)) {
int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum);
struct cppc_pcc_data *pcc_ss_data = NULL;
int ret = 0;
if (pcc_ss_id < 0)
@ -1035,7 +1026,7 @@ int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
down_write(&pcc_ss_data->pcc_lock);
if (send_pcc_cmd(pcc_ss_id, CMD_READ) >= 0)
cpc_read(cpunum, desired_reg, desired_perf);
cpc_read(cpunum, reg, perf);
else
ret = -EIO;
@ -1044,12 +1035,36 @@ int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
return ret;
}
cpc_read(cpunum, desired_reg, desired_perf);
cpc_read(cpunum, reg, perf);
return 0;
}
/**
* cppc_get_desired_perf - Get the desired performance register value.
* @cpunum: CPU from which to get desired performance.
* @desired_perf: Return address.
*
* Return: 0 for success, -EIO otherwise.
*/
int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
{
return cppc_get_perf(cpunum, DESIRED_PERF, desired_perf);
}
EXPORT_SYMBOL_GPL(cppc_get_desired_perf);
/**
* cppc_get_nominal_perf - Get the nominal performance register value.
* @cpunum: CPU from which to get nominal performance.
* @nominal_perf: Return address.
*
* Return: 0 for success, -EIO otherwise.
*/
int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
{
return cppc_get_perf(cpunum, NOMINAL_PERF, nominal_perf);
}
/**
* cppc_get_perf_caps - Get a CPU's performance capabilities.
* @cpunum: CPU from which to get capabilities info.

View File

@ -1642,7 +1642,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
}
dev->power.may_skip_resume = true;
dev->power.must_resume = false;
dev->power.must_resume = !dev_pm_test_driver_flags(dev, DPM_FLAG_MAY_SKIP_RESUME);
dpm_watchdog_set(&wd, dev);
device_lock(dev);

View File

@ -12,14 +12,11 @@
/**
* dev_pm_attach_wake_irq - Attach device interrupt as a wake IRQ
* @dev: Device entry
* @irq: Device wake-up capable interrupt
* @wirq: Wake irq specific data
*
* Internal function to attach either a device IO interrupt or a
* dedicated wake-up interrupt as a wake IRQ.
* Internal function to attach a dedicated wake-up interrupt as a wake IRQ.
*/
static int dev_pm_attach_wake_irq(struct device *dev, int irq,
struct wake_irq *wirq)
static int dev_pm_attach_wake_irq(struct device *dev, struct wake_irq *wirq)
{
unsigned long flags;
@ -65,7 +62,7 @@ int dev_pm_set_wake_irq(struct device *dev, int irq)
wirq->dev = dev;
wirq->irq = irq;
err = dev_pm_attach_wake_irq(dev, irq, wirq);
err = dev_pm_attach_wake_irq(dev, wirq);
if (err)
kfree(wirq);
@ -196,7 +193,7 @@ int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq)
if (err)
goto err_free_name;
err = dev_pm_attach_wake_irq(dev, irq, wirq);
err = dev_pm_attach_wake_irq(dev, wirq);
if (err)
goto err_free_irq;

View File

@ -268,6 +268,7 @@ static struct cpudata **all_cpu_data;
* @get_min: Callback to get minimum P state
* @get_turbo: Callback to get turbo P state
* @get_scaling: Callback to get frequency scaling factor
* @get_cpu_scaling: Get frequency scaling factor for a given cpu
* @get_aperf_mperf_shift: Callback to get the APERF vs MPERF frequency difference
* @get_val: Callback to convert P state to actual MSR write value
* @get_vid: Callback to get VID data for Atom platforms
@ -281,6 +282,7 @@ struct pstate_funcs {
int (*get_min)(void);
int (*get_turbo)(void);
int (*get_scaling)(void);
int (*get_cpu_scaling)(int cpu);
int (*get_aperf_mperf_shift)(void);
u64 (*get_val)(struct cpudata*, int pstate);
void (*get_vid)(struct cpudata *);
@ -384,6 +386,15 @@ static int intel_pstate_get_cppc_guaranteed(int cpu)
return cppc_perf.nominal_perf;
}
static u32 intel_pstate_cppc_nominal(int cpu)
{
u64 nominal_perf;
if (cppc_get_nominal_perf(cpu, &nominal_perf))
return 0;
return nominal_perf;
}
#else /* CONFIG_ACPI_CPPC_LIB */
static inline void intel_pstate_set_itmt_prio(int cpu)
{
@ -470,20 +481,6 @@ static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
acpi_processor_unregister_performance(policy->cpu);
}
static bool intel_pstate_cppc_perf_valid(u32 perf, struct cppc_perf_caps *caps)
{
return perf && perf <= caps->highest_perf && perf >= caps->lowest_perf;
}
static bool intel_pstate_cppc_perf_caps(struct cpudata *cpu,
struct cppc_perf_caps *caps)
{
if (cppc_get_perf_caps(cpu->cpu, caps))
return false;
return caps->highest_perf && caps->lowest_perf <= caps->highest_perf;
}
#else /* CONFIG_ACPI */
static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
{
@ -506,15 +503,8 @@ static inline int intel_pstate_get_cppc_guaranteed(int cpu)
}
#endif /* CONFIG_ACPI_CPPC_LIB */
static void intel_pstate_hybrid_hwp_perf_ctl_parity(struct cpudata *cpu)
{
pr_debug("CPU%d: Using PERF_CTL scaling for HWP\n", cpu->cpu);
cpu->pstate.scaling = cpu->pstate.perf_ctl_scaling;
}
/**
* intel_pstate_hybrid_hwp_calibrate - Calibrate HWP performance levels.
* intel_pstate_hybrid_hwp_adjust - Calibrate HWP performance levels.
* @cpu: Target CPU.
*
* On hybrid processors, HWP may expose more performance levels than there are
@ -522,115 +512,46 @@ static void intel_pstate_hybrid_hwp_perf_ctl_parity(struct cpudata *cpu)
* scaling factor between HWP performance levels and CPU frequency will be less
* than the scaling factor between P-state values and CPU frequency.
*
* In that case, the scaling factor between HWP performance levels and CPU
* frequency needs to be determined which can be done with the help of the
* observation that certain HWP performance levels should correspond to certain
* P-states, like for example the HWP highest performance should correspond
* to the maximum turbo P-state of the CPU.
* In that case, adjust the CPU parameters used in computations accordingly.
*/
static void intel_pstate_hybrid_hwp_calibrate(struct cpudata *cpu)
static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu)
{
int perf_ctl_max_phys = cpu->pstate.max_pstate_physical;
int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling;
int perf_ctl_turbo = pstate_funcs.get_turbo();
int turbo_freq = perf_ctl_turbo * perf_ctl_scaling;
int perf_ctl_max = pstate_funcs.get_max();
int max_freq = perf_ctl_max * perf_ctl_scaling;
int scaling = INT_MAX;
int freq;
int scaling = cpu->pstate.scaling;
pr_debug("CPU%d: perf_ctl_max_phys = %d\n", cpu->cpu, perf_ctl_max_phys);
pr_debug("CPU%d: perf_ctl_max = %d\n", cpu->cpu, perf_ctl_max);
pr_debug("CPU%d: perf_ctl_max = %d\n", cpu->cpu, pstate_funcs.get_max());
pr_debug("CPU%d: perf_ctl_turbo = %d\n", cpu->cpu, perf_ctl_turbo);
pr_debug("CPU%d: perf_ctl_scaling = %d\n", cpu->cpu, perf_ctl_scaling);
pr_debug("CPU%d: HWP_CAP guaranteed = %d\n", cpu->cpu, cpu->pstate.max_pstate);
pr_debug("CPU%d: HWP_CAP highest = %d\n", cpu->cpu, cpu->pstate.turbo_pstate);
#ifdef CONFIG_ACPI
if (IS_ENABLED(CONFIG_ACPI_CPPC_LIB)) {
struct cppc_perf_caps caps;
if (intel_pstate_cppc_perf_caps(cpu, &caps)) {
if (intel_pstate_cppc_perf_valid(caps.nominal_perf, &caps)) {
pr_debug("CPU%d: Using CPPC nominal\n", cpu->cpu);
/*
* If the CPPC nominal performance is valid, it
* can be assumed to correspond to cpu_khz.
*/
if (caps.nominal_perf == perf_ctl_max_phys) {
intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
return;
}
scaling = DIV_ROUND_UP(cpu_khz, caps.nominal_perf);
} else if (intel_pstate_cppc_perf_valid(caps.guaranteed_perf, &caps)) {
pr_debug("CPU%d: Using CPPC guaranteed\n", cpu->cpu);
/*
* If the CPPC guaranteed performance is valid,
* it can be assumed to correspond to max_freq.
*/
if (caps.guaranteed_perf == perf_ctl_max) {
intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
return;
}
scaling = DIV_ROUND_UP(max_freq, caps.guaranteed_perf);
}
}
}
#endif
/*
* If using the CPPC data to compute the HWP-to-frequency scaling factor
* doesn't work, use the HWP_CAP gauranteed perf for this purpose with
* the assumption that it corresponds to max_freq.
*/
if (scaling > perf_ctl_scaling) {
pr_debug("CPU%d: Using HWP_CAP guaranteed\n", cpu->cpu);
if (cpu->pstate.max_pstate == perf_ctl_max) {
intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
return;
}
scaling = DIV_ROUND_UP(max_freq, cpu->pstate.max_pstate);
if (scaling > perf_ctl_scaling) {
/*
* This should not happen, because it would mean that
* the number of HWP perf levels was less than the
* number of P-states, so use the PERF_CTL scaling in
* that case.
*/
pr_debug("CPU%d: scaling (%d) out of range\n", cpu->cpu,
scaling);
intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
return;
}
}
pr_debug("CPU%d: HWP-to-frequency scaling factor: %d\n", cpu->cpu, scaling);
/*
* If the product of the HWP performance scaling factor obtained above
* and the HWP_CAP highest performance is greater than the maximum turbo
* frequency corresponding to the pstate_funcs.get_turbo() return value,
* the scaling factor is too high, so recompute it so that the HWP_CAP
* highest performance corresponds to the maximum turbo frequency.
* If the product of the HWP performance scaling factor and the HWP_CAP
* highest performance is greater than the maximum turbo frequency
* corresponding to the pstate_funcs.get_turbo() return value, the
* scaling factor is too high, so recompute it to make the HWP_CAP
* highest performance correspond to the maximum turbo frequency.
*/
if (turbo_freq < cpu->pstate.turbo_pstate * scaling) {
pr_debug("CPU%d: scaling too high (%d)\n", cpu->cpu, scaling);
cpu->pstate.turbo_freq = turbo_freq;
scaling = DIV_ROUND_UP(turbo_freq, cpu->pstate.turbo_pstate);
cpu->pstate.scaling = scaling;
pr_debug("CPU%d: refined HWP-to-frequency scaling factor: %d\n",
cpu->cpu, scaling);
}
cpu->pstate.scaling = scaling;
pr_debug("CPU%d: HWP-to-frequency scaling factor: %d\n", cpu->cpu, scaling);
cpu->pstate.max_freq = rounddown(cpu->pstate.max_pstate * scaling,
perf_ctl_scaling);
freq = perf_ctl_max_phys * perf_ctl_scaling;
cpu->pstate.max_pstate_physical = DIV_ROUND_UP(freq, scaling);
cpu->pstate.max_pstate_physical =
DIV_ROUND_UP(perf_ctl_max_phys * perf_ctl_scaling,
scaling);
cpu->pstate.min_freq = cpu->pstate.min_pstate * perf_ctl_scaling;
/*
@ -1861,6 +1782,38 @@ static int knl_get_turbo_pstate(void)
return ret;
}
#ifdef CONFIG_ACPI_CPPC_LIB
static u32 hybrid_ref_perf;
static int hybrid_get_cpu_scaling(int cpu)
{
return DIV_ROUND_UP(core_get_scaling() * hybrid_ref_perf,
intel_pstate_cppc_nominal(cpu));
}
static void intel_pstate_cppc_set_cpu_scaling(void)
{
u32 min_nominal_perf = U32_MAX;
int cpu;
for_each_present_cpu(cpu) {
u32 nominal_perf = intel_pstate_cppc_nominal(cpu);
if (nominal_perf && nominal_perf < min_nominal_perf)
min_nominal_perf = nominal_perf;
}
if (min_nominal_perf < U32_MAX) {
hybrid_ref_perf = min_nominal_perf;
pstate_funcs.get_cpu_scaling = hybrid_get_cpu_scaling;
}
}
#else
static inline void intel_pstate_cppc_set_cpu_scaling(void)
{
}
#endif /* CONFIG_ACPI_CPPC_LIB */
static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
{
trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
@ -1889,10 +1842,8 @@ static void intel_pstate_max_within_limits(struct cpudata *cpu)
static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
{
bool hybrid_cpu = boot_cpu_has(X86_FEATURE_HYBRID_CPU);
int perf_ctl_max_phys = pstate_funcs.get_max_physical();
int perf_ctl_scaling = hybrid_cpu ? cpu_khz / perf_ctl_max_phys :
pstate_funcs.get_scaling();
int perf_ctl_scaling = pstate_funcs.get_scaling();
cpu->pstate.min_pstate = pstate_funcs.get_min();
cpu->pstate.max_pstate_physical = perf_ctl_max_phys;
@ -1901,10 +1852,13 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
if (hwp_active && !hwp_mode_bdw) {
__intel_pstate_get_hwp_cap(cpu);
if (hybrid_cpu)
intel_pstate_hybrid_hwp_calibrate(cpu);
else
if (pstate_funcs.get_cpu_scaling) {
cpu->pstate.scaling = pstate_funcs.get_cpu_scaling(cpu->cpu);
if (cpu->pstate.scaling != perf_ctl_scaling)
intel_pstate_hybrid_hwp_adjust(cpu);
} else {
cpu->pstate.scaling = perf_ctl_scaling;
}
} else {
cpu->pstate.scaling = perf_ctl_scaling;
cpu->pstate.max_pstate = pstate_funcs.get_max();
@ -3276,6 +3230,9 @@ static int __init intel_pstate_init(void)
if (!default_driver)
default_driver = &intel_pstate;
if (boot_cpu_has(X86_FEATURE_HYBRID_CPU))
intel_pstate_cppc_set_cpu_scaling();
goto hwp_cpu_matched;
}
} else {

View File

@ -135,6 +135,7 @@ struct cppc_cpudata {
#ifdef CONFIG_ACPI_CPPC_LIB
extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf);
extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf);
extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs);
extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls);
extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps);
@ -149,6 +150,10 @@ static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
{
return -ENOTSUPP;
}
static inline int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
{
return -ENOTSUPP;
}
static inline int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs)
{
return -ENOTSUPP;

View File

@ -11,7 +11,7 @@
#include <linux/types.h>
/**
* em_perf_state - Performance state of a performance domain
* struct em_perf_state - Performance state of a performance domain
* @frequency: The frequency in KHz, for consistency with CPUFreq
* @power: The power consumed at this level (by 1 CPU or by a registered
* device). It can be a total power: static and dynamic.
@ -25,7 +25,7 @@ struct em_perf_state {
};
/**
* em_perf_domain - Performance domain
* struct em_perf_domain - Performance domain
* @table: List of performance states, in ascending order
* @nr_perf_states: Number of performance states
* @milliwatts: Flag indicating the power values are in milli-Watts
@ -103,12 +103,12 @@ void em_dev_unregister_perf_domain(struct device *dev);
/**
* em_cpu_energy() - Estimates the energy consumed by the CPUs of a
performance domain
* performance domain
* @pd : performance domain for which energy has to be estimated
* @max_util : highest utilization among CPUs of the domain
* @sum_util : sum of the utilization of all CPUs in the domain
* @allowed_cpu_cap : maximum allowed CPU capacity for the @pd, which
might reflect reduced frequency (due to thermal)
* might reflect reduced frequency (due to thermal)
*
* This function must be used only for CPU devices. There is no validation,
* i.e. if the EM is a CPU type and has cpumask allocated. It is called from