Merge branches 'pm-cpufreq', 'pm-cpuidle' and 'acpi-cppc'

* pm-cpufreq:
  cpufreq: dt: Drop stale comment
  cpufreq: intel_pstate: Documenation for structures
  cpufreq: intel_pstate: fix inconsistency in setting policy limits
  intel_pstate: Avoid extra invocation of intel_pstate_sample()
  intel_pstate: Do not set utilization update hook too early

* pm-cpuidle:
  intel_idle: Add KBL support
  intel_idle: Add SKX support
  intel_idle: Clean up all registered devices on exit.
  intel_idle: Propagate hot plug errors.
  intel_idle: Don't overreact to a cpuidle registration failure.
  intel_idle: Setup the timer broadcast only on successful driver load.
  intel_idle: Avoid a double free of the per-CPU data.
  intel_idle: Fix dangling registration on error path.
  intel_idle: Fix deallocation order on the driver exit path.
  intel_idle: Remove redundant initialization calls.
  intel_idle: Fix a helper function's return value.
  intel_idle: remove useless return from void function.

* acpi-cppc:
  mailbox: pcc: Don't access an unmapped memory address space
This commit is contained in:
Rafael J. Wysocki 2016-04-08 21:46:05 +02:00
commit fa81e66ec8
4 changed files with 258 additions and 52 deletions

View File

@ -4,9 +4,6 @@
* Copyright (C) 2014 Linaro.
* Viresh Kumar <viresh.kumar@linaro.org>
*
* The OPP code in function set_target() is reused from
* drivers/cpufreq/omap-cpufreq.c
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.

View File

@ -64,6 +64,25 @@ static inline int ceiling_fp(int32_t x)
return ret;
}
/**
* struct sample - Store performance sample
* @core_pct_busy: Ratio of APERF/MPERF in percent, which is actual
* performance during last sample period
* @busy_scaled: Scaled busy value which is used to calculate next
* P state. This can be different than core_pct_busy
* to account for cpu idle period
* @aperf: Difference of actual performance frequency clock count
* read from APERF MSR between last and current sample
* @mperf: Difference of maximum performance frequency clock count
* read from MPERF MSR between last and current sample
* @tsc: Difference of time stamp counter between last and
* current sample
* @freq: Effective frequency calculated from APERF/MPERF
* @time: Current time from scheduler
*
* This structure is used in the cpudata structure to store performance sample
* data for choosing next P State.
*/
struct sample {
int32_t core_pct_busy;
int32_t busy_scaled;
@ -74,6 +93,20 @@ struct sample {
u64 time;
};
/**
* struct pstate_data - Store P state data
* @current_pstate: Current requested P state
* @min_pstate: Min P state possible for this platform
* @max_pstate: Max P state possible for this platform
* @max_pstate_physical:This is physical Max P state for a processor
* This can be higher than the max_pstate which can
* be limited by platform thermal design power limits
* @scaling: Scaling factor to convert frequency to cpufreq
* frequency units
* @turbo_pstate: Max Turbo P state possible for this platform
*
* Stores the per cpu model P state limits and current P state.
*/
struct pstate_data {
int current_pstate;
int min_pstate;
@ -83,6 +116,19 @@ struct pstate_data {
int turbo_pstate;
};
/**
* struct vid_data - Stores voltage information data
* @min: VID data for this platform corresponding to
* the lowest P state
* @max: VID data corresponding to the highest P State.
* @turbo: VID data for turbo P state
* @ratio: Ratio of (vid max - vid min) /
* (max P state - Min P State)
*
* Stores the voltage data for DVFS (Dynamic Voltage and Frequency Scaling)
* This data is used in Atom platforms, where in addition to target P state,
* the voltage data needs to be specified to select next P State.
*/
struct vid_data {
int min;
int max;
@ -90,6 +136,18 @@ struct vid_data {
int32_t ratio;
};
/**
* struct _pid - Stores PID data
* @setpoint: Target set point for busyness or performance
* @integral: Storage for accumulated error values
* @p_gain: PID proportional gain
* @i_gain: PID integral gain
* @d_gain: PID derivative gain
* @deadband: PID deadband
* @last_err: Last error storage for integral part of PID calculation
*
* Stores PID coefficients and last error for PID controller.
*/
struct _pid {
int setpoint;
int32_t integral;
@ -100,6 +158,23 @@ struct _pid {
int32_t last_err;
};
/**
* struct cpudata - Per CPU instance data storage
* @cpu: CPU number for this instance data
* @update_util: CPUFreq utility callback information
* @pstate: Stores P state limits for this CPU
* @vid: Stores VID limits for this CPU
* @pid: Stores PID parameters for this CPU
* @last_sample_time: Last Sample time
* @prev_aperf: Last APERF value read from APERF MSR
* @prev_mperf: Last MPERF value read from MPERF MSR
* @prev_tsc: Last timestamp counter (TSC) value
* @prev_cummulative_iowait: IO Wait time difference from last and
* current sample
* @sample: Storage for storing last Sample data
*
* This structure stores per CPU instance data for all CPUs.
*/
struct cpudata {
int cpu;
@ -118,6 +193,19 @@ struct cpudata {
};
static struct cpudata **all_cpu_data;
/**
* struct pid_adjust_policy - Stores static PID configuration data
* @sample_rate_ms: PID calculation sample rate in ms
* @sample_rate_ns: Sample rate calculation in ns
* @deadband: PID deadband
* @setpoint: PID Setpoint
* @p_gain_pct: PID proportional gain
* @i_gain_pct: PID integral gain
* @d_gain_pct: PID derivative gain
*
* Stores per CPU model static PID configuration data.
*/
struct pstate_adjust_policy {
int sample_rate_ms;
s64 sample_rate_ns;
@ -128,6 +216,20 @@ struct pstate_adjust_policy {
int i_gain_pct;
};
/**
* struct pstate_funcs - Per CPU model specific callbacks
* @get_max: Callback to get maximum non turbo effective P state
* @get_max_physical: Callback to get maximum non turbo physical P state
* @get_min: Callback to get minimum P state
* @get_turbo: Callback to get turbo P state
* @get_scaling: Callback to get frequency scaling factor
* @get_val: Callback to convert P state to actual MSR write value
* @get_vid: Callback to get VID data for Atom platforms
* @get_target_pstate: Callback to a function to calculate next P state to use
*
* Core and Atom CPU models have different way to get P State limits. This
* structure is used to store those callbacks.
*/
struct pstate_funcs {
int (*get_max)(void);
int (*get_max_physical)(void);
@ -139,6 +241,11 @@ struct pstate_funcs {
int32_t (*get_target_pstate)(struct cpudata *);
};
/**
* struct cpu_defaults- Per CPU model default config data
* @pid_policy: PID config data
* @funcs: Callback function data
*/
struct cpu_defaults {
struct pstate_adjust_policy pid_policy;
struct pstate_funcs funcs;
@ -151,6 +258,34 @@ static struct pstate_adjust_policy pid_params;
static struct pstate_funcs pstate_funcs;
static int hwp_active;
/**
* struct perf_limits - Store user and policy limits
* @no_turbo: User requested turbo state from intel_pstate sysfs
* @turbo_disabled: Platform turbo status either from msr
* MSR_IA32_MISC_ENABLE or when maximum available pstate
* matches the maximum turbo pstate
* @max_perf_pct: Effective maximum performance limit in percentage, this
* is minimum of either limits enforced by cpufreq policy
* or limits from user set limits via intel_pstate sysfs
* @min_perf_pct: Effective minimum performance limit in percentage, this
* is maximum of either limits enforced by cpufreq policy
* or limits from user set limits via intel_pstate sysfs
* @max_perf: This is a scaled value between 0 to 255 for max_perf_pct
* This value is used to limit max pstate
* @min_perf: This is a scaled value between 0 to 255 for min_perf_pct
* This value is used to limit min pstate
* @max_policy_pct: The maximum performance in percentage enforced by
* cpufreq setpolicy interface
* @max_sysfs_pct: The maximum performance in percentage enforced by
* intel pstate sysfs interface
* @min_policy_pct: The minimum performance in percentage enforced by
* cpufreq setpolicy interface
* @min_sysfs_pct: The minimum performance in percentage enforced by
* intel pstate sysfs interface
*
* Storage for user and policy defined limits.
*/
struct perf_limits {
int no_turbo;
int turbo_disabled;
@ -910,7 +1045,14 @@ static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time)
cpu->prev_aperf = aperf;
cpu->prev_mperf = mperf;
cpu->prev_tsc = tsc;
return true;
/*
* First time this function is invoked in a given cycle, all of the
* previous sample data fields are equal to zero or stale and they must
* be populated with meaningful numbers for things to work, so assume
* that sample.time will always be reset before setting the utilization
* update hook and make the caller skip the sample then.
*/
return !!cpu->last_sample_time;
}
static inline int32_t get_avg_frequency(struct cpudata *cpu)
@ -984,8 +1126,7 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
* enough period of time to adjust our busyness.
*/
duration_ns = cpu->sample.time - cpu->last_sample_time;
if ((s64)duration_ns > pid_params.sample_rate_ns * 3
&& cpu->last_sample_time > 0) {
if ((s64)duration_ns > pid_params.sample_rate_ns * 3) {
sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns),
int_tofp(duration_ns));
core_busy = mul_fp(core_busy, sample_ratio);
@ -1100,10 +1241,8 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
intel_pstate_get_cpu_pstates(cpu);
intel_pstate_busy_pid_reset(cpu);
intel_pstate_sample(cpu, 0);
cpu->update_util.func = intel_pstate_update_util;
cpufreq_set_update_util_data(cpunum, &cpu->update_util);
pr_debug("intel_pstate: controlling: cpu %d\n", cpunum);
@ -1122,22 +1261,54 @@ static unsigned int intel_pstate_get(unsigned int cpu_num)
return get_avg_frequency(cpu);
}
static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
{
struct cpudata *cpu = all_cpu_data[cpu_num];
/* Prevent intel_pstate_update_util() from using stale data. */
cpu->sample.time = 0;
cpufreq_set_update_util_data(cpu_num, &cpu->update_util);
}
static void intel_pstate_clear_update_util_hook(unsigned int cpu)
{
cpufreq_set_update_util_data(cpu, NULL);
synchronize_sched();
}
static void intel_pstate_set_performance_limits(struct perf_limits *limits)
{
limits->no_turbo = 0;
limits->turbo_disabled = 0;
limits->max_perf_pct = 100;
limits->max_perf = int_tofp(1);
limits->min_perf_pct = 100;
limits->min_perf = int_tofp(1);
limits->max_policy_pct = 100;
limits->max_sysfs_pct = 100;
limits->min_policy_pct = 0;
limits->min_sysfs_pct = 0;
}
static int intel_pstate_set_policy(struct cpufreq_policy *policy)
{
if (!policy->cpuinfo.max_freq)
return -ENODEV;
if (policy->policy == CPUFREQ_POLICY_PERFORMANCE &&
policy->max >= policy->cpuinfo.max_freq) {
pr_debug("intel_pstate: set performance\n");
intel_pstate_clear_update_util_hook(policy->cpu);
if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
limits = &performance_limits;
if (hwp_active)
intel_pstate_hwp_set(policy->cpus);
return 0;
if (policy->max >= policy->cpuinfo.max_freq) {
pr_debug("intel_pstate: set performance\n");
intel_pstate_set_performance_limits(limits);
goto out;
}
} else {
pr_debug("intel_pstate: set powersave\n");
limits = &powersave_limits;
}
pr_debug("intel_pstate: set powersave\n");
limits = &powersave_limits;
limits->min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 0 , 100);
limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100,
@ -1163,6 +1334,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
limits->max_perf = div_fp(int_tofp(limits->max_perf_pct),
int_tofp(100));
out:
intel_pstate_set_update_util_hook(policy->cpu);
if (hwp_active)
intel_pstate_hwp_set(policy->cpus);
@ -1187,8 +1361,7 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
pr_debug("intel_pstate: CPU %d exiting\n", cpu_num);
cpufreq_set_update_util_data(cpu_num, NULL);
synchronize_sched();
intel_pstate_clear_update_util_hook(cpu_num);
if (hwp_active)
return;
@ -1455,8 +1628,7 @@ out:
get_online_cpus();
for_each_online_cpu(cpu) {
if (all_cpu_data[cpu]) {
cpufreq_set_update_util_data(cpu, NULL);
synchronize_sched();
intel_pstate_clear_update_util_hook(cpu);
kfree(all_cpu_data[cpu]);
}
}

View File

@ -660,6 +660,35 @@ static struct cpuidle_state skl_cstates[] = {
.enter = NULL }
};
static struct cpuidle_state skx_cstates[] = {
{
.name = "C1-SKX",
.desc = "MWAIT 0x00",
.flags = MWAIT2flg(0x00),
.exit_latency = 2,
.target_residency = 2,
.enter = &intel_idle,
.enter_freeze = intel_idle_freeze, },
{
.name = "C1E-SKX",
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01),
.exit_latency = 10,
.target_residency = 20,
.enter = &intel_idle,
.enter_freeze = intel_idle_freeze, },
{
.name = "C6-SKX",
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 133,
.target_residency = 600,
.enter = &intel_idle,
.enter_freeze = intel_idle_freeze, },
{
.enter = NULL }
};
static struct cpuidle_state atom_cstates[] = {
{
.name = "C1E-ATM",
@ -818,8 +847,11 @@ static int cpu_hotplug_notify(struct notifier_block *n,
* driver in this case
*/
dev = per_cpu_ptr(intel_idle_cpuidle_devices, hotcpu);
if (!dev->registered)
intel_idle_cpu_init(hotcpu);
if (dev->registered)
break;
if (intel_idle_cpu_init(hotcpu))
return NOTIFY_BAD;
break;
}
@ -904,6 +936,10 @@ static const struct idle_cpu idle_cpu_skl = {
.disable_promotion_to_c1e = true,
};
static const struct idle_cpu idle_cpu_skx = {
.state_table = skx_cstates,
.disable_promotion_to_c1e = true,
};
static const struct idle_cpu idle_cpu_avn = {
.state_table = avn_cstates,
@ -945,6 +981,9 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
ICPU(0x56, idle_cpu_bdw),
ICPU(0x4e, idle_cpu_skl),
ICPU(0x5e, idle_cpu_skl),
ICPU(0x8e, idle_cpu_skl),
ICPU(0x9e, idle_cpu_skl),
ICPU(0x55, idle_cpu_skx),
ICPU(0x57, idle_cpu_knl),
{}
};
@ -987,22 +1026,15 @@ static int __init intel_idle_probe(void)
icpu = (const struct idle_cpu *)id->driver_data;
cpuidle_state_table = icpu->state_table;
if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */
lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE;
else
on_each_cpu(__setup_broadcast_timer, (void *)true, 1);
pr_debug(PREFIX "v" INTEL_IDLE_VERSION
" model 0x%X\n", boot_cpu_data.x86_model);
pr_debug(PREFIX "lapic_timer_reliable_states 0x%x\n",
lapic_timer_reliable_states);
return 0;
}
/*
* intel_idle_cpuidle_devices_uninit()
* unregister, free cpuidle_devices
* Unregisters the cpuidle devices.
*/
static void intel_idle_cpuidle_devices_uninit(void)
{
@ -1013,9 +1045,6 @@ static void intel_idle_cpuidle_devices_uninit(void)
dev = per_cpu_ptr(intel_idle_cpuidle_devices, i);
cpuidle_unregister_device(dev);
}
free_percpu(intel_idle_cpuidle_devices);
return;
}
/*
@ -1111,7 +1140,7 @@ static void intel_idle_state_table_update(void)
* intel_idle_cpuidle_driver_init()
* allocate, initialize cpuidle_states
*/
static int __init intel_idle_cpuidle_driver_init(void)
static void __init intel_idle_cpuidle_driver_init(void)
{
int cstate;
struct cpuidle_driver *drv = &intel_idle_driver;
@ -1163,18 +1192,10 @@ static int __init intel_idle_cpuidle_driver_init(void)
drv->state_count += 1;
}
if (icpu->auto_demotion_disable_flags)
on_each_cpu(auto_demotion_disable, NULL, 1);
if (icpu->byt_auto_demotion_disable_flag) {
wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
}
if (icpu->disable_promotion_to_c1e) /* each-cpu is redundant */
on_each_cpu(c1e_promotion_disable, NULL, 1);
return 0;
}
@ -1193,7 +1214,6 @@ static int intel_idle_cpu_init(int cpu)
if (cpuidle_register_device(dev)) {
pr_debug(PREFIX "cpuidle_register_device %d failed!\n", cpu);
intel_idle_cpuidle_devices_uninit();
return -EIO;
}
@ -1218,40 +1238,51 @@ static int __init intel_idle_init(void)
if (retval)
return retval;
intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
if (intel_idle_cpuidle_devices == NULL)
return -ENOMEM;
intel_idle_cpuidle_driver_init();
retval = cpuidle_register_driver(&intel_idle_driver);
if (retval) {
struct cpuidle_driver *drv = cpuidle_get_driver();
printk(KERN_DEBUG PREFIX "intel_idle yielding to %s",
drv ? drv->name : "none");
free_percpu(intel_idle_cpuidle_devices);
return retval;
}
intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
if (intel_idle_cpuidle_devices == NULL)
return -ENOMEM;
cpu_notifier_register_begin();
for_each_online_cpu(i) {
retval = intel_idle_cpu_init(i);
if (retval) {
intel_idle_cpuidle_devices_uninit();
cpu_notifier_register_done();
cpuidle_unregister_driver(&intel_idle_driver);
free_percpu(intel_idle_cpuidle_devices);
return retval;
}
}
__register_cpu_notifier(&cpu_hotplug_notifier);
if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */
lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE;
else
on_each_cpu(__setup_broadcast_timer, (void *)true, 1);
cpu_notifier_register_done();
pr_debug(PREFIX "lapic_timer_reliable_states 0x%x\n",
lapic_timer_reliable_states);
return 0;
}
static void __exit intel_idle_exit(void)
{
intel_idle_cpuidle_devices_uninit();
cpuidle_unregister_driver(&intel_idle_driver);
struct cpuidle_device *dev;
int i;
cpu_notifier_register_begin();
@ -1259,9 +1290,15 @@ static void __exit intel_idle_exit(void)
on_each_cpu(__setup_broadcast_timer, (void *)false, 1);
__unregister_cpu_notifier(&cpu_hotplug_notifier);
for_each_possible_cpu(i) {
dev = per_cpu_ptr(intel_idle_cpuidle_devices, i);
cpuidle_unregister_device(dev);
}
cpu_notifier_register_done();
return;
cpuidle_unregister_driver(&intel_idle_driver);
free_percpu(intel_idle_cpuidle_devices);
}
module_init(intel_idle_init);

View File

@ -361,8 +361,6 @@ static int __init acpi_pcc_probe(void)
struct acpi_generic_address *db_reg;
struct acpi_pcct_hw_reduced *pcct_ss;
pcc_mbox_channels[i].con_priv = pcct_entry;
pcct_entry = (struct acpi_subtable_header *)
((unsigned long) pcct_entry + pcct_entry->length);
/* If doorbell is in system memory cache the virt address */
pcct_ss = (struct acpi_pcct_hw_reduced *)pcct_entry;
@ -370,6 +368,8 @@ static int __init acpi_pcc_probe(void)
if (db_reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY)
pcc_doorbell_vaddr[i] = acpi_os_ioremap(db_reg->address,
db_reg->bit_width/8);
pcct_entry = (struct acpi_subtable_header *)
((unsigned long) pcct_entry + pcct_entry->length);
}
pcc_mbox_ctrl.num_chans = count;