From cef9615a853ebc4972084f7e70b52892557420ac Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Wed, 22 Apr 2009 13:48:29 +0200 Subject: [PATCH 01/13] [CPUFREQ] ondemand: Uncouple minimal sampling rate from HZ in NO_HZ case With this patch you have following minimal sampling rate restrictions: Kernel restrictions: If CONFIG_NO_HZ is set, the limit is 10ms fixed. If CONFIG_NO_HZ is not set or no_hz=off boot parameter is used, the limits depend on the CONFIG_HZ option: HZ=1000: min=20000us (20ms) HZ=250: min=80000us (80ms) HZ=100: min=200000us (200ms) HW restrictions: Do not sample/poll more often than HW latency * 100 exported by the low level cpufreq HW driver The higher value of above restrictions is the minimal sampling rate that can be set (and can be seen via ondemand/sampling_rate_min sysfs file) Default sampling rate still is HW latency * 1000, but this will now end up in lower values on latest (Intel and AMD) hardware as these can switch really fast and sampling rate mostly was limited to the 80ms or 200ms (depending on whether HZ=250 or HZ=1000 is used). Signed-off-by: Thomas Renninger Cc: Pallipadi Venkatesh Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_conservative.c | 44 ++++++++++------------- drivers/cpufreq/cpufreq_ondemand.c | 50 ++++++++++++-------------- 2 files changed, 41 insertions(+), 53 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 7a74d175287b..06bfe1c572cd 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -42,27 +42,12 @@ * this governor will not work. * All times here are in uS. */ -static unsigned int def_sampling_rate; #define MIN_SAMPLING_RATE_RATIO (2) -/* for correct statistics, we need at least 10 ticks between each measure */ -#define MIN_STAT_SAMPLING_RATE \ - (MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10)) -#define MIN_SAMPLING_RATE \ - (def_sampling_rate / MIN_SAMPLING_RATE_RATIO) -/* Above MIN_SAMPLING_RATE will vanish with its sysfs file soon - * Define the minimal settable sampling rate to the greater of: - * - "HW transition latency" * 100 (same as default sampling / 10) - * - MIN_STAT_SAMPLING_RATE - * To avoid that userspace shoots itself. -*/ -static unsigned int minimum_sampling_rate(void) -{ - return max(def_sampling_rate / 10, MIN_STAT_SAMPLING_RATE); -} -/* This will also vanish soon with removing sampling_rate_max */ -#define MAX_SAMPLING_RATE (500 * def_sampling_rate) +static unsigned int min_sampling_rate; + #define LATENCY_MULTIPLIER (1000) +#define MIN_LATENCY_MULTIPLIER (100) #define DEF_SAMPLING_DOWN_FACTOR (1) #define MAX_SAMPLING_DOWN_FACTOR (10) #define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) @@ -190,7 +175,7 @@ static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf) current->comm); print_once = 1; } - return sprintf(buf, "%u\n", MAX_SAMPLING_RATE); + return sprintf(buf, "%u\n", -1U); } static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf) @@ -202,7 +187,7 @@ static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf) "sysfs file is deprecated - used by: %s\n", current->comm); print_once = 1; } - return sprintf(buf, "%u\n", MIN_SAMPLING_RATE); + return sprintf(buf, "%u\n", min_sampling_rate); } #define define_one_ro(_name) \ @@ -254,7 +239,7 @@ static ssize_t store_sampling_rate(struct cpufreq_policy *unused, return -EINVAL; mutex_lock(&dbs_mutex); - dbs_tuners_ins.sampling_rate = max(input, minimum_sampling_rate()); + dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate); mutex_unlock(&dbs_mutex); return count; @@ -601,11 +586,18 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, if (latency == 0) latency = 1; - def_sampling_rate = - max(latency * LATENCY_MULTIPLIER, - MIN_STAT_SAMPLING_RATE); - - dbs_tuners_ins.sampling_rate = def_sampling_rate; + /* + * conservative does not implement micro like ondemand + * governor, thus we are bound to jiffes/HZ + */ + min_sampling_rate = + MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); + /* Bring kernel and HW constraints together */ + min_sampling_rate = max(min_sampling_rate, + MIN_LATENCY_MULTIPLIER * latency); + dbs_tuners_ins.sampling_rate = + max(min_sampling_rate, + latency * LATENCY_MULTIPLIER); cpufreq_register_notifier( &dbs_cpufreq_notifier_block, diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index e741c339df76..a235114c3d85 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -32,6 +32,7 @@ #define DEF_FREQUENCY_UP_THRESHOLD (80) #define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3) #define MICRO_FREQUENCY_UP_THRESHOLD (95) +#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) #define MIN_FREQUENCY_UP_THRESHOLD (11) #define MAX_FREQUENCY_UP_THRESHOLD (100) @@ -45,27 +46,12 @@ * this governor will not work. * All times here are in uS. */ -static unsigned int def_sampling_rate; #define MIN_SAMPLING_RATE_RATIO (2) -/* for correct statistics, we need at least 10 ticks between each measure */ -#define MIN_STAT_SAMPLING_RATE \ - (MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10)) -#define MIN_SAMPLING_RATE \ - (def_sampling_rate / MIN_SAMPLING_RATE_RATIO) -/* Above MIN_SAMPLING_RATE will vanish with its sysfs file soon - * Define the minimal settable sampling rate to the greater of: - * - "HW transition latency" * 100 (same as default sampling / 10) - * - MIN_STAT_SAMPLING_RATE - * To avoid that userspace shoots itself. -*/ -static unsigned int minimum_sampling_rate(void) -{ - return max(def_sampling_rate / 10, MIN_STAT_SAMPLING_RATE); -} -/* This will also vanish soon with removing sampling_rate_max */ -#define MAX_SAMPLING_RATE (500 * def_sampling_rate) +static unsigned int min_sampling_rate; + #define LATENCY_MULTIPLIER (1000) +#define MIN_LATENCY_MULTIPLIER (100) #define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) static void do_dbs_timer(struct work_struct *work); @@ -227,7 +213,7 @@ static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf) current->comm); print_once = 1; } - return sprintf(buf, "%u\n", MAX_SAMPLING_RATE); + return sprintf(buf, "%u\n", -1U); } static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf) @@ -240,7 +226,7 @@ static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf) current->comm); print_once = 1; } - return sprintf(buf, "%u\n", MIN_SAMPLING_RATE); + return sprintf(buf, "%u\n", min_sampling_rate); } #define define_one_ro(_name) \ @@ -274,7 +260,7 @@ static ssize_t store_sampling_rate(struct cpufreq_policy *unused, mutex_unlock(&dbs_mutex); return -EINVAL; } - dbs_tuners_ins.sampling_rate = max(input, minimum_sampling_rate()); + dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate); mutex_unlock(&dbs_mutex); return count; @@ -619,12 +605,12 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, latency = policy->cpuinfo.transition_latency / 1000; if (latency == 0) latency = 1; - - def_sampling_rate = - max(latency * LATENCY_MULTIPLIER, - MIN_STAT_SAMPLING_RATE); - - dbs_tuners_ins.sampling_rate = def_sampling_rate; + /* Bring kernel and HW constraints together */ + min_sampling_rate = max(min_sampling_rate, + MIN_LATENCY_MULTIPLIER * latency); + dbs_tuners_ins.sampling_rate = + max(min_sampling_rate, + latency * LATENCY_MULTIPLIER); } dbs_timer_init(this_dbs_info); @@ -678,6 +664,16 @@ static int __init cpufreq_gov_dbs_init(void) dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; dbs_tuners_ins.down_differential = MICRO_FREQUENCY_DOWN_DIFFERENTIAL; + /* + * In no_hz/micro accounting case we set the minimum frequency + * not depending on HZ, but fixed (very low). The deferred + * timer might skip some samples if idle/sleeping as needed. + */ + min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; + } else { + /* For correct statistics, we need 10 ticks for each measure */ + min_sampling_rate = + MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); } kondemand_wq = create_workqueue("kondemand"); From 86e13684aa77f07c77db352f437d9e53a84dde90 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Wed, 22 Apr 2009 13:48:30 +0200 Subject: [PATCH 02/13] [CPUFREQ] powernow-k8: Set transition latency to 1 if ACPI tables export 0 This doesn't fix anything, but it's expected that a transition latency of 0 could cause trouble in the future. Signed-off-by: Thomas Renninger Cc: Langsdorf, Mark Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index cf52215d9eb1..935989693a64 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1046,6 +1046,19 @@ static int get_transition_latency(struct powernow_k8_data *data) if (cur_latency > max_latency) max_latency = cur_latency; } + if (max_latency == 0) { + /* + * Fam 11h always returns 0 as transition latency. + * This is intended and means "very fast". While cpufreq core + * and governors currently can handle that gracefully, better + * set it to 1 to avoid problems in the future. + * For all others it's a BIOS bug. + */ + if (!boot_cpu_data.x86 == 0x11) + printk(KERN_ERR FW_WARN PFX "Invalid zero transition " + "latency\n"); + max_latency = 1; + } /* value in usecs, needs to be in nanoseconds */ return 1000 * max_latency; } From 4f4d1ad6ee69027f51f9d137f7e7d3c863cbc53d Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Wed, 22 Apr 2009 13:48:31 +0200 Subject: [PATCH 03/13] [CPUFREQ] Only set sampling_rate_max deprecated, sampling_rate_min is useful Update the documentation accordingly. Cleanup and use printk_once. Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones --- Documentation/cpu-freq/governors.txt | 26 ++++++++++++++------------ drivers/cpufreq/cpufreq_conservative.c | 17 ++--------------- drivers/cpufreq/cpufreq_ondemand.c | 18 ++---------------- 3 files changed, 18 insertions(+), 43 deletions(-) diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt index ce73f3eb5ddb..aed082f49d09 100644 --- a/Documentation/cpu-freq/governors.txt +++ b/Documentation/cpu-freq/governors.txt @@ -119,10 +119,6 @@ want the kernel to look at the CPU usage and to make decisions on what to do about the frequency. Typically this is set to values of around '10000' or more. It's default value is (cmp. with users-guide.txt): transition_latency * 1000 -The lowest value you can set is: -transition_latency * 100 or it may get restricted to a value where it -makes not sense for the kernel anymore to poll that often which depends -on your HZ config variable (HZ=1000: max=20000us, HZ=250: max=5000). Be aware that transition latency is in ns and sampling_rate is in us, so you get the same sysfs value by default. Sampling rate should always get adjusted considering the transition latency @@ -131,14 +127,20 @@ in the bash (as said, 1000 is default), do: echo `$(($(cat cpuinfo_transition_latency) * 750 / 1000)) \ >ondemand/sampling_rate -show_sampling_rate_(min|max): THIS INTERFACE IS DEPRECATED, DON'T USE IT. -You can use wider ranges now and the general -cpuinfo_transition_latency variable (cmp. with user-guide.txt) can be -used to obtain exactly the same info: -show_sampling_rate_min = transtition_latency * 500 / 1000 -show_sampling_rate_max = transtition_latency * 500000 / 1000 -(divided by 1000 is to illustrate that sampling rate is in us and -transition latency is exported ns). +show_sampling_rate_min: +The sampling rate is limited by the HW transition latency: +transition_latency * 100 +Or by kernel restrictions: +If CONFIG_NO_HZ is set, the limit is 10ms fixed. +If CONFIG_NO_HZ is not set or no_hz=off boot parameter is used, the +limits depend on the CONFIG_HZ option: +HZ=1000: min=20000us (20ms) +HZ=250: min=80000us (80ms) +HZ=100: min=200000us (200ms) +The highest value of kernel and HW latency restrictions is shown and +used as the minimum sampling rate. + +show_sampling_rate_max: THIS INTERFACE IS DEPRECATED, DON'T USE IT. up_threshold: defines what the average CPU usage between the samplings of 'sampling_rate' needs to be for the kernel to make a decision on diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 06bfe1c572cd..7fc58af748b4 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -167,26 +167,13 @@ static struct notifier_block dbs_cpufreq_notifier_block = { /************************** sysfs interface ************************/ static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf) { - static int print_once; - - if (!print_once) { - printk(KERN_INFO "CPUFREQ: conservative sampling_rate_max " - "sysfs file is deprecated - used by: %s\n", - current->comm); - print_once = 1; - } + printk_once(KERN_INFO "CPUFREQ: conservative sampling_rate_max " + "sysfs file is deprecated - used by: %s\n", current->comm); return sprintf(buf, "%u\n", -1U); } static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf) { - static int print_once; - - if (!print_once) { - printk(KERN_INFO "CPUFREQ: conservative sampling_rate_max " - "sysfs file is deprecated - used by: %s\n", current->comm); - print_once = 1; - } return sprintf(buf, "%u\n", min_sampling_rate); } diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index a235114c3d85..1911d1729353 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -205,27 +205,13 @@ static void ondemand_powersave_bias_init(void) /************************** sysfs interface ************************/ static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf) { - static int print_once; - - if (!print_once) { - printk(KERN_INFO "CPUFREQ: ondemand sampling_rate_max " - "sysfs file is deprecated - used by: %s\n", - current->comm); - print_once = 1; - } + printk_once(KERN_INFO "CPUFREQ: ondemand sampling_rate_max " + "sysfs file is deprecated - used by: %s\n", current->comm); return sprintf(buf, "%u\n", -1U); } static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf) { - static int print_once; - - if (!print_once) { - printk(KERN_INFO "CPUFREQ: ondemand sampling_rate_min " - "sysfs file is deprecated - used by: %s\n", - current->comm); - print_once = 1; - } return sprintf(buf, "%u\n", min_sampling_rate); } From 21335d021464c3ba3c20fc7207ffe2bdd2458568 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Thu, 23 Apr 2009 19:45:04 +0100 Subject: [PATCH 04/13] [CPUFREQ] powernow-k8.c: mess cleanup Mess cleanup in powernow_k8_acpi_pst_values() function. Signed-off-by: Luis Henriques Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 935989693a64..a9d7ecbf64b7 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1,3 +1,4 @@ + /* * (c) 2003-2006 Advanced Micro Devices, Inc. * Your use of this code is subject to the terms and conditions of the @@ -823,13 +824,14 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) return; - control = data->acpi_data.states[index].control; data->irt = (control - >> IRT_SHIFT) & IRT_MASK; data->rvo = (control >> - RVO_SHIFT) & RVO_MASK; data->exttype = (control - >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; - data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK; data->vidmvs = 1 - << ((control >> MVS_SHIFT) & MVS_MASK); data->vstable = - (control >> VST_SHIFT) & VST_MASK; } + control = data->acpi_data.states[index].control; + data->irt = (control >> IRT_SHIFT) & IRT_MASK; + data->rvo = (control >> RVO_SHIFT) & RVO_MASK; + data->exttype = (control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; + data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK; + data->vidmvs = 1 << ((control >> MVS_SHIFT) & MVS_MASK); + data->vstable = (control >> VST_SHIFT) & VST_MASK; +} static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { From 51555c0e91160f6d4c6c1cb7a44d20ea346aed08 Mon Sep 17 00:00:00 2001 From: Chumbalkar Nagananda Date: Thu, 21 May 2009 23:29:48 +0000 Subject: [PATCH 05/13] [CPUFREQ] minor correction to cpu-freq documentation I have been reading the documentation for cpufreq closely. Found a couple of minor errors in the Documentation. Signed-off-by: Naga Chumbalkar Signed-off-by: Dave Jones --- Documentation/cpu-freq/cpu-drivers.txt | 2 +- Documentation/cpu-freq/user-guide.txt | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Documentation/cpu-freq/cpu-drivers.txt b/Documentation/cpu-freq/cpu-drivers.txt index 43c743903dd7..75a58d14d3cf 100644 --- a/Documentation/cpu-freq/cpu-drivers.txt +++ b/Documentation/cpu-freq/cpu-drivers.txt @@ -155,7 +155,7 @@ actual frequency must be determined using the following rules: - if relation==CPUFREQ_REL_H, try to select a new_freq lower than or equal target_freq. ("H for highest, but no higher than") -Here again the frequency table helper might assist you - see section 3 +Here again the frequency table helper might assist you - see section 2 for details. diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt index 75f41193f3e1..5d5f5fadd1c2 100644 --- a/Documentation/cpu-freq/user-guide.txt +++ b/Documentation/cpu-freq/user-guide.txt @@ -31,7 +31,6 @@ Contents: 3. How to change the CPU cpufreq policy and/or speed 3.1 Preferred interface: sysfs -3.2 Deprecated interfaces From 931db6a32dbfaad627e89d0524979ce9cb894691 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Mon, 1 Jun 2009 12:29:55 -0400 Subject: [PATCH 06/13] [CPUFREQ] Clean up convoluted code in arch/x86/kernel/tsc.c:time_cpufreq_notifier() Christoph Hellwig noticed the following potential uninitialised use: > arch/x86/kernel/tsc.c: In function 'time_cpufreq_notifier': > arch/x86/kernel/tsc.c:634: warning: 'dummy' may be used uninitialized in this function > > where we do have CONFIG_SMP set, freq->flags & CPUFREQ_CONST_LOOPS is > true and ref_freq is false. It seems plausable, though the circumstances for hitting it are really low. Nearly all SMP capable cpufreq drivers set CPUFREQ_CONST_LOOPS. powernow-k8 is really the only exception. The older CPUs were typically only ever UP. (powernow-k7 never supported SMP for eg) It's worth fixing regardless, as it cleans up the code. Fix possible uninitialized use of dummy, by just removing it, and making the setting of lpj more obvious. Signed-off-by: Dave Jones --- arch/x86/kernel/tsc.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 3e1c057e98fe..3fbd3206eccf 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -631,17 +631,15 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) { struct cpufreq_freqs *freq = data; - unsigned long *lpj, dummy; + unsigned long *lpj; if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) return 0; - lpj = &dummy; - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) -#ifdef CONFIG_SMP - lpj = &cpu_data(freq->cpu).loops_per_jiffy; -#else lpj = &boot_cpu_data.loops_per_jiffy; +#ifdef CONFIG_SMP + if (!(freq->flags & CPUFREQ_CONST_LOOPS)) + lpj = &cpu_data(freq->cpu).loops_per_jiffy; #endif if (!ref_freq) { From b394f1dfc070e6f50f5f33694000815e50ef319d Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 10 Jun 2009 12:41:37 -0700 Subject: [PATCH 07/13] [CPUFREQ] reduce scope of ACPI_PSS_BIOS_BUG_MSG[] This symbol doesn't need file-global scope. Cc: "Zhang, Rui" Cc: Dave Jones Cc: Ingo Molnar Cc: Langsdorf, Mark Cc: Leo Milano Cc: Thomas Renninger Signed-off-by: Andrew Morton Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index a9d7ecbf64b7..2709b3c183b4 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1250,13 +1250,12 @@ static int powernowk8_verify(struct cpufreq_policy *pol) return cpufreq_frequency_table_verify(pol, data->powernow_table); } -static const char ACPI_PSS_BIOS_BUG_MSG[] = - KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n" - KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n"; - /* per CPU init entry point to the driver */ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) { + static const char ACPI_PSS_BIOS_BUG_MSG[] = + KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n" + KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n"; struct powernow_k8_data *data; cpumask_t oldmask; int rc; From 532cfee6ba0a8efcf7c3ce38b9881292d79d516e Mon Sep 17 00:00:00 2001 From: Naga Chumbalkar Date: Thu, 11 Jun 2009 15:26:48 +0000 Subject: [PATCH 08/13] [CPUFREQ] powernow-k8: read P-state from HW By definition, "cpuinfo_cur_freq" should report the value from HW. So, don't depend on the cached value. Instead read P-state directly from HW, while taking into account the erratum 311 workaround for Fam 11h processors. Cc: Andreas Herrmann Cc: Langsdorf, Mark Cc: Thomas Renninger Signed-off-by: Naga Chumbalkar Reviewed-by: Andreas Herrmann Tested-by: Andreas Herrmann Acked-by: Langsdorf, Mark Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 2709b3c183b4..331021112f2b 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -118,20 +118,17 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data) u32 i = 0; if (cpu_family == CPU_HW_PSTATE) { - if (data->currpstate == HW_PSTATE_INVALID) { - /* read (initial) hw pstate if not yet set */ - rdmsr(MSR_PSTATE_STATUS, lo, hi); - i = lo & HW_PSTATE_MASK; + rdmsr(MSR_PSTATE_STATUS, lo, hi); + i = lo & HW_PSTATE_MASK; + data->currpstate = i; + + /* + * a workaround for family 11h erratum 311 might cause + * an "out-of-range Pstate if the core is in Pstate-0 + */ + if ((boot_cpu_data.x86 == 0x11) && (i >= data->numps)) + data->currpstate = HW_PSTATE_0; - /* - * a workaround for family 11h erratum 311 might cause - * an "out-of-range Pstate if the core is in Pstate-0 - */ - if (i >= data->numps) - data->currpstate = HW_PSTATE_0; - else - data->currpstate = i; - } return 0; } do { From e15bc4559b397a611441a135b1f5992f07d0f436 Mon Sep 17 00:00:00 2001 From: Naga Chumbalkar Date: Thu, 11 Jun 2009 15:26:54 +0000 Subject: [PATCH 09/13] [CPUFREQ] powernow-k8: get drv data for correct CPU Make powernowk8_get() similar to powernowk8_target() and powernowk8_verify() in the way it obtains "powernow_data" for a given CPU. Cc: Andreas Herrmann Cc: Langsdorf, Mark Cc: Thomas Renninger Signed-off-by: Naga Chumbalkar Reviewed-by: Andreas Herrmann Tested-by: Andreas Herrmann Acked-by: Langsdorf, Mark Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 331021112f2b..20c7b99d7ba8 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1385,13 +1385,9 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol) static unsigned int powernowk8_get(unsigned int cpu) { - struct powernow_k8_data *data; + struct powernow_k8_data *data = per_cpu(powernow_data, cpu); cpumask_t oldmask = current->cpus_allowed; unsigned int khz = 0; - unsigned int first; - - first = cpumask_first(cpu_core_mask(cpu)); - data = per_cpu(powernow_data, first); if (!data) return -EINVAL; From 394122ab144dae4b276d74644a2f11c44a60ac5c Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 11 Jun 2009 22:59:58 +0930 Subject: [PATCH 10/13] [CPUFREQ] cpumask: avoid playing with cpus_allowed in speedstep-ich.c Impact: don't play with current's cpumask It's generally a very bad idea to mug some process's cpumask: it could legitimately and reasonably be changed by root, which could break us (if done before our code) or them (if we restore the wrong value). We use smp_call_function_single: this had the advantage of being more efficient, too. Signed-off-by: Rusty Russell To: cpufreq@vger.kernel.org Cc: Dominik Brodowski Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | 91 ++++++++++++--------- arch/x86/kernel/cpu/cpufreq/speedstep-lib.c | 1 + 2 files changed, 54 insertions(+), 38 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 016c1a4fa3fc..6911e91fb4f6 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -89,7 +89,8 @@ static int speedstep_find_register(void) * speedstep_set_state - set the SpeedStep state * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) * - * Tries to change the SpeedStep state. + * Tries to change the SpeedStep state. Can be called from + * smp_call_function_single. */ static void speedstep_set_state(unsigned int state) { @@ -143,6 +144,11 @@ static void speedstep_set_state(unsigned int state) return; } +/* Wrapper for smp_call_function_single. */ +static void _speedstep_set_state(void *_state) +{ + speedstep_set_state(*(unsigned int *)_state); +} /** * speedstep_activate - activate SpeedStep control in the chipset @@ -226,22 +232,28 @@ static unsigned int speedstep_detect_chipset(void) return 0; } -static unsigned int _speedstep_get(const struct cpumask *cpus) -{ +struct get_freq_data { unsigned int speed; - cpumask_t cpus_allowed; + unsigned int processor; +}; - cpus_allowed = current->cpus_allowed; - set_cpus_allowed_ptr(current, cpus); - speed = speedstep_get_frequency(speedstep_processor); - set_cpus_allowed_ptr(current, &cpus_allowed); - dprintk("detected %u kHz as current frequency\n", speed); - return speed; +static void get_freq_data(void *_data) +{ + struct get_freq_data *data = _data; + + data->speed = speedstep_get_frequency(data->processor); } static unsigned int speedstep_get(unsigned int cpu) { - return _speedstep_get(cpumask_of(cpu)); + struct get_freq_data data = { .processor = cpu }; + + /* You're supposed to ensure CPU is online. */ + if (smp_call_function_single(cpu, get_freq_data, &data, 1) != 0) + BUG(); + + dprintk("detected %u kHz as current frequency\n", data.speed); + return data.speed; } /** @@ -257,16 +269,16 @@ static int speedstep_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { - unsigned int newstate = 0; + unsigned int newstate = 0, policy_cpu; struct cpufreq_freqs freqs; - cpumask_t cpus_allowed; int i; if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) return -EINVAL; - freqs.old = _speedstep_get(policy->cpus); + policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask); + freqs.old = speedstep_get(policy_cpu); freqs.new = speedstep_freqs[newstate].frequency; freqs.cpu = policy->cpu; @@ -276,20 +288,13 @@ static int speedstep_target(struct cpufreq_policy *policy, if (freqs.old == freqs.new) return 0; - cpus_allowed = current->cpus_allowed; - for_each_cpu(i, policy->cpus) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); } - /* switch to physical CPU where state is to be changed */ - set_cpus_allowed_ptr(current, policy->cpus); - - speedstep_set_state(newstate); - - /* allow to be run on all CPUs */ - set_cpus_allowed_ptr(current, &cpus_allowed); + smp_call_function_single(policy_cpu, _speedstep_set_state, &newstate, + true); for_each_cpu(i, policy->cpus) { freqs.cpu = i; @@ -312,33 +317,43 @@ static int speedstep_verify(struct cpufreq_policy *policy) return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); } +struct get_freqs { + struct cpufreq_policy *policy; + int ret; +}; + +static void get_freqs_on_cpu(void *_get_freqs) +{ + struct get_freqs *get_freqs = _get_freqs; + + get_freqs->ret = + speedstep_get_freqs(speedstep_processor, + &speedstep_freqs[SPEEDSTEP_LOW].frequency, + &speedstep_freqs[SPEEDSTEP_HIGH].frequency, + &get_freqs->policy->cpuinfo.transition_latency, + &speedstep_set_state); +} static int speedstep_cpu_init(struct cpufreq_policy *policy) { - int result = 0; - unsigned int speed; - cpumask_t cpus_allowed; + int result; + unsigned int policy_cpu, speed; + struct get_freqs gf; /* only run on CPU to be set, or on its sibling */ #ifdef CONFIG_SMP cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu)); #endif - - cpus_allowed = current->cpus_allowed; - set_cpus_allowed_ptr(current, policy->cpus); + policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask); /* detect low and high frequency and transition latency */ - result = speedstep_get_freqs(speedstep_processor, - &speedstep_freqs[SPEEDSTEP_LOW].frequency, - &speedstep_freqs[SPEEDSTEP_HIGH].frequency, - &policy->cpuinfo.transition_latency, - &speedstep_set_state); - set_cpus_allowed_ptr(current, &cpus_allowed); - if (result) - return result; + gf.policy = policy; + smp_call_function_single(policy_cpu, get_freqs_on_cpu, &gf, 1); + if (gf.ret) + return gf.ret; /* get current speed setting */ - speed = _speedstep_get(policy->cpus); + speed = speedstep_get(policy_cpu); if (!speed) return -EIO; diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c index 2e3c6862657b..f4c290b8482f 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c @@ -226,6 +226,7 @@ static unsigned int pentium4_get_frequency(void) } +/* Warning: may get called from smp_call_function_single. */ unsigned int speedstep_get_frequency(unsigned int processor) { switch (processor) { From e3f996c26ff6c4c084aaaa64dce6e54d31f517be Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 11 Jun 2009 22:59:58 +0930 Subject: [PATCH 11/13] [CPUFREQ] cpumask: avoid cpumask games in arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c Impact: don't play with current's cpumask It's generally a very bad idea to mug some process's cpumask: it could legitimately and reasonably be changed by root, which could break us (if done before our code) or them (if we restore the wrong value). Use rdmsr_on_cpu and wrmsr_on_cpu instead. Signed-off-by: Rusty Russell To: cpufreq@vger.kernel.org Cc: Jeremy Fitzhardinge Signed-off-by: Dave Jones --- .../kernel/cpu/cpufreq/speedstep-centrino.c | 60 ++++++------------- 1 file changed, 17 insertions(+), 43 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 55c831ed71ce..8d672ef162ce 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -323,14 +323,8 @@ static unsigned int get_cur_freq(unsigned int cpu) { unsigned l, h; unsigned clock_freq; - cpumask_t saved_mask; - saved_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - if (smp_processor_id() != cpu) - return 0; - - rdmsr(MSR_IA32_PERF_STATUS, l, h); + rdmsr_on_cpu(cpu, MSR_IA32_PERF_STATUS, &l, &h); clock_freq = extract_clock(l, cpu, 0); if (unlikely(clock_freq == 0)) { @@ -340,11 +334,9 @@ static unsigned int get_cur_freq(unsigned int cpu) * P-state transition (like TM2). Get the last freq set * in PERF_CTL. */ - rdmsr(MSR_IA32_PERF_CTL, l, h); + rdmsr_on_cpu(cpu, MSR_IA32_PERF_CTL, &l, &h); clock_freq = extract_clock(l, cpu, 1); } - - set_cpus_allowed_ptr(current, &saved_mask); return clock_freq; } @@ -467,15 +459,10 @@ static int centrino_target (struct cpufreq_policy *policy, struct cpufreq_freqs freqs; int retval = 0; unsigned int j, k, first_cpu, tmp; - cpumask_var_t saved_mask, covered_cpus; + cpumask_var_t covered_cpus; - if (unlikely(!alloc_cpumask_var(&saved_mask, GFP_KERNEL))) + if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))) return -ENOMEM; - if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))) { - free_cpumask_var(saved_mask); - return -ENOMEM; - } - cpumask_copy(saved_mask, ¤t->cpus_allowed); if (unlikely(per_cpu(centrino_model, cpu) == NULL)) { retval = -ENODEV; @@ -493,7 +480,7 @@ static int centrino_target (struct cpufreq_policy *policy, first_cpu = 1; for_each_cpu(j, policy->cpus) { - const struct cpumask *mask; + int good_cpu; /* cpufreq holds the hotplug lock, so we are safe here */ if (!cpu_online(j)) @@ -504,32 +491,30 @@ static int centrino_target (struct cpufreq_policy *policy, * Make sure we are running on CPU that wants to change freq */ if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) - mask = policy->cpus; + good_cpu = cpumask_any_and(policy->cpus, + cpu_online_mask); else - mask = cpumask_of(j); + good_cpu = j; - set_cpus_allowed_ptr(current, mask); - preempt_disable(); - if (unlikely(!cpu_isset(smp_processor_id(), *mask))) { + if (good_cpu >= nr_cpu_ids) { dprintk("couldn't limit to CPUs in this domain\n"); retval = -EAGAIN; if (first_cpu) { /* We haven't started the transition yet. */ - goto migrate_end; + goto out; } - preempt_enable(); break; } msr = per_cpu(centrino_model, cpu)->op_points[newstate].index; if (first_cpu) { - rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); + rdmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, &oldmsr, &h); if (msr == (oldmsr & 0xffff)) { dprintk("no change needed - msr was and needs " "to be %x\n", oldmsr); retval = 0; - goto migrate_end; + goto out; } freqs.old = extract_clock(oldmsr, cpu, 0); @@ -553,14 +538,11 @@ static int centrino_target (struct cpufreq_policy *policy, oldmsr |= msr; } - wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); - if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { - preempt_enable(); + wrmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, oldmsr, h); + if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) break; - } - cpu_set(j, *covered_cpus); - preempt_enable(); + cpumask_set_cpu(j, covered_cpus); } for_each_cpu(k, policy->cpus) { @@ -578,10 +560,8 @@ static int centrino_target (struct cpufreq_policy *policy, * Best effort undo.. */ - for_each_cpu_mask_nr(j, *covered_cpus) { - set_cpus_allowed_ptr(current, &cpumask_of_cpu(j)); - wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); - } + for_each_cpu(j, covered_cpus) + wrmsr_on_cpu(j, MSR_IA32_PERF_CTL, oldmsr, h); tmp = freqs.new; freqs.new = freqs.old; @@ -593,15 +573,9 @@ static int centrino_target (struct cpufreq_policy *policy, cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); } } - set_cpus_allowed_ptr(current, saved_mask); retval = 0; - goto out; -migrate_end: - preempt_enable(); - set_cpus_allowed_ptr(current, saved_mask); out: - free_cpumask_var(saved_mask); free_cpumask_var(covered_cpus); return retval; } From 1ff6e97f1d993dff2f9b6f4a9173687370660232 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 12 Jun 2009 20:55:37 +0930 Subject: [PATCH 12/13] [CPUFREQ] cpumask: avoid playing with cpus_allowed in powernow-k8.c cpumask: avoid playing with cpus_allowed in powernow-k8.c It's generally a very bad idea to mug some process's cpumask: it could legitimately and reasonably be changed by root, which could break us (if done before our code) or them (if we restore the wrong value). I did not replace powernowk8_target; it needs fixing, but it grabs a mutex (so no smp_call_function_single here) but Mark points out it can be called multiple times per second, so work_on_cpu is too heavy. Signed-off-by: Rusty Russell To: cpufreq@vger.kernel.org Acked-by: Mark Langsdorf Tested-by: Mark Langsdorf Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 118 +++++++++++----------- 1 file changed, 60 insertions(+), 58 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 20c7b99d7ba8..1f55547d5b30 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -508,41 +508,34 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, return 0; } -static int check_supported_cpu(unsigned int cpu) +static void check_supported_cpu(void *_rc) { - cpumask_t oldmask; u32 eax, ebx, ecx, edx; - unsigned int rc = 0; + int *rc = _rc; - oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - - if (smp_processor_id() != cpu) { - printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); - goto out; - } + *rc = -ENODEV; if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) - goto out; + return; eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) && ((eax & CPUID_XFAM) < CPUID_XFAM_10H)) - goto out; + return; if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) { if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) { printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax); - goto out; + return; } eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES); if (eax < CPUID_FREQ_VOLT_CAPABILITIES) { printk(KERN_INFO PFX "No frequency change capabilities detected\n"); - goto out; + return; } cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); @@ -550,21 +543,17 @@ static int check_supported_cpu(unsigned int cpu) != P_STATE_TRANSITION_CAPABLE) { printk(KERN_INFO PFX "Power state transitions not supported\n"); - goto out; + return; } } else { /* must be a HW Pstate capable processor */ cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE) cpu_family = CPU_HW_PSTATE; else - goto out; + return; } - rc = 1; - -out: - set_cpus_allowed_ptr(current, &oldmask); - return rc; + *rc = 0; } static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, @@ -1247,6 +1236,32 @@ static int powernowk8_verify(struct cpufreq_policy *pol) return cpufreq_frequency_table_verify(pol, data->powernow_table); } +struct init_on_cpu { + struct powernow_k8_data *data; + int rc; +}; + +static void __cpuinit powernowk8_cpu_init_on_cpu(void *_init_on_cpu) +{ + struct init_on_cpu *init_on_cpu = _init_on_cpu; + + if (pending_bit_stuck()) { + printk(KERN_ERR PFX "failing init, change pending bit set\n"); + init_on_cpu->rc = -ENODEV; + return; + } + + if (query_current_values_with_pending_wait(init_on_cpu->data)) { + init_on_cpu->rc = -ENODEV; + return; + } + + if (cpu_family == CPU_OPTERON) + fidvid_msr_init(); + + init_on_cpu->rc = 0; +} + /* per CPU init entry point to the driver */ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) { @@ -1254,13 +1269,14 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n" KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n"; struct powernow_k8_data *data; - cpumask_t oldmask; + struct init_on_cpu init_on_cpu; int rc; if (!cpu_online(pol->cpu)) return -ENODEV; - if (!check_supported_cpu(pol->cpu)) + smp_call_function_single(pol->cpu, check_supported_cpu, &rc, 1); + if (rc) return -ENODEV; data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL); @@ -1300,27 +1316,12 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) pol->cpuinfo.transition_latency = get_transition_latency(data); /* only run on specific CPU from here on */ - oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); - - if (smp_processor_id() != pol->cpu) { - printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); - goto err_out_unmask; - } - - if (pending_bit_stuck()) { - printk(KERN_ERR PFX "failing init, change pending bit set\n"); - goto err_out_unmask; - } - - if (query_current_values_with_pending_wait(data)) - goto err_out_unmask; - - if (cpu_family == CPU_OPTERON) - fidvid_msr_init(); - - /* run on any CPU again */ - set_cpus_allowed_ptr(current, &oldmask); + init_on_cpu.data = data; + smp_call_function_single(data->cpu, powernowk8_cpu_init_on_cpu, + &init_on_cpu, 1); + rc = init_on_cpu.rc; + if (rc != 0) + goto err_out_exit_acpi; if (cpu_family == CPU_HW_PSTATE) cpumask_copy(pol->cpus, cpumask_of(pol->cpu)); @@ -1357,8 +1358,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) return 0; -err_out_unmask: - set_cpus_allowed_ptr(current, &oldmask); +err_out_exit_acpi: powernow_k8_cpu_exit_acpi(data); err_out: @@ -1383,24 +1383,25 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol) return 0; } +static void query_values_on_cpu(void *_err) +{ + int *err = _err; + struct powernow_k8_data *data = __get_cpu_var(powernow_data); + + *err = query_current_values_with_pending_wait(data); +} + static unsigned int powernowk8_get(unsigned int cpu) { struct powernow_k8_data *data = per_cpu(powernow_data, cpu); - cpumask_t oldmask = current->cpus_allowed; unsigned int khz = 0; + int err; if (!data) return -EINVAL; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - if (smp_processor_id() != cpu) { - printk(KERN_ERR PFX - "limiting to CPU %d failed in powernowk8_get\n", cpu); - set_cpus_allowed_ptr(current, &oldmask); - return 0; - } - - if (query_current_values_with_pending_wait(data)) + smp_call_function_single(cpu, query_values_on_cpu, &err, true); + if (err) goto out; if (cpu_family == CPU_HW_PSTATE) @@ -1411,7 +1412,6 @@ static unsigned int powernowk8_get(unsigned int cpu) out: - set_cpus_allowed_ptr(current, &oldmask); return khz; } @@ -1437,7 +1437,9 @@ static int __cpuinit powernowk8_init(void) unsigned int i, supported_cpus = 0; for_each_online_cpu(i) { - if (check_supported_cpu(i)) + int rc; + smp_call_function_single(i, check_supported_cpu, &rc, 1); + if (rc == 0) supported_cpus++; } From 8e7c25971b1590776a90b249de3d859dd45e7414 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 12 Jun 2009 20:58:37 +0930 Subject: [PATCH 13/13] [CPUFREQ] cpumask: new cpumask operators for arch/x86/kernel/cpu/cpufreq/powernow-k8.c Remove all old-style cpumask operators, and cpumask_t. Also: get rid of the unused define_siblings function. Signed-off-by: Rusty Russell Acked-by: Mark Langsdorf Tested-by: Mark Langsdorf Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 8 ++++---- arch/x86/kernel/cpu/cpufreq/powernow-k8.h | 11 ----------- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 1f55547d5b30..81cbe64ed6b4 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1094,7 +1094,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, freqs.old = find_khz_freq_from_fid(data->currfid); freqs.new = find_khz_freq_from_fid(fid); - for_each_cpu_mask_nr(i, *(data->available_cores)) { + for_each_cpu(i, data->available_cores) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); } @@ -1102,7 +1102,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, res = transition_fid_vid(data, fid, vid); freqs.new = find_khz_freq_from_fid(data->currfid); - for_each_cpu_mask_nr(i, *(data->available_cores)) { + for_each_cpu(i, data->available_cores) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); } @@ -1127,7 +1127,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, data->currpstate); freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); - for_each_cpu_mask_nr(i, *(data->available_cores)) { + for_each_cpu(i, data->available_cores) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); } @@ -1135,7 +1135,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, res = transition_pstate(data, pstate); freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); - for_each_cpu_mask_nr(i, *(data->available_cores)) { + for_each_cpu(i, data->available_cores) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); } diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index 6c6698feade1..c9c1190b5e1f 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h @@ -223,14 +223,3 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); - -#ifdef CONFIG_SMP -static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) -{ -} -#else -static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) -{ - cpu_set(0, cpu_sharedcore_mask[0]); -} -#endif