From 21ed5574d1622118b49b0c6342acc8d27d0799be Mon Sep 17 00:00:00 2001 From: Len Brown Date: Mon, 19 Oct 2015 22:37:40 -0400 Subject: [PATCH 01/32] tools/power turbostat: simplify Bzy_MHz calculation Bzy_MHz = TSC_delta*tsc_tweak/APERF_delta/MPERF_delta/measurement_interval becomes Bzy_MHz = base_mhz/APERF_delta/MPERF_delta on systems which support MSR_NHM_PLATFORM_INFO. base_mhz is calculated directly from the base_ratio reported in MSR_NHM_PLATFORM_INFO * bclk, and bclk is discovered via MSR or cpuid. This reduces the dependency of Bzy_MHz calculation on the TSC. Previously, there were 4 TSC readings required in each caculation, the raw TSC delta combined with the measurement_interval. This also removes the "tsc_tweak" correction factor used when TSC runs on a different base clock from the CPU's bclk. After this change, tsc_tweak is used only for %Busy. The end-result should be a Bzy_MHz result slightly less prone to jitter. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 28 ++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index bde0ef1a63df..84ec4e459975 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -75,6 +75,7 @@ unsigned int aperf_mperf_multiplier = 1; int do_smi; double bclk; double base_hz; +unsigned int has_base_hz; double tsc_tweak = 1.0; unsigned int show_pkg; unsigned int show_core; @@ -96,6 +97,7 @@ unsigned int do_ring_perf_limit_reasons; unsigned int crystal_hz; unsigned long long tsc_hz; int base_cpu; +double discover_bclk(unsigned int family, unsigned int model); #define RAPL_PKG (1 << 0) /* 0x610 MSR_PKG_POWER_LIMIT */ @@ -511,9 +513,13 @@ int format_counters(struct thread_data *t, struct core_data *c, } /* Bzy_MHz */ - if (has_aperf) - outp += sprintf(outp, "%8.0f", - 1.0 * t->tsc * tsc_tweak / units * t->aperf / t->mperf / interval_float); + if (has_aperf) { + if (has_base_hz) + outp += sprintf(outp, "%8.0f", base_hz / units * t->aperf / t->mperf); + else + outp += sprintf(outp, "%8.0f", + 1.0 * t->tsc / units * t->aperf / t->mperf / interval_float); + } /* TSC_MHz */ outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float); @@ -1158,12 +1164,6 @@ int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, static void calculate_tsc_tweak() { - unsigned long long msr; - unsigned int base_ratio; - - get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr); - base_ratio = (msr >> 8) & 0xFF; - base_hz = base_ratio * bclk * 1000000; tsc_tweak = base_hz / tsc_hz; } @@ -1821,6 +1821,7 @@ void check_permissions() int probe_nhm_msrs(unsigned int family, unsigned int model) { unsigned long long msr; + unsigned int base_ratio; int *pkg_cstate_limits; if (!genuine_intel) @@ -1829,6 +1830,8 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) if (family != 6) return 0; + bclk = discover_bclk(family, model); + switch (model) { case 0x1A: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */ case 0x1E: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */ @@ -1871,9 +1874,13 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) return 0; } get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); - pkg_cstate_limit = pkg_cstate_limits[msr & 0xF]; + get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr); + base_ratio = (msr >> 8) & 0xFF; + + base_hz = base_ratio * bclk * 1000000; + has_base_hz = 1; return 1; } int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model) @@ -2780,7 +2787,6 @@ void process_cpuid() do_skl_residency = has_skl_msrs(family, model); do_slm_cstates = is_slm(family, model); do_knl_cstates = is_knl(family, model); - bclk = discover_bclk(family, model); rapl_probe(family, model); perf_limit_reasons_probe(family, model); From 759d2a932b82009a7039ef5567e7dcba153ce123 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 22 Oct 2015 02:42:12 -0400 Subject: [PATCH 02/32] tools/power turbostat: bugfix: print MAX_NON_TURBO_RATIO MSR_TURBO_ACTIVATION_RATIO: 0x00000016 (MAX_NON_TURBO_RATIO=6 lock=0) should print all 7 bits of MAX_NON_TURBO_RATIO (in decimal): MSR_TURBO_ACTIVATION_RATIO: 0x00000016 (MAX_NON_TURBO_RATIO=22 lock=0) Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 84ec4e459975..d8e4b20b6d54 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1440,7 +1440,7 @@ dump_config_tdp(void) get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr); fprintf(stderr, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr); - fprintf(stderr, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xEF); + fprintf(stderr, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0x7F); fprintf(stderr, " lock=%d", (unsigned int)(msr >> 31) & 1); fprintf(stderr, ")\n"); } From 61f9738d65094a6b18d22c7beb6bb8c3dc0606b9 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 26 Oct 2015 15:20:46 +0100 Subject: [PATCH 03/32] ACPI / video: Add a quirk to force acpi-video backlight on Dell XPS L421X Just like the Dell XPS 15 (L521X) the Dell XPS 14 (L421X) needs to use the acpi-video backlight interface rather then the native one for backlight control to work, add a quirk for this. Link: https://bugzilla.redhat.com/show_bug.cgi?id=1272633 Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/video_detect.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 0d3a384b508a..daaf1c4e1e0f 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -232,6 +232,15 @@ static const struct dmi_system_id video_detect_dmi_table[] = { "900X3C/900X3D/900X3E/900X4C/900X4D"), }, }, + { + /* https://bugzilla.redhat.com/show_bug.cgi?id=1272633 */ + .callback = video_detect_force_video, + .ident = "Dell XPS14 L421X", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "XPS L421X"), + }, + }, { /* https://bugzilla.redhat.com/show_bug.cgi?id=1163574 */ .callback = video_detect_force_video, From e50b9be14ab0ed10b0b3cd4112ff4bed0abf7b6f Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Wed, 28 Oct 2015 15:09:23 +0800 Subject: [PATCH 04/32] ACPI / video: only register backlight for LCD device The firmware of ESPRIMO Mobile M9410 has two video output devices that have _BCM control method, one is the type of "External Digital Monitor" while the other is the type of "Internal/Integrated Digital Flat Panel". Only the 2nd video output device's _BCM control method works, but since we have created two and the 1st one got picked up by user space, the backlight functionality is broken. To solve this problem, only register backlight interface for "Internal/Integrated Digital Flat Panel" type video output device on this laptop. Another problem of this laptop is that the IDs listed by the _DOD method doesn't have bit 31 set, which means it doesn't follow the format specified by ACPI spec. But the value indicates that it actually follows that format so I've added a DMI quirk and a module level parameter to force use the device_id_scheme so that we can get the video output device's type to do the decision if we should register backlight interface. Link: https://bugzilla.kernel.org/show_bug.cgi?id=104121 Suggested-by: Hans de Goede Reported-and-tested-by: Christian Scharl Signed-off-by: Aaron Lu Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_video.c | 78 +++++++++++++++++++++++++++++++++------ 1 file changed, 66 insertions(+), 12 deletions(-) diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index 5778e8e4313a..3405f7a41e25 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -77,6 +77,12 @@ module_param(allow_duplicates, bool, 0644); static int disable_backlight_sysfs_if = -1; module_param(disable_backlight_sysfs_if, int, 0444); +static bool device_id_scheme = false; +module_param(device_id_scheme, bool, 0444); + +static bool only_lcd = false; +module_param(only_lcd, bool, 0444); + static int register_count; static DEFINE_MUTEX(register_count_mutex); static struct mutex video_list_lock; @@ -394,6 +400,18 @@ static int video_disable_backlight_sysfs_if( return 0; } +static int video_set_device_id_scheme(const struct dmi_system_id *d) +{ + device_id_scheme = true; + return 0; +} + +static int video_enable_only_lcd(const struct dmi_system_id *d) +{ + only_lcd = true; + return 0; +} + static struct dmi_system_id video_dmi_table[] = { /* * Broken _BQC workaround http://bugzilla.kernel.org/show_bug.cgi?id=13121 @@ -455,6 +473,33 @@ static struct dmi_system_id video_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "PORTEGE R830"), }, }, + /* + * Some machine's _DOD IDs don't have bit 31(Device ID Scheme) set + * but the IDs actually follow the Device ID Scheme. + */ + { + /* https://bugzilla.kernel.org/show_bug.cgi?id=104121 */ + .callback = video_set_device_id_scheme, + .ident = "ESPRIMO Mobile M9410", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), + DMI_MATCH(DMI_PRODUCT_NAME, "ESPRIMO Mobile M9410"), + }, + }, + /* + * Some machines have multiple video output devices, but only the one + * that is the type of LCD can do the backlight control so we should not + * register backlight interface for other video output devices. + */ + { + /* https://bugzilla.kernel.org/show_bug.cgi?id=104121 */ + .callback = video_enable_only_lcd, + .ident = "ESPRIMO Mobile M9410", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), + DMI_MATCH(DMI_PRODUCT_NAME, "ESPRIMO Mobile M9410"), + }, + }, {} }; @@ -1003,7 +1048,7 @@ acpi_video_bus_get_one_device(struct acpi_device *device, attribute = acpi_video_get_device_attr(video, device_id); - if (attribute && attribute->device_id_scheme) { + if (attribute && (attribute->device_id_scheme || device_id_scheme)) { switch (attribute->display_type) { case ACPI_VIDEO_DISPLAY_CRT: data->flags.crt = 1; @@ -1568,15 +1613,6 @@ static void acpi_video_dev_register_backlight(struct acpi_video_device *device) static int count; char *name; - /* - * Do not create backlight device for video output - * device that is not in the enumerated list. - */ - if (!acpi_video_device_in_dod(device)) { - dev_dbg(&device->dev->dev, "not in _DOD list, ignore\n"); - return; - } - result = acpi_video_init_brightness(device); if (result) return; @@ -1657,6 +1693,22 @@ static void acpi_video_run_bcl_for_osi(struct acpi_video_bus *video) mutex_unlock(&video->device_list_lock); } +static bool acpi_video_should_register_backlight(struct acpi_video_device *dev) +{ + /* + * Do not create backlight device for video output + * device that is not in the enumerated list. + */ + if (!acpi_video_device_in_dod(dev)) { + dev_dbg(&dev->dev->dev, "not in _DOD list, ignore\n"); + return false; + } + + if (only_lcd) + return dev->flags.lcd; + return true; +} + static int acpi_video_bus_register_backlight(struct acpi_video_bus *video) { struct acpi_video_device *dev; @@ -1670,8 +1722,10 @@ static int acpi_video_bus_register_backlight(struct acpi_video_bus *video) return 0; mutex_lock(&video->device_list_lock); - list_for_each_entry(dev, &video->video_device_list, entry) - acpi_video_dev_register_backlight(dev); + list_for_each_entry(dev, &video->video_device_list, entry) { + if (acpi_video_should_register_backlight(dev)) + acpi_video_dev_register_backlight(dev); + } mutex_unlock(&video->device_list_lock); video->backlight_registered = true; From d6d2a5289a530a3020703e6a3b19a14668601c27 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Sat, 17 Oct 2015 09:45:18 +0530 Subject: [PATCH 05/32] PM / OPP: Improve print messages with pr_fmt To identify OPP core's print messages easily, prefix them with KBUILD_MODNAME. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- drivers/base/power/opp/core.c | 2 ++ drivers/base/power/opp/cpu.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index a731fa66e504..60ae6f029499 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -11,6 +11,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c index 7654c5606307..c27a1cdffec9 100644 --- a/drivers/base/power/opp/cpu.c +++ b/drivers/base/power/opp/cpu.c @@ -10,6 +10,9 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include From b64b9c3f900a0522fb926f1436088e2e36807594 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 15 Oct 2015 21:42:44 +0530 Subject: [PATCH 06/32] PM / OPP: Rename routines specific to old bindings with _v1 Clearly distinguish routines based on what version of bindings they parse. We have already postfixed routines properly with _v2 for new bindings. Postfix the older ones now with _v1. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- drivers/base/power/opp/core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index 60ae6f029499..ca70e281614b 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -703,7 +703,7 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, } /** - * _opp_add_dynamic() - Allocate a dynamic OPP. + * _opp_add_v1() - Allocate a OPP based on v1 bindings. * @dev: device for which we do this operation * @freq: Frequency in Hz for this OPP * @u_volt: Voltage in uVolts for this OPP @@ -729,8 +729,8 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, * Duplicate OPPs (both freq and volt are same) and !opp->available * -ENOMEM Memory allocation failure */ -static int _opp_add_dynamic(struct device *dev, unsigned long freq, - long u_volt, bool dynamic) +static int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt, + bool dynamic) { struct device_opp *dev_opp; struct dev_pm_opp *new_opp; @@ -934,7 +934,7 @@ unlock: */ int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt) { - return _opp_add_dynamic(dev, freq, u_volt, true); + return _opp_add_v1(dev, freq, u_volt, true); } EXPORT_SYMBOL_GPL(dev_pm_opp_add); @@ -1236,7 +1236,7 @@ static int _of_add_opp_table_v1(struct device *dev) unsigned long freq = be32_to_cpup(val++) * 1000; unsigned long volt = be32_to_cpup(val++); - if (_opp_add_dynamic(dev, freq, volt, false)) + if (_opp_add_v1(dev, freq, volt, false)) dev_warn(dev, "%s: Failed to add OPP %ld\n", __func__, freq); nr -= 2; From ad623c31485581d6b082ef92429db3b728739cd8 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 15 Oct 2015 21:42:45 +0530 Subject: [PATCH 07/32] PM / OPP: Parse all power-supply related bindings together Move all DT parsing for the power supplies to a single function, rather than keeping them at separate places. This will help manage things properly. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- drivers/base/power/opp/core.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index ca70e281614b..ccf2c91aedff 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -772,9 +772,10 @@ unlock: } /* TODO: Support multiple regulators */ -static int opp_get_microvolt(struct dev_pm_opp *opp, struct device *dev) +static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev) { u32 microvolt[3] = {0}; + u32 val; int count, ret; count = of_property_count_u32_elems(opp->np, "opp-microvolt"); @@ -800,6 +801,9 @@ static int opp_get_microvolt(struct dev_pm_opp *opp, struct device *dev) opp->u_volt_min = microvolt[1]; opp->u_volt_max = microvolt[2]; + if (!of_property_read_u32(opp->np, "opp-microamp", &val)) + opp->u_amp = val; + return 0; } @@ -864,13 +868,10 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np) if (!of_property_read_u32(np, "clock-latency-ns", &val)) new_opp->clock_latency_ns = val; - ret = opp_get_microvolt(new_opp, dev); + ret = opp_parse_supplies(new_opp, dev); if (ret) goto free_opp; - if (!of_property_read_u32(new_opp->np, "opp-microamp", &val)) - new_opp->u_amp = val; - ret = _opp_add(dev, new_opp, dev_opp); if (ret) goto free_opp; From 14f1ba3af6209f0394192ef07fe2bd9bccdc755f Mon Sep 17 00:00:00 2001 From: "Jon Medhurst \\(Tixy\\)" Date: Wed, 21 Oct 2015 10:55:33 +0100 Subject: [PATCH 08/32] cpufreq: arm_big_little: fix frequency check when bL switcher is active The check for correct frequency being set in bL_cpufreq_set_rate is broken when the big.LITTLE switcher is active, for two reasons. 1. The 'new_rate' variable gets overwritten before the test by the code calculating the frequency of the old cluster. 2. The frequency returned by bL_cpufreq_get_rate will be the virtual frequency, not the actual one the intended version of new_rate contains. This means the function always returns an error causing an endless stream of: "cpufreq: __target_index: Failed to change cpu frequency: -5" As the intent is to check for errors that clk_set_rate doesn't report lets move the check to immediately after that and directly use clk_get_rate, rather than the arm_big_little helpers which only confuse matters. Also, update the comment to be hopefully clearer about the purpose of the code. Fixes: 0a95e630b49a (cpufreq: arm_big_little: check if the frequency is set correctly) Signed-off-by: Jon Medhurst Acked-by: Sudeep Holla Acked-by: Viresh Kumar Reviewed-by: Michael Turquette Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/arm_big_little.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c index f1e42f8ce0fc..c5d256caa664 100644 --- a/drivers/cpufreq/arm_big_little.c +++ b/drivers/cpufreq/arm_big_little.c @@ -149,6 +149,19 @@ bL_cpufreq_set_rate(u32 cpu, u32 old_cluster, u32 new_cluster, u32 rate) __func__, cpu, old_cluster, new_cluster, new_rate); ret = clk_set_rate(clk[new_cluster], new_rate * 1000); + if (!ret) { + /* + * FIXME: clk_set_rate hasn't returned an error here however it + * may be that clk_change_rate failed due to hardware or + * firmware issues and wasn't able to report that due to the + * current design of the clk core layer. To work around this + * problem we will read back the clock rate and check it is + * correct. This needs to be removed once clk core is fixed. + */ + if (clk_get_rate(clk[new_cluster]) != new_rate * 1000) + ret = -EIO; + } + if (WARN_ON(ret)) { pr_err("clk_set_rate failed: %d, new cluster: %d\n", ret, new_cluster); @@ -189,15 +202,6 @@ bL_cpufreq_set_rate(u32 cpu, u32 old_cluster, u32 new_cluster, u32 rate) mutex_unlock(&cluster_lock[old_cluster]); } - /* - * FIXME: clk_set_rate has to handle the case where clk_change_rate - * can fail due to hardware or firmware issues. Until the clk core - * layer is fixed, we can check here. In most of the cases we will - * be reading only the cached value anyway. This needs to be removed - * once clk core is fixed. - */ - if (bL_cpufreq_get_rate(cpu) != new_rate) - return -EIO; return 0; } From 539342f60b93c9f98c47f75b63fe5b8b13c1d226 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Thu, 22 Oct 2015 09:43:31 -0400 Subject: [PATCH 09/32] intel_pstate: decrease number of "HWP enabled" messages When booting an HWP enabled system the kernel displays one "HWP enabled" message for each cpu. The messages are superfluous since HWP is globally enabled across all CPUs. This patch also adds an informational message when HWP is disabled via intel_pstate=no_hwp. Signed-off-by: Prarit Bhargava Reviewed-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 93a3c635ea27..2e31d097def6 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -684,8 +684,6 @@ static void __init intel_pstate_sysfs_expose_params(void) static void intel_pstate_hwp_enable(struct cpudata *cpudata) { - pr_info("intel_pstate: HWP enabled\n"); - wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); } @@ -1557,8 +1555,10 @@ static int __init intel_pstate_init(void) if (!all_cpu_data) return -ENOMEM; - if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) + if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) { + pr_info("intel_pstate: HWP enabled\n"); hwp_active++; + } if (!hwp_active && hwp_only) goto out; @@ -1593,8 +1593,10 @@ static int __init intel_pstate_setup(char *str) if (!strcmp(str, "disable")) no_load = 1; - if (!strcmp(str, "no_hwp")) + if (!strcmp(str, "no_hwp")) { + pr_info("intel_pstate: HWP disabled\n"); no_hwp = 1; + } if (!strcmp(str, "force")) force_load = 1; if (!strcmp(str, "hwp_only")) From 3a91b069eabf5dc8d4cd6f3e66dcd700536ef9f8 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 29 Oct 2015 08:08:38 +0530 Subject: [PATCH 10/32] cpufreq: governor: Quit work-handlers early if governor is stopped gov_queue_work() acquires cpufreq_governor_lock to allow cpufreq_governor_stop() to drain delayed work items possibly scheduled on CPUs that share the policy with a CPU being taken offline. However, the same goal may be achieved in a more straightforward way if the policy pointer in the struct cpu_dbs_info matching the policy CPU is reset upfront by cpufreq_governor_stop() under the timer_mutex belonging to it and checked against NULL, under the same lock, at the beginning of dbs_timer(). In that case every instance of dbs_timer() run for a struct cpu_dbs_info sharing the policy pointer in question after cpufreq_governor_stop() has started will notice that that pointer is NULL and bail out immediately without queuing up any new work items. In turn, gov_cancel_work() called by cpufreq_governor_stop() before destroying timer_mutex will wait for all of the delayed work items currently running on the CPUs sharing the policy to drop the mutex, so it may be destroyed safely. Make cpufreq_governor_stop() and dbs_timer() work as described and modify gov_queue_work() so it does not acquire cpufreq_governor_lock any more. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor.c | 33 +++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 11258c4c1b17..b260576ddb12 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -171,10 +171,6 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy, { int i; - mutex_lock(&cpufreq_governor_lock); - if (!policy->governor_enabled) - goto out_unlock; - if (!all_cpus) { /* * Use raw_smp_processor_id() to avoid preemptible warnings. @@ -188,9 +184,6 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy, for_each_cpu(i, policy->cpus) __gov_queue_work(i, dbs_data, delay); } - -out_unlock: - mutex_unlock(&cpufreq_governor_lock); } EXPORT_SYMBOL_GPL(gov_queue_work); @@ -229,13 +222,24 @@ static void dbs_timer(struct work_struct *work) struct cpu_dbs_info *cdbs = container_of(work, struct cpu_dbs_info, dwork.work); struct cpu_common_dbs_info *shared = cdbs->shared; - struct cpufreq_policy *policy = shared->policy; - struct dbs_data *dbs_data = policy->governor_data; + struct cpufreq_policy *policy; + struct dbs_data *dbs_data; unsigned int sampling_rate, delay; bool modify_all = true; mutex_lock(&shared->timer_mutex); + policy = shared->policy; + + /* + * Governor might already be disabled and there is no point continuing + * with the work-handler. + */ + if (!policy) + goto unlock; + + dbs_data = policy->governor_data; + if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; @@ -252,6 +256,7 @@ static void dbs_timer(struct work_struct *work) delay = dbs_data->cdata->gov_dbs_timer(cdbs, dbs_data, modify_all); gov_queue_work(dbs_data, policy, delay, modify_all); +unlock: mutex_unlock(&shared->timer_mutex); } @@ -478,9 +483,17 @@ static int cpufreq_governor_stop(struct cpufreq_policy *policy, if (!shared || !shared->policy) return -EBUSY; + /* + * Work-handler must see this updated, as it should not proceed any + * further after governor is disabled. And so timer_mutex is taken while + * updating this value. + */ + mutex_lock(&shared->timer_mutex); + shared->policy = NULL; + mutex_unlock(&shared->timer_mutex); + gov_cancel_work(dbs_data, policy); - shared->policy = NULL; mutex_destroy(&shared->timer_mutex); return 0; } From 298cd0f0880188422a518e9d780bd49d66ea7ad5 Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Wed, 28 Oct 2015 15:18:59 -0600 Subject: [PATCH 11/32] PM / Domains: Remove dev->driver check for runtime PM Remove check for driver of a device, for runtime PM. Device may be suspended without an explicit driver. This check seems to be vestigial and incorrect in the current context. Reviewed-by: Krzysztof Kozlowski Acked-by: Ulf Hansson Signed-off-by: Kevin Hilman Signed-off-by: Lina Iyer Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index a7dfdf9f15ba..e1c017411aeb 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -321,8 +321,7 @@ static int genpd_poweroff(struct generic_pm_domain *genpd, bool is_async) if (stat > PM_QOS_FLAGS_NONE) return -EBUSY; - if (pdd->dev->driver && (!pm_runtime_suspended(pdd->dev) - || pdd->dev->power.irq_safe)) + if (!pm_runtime_suspended(pdd->dev) || pdd->dev->power.irq_safe) not_suspended++; } From 2547923d1cc38b4b55c538e450411586acc39ac6 Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Wed, 28 Oct 2015 15:19:50 -0600 Subject: [PATCH 12/32] PM / Domains: Allocate memory outside domain locks In preparation for supporting IRQ-safe domains, allocate domain data outside the domain locks. These functions are not called in an atomic context, so we can always allocate memory using GFP_KERNEL. By allocating memory before the locks, we can safely lock the domain using spinlocks instead of mutexes. Reviewed-by: Kevin Hilman Acked-by: Ulf Hansson Signed-off-by: Lina Iyer Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index e1c017411aeb..f932058b5db6 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1311,13 +1311,17 @@ int pm_genpd_remove_device(struct generic_pm_domain *genpd, int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *subdomain) { - struct gpd_link *link; + struct gpd_link *link, *itr; int ret = 0; if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(subdomain) || genpd == subdomain) return -EINVAL; + link = kzalloc(sizeof(*link), GFP_KERNEL); + if (!link) + return -ENOMEM; + mutex_lock(&genpd->lock); mutex_lock_nested(&subdomain->lock, SINGLE_DEPTH_NESTING); @@ -1327,18 +1331,13 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, goto out; } - list_for_each_entry(link, &genpd->master_links, master_node) { - if (link->slave == subdomain && link->master == genpd) { + list_for_each_entry(itr, &genpd->master_links, master_node) { + if (itr->slave == subdomain && itr->master == genpd) { ret = -EINVAL; goto out; } } - link = kzalloc(sizeof(*link), GFP_KERNEL); - if (!link) { - ret = -ENOMEM; - goto out; - } link->master = genpd; list_add_tail(&link->master_node, &genpd->master_links); link->slave = subdomain; @@ -1349,7 +1348,8 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, out: mutex_unlock(&subdomain->lock); mutex_unlock(&genpd->lock); - + if (ret) + kfree(link); return ret; } From 20102ac5bee3c7c1ffff43f220e37328b6fffd16 Mon Sep 17 00:00:00 2001 From: Jacob Tanenbaum Date: Thu, 22 Oct 2015 17:17:05 +0200 Subject: [PATCH 13/32] cpupower: cpupower monitor reports uninitialized values for offline cpus [root@hp-dl980g7-02 linux]# cpupower monitor ... 5472| 0| 1|******|******|******|******|| 0.00| 0.00| 0.00| 0.00| 0.00 *is offline 10567| 0| 159|******|******|******|******|| 0.00| 0.00| 0.00| 0.00| 0.00 *is offline 1661206560|859272560| 150|******|******|******|******|| 0.00| 0.00| 0.00| 0.00| 0.00 *is offline 1661206560|943093104| 140|******|******|******|******|| 0.00| 0.00| 0.00| 0.00| 0.00 *is offline because of this cpupower also holds the incorrect value for the number of physical packages in the machine Changed cpupower to initialize the values of an offline cpu's socket and core to -1, warn the user that one or more cpus is/are offline and not print statistics for offline cpus. This fix hides offlined cores where topology cannot be accessed. With a recent kernel patch suggested from Prarit Bhargava it may be possible that soft offlined cores' topology can still be parsed. This patch would then show which cores in which package/socket are offline, when sane toplogoy information is available. Signed-off-by: Jacob Tanenbaum Signed-off-by: Thomas Renninger Signed-off-by: Rafael J. Wysocki --- tools/power/cpupower/utils/helpers/topology.c | 23 ++++++++++++------- .../utils/idle_monitor/cpupower-monitor.c | 9 +++++++- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/tools/power/cpupower/utils/helpers/topology.c b/tools/power/cpupower/utils/helpers/topology.c index cea398c176e7..9cbb7fd75171 100644 --- a/tools/power/cpupower/utils/helpers/topology.c +++ b/tools/power/cpupower/utils/helpers/topology.c @@ -73,18 +73,22 @@ int get_cpu_topology(struct cpupower_topology *cpu_top) for (cpu = 0; cpu < cpus; cpu++) { cpu_top->core_info[cpu].cpu = cpu; cpu_top->core_info[cpu].is_online = sysfs_is_cpu_online(cpu); - if (!cpu_top->core_info[cpu].is_online) - continue; if(sysfs_topology_read_file( cpu, "physical_package_id", - &(cpu_top->core_info[cpu].pkg)) < 0) - return -1; + &(cpu_top->core_info[cpu].pkg)) < 0) { + cpu_top->core_info[cpu].pkg = -1; + cpu_top->core_info[cpu].core = -1; + continue; + } if(sysfs_topology_read_file( cpu, "core_id", - &(cpu_top->core_info[cpu].core)) < 0) - return -1; + &(cpu_top->core_info[cpu].core)) < 0) { + cpu_top->core_info[cpu].pkg = -1; + cpu_top->core_info[cpu].core = -1; + continue; + } } qsort(cpu_top->core_info, cpus, sizeof(struct cpuid_core_info), @@ -95,12 +99,15 @@ int get_cpu_topology(struct cpupower_topology *cpu_top) done by pkg value. */ last_pkg = cpu_top->core_info[0].pkg; for(cpu = 1; cpu < cpus; cpu++) { - if(cpu_top->core_info[cpu].pkg != last_pkg) { + if (cpu_top->core_info[cpu].pkg != last_pkg && + cpu_top->core_info[cpu].pkg != -1) { + last_pkg = cpu_top->core_info[cpu].pkg; cpu_top->pkgs++; } } - cpu_top->pkgs++; + if (!cpu_top->core_info[0].pkg == -1) + cpu_top->pkgs++; /* Intel's cores count is not consecutively numbered, there may * be a core_id of 3, but none of 2. Assume there always is 0 diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c index c4bae9203a69..05f953f0f0a0 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c @@ -143,6 +143,9 @@ void print_results(int topology_depth, int cpu) /* Be careful CPUs may got resorted for pkg value do not just use cpu */ if (!bitmask_isbitset(cpus_chosen, cpu_top.core_info[cpu].cpu)) return; + if (!cpu_top.core_info[cpu].is_online && + cpu_top.core_info[cpu].pkg == -1) + return; if (topology_depth > 2) printf("%4d|", cpu_top.core_info[cpu].pkg); @@ -191,7 +194,8 @@ void print_results(int topology_depth, int cpu) * It's up to the monitor plug-in to check .is_online, this one * is just for additional info. */ - if (!cpu_top.core_info[cpu].is_online) { + if (!cpu_top.core_info[cpu].is_online && + cpu_top.core_info[cpu].pkg != -1) { printf(_(" *is offline\n")); return; } else @@ -388,6 +392,9 @@ int cmd_monitor(int argc, char **argv) return EXIT_FAILURE; } + if (!cpu_top.core_info[0].is_online) + printf("WARNING: at least one cpu is offline\n"); + /* Default is: monitor all CPUs */ if (bitmask_isallclear(cpus_chosen)) bitmask_setall(cpus_chosen); From 645209472d909071df5af2c42ea623ef011ad3c8 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Thu, 22 Oct 2015 17:17:06 +0200 Subject: [PATCH 14/32] cpupower: Remove debug message when using cpupower idle-set -D switch Signed-off-by: Thomas Renninger Signed-off-by: Rafael J. Wysocki --- tools/power/cpupower/utils/cpuidle-set.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/power/cpupower/utils/cpuidle-set.c b/tools/power/cpupower/utils/cpuidle-set.c index d45d8d775c02..9a9b7a337d7b 100644 --- a/tools/power/cpupower/utils/cpuidle-set.c +++ b/tools/power/cpupower/utils/cpuidle-set.c @@ -148,8 +148,6 @@ int cmd_idle_set(int argc, char **argv) (cpu, idlestate); state_latency = sysfs_get_idlestate_latency (cpu, idlestate); - printf("CPU: %u - idlestate %u - state_latency: %llu - latency: %llu\n", - cpu, idlestate, state_latency, latency); if (disabled == 1 || latency > state_latency) continue; ret = sysfs_idlestate_disable From 19c9fb896f54ade387676f134ce1de9bcd3cd478 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Thu, 22 Oct 2015 17:17:07 +0200 Subject: [PATCH 15/32] cpupower: Enable disabled Cstates if they are below max latency cpupower idle-set -D currently only disables all C-states that have a higher latency than the specified . But if deep sleep states were already disabled and have a lower latency, they should get enabled again. For example: This call: cpupower idle-set -D 30 disables all C-states with a higher or equal latency than 30. If one then calls: cpupower idle-set -D 100 C-states with a latency between 30-99 will get enabled again with this patch now. It is ensured that only C-states with a latency of 100 and higher are disabled. Signed-off-by: Thomas Renninger Signed-off-by: Rafael J. Wysocki --- tools/power/cpupower/man/cpupower-idle-set.1 | 4 +++- tools/power/cpupower/utils/cpuidle-set.c | 17 +++++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/tools/power/cpupower/man/cpupower-idle-set.1 b/tools/power/cpupower/man/cpupower-idle-set.1 index 3e6799d7a79f..580c4e3ea92a 100644 --- a/tools/power/cpupower/man/cpupower-idle-set.1 +++ b/tools/power/cpupower/man/cpupower-idle-set.1 @@ -20,7 +20,9 @@ Disable a specific processor sleep state. Enable a specific processor sleep state. .TP \fB\-D\fR \fB\-\-disable-by-latency\fR -Disable all idle states with a equal or higher latency than +Disable all idle states with a equal or higher latency than . + +Enable all idle states with a latency lower than . .TP \fB\-E\fR \fB\-\-enable-all\fR Enable all idle states if not enabled already. diff --git a/tools/power/cpupower/utils/cpuidle-set.c b/tools/power/cpupower/utils/cpuidle-set.c index 9a9b7a337d7b..eaea1301e29b 100644 --- a/tools/power/cpupower/utils/cpuidle-set.c +++ b/tools/power/cpupower/utils/cpuidle-set.c @@ -148,12 +148,21 @@ int cmd_idle_set(int argc, char **argv) (cpu, idlestate); state_latency = sysfs_get_idlestate_latency (cpu, idlestate); - if (disabled == 1 || latency > state_latency) + if (disabled == 1) { + if (latency > state_latency){ + ret = sysfs_idlestate_disable + (cpu, idlestate, 0); + if (ret == 0) + printf(_("Idlestate %u enabled on CPU %u\n"), idlestate, cpu); + } continue; - ret = sysfs_idlestate_disable - (cpu, idlestate, 1); - if (ret == 0) + } + if (latency <= state_latency){ + ret = sysfs_idlestate_disable + (cpu, idlestate, 1); + if (ret == 0) printf(_("Idlestate %u disabled on CPU %u\n"), idlestate, cpu); + } } break; case 'E': From 57ab3b08725163bfe385aaeea6837f9b1213af3d Mon Sep 17 00:00:00 2001 From: Sriram Raghunathan Date: Fri, 23 Oct 2015 09:52:45 +0200 Subject: [PATCH 16/32] Creating a common structure initialization pattern for struct option This patch tries to creates a common structure initialization within the cpupower tool. Previously the ``struct option`` was initialized using `designated initializer` technique which was not needed. There were conflicting initialization methods seen with bench/main.c & others. Signed-off-by: Sriram Raghunathan Signed-off-by: Thomas Renninger Signed-off-by: Rafael J. Wysocki --- tools/power/cpupower/debug/i386/dump_psb.c | 2 +- tools/power/cpupower/utils/cpufreq-info.c | 30 +++++++++++----------- tools/power/cpupower/utils/cpufreq-set.c | 10 ++++---- tools/power/cpupower/utils/cpuidle-info.c | 4 +-- tools/power/cpupower/utils/cpuidle-set.c | 14 ++++------ tools/power/cpupower/utils/cpupower-info.c | 4 +-- tools/power/cpupower/utils/cpupower-set.c | 2 +- 7 files changed, 31 insertions(+), 35 deletions(-) diff --git a/tools/power/cpupower/debug/i386/dump_psb.c b/tools/power/cpupower/debug/i386/dump_psb.c index 8d6a47514253..2c768cf70128 100644 --- a/tools/power/cpupower/debug/i386/dump_psb.c +++ b/tools/power/cpupower/debug/i386/dump_psb.c @@ -134,7 +134,7 @@ next_one: } static struct option info_opts[] = { - {.name = "numpst", .has_arg=no_argument, .flag=NULL, .val='n'}, + {"numpst", no_argument, NULL, 'n'}, }; void print_help(void) diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c index b4b90a97662c..0e6764330241 100644 --- a/tools/power/cpupower/utils/cpufreq-info.c +++ b/tools/power/cpupower/utils/cpufreq-info.c @@ -536,21 +536,21 @@ static int get_latency(unsigned int cpu, unsigned int human) } static struct option info_opts[] = { - { .name = "debug", .has_arg = no_argument, .flag = NULL, .val = 'e'}, - { .name = "boost", .has_arg = no_argument, .flag = NULL, .val = 'b'}, - { .name = "freq", .has_arg = no_argument, .flag = NULL, .val = 'f'}, - { .name = "hwfreq", .has_arg = no_argument, .flag = NULL, .val = 'w'}, - { .name = "hwlimits", .has_arg = no_argument, .flag = NULL, .val = 'l'}, - { .name = "driver", .has_arg = no_argument, .flag = NULL, .val = 'd'}, - { .name = "policy", .has_arg = no_argument, .flag = NULL, .val = 'p'}, - { .name = "governors", .has_arg = no_argument, .flag = NULL, .val = 'g'}, - { .name = "related-cpus", .has_arg = no_argument, .flag = NULL, .val = 'r'}, - { .name = "affected-cpus",.has_arg = no_argument, .flag = NULL, .val = 'a'}, - { .name = "stats", .has_arg = no_argument, .flag = NULL, .val = 's'}, - { .name = "latency", .has_arg = no_argument, .flag = NULL, .val = 'y'}, - { .name = "proc", .has_arg = no_argument, .flag = NULL, .val = 'o'}, - { .name = "human", .has_arg = no_argument, .flag = NULL, .val = 'm'}, - { .name = "no-rounding", .has_arg = no_argument, .flag = NULL, .val = 'n'}, + {"debug", no_argument, NULL, 'e'}, + {"boost", no_argument, NULL, 'b'}, + {"freq", no_argument, NULL, 'f'}, + {"hwfreq", no_argument, NULL, 'w'}, + {"hwlimits", no_argument, NULL, 'l'}, + {"driver", no_argument, NULL, 'd'}, + {"policy", no_argument, NULL, 'p'}, + {"governors", no_argument, NULL, 'g'}, + {"related-cpus", no_argument, NULL, 'r'}, + {"affected-cpus", no_argument, NULL, 'a'}, + {"stats", no_argument, NULL, 's'}, + {"latency", no_argument, NULL, 'y'}, + {"proc", no_argument, NULL, 'o'}, + {"human", no_argument, NULL, 'm'}, + {"no-rounding", no_argument, NULL, 'n'}, { }, }; diff --git a/tools/power/cpupower/utils/cpufreq-set.c b/tools/power/cpupower/utils/cpufreq-set.c index 4e213576381e..0fbd1a22c0a9 100644 --- a/tools/power/cpupower/utils/cpufreq-set.c +++ b/tools/power/cpupower/utils/cpufreq-set.c @@ -22,11 +22,11 @@ #define NORM_FREQ_LEN 32 static struct option set_opts[] = { - { .name = "min", .has_arg = required_argument, .flag = NULL, .val = 'd'}, - { .name = "max", .has_arg = required_argument, .flag = NULL, .val = 'u'}, - { .name = "governor", .has_arg = required_argument, .flag = NULL, .val = 'g'}, - { .name = "freq", .has_arg = required_argument, .flag = NULL, .val = 'f'}, - { .name = "related", .has_arg = no_argument, .flag = NULL, .val='r'}, + {"min", required_argument, NULL, 'd'}, + {"max", required_argument, NULL, 'u'}, + {"governor", required_argument, NULL, 'g'}, + {"freq", required_argument, NULL, 'f'}, + {"related", no_argument, NULL, 'r'}, { }, }; diff --git a/tools/power/cpupower/utils/cpuidle-info.c b/tools/power/cpupower/utils/cpuidle-info.c index 75e66de7e7a7..750c1d82c3f7 100644 --- a/tools/power/cpupower/utils/cpuidle-info.c +++ b/tools/power/cpupower/utils/cpuidle-info.c @@ -126,8 +126,8 @@ static void proc_cpuidle_cpu_output(unsigned int cpu) } static struct option info_opts[] = { - { .name = "silent", .has_arg = no_argument, .flag = NULL, .val = 's'}, - { .name = "proc", .has_arg = no_argument, .flag = NULL, .val = 'o'}, + {"silent", no_argument, NULL, 's'}, + {"proc", no_argument, NULL, 'o'}, { }, }; diff --git a/tools/power/cpupower/utils/cpuidle-set.c b/tools/power/cpupower/utils/cpuidle-set.c index eaea1301e29b..d6b6ae44b8c2 100644 --- a/tools/power/cpupower/utils/cpuidle-set.c +++ b/tools/power/cpupower/utils/cpuidle-set.c @@ -13,15 +13,11 @@ #include "helpers/sysfs.h" static struct option info_opts[] = { - { .name = "disable", - .has_arg = required_argument, .flag = NULL, .val = 'd'}, - { .name = "enable", - .has_arg = required_argument, .flag = NULL, .val = 'e'}, - { .name = "disable-by-latency", - .has_arg = required_argument, .flag = NULL, .val = 'D'}, - { .name = "enable-all", - .has_arg = no_argument, .flag = NULL, .val = 'E'}, - { }, + {"disable", required_argument, NULL, 'd'}, + {"enable", required_argument, NULL, 'e'}, + {"disable-by-latency", required_argument, NULL, 'D'}, + {"enable-all", no_argument, NULL, 'E'}, + { }, }; diff --git a/tools/power/cpupower/utils/cpupower-info.c b/tools/power/cpupower/utils/cpupower-info.c index 136d979e9586..10299f2e9d2a 100644 --- a/tools/power/cpupower/utils/cpupower-info.c +++ b/tools/power/cpupower/utils/cpupower-info.c @@ -17,8 +17,8 @@ #include "helpers/sysfs.h" static struct option set_opts[] = { - { .name = "perf-bias", .has_arg = optional_argument, .flag = NULL, .val = 'b'}, - { }, + {"perf-bias", optional_argument, NULL, 'b'}, + { }, }; static void print_wrong_arg_exit(void) diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c index 573c75f8e3f5..3e6f374f8dd7 100644 --- a/tools/power/cpupower/utils/cpupower-set.c +++ b/tools/power/cpupower/utils/cpupower-set.c @@ -18,7 +18,7 @@ #include "helpers/bitmask.h" static struct option set_opts[] = { - { .name = "perf-bias", .has_arg = required_argument, .flag = NULL, .val = 'b'}, + {"perf-bias", required_argument, NULL, 'b'}, { }, }; From 58ac1f6202aab03d1f2c5fcfe3552af4b93321d3 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 2 Nov 2015 09:45:46 -0800 Subject: [PATCH 17/32] MAINTAINERS: update for intel P-state driver Add Srinivas Pandruvada and Len Brown as maintainers and remove Kristen Carlson Accardi from the list of maintainers. Signed-off-by: Srinivas Pandruvada [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 5f467845ef72..e90b6ac22a50 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5405,7 +5405,8 @@ S: Supported F: drivers/idle/intel_idle.c INTEL PSTATE DRIVER -M: Kristen Carlson Accardi +M: Srinivas Pandruvada +M: Len Brown L: linux-pm@vger.kernel.org S: Supported F: drivers/cpufreq/intel_pstate.c From d7e53e35f9f54cdfa09a8456ae8e9874ec66bb36 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 3 Nov 2015 17:13:57 -0500 Subject: [PATCH 18/32] cpufreq: s5pv210-cpufreq: fix wrong do_div() usage It is wrong to use do_div() with 32-bit dividends (unsigned long is 32 bits on 32-bit architectures). Signed-off-by: Nicolas Pitre Reviewed-by: Krzysztof Kozlowski Reviewed-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/s5pv210-cpufreq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c index 9e231f52150c..051a8a8224cd 100644 --- a/drivers/cpufreq/s5pv210-cpufreq.c +++ b/drivers/cpufreq/s5pv210-cpufreq.c @@ -212,11 +212,11 @@ static void s5pv210_set_refresh(enum s5pv210_dmc_port ch, unsigned long freq) /* Find current DRAM frequency */ tmp = s5pv210_dram_conf[ch].freq; - do_div(tmp, freq); + tmp /= freq; tmp1 = s5pv210_dram_conf[ch].refresh; - do_div(tmp1, tmp); + tmp1 /= tmp; __raw_writel(tmp1, reg); } From 1794ec1f9585501e4ed4390f5a5d396fd28c63ce Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 5 Nov 2015 14:21:18 +0530 Subject: [PATCH 19/32] PM / OPP: Propagate error properly from dev_pm_opp_set_sharing_cpus() We are returning 0 even in case of errors, fix it. Fixes: 8d4d4e98acd6 ("PM / OPP: Add helpers for initializing CPU OPPs") Reported-by: Dan Carpenter Reviewed-by: Stephen Boyd Signed-off-by: Viresh Kumar Cc: 4.3 # 4.3 Signed-off-by: Rafael J. Wysocki --- drivers/base/power/opp/cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c index c27a1cdffec9..2139c9d4c447 100644 --- a/drivers/base/power/opp/cpu.c +++ b/drivers/base/power/opp/cpu.c @@ -156,7 +156,7 @@ int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) out_rcu_read_unlock: rcu_read_unlock(); - return 0; + return ret; } EXPORT_SYMBOL_GPL(dev_pm_opp_set_sharing_cpus); From 87b4115db0239865bc812f61704bb1f43e2439b6 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 5 Nov 2015 14:21:19 +0530 Subject: [PATCH 20/32] PM / OPP: Protect updates to list_dev with mutex dev_opp_list_lock is used everywhere to protect device and OPP lists, but dev_pm_opp_set_sharing_cpus() is missed somehow. And instead we used rcu-lock, which wouldn't help here as we are adding a new list_dev. This also fixes a problem where we have called kzalloc(..., GFP_KERNEL) from within rcu-lock, which isn't allowed as kzalloc can sleep when called with GFP_KERNEL. With CONFIG_DEBUG_ATOMIC_SLEEP set, we get following lockdep-splat: include/linux/rcupdate.h:578 Illegal context switch in RCU read-side critical section! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 5 locks held by swapper/0/1: #0: (&dev->mutex){......}, at: [] __driver_attach+0x48/0x98 #1: (&dev->mutex){......}, at: [] __driver_attach+0x58/0x98 #2: (cpu_hotplug.lock){++++++}, at: [] get_online_cpus+0x40/0xb0 #3: (subsys mutex#5){+.+.+.}, at: [] subsys_interface_register+0x44/0xdc #4: (rcu_read_lock){......}, at: [] dev_pm_opp_set_sharing_cpus+0x0/0x1e4 stack backtrace: CPU: 1 PID: 1 Comm: swapper/0 Tainted: G W 4.3.0-rc7-00047-g81f5932958a8 #59 Hardware name: SAMSUNG EXYNOS (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x94/0xbc) [] (dump_stack) from [] (___might_sleep+0x24c/0x298) [] (___might_sleep) from [] (kmem_cache_alloc+0xe8/0x164) [] (kmem_cache_alloc) from [] (_add_list_dev+0x30/0x58) [] (_add_list_dev) from [] (dev_pm_opp_set_sharing_cpus+0xd0/0x1e4) [] (dev_pm_opp_set_sharing_cpus) from [] (cpufreq_init+0x4cc/0x62c) [] (cpufreq_init) from [] (cpufreq_online+0xbc/0x73c) [] (cpufreq_online) from [] (subsys_interface_register+0x98/0xdc) [] (subsys_interface_register) from [] (cpufreq_register_driver+0x110/0x17c) [] (cpufreq_register_driver) from [] (dt_cpufreq_probe+0x60/0x8c) [] (dt_cpufreq_probe) from [] (platform_drv_probe+0x44/0xa4) [] (platform_drv_probe) from [] (driver_probe_device+0x208/0x2f4) [] (driver_probe_device) from [] (__driver_attach+0x94/0x98) [] (__driver_attach) from [] (bus_for_each_dev+0x68/0x9c) Reported-by: Michael Turquette Reviewed-by: Stephen Boyd Signed-off-by: Viresh Kumar Cc: 4.3 # 4.3 Signed-off-by: Rafael J. Wysocki --- drivers/base/power/opp/core.c | 2 +- drivers/base/power/opp/cpu.c | 8 ++++---- drivers/base/power/opp/opp.h | 3 +++ 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index ccf2c91aedff..252706d6f60b 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -29,7 +29,7 @@ */ static LIST_HEAD(dev_opp_list); /* Lock to allow exclusive modification to the device and opp lists */ -static DEFINE_MUTEX(dev_opp_list_lock); +DEFINE_MUTEX(dev_opp_list_lock); #define opp_rcu_lockdep_assert() \ do { \ diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c index 2139c9d4c447..7b445e88a0d5 100644 --- a/drivers/base/power/opp/cpu.c +++ b/drivers/base/power/opp/cpu.c @@ -127,12 +127,12 @@ int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) struct device *dev; int cpu, ret = 0; - rcu_read_lock(); + mutex_lock(&dev_opp_list_lock); dev_opp = _find_device_opp(cpu_dev); if (IS_ERR(dev_opp)) { ret = -EINVAL; - goto out_rcu_read_unlock; + goto unlock; } for_each_cpu(cpu, cpumask) { @@ -153,8 +153,8 @@ int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) continue; } } -out_rcu_read_unlock: - rcu_read_unlock(); +unlock: + mutex_unlock(&dev_opp_list_lock); return ret; } diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h index dcb38f78dae4..7366b2aa8997 100644 --- a/drivers/base/power/opp/opp.h +++ b/drivers/base/power/opp/opp.h @@ -21,6 +21,9 @@ #include #include +/* Lock to allow exclusive modification to the device and opp lists */ +extern struct mutex dev_opp_list_lock; + /* * Internal data structure organization with the OPP layer library is as * follows: From 4a3a1353a84796f93d389694e3b87ede533953fe Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 5 Nov 2015 14:21:20 +0530 Subject: [PATCH 21/32] PM / OPP: Hold dev_opp_list_lock for writers Writers need to update OPP device and their list with dev_opp_list_lock mutex held, which was missed at few places. Fix it. Signed-off-by: Viresh Kumar Cc: 4.3 # 4.3 Signed-off-by: Rafael J. Wysocki --- drivers/base/power/opp/core.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index 252706d6f60b..d5f215679820 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -1168,13 +1168,17 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np) struct device_opp *dev_opp; int ret = 0, count = 0; + mutex_lock(&dev_opp_list_lock); + dev_opp = _managed_opp(opp_np); if (dev_opp) { /* OPPs are already managed */ if (!_add_list_dev(dev, dev_opp)) ret = -ENOMEM; + mutex_unlock(&dev_opp_list_lock); return ret; } + mutex_unlock(&dev_opp_list_lock); /* We have opp-list node now, iterate over it and add OPPs */ for_each_available_child_of_node(opp_np, np) { @@ -1192,15 +1196,20 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np) if (WARN_ON(!count)) return -ENOENT; + mutex_lock(&dev_opp_list_lock); + dev_opp = _find_device_opp(dev); if (WARN_ON(IS_ERR(dev_opp))) { ret = PTR_ERR(dev_opp); + mutex_unlock(&dev_opp_list_lock); goto free_table; } dev_opp->np = opp_np; dev_opp->shared_opp = of_property_read_bool(opp_np, "opp-shared"); + mutex_unlock(&dev_opp_list_lock); + return 0; free_table: From 0597e818501f595090a49a1779ab6ec377051b11 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 5 Nov 2015 14:21:21 +0530 Subject: [PATCH 22/32] PM / OPP: Add opp_rcu_lockdep_assert() to _find_device_opp() _find_device_opp() should be called with rcu-read lock or dev_opp_list_lock held. Add the opp_rcu_lockdep_assert() check to make sure caller have taken appropriate locks. Fix comment over the routine as well. Suggested-by: Stephen Boyd Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/base/power/opp/core.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index d5f215679820..c987d2e1a83b 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -81,14 +81,18 @@ static struct device_opp *_managed_opp(const struct device_node *np) * Return: pointer to 'struct device_opp' if found, otherwise -ENODEV or * -EINVAL based on type of error. * - * Locking: This function must be called under rcu_read_lock(). device_opp - * is a RCU protected pointer. This means that device_opp is valid as long - * as we are under RCU lock. + * Locking: For readers, this function must be called under rcu_read_lock(). + * device_opp is a RCU protected pointer, which means that device_opp is valid + * as long as we are under RCU lock. + * + * For Writers, this function must be called with dev_opp_list_lock held. */ struct device_opp *_find_device_opp(struct device *dev) { struct device_opp *dev_opp; + opp_rcu_lockdep_assert(); + if (IS_ERR_OR_NULL(dev)) { pr_err("%s: Invalid parameters\n", __func__); return ERR_PTR(-EINVAL); From efb2d3be53e4343ab6495729b3ec9ffb95261035 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 5 Nov 2015 21:10:29 +0100 Subject: [PATCH 23/32] cpufreq: CPPC: Delete an unnecessary check before the function call kfree() The kfree() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cppc_cpufreq.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 93c219fab850..e8cb334094b0 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -166,8 +166,7 @@ static int __init cppc_cpufreq_init(void) out: for_each_possible_cpu(i) - if (all_cpu_data[i]) - kfree(all_cpu_data[i]); + kfree(all_cpu_data[i]); kfree(all_cpu_data); return -ENODEV; From d764c21c7b1aa5ce982eaa8400517d8004d30267 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 28 Oct 2015 15:50:46 -0700 Subject: [PATCH 24/32] ACPI: Honor ACPI _CCA attribute setting ACPI configurations can now mark devices as noncoherent, support that choice. NOTE: This is required to support USB on ARM Juno Development Board. Signed-off-by: Jeremy Linton Signed-off-by: Suravee Suthikulpanit Acked-by: Bjorn Helgaas Reviewed-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/acpi/acpi_bus.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index e234725eadc7..8df990520304 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -407,7 +407,7 @@ static inline bool acpi_check_dma(struct acpi_device *adev, bool *coherent) * case 1. Do not support and disable DMA. * case 2. Support but rely on arch-specific cache maintenance for * non-coherence DMA operations. - * Currently, we implement case 1 above. + * Currently, we implement case 2 above. * * For the case when _CCA is missing (i.e. cca_seen=0) and * platform specifies ACPI_CCA_REQUIRED, we do not support DMA, @@ -415,7 +415,8 @@ static inline bool acpi_check_dma(struct acpi_device *adev, bool *coherent) * * See acpi_init_coherency() for more info. */ - if (adev->flags.coherent_dma) { + if (adev->flags.coherent_dma || + (adev->flags.cca_seen && IS_ENABLED(CONFIG_ARM64))) { ret = true; if (coherent) *coherent = adev->flags.coherent_dma; From 1b9863c6aa56d92126ec0d5c42eae25df52b7ca1 Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Wed, 28 Oct 2015 15:50:47 -0700 Subject: [PATCH 25/32] device property: Introducing enum dev_dma_attr A device could have one of the following DMA attributes: * DMA not supported * DMA non-coherent * DMA coherent So, this patch introduces enum dev_dma_attribute. This will be used by new APIs introduced in later patches. Signed-off-by: Suravee Suthikulpanit Acked-by: Bjorn Helgaas Reviewed-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/property.h b/include/linux/property.h index 463de52fe891..8eecf200bae5 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -27,6 +27,12 @@ enum dev_prop_type { DEV_PROP_MAX, }; +enum dev_dma_attr { + DEV_DMA_NOT_SUPPORTED, + DEV_DMA_NON_COHERENT, + DEV_DMA_COHERENT, +}; + bool device_property_present(struct device *dev, const char *propname); int device_property_read_u8_array(struct device *dev, const char *propname, u8 *val, size_t nval); From b84f196d963c3159329f72ca1913b08679004a43 Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Wed, 28 Oct 2015 15:50:48 -0700 Subject: [PATCH 26/32] ACPI: Adding DMA Attribute APIs for ACPI Device Adding acpi_get_dma_attr() to query DMA attributes of ACPI devices. It returns the enum dev_dma_attr, which communicates DMA information more clearly. This API replaces the acpi_check_dma(), which will be removed in subsequent patch. This patch also provides a convenient function, acpi_dma_supported(), to check DMA support of the specified ACPI device. Signed-off-by: Suravee Suthikulpanit Acked-by: Bjorn Helgaas Reviewed-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- drivers/acpi/scan.c | 42 +++++++++++++++++++++++++++++++++++++++++ include/acpi/acpi_bus.h | 3 +++ include/linux/acpi.h | 10 ++++++++++ 3 files changed, 55 insertions(+) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index d1ce377db3e9..ed3d76fadccf 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1308,6 +1308,48 @@ void acpi_free_pnp_ids(struct acpi_device_pnp *pnp) kfree(pnp->unique_id); } +/** + * acpi_dma_supported - Check DMA support for the specified device. + * @adev: The pointer to acpi device + * + * Return false if DMA is not supported. Otherwise, return true + */ +bool acpi_dma_supported(struct acpi_device *adev) +{ + if (!adev) + return false; + + if (adev->flags.cca_seen) + return true; + + /* + * Per ACPI 6.0 sec 6.2.17, assume devices can do cache-coherent + * DMA on "Intel platforms". Presumably that includes all x86 and + * ia64, and other arches will set CONFIG_ACPI_CCA_REQUIRED=y. + */ + if (!IS_ENABLED(CONFIG_ACPI_CCA_REQUIRED)) + return true; + + return false; +} + +/** + * acpi_get_dma_attr - Check the supported DMA attr for the specified device. + * @adev: The pointer to acpi device + * + * Return enum dev_dma_attr. + */ +enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev) +{ + if (!acpi_dma_supported(adev)) + return DEV_DMA_NOT_SUPPORTED; + + if (adev->flags.coherent_dma) + return DEV_DMA_COHERENT; + else + return DEV_DMA_NON_COHERENT; +} + static void acpi_init_coherency(struct acpi_device *adev) { unsigned long long cca = 0; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 8df990520304..e56e6520edce 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -596,6 +596,9 @@ struct acpi_pci_root { /* helper */ +bool acpi_dma_supported(struct acpi_device *adev); +enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev); + struct acpi_device *acpi_find_child_device(struct acpi_device *parent, u64 address, bool check_children); int acpi_is_root_bridge(acpi_handle); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 496265b0f527..292af3b69ede 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -579,6 +579,16 @@ static inline bool acpi_check_dma(struct acpi_device *adev, bool *coherent) return false; } +static inline bool acpi_dma_supported(struct acpi_device *adev) +{ + return false; +} + +static inline enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev) +{ + return DEV_DMA_NOT_SUPPORTED; +} + #define ACPI_PTR(_ptr) (NULL) #endif /* !CONFIG_ACPI */ From e5e558644bbb23cad03c586703331b8bcd9e0e6c Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Wed, 28 Oct 2015 15:50:49 -0700 Subject: [PATCH 27/32] device property: Adding DMA Attribute APIs for Generic Devices The function device_dma_is_coherent() does not sufficiently communicate device DMA attributes. Instead, this patch introduces device_get_dma_attr(), which returns enum dev_dma_attr. It replaces the acpi_check_dma(), which will be removed in subsequent patch. This also provides a convenient function, device_dma_supported(), to check DMA support of the specified device. Signed-off-by: Suravee Suthikulpanit Acked-by: Bjorn Helgaas Reviewed-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- drivers/base/property.c | 29 +++++++++++++++++++++++++++++ include/linux/property.h | 4 ++++ 2 files changed, 33 insertions(+) diff --git a/drivers/base/property.c b/drivers/base/property.c index de40623bbd8a..05d57a2afa05 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -611,6 +611,35 @@ bool device_dma_is_coherent(struct device *dev) } EXPORT_SYMBOL_GPL(device_dma_is_coherent); +bool device_dma_supported(struct device *dev) +{ + /* For DT, this is always supported. + * For ACPI, this depends on CCA, which + * is determined by the acpi_dma_supported(). + */ + if (IS_ENABLED(CONFIG_OF) && dev->of_node) + return true; + + return acpi_dma_supported(ACPI_COMPANION(dev)); +} +EXPORT_SYMBOL_GPL(device_dma_supported); + +enum dev_dma_attr device_get_dma_attr(struct device *dev) +{ + enum dev_dma_attr attr = DEV_DMA_NOT_SUPPORTED; + + if (IS_ENABLED(CONFIG_OF) && dev->of_node) { + if (of_dma_is_coherent(dev->of_node)) + attr = DEV_DMA_COHERENT; + else + attr = DEV_DMA_NON_COHERENT; + } else + attr = acpi_get_dma_attr(ACPI_COMPANION(dev)); + + return attr; +} +EXPORT_SYMBOL_GPL(device_get_dma_attr); + /** * device_get_phy_mode - Get phy mode for given device * @dev: Pointer to the given device diff --git a/include/linux/property.h b/include/linux/property.h index 8eecf200bae5..7200490b7e6f 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -176,6 +176,10 @@ void device_add_property_set(struct device *dev, struct property_set *pset); bool device_dma_is_coherent(struct device *dev); +bool device_dma_supported(struct device *dev); + +enum dev_dma_attr device_get_dma_attr(struct device *dev); + int device_get_phy_mode(struct device *dev); void *device_get_mac_address(struct device *dev, char *addr, int alen); From 1831eff876bd0bb8d64e9965a7ff47486c9a3ecd Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Wed, 28 Oct 2015 15:50:50 -0700 Subject: [PATCH 28/32] device property: ACPI: Make use of the new DMA Attribute APIs Now that we have the new DMA attribute APIs, we can replace the older acpi_check_dma() and device_dma_is_coherent(). Signed-off-by: Suravee Suthikulpanit Acked-by: Bjorn Helgaas Reviewed-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_platform.c | 7 ++++++- drivers/acpi/glue.c | 8 +++++--- drivers/crypto/ccp/ccp-platform.c | 19 +++++++++++++------ drivers/net/ethernet/amd/xgbe/xgbe-main.c | 8 +++++++- 4 files changed, 31 insertions(+), 11 deletions(-) diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c index 06a67d5f2846..296b7a14893a 100644 --- a/drivers/acpi/acpi_platform.c +++ b/drivers/acpi/acpi_platform.c @@ -103,7 +103,12 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev) pdevinfo.res = resources; pdevinfo.num_res = count; pdevinfo.fwnode = acpi_fwnode_handle(adev); - pdevinfo.dma_mask = acpi_check_dma(adev, NULL) ? DMA_BIT_MASK(32) : 0; + + if (acpi_dma_supported(adev)) + pdevinfo.dma_mask = DMA_BIT_MASK(32); + else + pdevinfo.dma_mask = 0; + pdev = platform_device_register_full(&pdevinfo); if (IS_ERR(pdev)) dev_err(&adev->dev, "platform device creation failed: %ld\n", diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index b9657af751d1..a66e77676b8a 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -168,7 +168,7 @@ int acpi_bind_one(struct device *dev, struct acpi_device *acpi_dev) struct list_head *physnode_list; unsigned int node_id; int retval = -EINVAL; - bool coherent; + enum dev_dma_attr attr; if (has_acpi_companion(dev)) { if (acpi_dev) { @@ -225,8 +225,10 @@ int acpi_bind_one(struct device *dev, struct acpi_device *acpi_dev) if (!has_acpi_companion(dev)) ACPI_COMPANION_SET(dev, acpi_dev); - if (acpi_check_dma(acpi_dev, &coherent)) - arch_setup_dma_ops(dev, 0, 0, NULL, coherent); + attr = acpi_get_dma_attr(acpi_dev); + if (attr != DEV_DMA_NOT_SUPPORTED) + arch_setup_dma_ops(dev, 0, 0, NULL, + attr == DEV_DMA_COHERENT); acpi_physnode_link_name(physical_node_name, node_id); retval = sysfs_create_link(&acpi_dev->dev.kobj, &dev->kobj, diff --git a/drivers/crypto/ccp/ccp-platform.c b/drivers/crypto/ccp/ccp-platform.c index bb241c3ab6b9..844118cb31a3 100644 --- a/drivers/crypto/ccp/ccp-platform.c +++ b/drivers/crypto/ccp/ccp-platform.c @@ -96,6 +96,7 @@ static int ccp_platform_probe(struct platform_device *pdev) struct ccp_platform *ccp_platform; struct device *dev = &pdev->dev; struct acpi_device *adev = ACPI_COMPANION(dev); + enum dev_dma_attr attr; struct resource *ior; int ret; @@ -122,18 +123,24 @@ static int ccp_platform_probe(struct platform_device *pdev) } ccp->io_regs = ccp->io_map; + attr = device_get_dma_attr(dev); + if (attr == DEV_DMA_NOT_SUPPORTED) { + dev_err(dev, "DMA is not supported"); + goto e_err; + } + + ccp_platform->coherent = (attr == DEV_DMA_COHERENT); + if (ccp_platform->coherent) + ccp->axcache = CACHE_WB_NO_ALLOC; + else + ccp->axcache = CACHE_NONE; + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); if (ret) { dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret); goto e_err; } - ccp_platform->coherent = device_dma_is_coherent(ccp->dev); - if (ccp_platform->coherent) - ccp->axcache = CACHE_WB_NO_ALLOC; - else - ccp->axcache = CACHE_NONE; - dev_set_drvdata(dev, ccp); ret = ccp_init(ccp); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c index e83bd76abce6..c607b3f88f0b 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c @@ -342,6 +342,7 @@ static int xgbe_probe(struct platform_device *pdev) struct resource *res; const char *phy_mode; unsigned int i, phy_memnum, phy_irqnum; + enum dev_dma_attr attr; int ret; DBGPR("--> xgbe_probe\n"); @@ -609,7 +610,12 @@ static int xgbe_probe(struct platform_device *pdev) goto err_io; /* Set the DMA coherency values */ - pdata->coherent = device_dma_is_coherent(pdata->dev); + attr = device_get_dma_attr(dev); + if (attr == DEV_DMA_NOT_SUPPORTED) { + dev_err(dev, "DMA is not supported"); + goto err_io; + } + pdata->coherent = (attr == DEV_DMA_COHERENT); if (pdata->coherent) { pdata->axdomain = XGBE_DMA_OS_AXDOMAIN; pdata->arcache = XGBE_DMA_OS_ARCACHE; From ab3d527329f01dd63dc852041006d1a24895d116 Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Wed, 28 Oct 2015 15:50:51 -0700 Subject: [PATCH 29/32] device property: ACPI: Remove unused DMA APIs These DMA APIs are replaced with the newer versions, which return the enum dev_dma_attr. So, we can safely remove them. Signed-off-by: Suravee Suthikulpanit Acked-by: Bjorn Helgaas Reviewed-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- drivers/base/property.c | 13 ------------- include/acpi/acpi_bus.h | 34 ---------------------------------- include/linux/acpi.h | 5 ----- include/linux/property.h | 2 -- 4 files changed, 54 deletions(-) diff --git a/drivers/base/property.c b/drivers/base/property.c index 05d57a2afa05..1325ff225cc4 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -598,19 +598,6 @@ unsigned int device_get_child_node_count(struct device *dev) } EXPORT_SYMBOL_GPL(device_get_child_node_count); -bool device_dma_is_coherent(struct device *dev) -{ - bool coherent = false; - - if (IS_ENABLED(CONFIG_OF) && dev->of_node) - coherent = of_dma_is_coherent(dev->of_node); - else - acpi_check_dma(ACPI_COMPANION(dev), &coherent); - - return coherent; -} -EXPORT_SYMBOL_GPL(device_dma_is_coherent); - bool device_dma_supported(struct device *dev) { /* For DT, this is always supported. diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index e56e6520edce..e45d58d6b0a7 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -390,40 +390,6 @@ struct acpi_data_node { struct completion kobj_done; }; -static inline bool acpi_check_dma(struct acpi_device *adev, bool *coherent) -{ - bool ret = false; - - if (!adev) - return ret; - - /** - * Currently, we only support _CCA=1 (i.e. coherent_dma=1) - * This should be equivalent to specifyig dma-coherent for - * a device in OF. - * - * For the case when _CCA=0 (i.e. coherent_dma=0 && cca_seen=1), - * There are two cases: - * case 1. Do not support and disable DMA. - * case 2. Support but rely on arch-specific cache maintenance for - * non-coherence DMA operations. - * Currently, we implement case 2 above. - * - * For the case when _CCA is missing (i.e. cca_seen=0) and - * platform specifies ACPI_CCA_REQUIRED, we do not support DMA, - * and fallback to arch-specific default handling. - * - * See acpi_init_coherency() for more info. - */ - if (adev->flags.coherent_dma || - (adev->flags.cca_seen && IS_ENABLED(CONFIG_ARM64))) { - ret = true; - if (coherent) - *coherent = adev->flags.coherent_dma; - } - return ret; -} - static inline bool is_acpi_node(struct fwnode_handle *fwnode) { return fwnode && (fwnode->type == FWNODE_ACPI diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 292af3b69ede..b5868300df75 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -574,11 +574,6 @@ static inline int acpi_device_modalias(struct device *dev, return -ENODEV; } -static inline bool acpi_check_dma(struct acpi_device *adev, bool *coherent) -{ - return false; -} - static inline bool acpi_dma_supported(struct acpi_device *adev) { return false; diff --git a/include/linux/property.h b/include/linux/property.h index 7200490b7e6f..0a3705a7c9f2 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -174,8 +174,6 @@ struct property_set { void device_add_property_set(struct device *dev, struct property_set *pset); -bool device_dma_is_coherent(struct device *dev); - bool device_dma_supported(struct device *dev); enum dev_dma_attr device_get_dma_attr(struct device *dev); From 37efbe20fb19cee43d95b7ba9e54d5d0b00c0d67 Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Wed, 28 Oct 2015 15:50:52 -0700 Subject: [PATCH 30/32] of/pci: Fix pci_get_host_bridge_device leak In case of error, the current code return w/o calling pci_put_host_bridge_device. This patch fixes this. Signed-off-by: Suravee Suthikulpanit Acked-by: Bjorn Helgaas Reviewed-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- drivers/of/of_pci.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c index 5751dc5b6494..a2f510cc683b 100644 --- a/drivers/of/of_pci.c +++ b/drivers/of/of_pci.c @@ -129,10 +129,9 @@ void of_pci_dma_configure(struct pci_dev *pci_dev) struct device *dev = &pci_dev->dev; struct device *bridge = pci_get_host_bridge_device(pci_dev); - if (!bridge->parent) - return; + if (bridge->parent) + of_dma_configure(dev, bridge->parent->of_node); - of_dma_configure(dev, bridge->parent->of_node); pci_put_host_bridge_device(bridge); } EXPORT_SYMBOL_GPL(of_pci_dma_configure); From 50230713b63941f4b6b562eea0834f751aa0801e Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Wed, 28 Oct 2015 15:50:53 -0700 Subject: [PATCH 31/32] PCI: OF: Move of_pci_dma_configure() to pci_dma_configure() This patch move of_pci_dma_configure() to a more generic pci_dma_configure(), which can be extended by non-OF code (e.g. ACPI). This has no functional change. Signed-off-by: Suravee Suthikulpanit Acked-by: Rob Herring Acked-by: Bjorn Helgaas Reviewed-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- drivers/of/of_pci.c | 19 ------------------- drivers/pci/probe.c | 22 +++++++++++++++++++++- include/linux/of_pci.h | 3 --- 3 files changed, 21 insertions(+), 23 deletions(-) diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c index a2f510cc683b..b66ee4ebf650 100644 --- a/drivers/of/of_pci.c +++ b/drivers/of/of_pci.c @@ -117,25 +117,6 @@ int of_get_pci_domain_nr(struct device_node *node) } EXPORT_SYMBOL_GPL(of_get_pci_domain_nr); -/** - * of_pci_dma_configure - Setup DMA configuration - * @dev: ptr to pci_dev struct of the PCI device - * - * Function to update PCI devices's DMA configuration using the same - * info from the OF node of host bridge's parent (if any). - */ -void of_pci_dma_configure(struct pci_dev *pci_dev) -{ - struct device *dev = &pci_dev->dev; - struct device *bridge = pci_get_host_bridge_device(pci_dev); - - if (bridge->parent) - of_dma_configure(dev, bridge->parent->of_node); - - pci_put_host_bridge_device(bridge); -} -EXPORT_SYMBOL_GPL(of_pci_dma_configure); - #if defined(CONFIG_OF_ADDRESS) /** * of_pci_get_host_bridge_resources - Parse PCI host bridge resources from DT diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 8361d27e5eca..d35f83d80b15 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -1633,6 +1634,25 @@ static void pci_set_msi_domain(struct pci_dev *dev) dev_get_msi_domain(&dev->bus->dev)); } +/** + * pci_dma_configure - Setup DMA configuration + * @dev: ptr to pci_dev struct of the PCI device + * + * Function to update PCI devices's DMA configuration using the same + * info from the OF node of host bridge's parent (if any). + */ +static void pci_dma_configure(struct pci_dev *dev) +{ + struct device *bridge = pci_get_host_bridge_device(dev); + + if (IS_ENABLED(CONFIG_OF) && dev->dev.of_node) { + if (bridge->parent) + of_dma_configure(&dev->dev, bridge->parent->of_node); + } + + pci_put_host_bridge_device(bridge); +} + void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) { int ret; @@ -1646,7 +1666,7 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) dev->dev.dma_mask = &dev->dma_mask; dev->dev.dma_parms = &dev->dma_parms; dev->dev.coherent_dma_mask = 0xffffffffull; - of_pci_dma_configure(dev); + pci_dma_configure(dev); pci_set_dma_max_seg_size(dev, 65536); pci_set_dma_seg_boundary(dev, 0xffffffff); diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h index 29fd3fe1c035..ce0e5abeb454 100644 --- a/include/linux/of_pci.h +++ b/include/linux/of_pci.h @@ -16,7 +16,6 @@ int of_pci_get_devfn(struct device_node *np); int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin); int of_pci_parse_bus_range(struct device_node *node, struct resource *res); int of_get_pci_domain_nr(struct device_node *node); -void of_pci_dma_configure(struct pci_dev *pci_dev); #else static inline int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq) { @@ -51,8 +50,6 @@ of_get_pci_domain_nr(struct device_node *node) { return -1; } - -static inline void of_pci_dma_configure(struct pci_dev *pci_dev) { } #endif #if defined(CONFIG_OF_ADDRESS) From 29dbe1f0af88b4162d2b57e790db7a51ab061f35 Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Wed, 28 Oct 2015 15:50:54 -0700 Subject: [PATCH 32/32] PCI: ACPI: Add support for PCI device DMA coherency This patch adds support for setting up PCI device DMA coherency from ACPI _CCA object that should normally be specified in the DSDT node of its PCI host bridge. Signed-off-by: Suravee Suthikulpanit Acked-by: Bjorn Helgaas Reviewed-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- drivers/pci/probe.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index d35f83d80b15..064078e11017 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "pci.h" @@ -1639,7 +1640,7 @@ static void pci_set_msi_domain(struct pci_dev *dev) * @dev: ptr to pci_dev struct of the PCI device * * Function to update PCI devices's DMA configuration using the same - * info from the OF node of host bridge's parent (if any). + * info from the OF node or ACPI node of host bridge's parent (if any). */ static void pci_dma_configure(struct pci_dev *dev) { @@ -1648,6 +1649,15 @@ static void pci_dma_configure(struct pci_dev *dev) if (IS_ENABLED(CONFIG_OF) && dev->dev.of_node) { if (bridge->parent) of_dma_configure(&dev->dev, bridge->parent->of_node); + } else if (has_acpi_companion(bridge)) { + struct acpi_device *adev = to_acpi_device_node(bridge->fwnode); + enum dev_dma_attr attr = acpi_get_dma_attr(adev); + + if (attr == DEV_DMA_NOT_SUPPORTED) + dev_warn(&dev->dev, "DMA not supported.\n"); + else + arch_setup_dma_ops(&dev->dev, 0, 0, NULL, + attr == DEV_DMA_COHERENT); } pci_put_host_bridge_device(bridge);