From f328584f7bff86858249bb358f1adcecde48388a Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 12 Jun 2019 12:28:15 -0400 Subject: [PATCH 001/126] cpufreq: Add sun50i nvmem based CPU scaling driver For some SoCs, the CPU frequency subset and voltage value of each OPP varies based on the silicon variant in use. The sun50i-cpufreq-nvmem driver reads the efuse value from the SoC to provide the OPP framework with required information. Signed-off-by: Yangtao Li Acked-by: Maxime Ripard Signed-off-by: Viresh Kumar --- MAINTAINERS | 7 + drivers/cpufreq/Kconfig.arm | 12 ++ drivers/cpufreq/Makefile | 1 + drivers/cpufreq/cpufreq-dt-platdev.c | 2 + drivers/cpufreq/sun50i-cpufreq-nvmem.c | 226 +++++++++++++++++++++++++ 5 files changed, 248 insertions(+) create mode 100644 drivers/cpufreq/sun50i-cpufreq-nvmem.c diff --git a/MAINTAINERS b/MAINTAINERS index 783569e3c4b4..d3964379841b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -676,6 +676,13 @@ L: linux-media@vger.kernel.org S: Maintained F: drivers/staging/media/allegro-dvt/ +ALLWINNER CPUFREQ DRIVER +M: Yangtao Li +L: linux-pm@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/opp/sun50i-nvmem-cpufreq.txt +F: drivers/cpufreq/sun50i-cpufreq-nvmem.c + ALLWINNER SECURITY SYSTEM M: Corentin Labbe L: linux-crypto@vger.kernel.org diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 56c31a78c692..70c2b4bea55c 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -19,6 +19,18 @@ config ACPI_CPPC_CPUFREQ If in doubt, say N. +config ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM + tristate "Allwinner nvmem based SUN50I CPUFreq driver" + depends on ARCH_SUNXI + depends on NVMEM_SUNXI_SID + select PM_OPP + help + This adds the nvmem based CPUFreq driver for Allwinner + h6 SoC. + + To compile this driver as a module, choose M here: the + module will be called sun50i-cpufreq-nvmem. + config ARM_ARMADA_37XX_CPUFREQ tristate "Armada 37xx CPUFreq support" depends on ARCH_MVEBU && CPUFREQ_DT diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 5a6c70d26c98..7f2d2e1079d4 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -80,6 +80,7 @@ obj-$(CONFIG_ARM_SCMI_CPUFREQ) += scmi-cpufreq.o obj-$(CONFIG_ARM_SCPI_CPUFREQ) += scpi-cpufreq.o obj-$(CONFIG_ARM_SPEAR_CPUFREQ) += spear-cpufreq.o obj-$(CONFIG_ARM_STI_CPUFREQ) += sti-cpufreq.o +obj-$(CONFIG_ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM) += sun50i-cpufreq-nvmem.o obj-$(CONFIG_ARM_TANGO_CPUFREQ) += tango-cpufreq.o obj-$(CONFIG_ARM_TEGRA20_CPUFREQ) += tegra20-cpufreq.o obj-$(CONFIG_ARM_TEGRA124_CPUFREQ) += tegra124-cpufreq.o diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 03dc4244ab00..f9444ddd35ab 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -101,6 +101,8 @@ static const struct of_device_id whitelist[] __initconst = { * platforms using "operating-points-v2" property. */ static const struct of_device_id blacklist[] __initconst = { + { .compatible = "allwinner,sun50i-h6", }, + { .compatible = "calxeda,highbank", }, { .compatible = "calxeda,ecx-2000", }, diff --git a/drivers/cpufreq/sun50i-cpufreq-nvmem.c b/drivers/cpufreq/sun50i-cpufreq-nvmem.c new file mode 100644 index 000000000000..eca32e443716 --- /dev/null +++ b/drivers/cpufreq/sun50i-cpufreq-nvmem.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Allwinner CPUFreq nvmem based driver + * + * The sun50i-cpufreq-nvmem driver reads the efuse value from the SoC to + * provide the OPP framework with required information. + * + * Copyright (C) 2019 Yangtao Li + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include + +#define MAX_NAME_LEN 7 + +#define NVMEM_MASK 0x7 +#define NVMEM_SHIFT 5 + +static struct platform_device *cpufreq_dt_pdev, *sun50i_cpufreq_pdev; + +/** + * sun50i_cpufreq_get_efuse() - Parse and return efuse value present on SoC + * @versions: Set to the value parsed from efuse + * + * Returns 0 if success. + */ +static int sun50i_cpufreq_get_efuse(u32 *versions) +{ + struct nvmem_cell *speedbin_nvmem; + struct device_node *np; + struct device *cpu_dev; + u32 *speedbin, efuse_value; + size_t len; + int ret; + + cpu_dev = get_cpu_device(0); + if (!cpu_dev) + return -ENODEV; + + np = dev_pm_opp_of_get_opp_desc_node(cpu_dev); + if (!np) + return -ENOENT; + + ret = of_device_is_compatible(np, + "allwinner,sun50i-h6-operating-points"); + if (!ret) { + of_node_put(np); + return -ENOENT; + } + + speedbin_nvmem = of_nvmem_cell_get(np, NULL); + of_node_put(np); + if (IS_ERR(speedbin_nvmem)) { + if (PTR_ERR(speedbin_nvmem) != -EPROBE_DEFER) + pr_err("Could not get nvmem cell: %ld\n", + PTR_ERR(speedbin_nvmem)); + return PTR_ERR(speedbin_nvmem); + } + + speedbin = nvmem_cell_read(speedbin_nvmem, &len); + nvmem_cell_put(speedbin_nvmem); + if (IS_ERR(speedbin)) + return PTR_ERR(speedbin); + + efuse_value = (*speedbin >> NVMEM_SHIFT) & NVMEM_MASK; + switch (efuse_value) { + case 0b0001: + *versions = 1; + break; + case 0b0011: + *versions = 2; + break; + default: + /* + * For other situations, we treat it as bin0. + * This vf table can be run for any good cpu. + */ + *versions = 0; + break; + } + + kfree(speedbin); + return 0; +}; + +static int sun50i_cpufreq_nvmem_probe(struct platform_device *pdev) +{ + struct opp_table **opp_tables; + char name[MAX_NAME_LEN]; + unsigned int cpu; + u32 speed = 0; + int ret; + + opp_tables = kcalloc(num_possible_cpus(), sizeof(*opp_tables), + GFP_KERNEL); + if (!opp_tables) + return -ENOMEM; + + ret = sun50i_cpufreq_get_efuse(&speed); + if (ret) + return ret; + + snprintf(name, MAX_NAME_LEN, "speed%d", speed); + + for_each_possible_cpu(cpu) { + struct device *cpu_dev = get_cpu_device(cpu); + + if (!cpu_dev) { + ret = -ENODEV; + goto free_opp; + } + + opp_tables[cpu] = dev_pm_opp_set_prop_name(cpu_dev, name); + if (IS_ERR(opp_tables[cpu])) { + ret = PTR_ERR(opp_tables[cpu]); + pr_err("Failed to set prop name\n"); + goto free_opp; + } + } + + cpufreq_dt_pdev = platform_device_register_simple("cpufreq-dt", -1, + NULL, 0); + if (!IS_ERR(cpufreq_dt_pdev)) { + platform_set_drvdata(pdev, opp_tables); + return 0; + } + + ret = PTR_ERR(cpufreq_dt_pdev); + pr_err("Failed to register platform device\n"); + +free_opp: + for_each_possible_cpu(cpu) { + if (IS_ERR_OR_NULL(opp_tables[cpu])) + break; + dev_pm_opp_put_prop_name(opp_tables[cpu]); + } + kfree(opp_tables); + + return ret; +} + +static int sun50i_cpufreq_nvmem_remove(struct platform_device *pdev) +{ + struct opp_table **opp_tables = platform_get_drvdata(pdev); + unsigned int cpu; + + platform_device_unregister(cpufreq_dt_pdev); + + for_each_possible_cpu(cpu) + dev_pm_opp_put_prop_name(opp_tables[cpu]); + + kfree(opp_tables); + + return 0; +} + +static struct platform_driver sun50i_cpufreq_driver = { + .probe = sun50i_cpufreq_nvmem_probe, + .remove = sun50i_cpufreq_nvmem_remove, + .driver = { + .name = "sun50i-cpufreq-nvmem", + }, +}; + +static const struct of_device_id sun50i_cpufreq_match_list[] = { + { .compatible = "allwinner,sun50i-h6" }, + {} +}; + +static const struct of_device_id *sun50i_cpufreq_match_node(void) +{ + const struct of_device_id *match; + struct device_node *np; + + np = of_find_node_by_path("/"); + match = of_match_node(sun50i_cpufreq_match_list, np); + of_node_put(np); + + return match; +} + +/* + * Since the driver depends on nvmem drivers, which may return EPROBE_DEFER, + * all the real activity is done in the probe, which may be defered as well. + * The init here is only registering the driver and the platform device. + */ +static int __init sun50i_cpufreq_init(void) +{ + const struct of_device_id *match; + int ret; + + match = sun50i_cpufreq_match_node(); + if (!match) + return -ENODEV; + + ret = platform_driver_register(&sun50i_cpufreq_driver); + if (unlikely(ret < 0)) + return ret; + + sun50i_cpufreq_pdev = + platform_device_register_simple("sun50i-cpufreq-nvmem", + -1, NULL, 0); + ret = PTR_ERR_OR_ZERO(sun50i_cpufreq_pdev); + if (ret == 0) + return 0; + + platform_driver_unregister(&sun50i_cpufreq_driver); + return ret; +} +module_init(sun50i_cpufreq_init); + +static void __exit sun50i_cpufreq_exit(void) +{ + platform_device_unregister(sun50i_cpufreq_pdev); + platform_driver_unregister(&sun50i_cpufreq_driver); +} +module_exit(sun50i_cpufreq_exit); + +MODULE_DESCRIPTION("Sun50i-h6 cpufreq driver"); +MODULE_LICENSE("GPL v2"); From b880c18336252df4866d07f850ea7872b6e83f0a Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 12 Jun 2019 12:28:16 -0400 Subject: [PATCH 002/126] dt-bindings: cpufreq: Document allwinner,sun50i-h6-operating-points Allwinner Process Voltage Scaling Tables defines the voltage and frequency value based on the speedbin blown in the efuse combination. The sunxi-cpufreq-nvmem driver reads the efuse value from the SoC to provide the OPP framework with required information. This is used to determine the voltage and frequency value for each OPP of operating-points-v2 table when it is parsed by the OPP framework. The "allwinner,sun50i-h6-operating-points" DT extends the "operating-points-v2" with following parameters: - nvmem-cells (NVMEM area containig the speedbin information) - opp-microvolt-: voltage in micro Volts. At runtime, the platform can pick a and matching opp-microvolt- property. HW: : sun50i-h6 speed0 speed1 speed2 Signed-off-by: Yangtao Li Acked-by: Maxime Ripard Reviewed-by: Rob Herring Signed-off-by: Viresh Kumar --- .../bindings/opp/sun50i-nvmem-cpufreq.txt | 167 ++++++++++++++++++ 1 file changed, 167 insertions(+) create mode 100644 Documentation/devicetree/bindings/opp/sun50i-nvmem-cpufreq.txt diff --git a/Documentation/devicetree/bindings/opp/sun50i-nvmem-cpufreq.txt b/Documentation/devicetree/bindings/opp/sun50i-nvmem-cpufreq.txt new file mode 100644 index 000000000000..7deae57a587b --- /dev/null +++ b/Documentation/devicetree/bindings/opp/sun50i-nvmem-cpufreq.txt @@ -0,0 +1,167 @@ +Allwinner Technologies, Inc. NVMEM CPUFreq and OPP bindings +=================================== + +For some SoCs, the CPU frequency subset and voltage value of each OPP +varies based on the silicon variant in use. Allwinner Process Voltage +Scaling Tables defines the voltage and frequency value based on the +speedbin blown in the efuse combination. The sun50i-cpufreq-nvmem driver +reads the efuse value from the SoC to provide the OPP framework with +required information. + +Required properties: +-------------------- +In 'cpus' nodes: +- operating-points-v2: Phandle to the operating-points-v2 table to use. + +In 'operating-points-v2' table: +- compatible: Should be + - 'allwinner,sun50i-h6-operating-points'. +- nvmem-cells: A phandle pointing to a nvmem-cells node representing the + efuse registers that has information about the speedbin + that is used to select the right frequency/voltage value + pair. Please refer the for nvmem-cells bindings + Documentation/devicetree/bindings/nvmem/nvmem.txt and + also examples below. + +In every OPP node: +- opp-microvolt-: Voltage in micro Volts. + At runtime, the platform can pick a and + matching opp-microvolt- property. + [See: opp.txt] + HW: : + sun50i-h6 speed0 speed1 speed2 + +Example 1: +--------- + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu0: cpu@0 { + compatible = "arm,cortex-a53"; + device_type = "cpu"; + reg = <0>; + enable-method = "psci"; + clocks = <&ccu CLK_CPUX>; + clock-latency-ns = <244144>; /* 8 32k periods */ + operating-points-v2 = <&cpu_opp_table>; + #cooling-cells = <2>; + }; + + cpu1: cpu@1 { + compatible = "arm,cortex-a53"; + device_type = "cpu"; + reg = <1>; + enable-method = "psci"; + clocks = <&ccu CLK_CPUX>; + clock-latency-ns = <244144>; /* 8 32k periods */ + operating-points-v2 = <&cpu_opp_table>; + #cooling-cells = <2>; + }; + + cpu2: cpu@2 { + compatible = "arm,cortex-a53"; + device_type = "cpu"; + reg = <2>; + enable-method = "psci"; + clocks = <&ccu CLK_CPUX>; + clock-latency-ns = <244144>; /* 8 32k periods */ + operating-points-v2 = <&cpu_opp_table>; + #cooling-cells = <2>; + }; + + cpu3: cpu@3 { + compatible = "arm,cortex-a53"; + device_type = "cpu"; + reg = <3>; + enable-method = "psci"; + clocks = <&ccu CLK_CPUX>; + clock-latency-ns = <244144>; /* 8 32k periods */ + operating-points-v2 = <&cpu_opp_table>; + #cooling-cells = <2>; + }; + }; + + cpu_opp_table: opp_table { + compatible = "allwinner,sun50i-h6-operating-points"; + nvmem-cells = <&speedbin_efuse>; + opp-shared; + + opp@480000000 { + clock-latency-ns = <244144>; /* 8 32k periods */ + opp-hz = /bits/ 64 <480000000>; + + opp-microvolt-speed0 = <880000>; + opp-microvolt-speed1 = <820000>; + opp-microvolt-speed2 = <800000>; + }; + + opp@720000000 { + clock-latency-ns = <244144>; /* 8 32k periods */ + opp-hz = /bits/ 64 <720000000>; + + opp-microvolt-speed0 = <880000>; + opp-microvolt-speed1 = <820000>; + opp-microvolt-speed2 = <800000>; + }; + + opp@816000000 { + clock-latency-ns = <244144>; /* 8 32k periods */ + opp-hz = /bits/ 64 <816000000>; + + opp-microvolt-speed0 = <880000>; + opp-microvolt-speed1 = <820000>; + opp-microvolt-speed2 = <800000>; + }; + + opp@888000000 { + clock-latency-ns = <244144>; /* 8 32k periods */ + opp-hz = /bits/ 64 <888000000>; + + opp-microvolt-speed0 = <940000>; + opp-microvolt-speed1 = <820000>; + opp-microvolt-speed2 = <800000>; + }; + + opp@1080000000 { + clock-latency-ns = <244144>; /* 8 32k periods */ + opp-hz = /bits/ 64 <1080000000>; + + opp-microvolt-speed0 = <1060000>; + opp-microvolt-speed1 = <880000>; + opp-microvolt-speed2 = <840000>; + }; + + opp@1320000000 { + clock-latency-ns = <244144>; /* 8 32k periods */ + opp-hz = /bits/ 64 <1320000000>; + + opp-microvolt-speed0 = <1160000>; + opp-microvolt-speed1 = <940000>; + opp-microvolt-speed2 = <900000>; + }; + + opp@1488000000 { + clock-latency-ns = <244144>; /* 8 32k periods */ + opp-hz = /bits/ 64 <1488000000>; + + opp-microvolt-speed0 = <1160000>; + opp-microvolt-speed1 = <1000000>; + opp-microvolt-speed2 = <960000>; + }; + }; +.... +soc { +.... + sid: sid@3006000 { + compatible = "allwinner,sun50i-h6-sid"; + reg = <0x03006000 0x400>; + #address-cells = <1>; + #size-cells = <1>; + .... + speedbin_efuse: speed@1c { + reg = <0x1c 4>; + }; + }; +}; From 3355c91b79394593ebbb197c8e930a91826f4ff3 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Sun, 21 Jul 2019 23:38:15 +0530 Subject: [PATCH 003/126] cpufreq: ap806: Add NULL check after kcalloc Add NULL check after kcalloc. Fix below issue reported by coccicheck ./drivers/cpufreq/armada-8k-cpufreq.c:138:1-12: alloc with no test, possible model on line 151 Signed-off-by: Hariprasad Kelam Signed-off-by: Viresh Kumar --- drivers/cpufreq/armada-8k-cpufreq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/cpufreq/armada-8k-cpufreq.c b/drivers/cpufreq/armada-8k-cpufreq.c index 988ebc326bdb..39e34f5066d3 100644 --- a/drivers/cpufreq/armada-8k-cpufreq.c +++ b/drivers/cpufreq/armada-8k-cpufreq.c @@ -136,6 +136,8 @@ static int __init armada_8k_cpufreq_init(void) nb_cpus = num_possible_cpus(); freq_tables = kcalloc(nb_cpus, sizeof(*freq_tables), GFP_KERNEL); + if (!freq_tables) + return -ENOMEM; cpumask_copy(&cpus, cpu_possible_mask); /* From 3a79bc63d90750f737ab9d7219bd3091d2fd6d84 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Jul 2019 13:03:20 +0200 Subject: [PATCH 004/126] PCI: irq: Introduce rearm_wake_irq() Introduce a new function, rearm_wake_irq(), allowing a wakeup IRQ to be armed for systen wakeup detection again without running any action handlers associated with it after it has been armed for wakeup detection and triggered. That is useful for IRQs, like ACPI SCI, that may deliver wakeup as well as non-wakeup interrupts when armed for systen wakeup detection. In those cases, it may be possible to determine whether or not the delivered interrupt is a systen wakeup one without running the entire action handler (or handlers, if the IRQ is shared) for the IRQ, and if the interrupt turns out to be a non-wakeup one, the IRQ can be rearmed with the help of the new function. Signed-off-by: Rafael J. Wysocki Acked-by: Thomas Gleixner --- include/linux/interrupt.h | 1 + kernel/irq/pm.c | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 5b8328a99b2a..0e9cdb3efda7 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -238,6 +238,7 @@ extern void teardown_percpu_nmi(unsigned int irq); /* The following three functions are for the core kernel use only. */ extern void suspend_device_irqs(void); extern void resume_device_irqs(void); +extern void rearm_wake_irq(unsigned int irq); /** * struct irq_affinity_notify - context for notification of IRQ affinity changes diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c index d6961d3c6f9e..8f557fa1f4fe 100644 --- a/kernel/irq/pm.c +++ b/kernel/irq/pm.c @@ -176,6 +176,26 @@ static void resume_irqs(bool want_early) } } +/** + * rearm_wake_irq - rearm a wakeup interrupt line after signaling wakeup + * @irq: Interrupt to rearm + */ +void rearm_wake_irq(unsigned int irq) +{ + unsigned long flags; + struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); + + if (!desc || !(desc->istate & IRQS_SUSPENDED) || + !irqd_is_wakeup_set(&desc->irq_data)) + return; + + desc->istate &= ~IRQS_SUSPENDED; + irqd_set(&desc->irq_data, IRQD_WAKEUP_ARMED); + __enable_irq(desc); + + irq_put_desc_busunlock(desc, flags); +} + /** * irq_pm_syscore_ops - enable interrupt lines early * From 6921de898ba8f2ec91cfea70e7160b89c477382e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Jul 2019 13:03:28 +0200 Subject: [PATCH 005/126] ACPICA: Return u32 from acpi_dispatch_gpe() In some cases it is useful to know whether or not the acpi_ev_detect_gpe() called by acpi_dispatch_gpe() has found the GPE to be active, so return the return value of it (whose data type is u32) from latter. Signed-off-by: Rafael J. Wysocki Acked-by: Thomas Gleixner --- drivers/acpi/acpica/evxfgpe.c | 6 +++--- include/acpi/acpixf.h | 8 +++++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/acpica/evxfgpe.c b/drivers/acpi/acpica/evxfgpe.c index 710488ec59e9..04a40d563dd6 100644 --- a/drivers/acpi/acpica/evxfgpe.c +++ b/drivers/acpi/acpica/evxfgpe.c @@ -644,17 +644,17 @@ ACPI_EXPORT_SYMBOL(acpi_get_gpe_status) * PARAMETERS: gpe_device - Parent GPE Device. NULL for GPE0/GPE1 * gpe_number - GPE level within the GPE block * - * RETURN: None + * RETURN: INTERRUPT_HANDLED or INTERRUPT_NOT_HANDLED * * DESCRIPTION: Detect and dispatch a General Purpose Event to either a function * (e.g. EC) or method (e.g. _Lxx/_Exx) handler. * ******************************************************************************/ -void acpi_dispatch_gpe(acpi_handle gpe_device, u32 gpe_number) +u32 acpi_dispatch_gpe(acpi_handle gpe_device, u32 gpe_number) { ACPI_FUNCTION_TRACE(acpi_dispatch_gpe); - acpi_ev_detect_gpe(gpe_device, NULL, gpe_number); + return acpi_ev_detect_gpe(gpe_device, NULL, gpe_number); } ACPI_EXPORT_SYMBOL(acpi_dispatch_gpe) diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 3845c8fcc94e..4ed603a3b448 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -297,6 +297,9 @@ ACPI_GLOBAL(u8, acpi_gbl_system_awake_and_running); #define ACPI_HW_DEPENDENT_RETURN_OK(prototype) \ ACPI_EXTERNAL_RETURN_OK(prototype) +#define ACPI_HW_DEPENDENT_RETURN_UINT32(prototype) \ + ACPI_EXTERNAL_RETURN_UINT32(prototype) + #define ACPI_HW_DEPENDENT_RETURN_VOID(prototype) \ ACPI_EXTERNAL_RETURN_VOID(prototype) @@ -307,6 +310,9 @@ ACPI_GLOBAL(u8, acpi_gbl_system_awake_and_running); #define ACPI_HW_DEPENDENT_RETURN_OK(prototype) \ static ACPI_INLINE prototype {return(AE_OK);} +#define ACPI_HW_DEPENDENT_RETURN_UINT32(prototype) \ + static ACPI_INLINE prototype {return(0);} + #define ACPI_HW_DEPENDENT_RETURN_VOID(prototype) \ static ACPI_INLINE prototype {return;} @@ -738,7 +744,7 @@ ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status u32 gpe_number, acpi_event_status *event_status)) -ACPI_HW_DEPENDENT_RETURN_VOID(void acpi_dispatch_gpe(acpi_handle gpe_device, u32 gpe_number)) +ACPI_HW_DEPENDENT_RETURN_UINT32(u32 acpi_dispatch_gpe(acpi_handle gpe_device, u32 gpe_number)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_disable_all_gpes(void)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_runtime_gpes(void)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_wakeup_gpes(void)) From 9089f16e053afc5e18feaeb9f64cc7c90d6bd687 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Jul 2019 13:03:39 +0200 Subject: [PATCH 006/126] ACPI: EC: Return bool from acpi_ec_dispatch_gpe() On some systems, if suspend-to-idle is used, the EC may signal system wakeup events (power button events, for example) as well as events that should not cause the system to resume and acpi_ec_dispatch_gpe() needs to be called to determine whether or not the system should resume then. In particular, if acpi_ec_dispatch_gpe() doesn't detect any EC events at all, the system should remain suspended, so it is useful to know when that is the case. For this reason, make acpi_ec_dispatch_gpe() return a bool value indicating whether or not any EC events have been detected by it. Signed-off-by: Rafael J. Wysocki Acked-by: Thomas Gleixner --- drivers/acpi/ec.c | 11 ++++++++--- drivers/acpi/internal.h | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index c33756ed3304..58c7ad402d8d 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1060,10 +1060,15 @@ void acpi_ec_set_gpe_wake_mask(u8 action) acpi_set_gpe_wake_mask(NULL, first_ec->gpe, action); } -void acpi_ec_dispatch_gpe(void) +bool acpi_ec_dispatch_gpe(void) { - if (first_ec) - acpi_dispatch_gpe(NULL, first_ec->gpe); + u32 ret; + + if (!first_ec) + return false; + + ret = acpi_dispatch_gpe(NULL, first_ec->gpe); + return ret == ACPI_INTERRUPT_HANDLED; } /* -------------------------------------------------------------------------- diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index f4c2fe6be4f2..1b5f9ac06ea8 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -196,7 +196,7 @@ void acpi_ec_block_transactions(void); void acpi_ec_unblock_transactions(void); void acpi_ec_mark_gpe_for_wake(void); void acpi_ec_set_gpe_wake_mask(u8 action); -void acpi_ec_dispatch_gpe(void); +bool acpi_ec_dispatch_gpe(void); int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit, acpi_handle handle, acpi_ec_query_func func, void *data); From 2933954b71f10d392764f95eec0f0aa2d103054b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Jul 2019 13:03:48 +0200 Subject: [PATCH 007/126] PM: sleep: Fix possible overflow in pm_system_cancel_wakeup() It is not actually guaranteed that pm_abort_suspend will be nonzero when pm_system_cancel_wakeup() is called which may lead to subtle issues, so make it use atomic_dec_if_positive() instead of atomic_dec() for the safety sake. Fixes: 33e4f80ee69b ("ACPI / PM: Ignore spurious SCI wakeups from suspend-to-idle") Signed-off-by: Rafael J. Wysocki Acked-by: Thomas Gleixner --- drivers/base/power/wakeup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index ee31d4f8d856..b30c45aad10f 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -859,7 +859,7 @@ EXPORT_SYMBOL_GPL(pm_system_wakeup); void pm_system_cancel_wakeup(void) { - atomic_dec(&pm_abort_suspend); + atomic_dec_if_positive(&pm_abort_suspend); } void pm_wakeup_clear(bool reset) From 41275eb5c7181febdfaa63c3a0ad9b7acdadcd52 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Jul 2019 23:51:19 +0200 Subject: [PATCH 008/126] ACPI: PM: Set s2idle_wakeup earlier and clear it later The role of the s2idle_wakeup variable is to cause acpi_pm_wakeup_event() and acpi_pm_notify_handler() to increment pm_abort_suspend and trigger a wakeup from suspend-to-idle in case the ACPI SCI wakeup was canceled by acpi_s2idle_wake(). However, for this purpose it need not be set in acpi_s2idle_wake() and cleared in acpi_s2idle_sync(), respectively. In fact, it may be set as early as in acpi_s2idle_prepare() and cleared as late as in acpi_s2idle_restore(), so do that to allow subsequent changes to be simpler. This change is not expected to alter functionality. Signed-off-by: Rafael J. Wysocki Acked-by: Thomas Gleixner --- drivers/acpi/sleep.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index f0fe7c15d657..3debe1a42655 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -972,6 +972,8 @@ static int acpi_s2idle_prepare(void) /* Change the configuration of GPEs to avoid spurious wakeup. */ acpi_enable_all_wakeup_gpes(); acpi_os_wait_events_complete(); + + s2idle_wakeup = true; return 0; } @@ -991,7 +993,6 @@ static void acpi_s2idle_wake(void) if (acpi_sci_irq_valid() && !irqd_is_wakeup_armed(irq_get_irq_data(acpi_sci_irq))) { pm_system_cancel_wakeup(); - s2idle_wakeup = true; /* * On some platforms with the LPS0 _DSM device noirq resume * takes too much time for EC wakeup events to survive, so look @@ -1012,11 +1013,12 @@ static void acpi_s2idle_sync(void) acpi_os_wait_events_complete(); /* synchronize SCI IRQ handling */ acpi_ec_flush_work(); acpi_os_wait_events_complete(); /* synchronize Notify handling */ - s2idle_wakeup = false; } static void acpi_s2idle_restore(void) { + s2idle_wakeup = false; + acpi_enable_all_runtime_gpes(); acpi_disable_wakeup_devices(ACPI_STATE_S0); From 56b991849009f5def0443bfb2f48c8321d888e15 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Jul 2019 23:52:03 +0200 Subject: [PATCH 009/126] PM: sleep: Simplify suspend-to-idle control flow After commit 33e4f80ee69b ("ACPI / PM: Ignore spurious SCI wakeups from suspend-to-idle") the "noirq" phases of device suspend and resume may run for multiple times during suspend-to-idle, if there are spurious system wakeup events while suspended. However, this is complicated and fragile and actually unnecessary. The main reason for doing this is that on some systems the EC may signal system wakeup events (power button events, for example) as well as events that should not cause the system to resume (spurious system wakeup events). Thus, in order to determine whether or not a given event signaled by the EC while suspended is a proper system wakeup one, the EC GPE needs to be dispatched and to start with that was achieved by allowing the ACPI SCI action handler to run, which was only possible after calling resume_device_irqs(). However, dispatching the EC GPE this way turned out to take too much time in some cases and some EC events might be missed due to that, so commit 68e22011856f ("ACPI: EC: Dispatch the EC GPE directly on s2idle wake") started to dispatch the EC GPE right after a wakeup event has been detected, so in fact the full ACPI SCI action handler doesn't need to run any more to deal with the wakeups coming from the EC. Use this observation to simplify the suspend-to-idle control flow so that the "noirq" phases of device suspend and resume are each run only once in every suspend-to-idle cycle, which is reported to significantly reduce power drawn by some systems when suspended to idle (by allowing them to reach a deep platform-wide low-power state through the suspend-to-idle flow). [What appears to happen is that the "noirq" resume of devices after a spurious EC wakeup brings some devices into a state in which they prevent the platform from reaching the deep low-power state going forward, even after a subsequent "noirq" suspend phase, and on some systems the EC triggers such wakeups already when the "noirq" suspend of devices is running for the first time in the given suspend/resume cycle, so the platform cannot reach the deep low-power state at all.] First, make acpi_s2idle_wake() use the acpi_ec_dispatch_gpe() return value to determine whether or not the wakeup may have been triggered by the EC (in which case the system wakeup is canceled and ACPI events are processed in order to determine whether or not the event is a proper system wakeup one) and use rearm_wake_irq() (introduced by a previous change) in it to rearm the ACPI SCI for system wakeup detection in case the system will remain suspended. Second, drop acpi_s2idle_sync(), which is not needed any more, and the corresponding global platform suspend-to-idle callback. Next, drop the pm_wakeup_pending() check (which is an optimization only) from __device_suspend_noirq() to prevent it from returning errors on system wakeups occurring before the "noirq" phase of device suspend is complete (as in the case of suspend-to-idle it is not known whether or not these wakeups are suprious at that point), in order to avoid having to carry out a "noirq" resume of devices on a spurious system wakeup. Finally, change the code flow in s2idle_loop() to (1) run the "noirq" suspend of devices once before starting the loop, (2) check for spurious EC wakeups (via the platform ->wake callback) for the first time before calling s2idle_enter(), and (3) run the "noirq" resume of devices once after leaving the loop. Signed-off-by: Rafael J. Wysocki Acked-by: Thomas Gleixner --- drivers/acpi/sleep.c | 47 ++++++++++++++++++---------------- drivers/base/power/main.c | 5 ---- include/linux/suspend.h | 1 - kernel/power/suspend.c | 53 +++++++++++++++++---------------------- 4 files changed, 48 insertions(+), 58 deletions(-) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 3debe1a42655..970ae7c7a3f7 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -986,33 +986,37 @@ static void acpi_s2idle_wake(void) lpi_check_constraints(); /* - * If IRQD_WAKEUP_ARMED is not set for the SCI at this point, it means - * that the SCI has triggered while suspended, so cancel the wakeup in - * case it has not been a wakeup event (the GPEs will be checked later). + * If IRQD_WAKEUP_ARMED is set for the SCI at this point, the SCI has + * not triggered while suspended, so bail out. */ - if (acpi_sci_irq_valid() && - !irqd_is_wakeup_armed(irq_get_irq_data(acpi_sci_irq))) { + if (!acpi_sci_irq_valid() || + irqd_is_wakeup_armed(irq_get_irq_data(acpi_sci_irq))) + return; + + /* + * If there are EC events to process, the wakeup may be a spurious one + * coming from the EC. + */ + if (acpi_ec_dispatch_gpe()) { + /* + * Cancel the wakeup and process all pending events in case + * there are any wakeup ones in there. + * + * Note that if any non-EC GPEs are active at this point, the + * SCI will retrigger after the rearming below, so no events + * should be missed by canceling the wakeup here. + */ pm_system_cancel_wakeup(); /* - * On some platforms with the LPS0 _DSM device noirq resume - * takes too much time for EC wakeup events to survive, so look - * for them now. + * The EC driver uses the system workqueue and an additional + * special one, so those need to be flushed too. */ - acpi_ec_dispatch_gpe(); + acpi_os_wait_events_complete(); /* synchronize EC GPE processing */ + acpi_ec_flush_work(); + acpi_os_wait_events_complete(); /* synchronize Notify handling */ } -} -static void acpi_s2idle_sync(void) -{ - /* - * Process all pending events in case there are any wakeup ones. - * - * The EC driver uses the system workqueue and an additional special - * one, so those need to be flushed too. - */ - acpi_os_wait_events_complete(); /* synchronize SCI IRQ handling */ - acpi_ec_flush_work(); - acpi_os_wait_events_complete(); /* synchronize Notify handling */ + rearm_wake_irq(acpi_sci_irq); } static void acpi_s2idle_restore(void) @@ -1044,7 +1048,6 @@ static const struct platform_s2idle_ops acpi_s2idle_ops = { .begin = acpi_s2idle_begin, .prepare = acpi_s2idle_prepare, .wake = acpi_s2idle_wake, - .sync = acpi_s2idle_sync, .restore = acpi_s2idle_restore, .end = acpi_s2idle_end, }; diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 7fb2c39bc725..f08332fab531 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1291,11 +1291,6 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a if (async_error) goto Complete; - if (pm_wakeup_pending()) { - async_error = -EBUSY; - goto Complete; - } - if (dev->power.syscore || dev->power.direct_complete) goto Complete; diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 9c0ad1a3a727..66ce3871ed61 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -191,7 +191,6 @@ struct platform_s2idle_ops { int (*begin)(void); int (*prepare)(void); void (*wake)(void); - void (*sync)(void); void (*restore)(void); void (*end)(void); }; diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index c874a7026e24..907b2be0372f 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -119,48 +119,41 @@ static void s2idle_enter(void) static void s2idle_loop(void) { + int error; + + dpm_noirq_begin(); + error = dpm_noirq_suspend_devices(PMSG_SUSPEND); + if (error) + goto resume; + pm_pr_dbg("suspend-to-idle\n"); + /* + * Suspend-to-idle equals: + * frozen processes + suspended devices + idle processors. + * Thus s2idle_enter() should be called right after all devices have + * been suspended. + * + * Wakeups during the noirq suspend of devices may be spurious, so try + * to avoid them upfront. + */ for (;;) { - int error; - - dpm_noirq_begin(); - - /* - * Suspend-to-idle equals - * frozen processes + suspended devices + idle processors. - * Thus s2idle_enter() should be called right after - * all devices have been suspended. - * - * Wakeups during the noirq suspend of devices may be spurious, - * so prevent them from terminating the loop right away. - */ - error = dpm_noirq_suspend_devices(PMSG_SUSPEND); - if (!error) - s2idle_enter(); - else if (error == -EBUSY && pm_wakeup_pending()) - error = 0; - - if (!error && s2idle_ops && s2idle_ops->wake) + if (s2idle_ops && s2idle_ops->wake) s2idle_ops->wake(); - dpm_noirq_resume_devices(PMSG_RESUME); - - dpm_noirq_end(); - - if (error) - break; - - if (s2idle_ops && s2idle_ops->sync) - s2idle_ops->sync(); - if (pm_wakeup_pending()) break; pm_wakeup_clear(false); + + s2idle_enter(); } pm_pr_dbg("resume from suspend-to-idle\n"); + +resume: + dpm_noirq_resume_devices(PMSG_RESUME); + dpm_noirq_end(); } void s2idle_wake(void) From 8eb0fd3b55f084320ae511cd5a64d356cf497c83 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Jul 2019 23:52:12 +0200 Subject: [PATCH 010/126] PM: sleep: Integrate suspend-to-idle with generig suspend flow After previous changes the suspend-to-idle code flow can be integrated more tightly with the generic system suspend code flow by making suspend_enter() call s2idle_loop() later and removing the direct invocations of dpm_noirq_begin(), dpm_noirq_suspend_devices(), dpm_noirq_end(), and dpm_noirq_resume_devices() from the latter, so do that. This change is not expected to alter functionality. Signed-off-by: Rafael J. Wysocki Acked-by: Thomas Gleixner --- kernel/power/suspend.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 907b2be0372f..2b6057853b33 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -119,13 +119,6 @@ static void s2idle_enter(void) static void s2idle_loop(void) { - int error; - - dpm_noirq_begin(); - error = dpm_noirq_suspend_devices(PMSG_SUSPEND); - if (error) - goto resume; - pm_pr_dbg("suspend-to-idle\n"); /* @@ -150,10 +143,6 @@ static void s2idle_loop(void) } pm_pr_dbg("resume from suspend-to-idle\n"); - -resume: - dpm_noirq_resume_devices(PMSG_RESUME); - dpm_noirq_end(); } void s2idle_wake(void) @@ -408,11 +397,6 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) if (error) goto Devices_early_resume; - if (state == PM_SUSPEND_TO_IDLE && pm_test_level != TEST_PLATFORM) { - s2idle_loop(); - goto Platform_early_resume; - } - error = dpm_suspend_noirq(PMSG_SUSPEND); if (error) { pr_err("noirq suspend of devices failed\n"); @@ -425,6 +409,11 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) if (suspend_test(TEST_PLATFORM)) goto Platform_wake; + if (state == PM_SUSPEND_TO_IDLE) { + s2idle_loop(); + goto Platform_wake; + } + error = suspend_disable_secondary_cpus(); if (error || suspend_test(TEST_CPUS)) goto Enable_cpus; From b605c44c30b59990e806f930c37bd288b9d901a5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Jul 2019 23:52:18 +0200 Subject: [PATCH 011/126] PM: sleep: Drop dpm_noirq_begin() and dpm_noirq_end() Note that after previous changes dpm_noirq_begin() and dpm_noirq_end() each have only one caller, so move the code from them to their respective callers and drop them. Also note that dpm_noirq_resume_devices() and dpm_noirq_suspend_devices() need not be exported any more, so make them both static. This change is not expected to alter functionality. Signed-off-by: Rafael J. Wysocki Acked-by: Thomas Gleixner --- drivers/base/power/main.c | 30 ++++++++++++------------------ include/linux/pm.h | 4 ---- 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index f08332fab531..134a8af51511 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -716,7 +716,7 @@ static void async_resume_noirq(void *data, async_cookie_t cookie) put_device(dev); } -void dpm_noirq_resume_devices(pm_message_t state) +static void dpm_noirq_resume_devices(pm_message_t state) { struct device *dev; ktime_t starttime = ktime_get(); @@ -760,13 +760,6 @@ void dpm_noirq_resume_devices(pm_message_t state) trace_suspend_resume(TPS("dpm_resume_noirq"), state.event, false); } -void dpm_noirq_end(void) -{ - resume_device_irqs(); - device_wakeup_disarm_wake_irqs(); - cpuidle_resume(); -} - /** * dpm_resume_noirq - Execute "noirq resume" callbacks for all devices. * @state: PM transition of the system being carried out. @@ -777,7 +770,11 @@ void dpm_noirq_end(void) void dpm_resume_noirq(pm_message_t state) { dpm_noirq_resume_devices(state); - dpm_noirq_end(); + + resume_device_irqs(); + device_wakeup_disarm_wake_irqs(); + + cpuidle_resume(); } static pm_callback_t dpm_subsys_resume_early_cb(struct device *dev, @@ -1357,14 +1354,7 @@ static int device_suspend_noirq(struct device *dev) return __device_suspend_noirq(dev, pm_transition, false); } -void dpm_noirq_begin(void) -{ - cpuidle_pause(); - device_wakeup_arm_wake_irqs(); - suspend_device_irqs(); -} - -int dpm_noirq_suspend_devices(pm_message_t state) +static int dpm_noirq_suspend_devices(pm_message_t state) { ktime_t starttime = ktime_get(); int error = 0; @@ -1421,7 +1411,11 @@ int dpm_suspend_noirq(pm_message_t state) { int ret; - dpm_noirq_begin(); + cpuidle_pause(); + + device_wakeup_arm_wake_irqs(); + suspend_device_irqs(); + ret = dpm_noirq_suspend_devices(state); if (ret) dpm_resume_noirq(resume_event(state)); diff --git a/include/linux/pm.h b/include/linux/pm.h index 3619a870eaa4..4c441be03079 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -712,8 +712,6 @@ struct dev_pm_domain { extern void device_pm_lock(void); extern void dpm_resume_start(pm_message_t state); extern void dpm_resume_end(pm_message_t state); -extern void dpm_noirq_resume_devices(pm_message_t state); -extern void dpm_noirq_end(void); extern void dpm_resume_noirq(pm_message_t state); extern void dpm_resume_early(pm_message_t state); extern void dpm_resume(pm_message_t state); @@ -722,8 +720,6 @@ extern void dpm_complete(pm_message_t state); extern void device_pm_unlock(void); extern int dpm_suspend_end(pm_message_t state); extern int dpm_suspend_start(pm_message_t state); -extern void dpm_noirq_begin(void); -extern int dpm_noirq_suspend_devices(pm_message_t state); extern int dpm_suspend_noirq(pm_message_t state); extern int dpm_suspend_late(pm_message_t state); extern int dpm_suspend(pm_message_t state); From baea35e4db17a72145c84a401f70d496c8ebf833 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 17 Jul 2019 11:20:17 +0530 Subject: [PATCH 012/126] opp: Not all power-domains are scalable A device may have multiple power-domains and not all of them may be scalable (i.e. support performance states). But dev_pm_opp_attach_genpd() doesn't take that into account currently. Fix that by not verifying the names argument with "power-domain-names" DT property and finding the index into the required-opps array. The names argument will anyway get verified later on when we call dev_pm_domain_attach_by_name(). Fixes: 6319aee10e53 ("opp: Attach genpds to devices from within OPP core") Reported-by: Rajendra Nayak Tested-by: Rajendra Nayak Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index c094d5d20fd7..08b830f84d8c 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1784,12 +1784,15 @@ static void _opp_detach_genpd(struct opp_table *opp_table) * * This helper needs to be called once with a list of all genpd to attach. * Otherwise the original device structure will be used instead by the OPP core. + * + * The order of entries in the names array must match the order in which + * "required-opps" are added in DT. */ struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names) { struct opp_table *opp_table; struct device *virt_dev; - int index, ret = -EINVAL; + int index = 0, ret = -EINVAL; const char **name = names; opp_table = dev_pm_opp_get_opp_table(dev); @@ -1815,14 +1818,6 @@ struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names goto unlock; while (*name) { - index = of_property_match_string(dev->of_node, - "power-domain-names", *name); - if (index < 0) { - dev_err(dev, "Failed to find power domain: %s (%d)\n", - *name, index); - goto err; - } - if (index >= opp_table->required_opp_count) { dev_err(dev, "Index can't be greater than required-opp-count - 1, %s (%d : %d)\n", *name, opp_table->required_opp_count, index); @@ -1843,6 +1838,7 @@ struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names } opp_table->genpd_virt_devs[index] = virt_dev; + index++; name++; } From 17a8f868ae3e85a173843b1ac65e744e8585bc5a Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 8 Jul 2019 11:24:56 +0530 Subject: [PATCH 013/126] opp: Return genpd virtual devices from dev_pm_opp_attach_genpd() The cpufreq drivers don't need to do runtime PM operations on the virtual devices returned by dev_pm_domain_attach_by_name() and so the virtual devices weren't shared with the callers of dev_pm_opp_attach_genpd() earlier. But the IO device drivers would want to do that. This patch updates the prototype of dev_pm_opp_attach_genpd() to accept another argument to return the pointer to the array of genpd virtual devices. Reported-by: Rajendra Nayak Tested-by: Rajendra Nayak Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 6 +++++- include/linux/pm_opp.h | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 08b830f84d8c..bdb435822401 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1771,6 +1771,7 @@ static void _opp_detach_genpd(struct opp_table *opp_table) * dev_pm_opp_attach_genpd - Attach genpd(s) for the device and save virtual device pointer * @dev: Consumer device for which the genpd is getting attached. * @names: Null terminated array of pointers containing names of genpd to attach. + * @virt_devs: Pointer to return the array of virtual devices. * * Multiple generic power domains for a device are supported with the help of * virtual genpd devices, which are created for each consumer device - genpd @@ -1788,7 +1789,8 @@ static void _opp_detach_genpd(struct opp_table *opp_table) * The order of entries in the names array must match the order in which * "required-opps" are added in DT. */ -struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names) +struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, + const char **names, struct device ***virt_devs) { struct opp_table *opp_table; struct device *virt_dev; @@ -1842,6 +1844,8 @@ struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names name++; } + if (virt_devs) + *virt_devs = opp_table->genpd_virt_devs; mutex_unlock(&opp_table->genpd_virt_dev_lock); return opp_table; diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index af5021f27cb7..5bdceca5125d 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -128,7 +128,7 @@ struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char * name); void dev_pm_opp_put_clkname(struct opp_table *opp_table); struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)); void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table); -struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names); +struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names, struct device ***virt_devs); void dev_pm_opp_detach_genpd(struct opp_table *opp_table); int dev_pm_opp_xlate_performance_state(struct opp_table *src_table, struct opp_table *dst_table, unsigned int pstate); int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); @@ -292,7 +292,7 @@ static inline struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const static inline void dev_pm_opp_put_clkname(struct opp_table *opp_table) {} -static inline struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names) +static inline struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names, struct device ***virt_devs) { return ERR_PTR(-ENOTSUPP); } From 71419d84c216cee8da3b19fb843b4242f112cde4 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 25 Jul 2019 12:41:29 +0200 Subject: [PATCH 014/126] opp: Add dev_pm_opp_find_level_exact() Since the performance states in the OPP table are unique, implement a dev_pm_opp_find_level_exact() in order to be able to fetch a specific OPP. Signed-off-by: Niklas Cassel [ Viresh: Updated commit log ] Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 48 ++++++++++++++++++++++++++++++++++++++++++ include/linux/pm_opp.h | 8 +++++++ 2 files changed, 56 insertions(+) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index bdb435822401..0ee8c0133d3e 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -401,6 +401,54 @@ struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, } EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_exact); +/** + * dev_pm_opp_find_level_exact() - search for an exact level + * @dev: device for which we do this operation + * @level: level to search for + * + * Return: Searches for exact match in the opp table and returns pointer to the + * matching opp if found, else returns ERR_PTR in case of error and should + * be handled using IS_ERR. Error return values can be: + * EINVAL: for bad pointer + * ERANGE: no match found for search + * ENODEV: if device not found in list of registered devices + * + * The callers are required to call dev_pm_opp_put() for the returned OPP after + * use. + */ +struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev, + unsigned int level) +{ + struct opp_table *opp_table; + struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE); + + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) { + int r = PTR_ERR(opp_table); + + dev_err(dev, "%s: OPP table not found (%d)\n", __func__, r); + return ERR_PTR(r); + } + + mutex_lock(&opp_table->lock); + + list_for_each_entry(temp_opp, &opp_table->opp_list, node) { + if (temp_opp->level == level) { + opp = temp_opp; + + /* Increment the reference count of OPP */ + dev_pm_opp_get(opp); + break; + } + } + + mutex_unlock(&opp_table->lock); + dev_pm_opp_put_opp_table(opp_table); + + return opp; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_find_level_exact); + static noinline struct dev_pm_opp *_find_freq_ceil(struct opp_table *opp_table, unsigned long *freq) { diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 5bdceca5125d..b8197ab014f2 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -96,6 +96,8 @@ unsigned long dev_pm_opp_get_suspend_opp_freq(struct device *dev); struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, unsigned long freq, bool available); +struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev, + unsigned int level); struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, unsigned long *freq); @@ -200,6 +202,12 @@ static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, return ERR_PTR(-ENOTSUPP); } +static inline struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev, + unsigned int level) +{ + return ERR_PTR(-ENOTSUPP); +} + static inline struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, unsigned long *freq) { From 11e1a1648298656722fe206a53e555e2e98668ae Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 3 Jul 2019 15:03:14 +0530 Subject: [PATCH 015/126] opp: Don't decrement uninitialized list_kref The list_kref was added for static OPPs and to track their users. The kref is initialized while the static OPPs are added, but removed unconditionally even if the static OPPs were never added. This causes refcount mismatch warnings currently. Fix that by always initializing the kref when the OPP table is first initialized. The refcount is later incremented only for the second user onwards. Fixes: d0e8ae6c26da ("OPP: Create separate kref for static OPPs list") Reported-by: Rajendra Nayak Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 1 + drivers/opp/of.c | 21 ++++----------------- 2 files changed, 5 insertions(+), 17 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 0ee8c0133d3e..9ff0538ee83a 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -988,6 +988,7 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index) BLOCKING_INIT_NOTIFIER_HEAD(&opp_table->head); INIT_LIST_HEAD(&opp_table->opp_list); kref_init(&opp_table->kref); + kref_init(&opp_table->list_kref); /* Secure the device table modification */ list_add(&opp_table->node, &opp_tables); diff --git a/drivers/opp/of.c b/drivers/opp/of.c index b313aca9894f..698a92c3c17b 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -662,8 +662,6 @@ static int _of_add_opp_table_v2(struct device *dev, struct opp_table *opp_table) return 0; } - kref_init(&opp_table->list_kref); - /* We have opp-table node now, iterate over it and add OPPs */ for_each_available_child_of_node(opp_table->np, np) { opp = _opp_add_static_v2(opp_table, dev, np); @@ -672,17 +670,15 @@ static int _of_add_opp_table_v2(struct device *dev, struct opp_table *opp_table) dev_err(dev, "%s: Failed to add OPP, %d\n", __func__, ret); of_node_put(np); - goto put_list_kref; + return ret; } else if (opp) { count++; } } /* There should be one of more OPP defined */ - if (WARN_ON(!count)) { - ret = -ENOENT; - goto put_list_kref; - } + if (WARN_ON(!count)) + return -ENOENT; list_for_each_entry(opp, &opp_table->opp_list, node) pstate_count += !!opp->pstate; @@ -691,8 +687,7 @@ static int _of_add_opp_table_v2(struct device *dev, struct opp_table *opp_table) if (pstate_count && pstate_count != count) { dev_err(dev, "Not all nodes have performance state set (%d: %d)\n", count, pstate_count); - ret = -ENOENT; - goto put_list_kref; + return -ENOENT; } if (pstate_count) @@ -701,11 +696,6 @@ static int _of_add_opp_table_v2(struct device *dev, struct opp_table *opp_table) opp_table->parsed_static_opps = true; return 0; - -put_list_kref: - _put_opp_list_kref(opp_table); - - return ret; } /* Initializes OPP tables based on old-deprecated bindings */ @@ -731,8 +721,6 @@ static int _of_add_opp_table_v1(struct device *dev, struct opp_table *opp_table) return -EINVAL; } - kref_init(&opp_table->list_kref); - val = prop->value; while (nr) { unsigned long freq = be32_to_cpup(val++) * 1000; @@ -742,7 +730,6 @@ static int _of_add_opp_table_v1(struct device *dev, struct opp_table *opp_table) if (ret) { dev_err(dev, "%s: Failed to add OPP %ld (%d)\n", __func__, freq, ret); - _put_opp_list_kref(opp_table); return ret; } nr -= 2; From 7f93ff73f7c8c8bfa6be33bcc16470b0b44682aa Mon Sep 17 00:00:00 2001 From: "k.konieczny@partner.samsung.com" Date: Fri, 19 Jul 2019 17:05:32 +0200 Subject: [PATCH 016/126] opp: core: add regulators enable and disable Add enable regulators to dev_pm_opp_set_regulators() and disable regulators to dev_pm_opp_put_regulators(). Even if bootloader leaves regulators enabled, they should be enabled in kernel in order to increase the reference count. Signed-off-by: Kamil Konieczny Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 9ff0538ee83a..3b7ffd0234e9 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1626,6 +1626,12 @@ struct opp_table *dev_pm_opp_set_regulators(struct device *dev, goto free_regulators; } + ret = regulator_enable(reg); + if (ret < 0) { + regulator_put(reg); + goto free_regulators; + } + opp_table->regulators[i] = reg; } @@ -1639,8 +1645,10 @@ struct opp_table *dev_pm_opp_set_regulators(struct device *dev, return opp_table; free_regulators: - while (i != 0) - regulator_put(opp_table->regulators[--i]); + while (i--) { + regulator_disable(opp_table->regulators[i]); + regulator_put(opp_table->regulators[i]); + } kfree(opp_table->regulators); opp_table->regulators = NULL; @@ -1666,8 +1674,10 @@ void dev_pm_opp_put_regulators(struct opp_table *opp_table) /* Make sure there are no concurrent readers while updating opp_table */ WARN_ON(!list_empty(&opp_table->opp_list)); - for (i = opp_table->regulator_count - 1; i >= 0; i--) + for (i = opp_table->regulator_count - 1; i >= 0; i--) { + regulator_disable(opp_table->regulators[i]); regulator_put(opp_table->regulators[i]); + } _free_set_opp_data(opp_table); From 518c6880ffc965d59349bf064ff00153cf05f033 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Tue, 9 Jul 2019 16:00:12 +0800 Subject: [PATCH 017/126] dt-bindings: opp: Support multiple opp-suspend properties Update opp-suspend property's description to support multiple opp-suspend properties defined in DT, the OPP with highest opp-hz and with opp-suspend property present will be used as suspend opp. Signed-off-by: Anson Huang Reviewed-by: Rob Herring Signed-off-by: Viresh Kumar --- Documentation/devicetree/bindings/opp/opp.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/opp/opp.txt b/Documentation/devicetree/bindings/opp/opp.txt index 76b6c79604a5..68592271461f 100644 --- a/Documentation/devicetree/bindings/opp/opp.txt +++ b/Documentation/devicetree/bindings/opp/opp.txt @@ -140,8 +140,8 @@ Optional properties: frequency for a short duration of time limited by the device's power, current and thermal limits. -- opp-suspend: Marks the OPP to be used during device suspend. Only one OPP in - the table should have this. +- opp-suspend: Marks the OPP to be used during device suspend. If multiple OPPs + in the table have this, the OPP with highest opp-hz will be used. - opp-supported-hw: This enables us to select only a subset of OPPs from the larger OPP table, based on what version of the hardware we are running on. We From 4527551750e85f9be9296f2b30b19bb257f342e5 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Tue, 9 Jul 2019 16:00:13 +0800 Subject: [PATCH 018/126] opp: of: Support multiple suspend OPPs defined in DT With property "opp-supported-hw" introduced, the OPP table in DT could be a large OPP table and ONLY a subset of OPPs are available, based on the version of the hardware running on. That introduces restriction of using "opp-suspend" property to define the suspend OPP, as we are NOT sure if the OPP containing "opp-suspend" property is available for the hardware running on, and the of opp core does NOT allow multiple suspend OPPs defined in DT OPP table. To eliminate this restrition, make of opp core allow multiple suspend OPPs defined in DT, and pick the OPP with highest rate and with "opp-suspend" property present to be suspend OPP, it can speed up the suspend/resume process. Signed-off-by: Anson Huang Signed-off-by: Viresh Kumar --- drivers/opp/of.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 698a92c3c17b..1813f5ad5fa2 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -617,9 +617,12 @@ static struct dev_pm_opp *_opp_add_static_v2(struct opp_table *opp_table, /* OPP to select on device suspend */ if (of_property_read_bool(np, "opp-suspend")) { if (opp_table->suspend_opp) { - dev_warn(dev, "%s: Multiple suspend OPPs found (%lu %lu)\n", - __func__, opp_table->suspend_opp->rate, - new_opp->rate); + /* Pick the OPP with higher rate as suspend OPP */ + if (new_opp->rate > opp_table->suspend_opp->rate) { + opp_table->suspend_opp->suspend = false; + new_opp->suspend = true; + opp_table->suspend_opp = new_opp; + } } else { new_opp->suspend = true; opp_table->suspend_opp = new_opp; From 27a84f76738cc4a9509ec9234a7021de34a88e58 Mon Sep 17 00:00:00 2001 From: Yue Hu Date: Fri, 26 Jul 2019 14:07:47 +0800 Subject: [PATCH 019/126] PM / OPP: Correct Documentation about library location OPP library is now located in drivers/opp/ directory. Signed-off-by: Yue Hu Signed-off-by: Viresh Kumar --- Documentation/power/opp.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/power/opp.rst b/Documentation/power/opp.rst index b3cf1def9dee..209c7613f5a4 100644 --- a/Documentation/power/opp.rst +++ b/Documentation/power/opp.rst @@ -46,7 +46,7 @@ We can represent these as three OPPs as the following {Hz, uV} tuples: ---------------------------------------- OPP library provides a set of helper functions to organize and query the OPP -information. The library is located in drivers/base/power/opp.c and the header +information. The library is located in drivers/opp/ directory and the header is located in include/linux/pm_opp.h. OPP library can be enabled by enabling CONFIG_PM_OPP from power management menuconfig menu. OPP library depends on CONFIG_PM as certain SoCs such as Texas Instrument's OMAP framework allows to From 8cfda0df3a3020454848f55ed23a781169770c99 Mon Sep 17 00:00:00 2001 From: Sricharan R Date: Thu, 25 Jul 2019 12:41:30 +0200 Subject: [PATCH 020/126] dt-bindings: opp: Re-organise kryo cpufreq to use it for other nvmem based qcom socs The kryo cpufreq driver reads the nvmem cell and uses that data to populate the opps. There are other qcom cpufreq socs like krait which does similar thing. Except for the interpretation of the read data, rest of the driver is same for both the cases. So pull the common things out for reuse. Signed-off-by: Sricharan R [niklas.cassel@linaro.org: split dt-binding into a separate patch and do not rename the compatible string.] Signed-off-by: Niklas Cassel Reviewed-by: Ilia Lin Reviewed-by: Rob Herring Signed-off-by: Viresh Kumar --- .../opp/{kryo-cpufreq.txt => qcom-nvmem-cpufreq.txt} | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) rename Documentation/devicetree/bindings/opp/{kryo-cpufreq.txt => qcom-nvmem-cpufreq.txt} (98%) diff --git a/Documentation/devicetree/bindings/opp/kryo-cpufreq.txt b/Documentation/devicetree/bindings/opp/qcom-nvmem-cpufreq.txt similarity index 98% rename from Documentation/devicetree/bindings/opp/kryo-cpufreq.txt rename to Documentation/devicetree/bindings/opp/qcom-nvmem-cpufreq.txt index c2127b96805a..198441e80ba8 100644 --- a/Documentation/devicetree/bindings/opp/kryo-cpufreq.txt +++ b/Documentation/devicetree/bindings/opp/qcom-nvmem-cpufreq.txt @@ -1,13 +1,13 @@ -Qualcomm Technologies, Inc. KRYO CPUFreq and OPP bindings +Qualcomm Technologies, Inc. NVMEM CPUFreq and OPP bindings =================================== -In Certain Qualcomm Technologies, Inc. SoCs like apq8096 and msm8996 -that have KRYO processors, the CPU ferequencies subset and voltage value -of each OPP varies based on the silicon variant in use. +In Certain Qualcomm Technologies, Inc. SoCs like apq8096 and msm8996, +the CPU frequencies subset and voltage value of each OPP varies based on +the silicon variant in use. Qualcomm Technologies, Inc. Process Voltage Scaling Tables defines the voltage and frequency value based on the msm-id in SMEM and speedbin blown in the efuse combination. -The qcom-cpufreq-kryo driver reads the msm-id and efuse value from the SoC +The qcom-cpufreq-nvmem driver reads the msm-id and efuse value from the SoC to provide the OPP framework with required information (existing HW bitmap). This is used to determine the voltage and frequency value for each OPP of operating-points-v2 table when it is parsed by the OPP framework. From a409906003a2b5418e6e60ac2524948ea80819f2 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 25 Jul 2019 12:41:32 +0200 Subject: [PATCH 021/126] dt-bindings: opp: qcom-nvmem: Make speedbin related properties optional Not all Qualcomm platforms need to care about the speedbin efuse, nor the value blown into the speedbin efuse. Therefore, make the nvmem-cells and opp-supported-hw properties optional. Signed-off-by: Niklas Cassel Reviewed-by: Ilia Lin Reviewed-by: Rob Herring Signed-off-by: Viresh Kumar --- Documentation/devicetree/bindings/opp/qcom-nvmem-cpufreq.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/opp/qcom-nvmem-cpufreq.txt b/Documentation/devicetree/bindings/opp/qcom-nvmem-cpufreq.txt index 198441e80ba8..c5ea8b90e35d 100644 --- a/Documentation/devicetree/bindings/opp/qcom-nvmem-cpufreq.txt +++ b/Documentation/devicetree/bindings/opp/qcom-nvmem-cpufreq.txt @@ -20,6 +20,10 @@ In 'cpus' nodes: In 'operating-points-v2' table: - compatible: Should be - 'operating-points-v2-kryo-cpu' for apq8096 and msm8996. + +Optional properties: +-------------------- +In 'operating-points-v2' table: - nvmem-cells: A phandle pointing to a nvmem-cells node representing the efuse registers that has information about the speedbin that is used to select the right frequency/voltage From 51dbe89768289c32406f47ad89957b79220c1b63 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 29 Jul 2019 17:49:33 -0500 Subject: [PATCH 022/126] cpufreq: ti-cpufreq: Mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark switch cases where we are expecting to fall through. This patch fixes the following warning (Building: arm): drivers/cpufreq/ti-cpufreq.c: In function ‘dra7_efuse_xlate’: drivers/cpufreq/ti-cpufreq.c:79:20: warning: this statement may fall through [-Wimplicit-fallthrough=] calculated_efuse |= DRA7_EFUSE_HIGH_MPU_OPP; drivers/cpufreq/ti-cpufreq.c:80:2: note: here case DRA7_EFUSE_HAS_OD_MPU_OPP: ^~~~ Signed-off-by: Gustavo A. R. Silva Signed-off-by: Viresh Kumar --- drivers/cpufreq/ti-cpufreq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c index 2ad1ae17932d..aeaa883a8c9d 100644 --- a/drivers/cpufreq/ti-cpufreq.c +++ b/drivers/cpufreq/ti-cpufreq.c @@ -77,6 +77,7 @@ static unsigned long dra7_efuse_xlate(struct ti_cpufreq_data *opp_data, case DRA7_EFUSE_HAS_ALL_MPU_OPP: case DRA7_EFUSE_HAS_HIGH_MPU_OPP: calculated_efuse |= DRA7_EFUSE_HIGH_MPU_OPP; + /* Fall through */ case DRA7_EFUSE_HAS_OD_MPU_OPP: calculated_efuse |= DRA7_EFUSE_OD_MPU_OPP; } From 10a08fd65ec1a68ccd86b19ec822ed5f2e50113f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 30 Jul 2019 11:55:59 +0200 Subject: [PATCH 023/126] ACPI: PM: Set up EC GPE for system wakeup from drivers that need it The EC GPE needs to be set up for system wakeup only if there is a driver depending on it, either intel-hid or intel-vbtn, bound to a button device that is expected to wake up the system from sleep (such as the power button on some Dell systems, like the XPS13 9360). It doesn't need to be set up for waking up the system from sleep in any other cases and whether or not it is expected to wake up the system from sleep doesn't depend on whether or not the LPS0 device is present in the ACPI namespace. For this reason, rearrange the ACPI suspend-to-idle code to make the drivers depending on the EC GPE wakeup take care of setting it up and decouple that from the LPS0 device handling. While at it, make intel-hid and intel-vbtn prepare for system wakeup only if they are allowed to wake up the system from sleep by user space (via sysfs). [Note that acpi_ec_mark_gpe_for_wake() and acpi_ec_set_gpe_wake_mask() are there to prevent the EC GPE from being disabled by the acpi_enable_all_wakeup_gpes() call in acpi_s2idle_prepare(), so on systems with either intel-hid or intel-vbtn this change doesn't affect any interactions with the hardware or platform firmware.] Signed-off-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko --- drivers/acpi/ec.c | 7 ++++++- drivers/acpi/internal.h | 2 -- drivers/acpi/sleep.c | 13 ++----------- drivers/platform/x86/intel-hid.c | 20 ++++++++++++++++---- drivers/platform/x86/intel-vbtn.c | 20 ++++++++++++++++---- include/linux/acpi.h | 4 ++++ include/linux/suspend.h | 1 + 7 files changed, 45 insertions(+), 22 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 58c7ad402d8d..b996ca5f253f 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -1048,17 +1049,21 @@ void acpi_ec_unblock_transactions(void) acpi_ec_start(first_ec, true); } +#ifdef CONFIG_PM_SLEEP void acpi_ec_mark_gpe_for_wake(void) { if (first_ec && !ec_no_wakeup) acpi_mark_gpe_for_wake(NULL, first_ec->gpe); } +EXPORT_SYMBOL_GPL(acpi_ec_mark_gpe_for_wake); void acpi_ec_set_gpe_wake_mask(u8 action) { - if (first_ec && !ec_no_wakeup) + if (pm_suspend_no_platform() && first_ec && !ec_no_wakeup) acpi_set_gpe_wake_mask(NULL, first_ec->gpe, action); } +EXPORT_SYMBOL_GPL(acpi_ec_set_gpe_wake_mask); +#endif bool acpi_ec_dispatch_gpe(void) { diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 1b5f9ac06ea8..bcc080511197 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -194,8 +194,6 @@ void acpi_ec_ecdt_probe(void); void acpi_ec_dsdt_probe(void); void acpi_ec_block_transactions(void); void acpi_ec_unblock_transactions(void); -void acpi_ec_mark_gpe_for_wake(void); -void acpi_ec_set_gpe_wake_mask(u8 action); bool acpi_ec_dispatch_gpe(void); int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit, acpi_handle handle, acpi_ec_query_func func, diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 970ae7c7a3f7..9cb0532f7471 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -930,8 +930,6 @@ static int lps0_device_attach(struct acpi_device *adev, acpi_handle_debug(adev->handle, "_DSM function mask: 0x%x\n", bitmask); - - acpi_ec_mark_gpe_for_wake(); } else { acpi_handle_debug(adev->handle, "_DSM function 0 evaluation failed\n"); @@ -960,8 +958,6 @@ static int acpi_s2idle_prepare(void) if (lps0_device_handle) { acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF); acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY); - - acpi_ec_set_gpe_wake_mask(ACPI_GPE_ENABLE); } if (acpi_sci_irq_valid()) @@ -979,10 +975,7 @@ static int acpi_s2idle_prepare(void) static void acpi_s2idle_wake(void) { - if (!lps0_device_handle) - return; - - if (pm_debug_messages_on) + if (lps0_device_handle && pm_debug_messages_on) lpi_check_constraints(); /* @@ -1031,8 +1024,6 @@ static void acpi_s2idle_restore(void) disable_irq_wake(acpi_sci_irq); if (lps0_device_handle) { - acpi_ec_set_gpe_wake_mask(ACPI_GPE_DISABLE); - acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT); acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON); } @@ -1081,7 +1072,7 @@ bool acpi_s2idle_wakeup(void) bool acpi_sleep_no_ec_events(void) { - return !s2idle_in_progress || !lps0_device_handle; + return !s2idle_in_progress; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c index bc0d55a59015..e51c72b92cbd 100644 --- a/drivers/platform/x86/intel-hid.c +++ b/drivers/platform/x86/intel-hid.c @@ -253,9 +253,12 @@ static void intel_button_array_enable(struct device *device, bool enable) static int intel_hid_pm_prepare(struct device *device) { - struct intel_hid_priv *priv = dev_get_drvdata(device); + if (device_may_wakeup(device)) { + struct intel_hid_priv *priv = dev_get_drvdata(device); - priv->wakeup_mode = true; + priv->wakeup_mode = true; + acpi_ec_set_gpe_wake_mask(ACPI_GPE_ENABLE); + } return 0; } @@ -270,9 +273,12 @@ static int intel_hid_pl_suspend_handler(struct device *device) static int intel_hid_pl_resume_handler(struct device *device) { - struct intel_hid_priv *priv = dev_get_drvdata(device); + if (device_may_wakeup(device)) { + struct intel_hid_priv *priv = dev_get_drvdata(device); - priv->wakeup_mode = false; + acpi_ec_set_gpe_wake_mask(ACPI_GPE_DISABLE); + priv->wakeup_mode = false; + } if (pm_resume_via_firmware()) { intel_hid_set_enable(device, true); intel_button_array_enable(device, true); @@ -491,6 +497,12 @@ static int intel_hid_probe(struct platform_device *device) } device_init_wakeup(&device->dev, true); + /* + * In order for system wakeup to work, the EC GPE has to be marked as + * a wakeup one, so do that here (this setting will persist, but it has + * no effect until the wakeup mask is set for the EC GPE). + */ + acpi_ec_mark_gpe_for_wake(); return 0; err_remove_notify: diff --git a/drivers/platform/x86/intel-vbtn.c b/drivers/platform/x86/intel-vbtn.c index a0d0cecff55f..ab84e1bbdedd 100644 --- a/drivers/platform/x86/intel-vbtn.c +++ b/drivers/platform/x86/intel-vbtn.c @@ -176,6 +176,12 @@ static int intel_vbtn_probe(struct platform_device *device) return -EBUSY; device_init_wakeup(&device->dev, true); + /* + * In order for system wakeup to work, the EC GPE has to be marked as + * a wakeup one, so do that here (this setting will persist, but it has + * no effect until the wakeup mask is set for the EC GPE). + */ + acpi_ec_mark_gpe_for_wake(); return 0; } @@ -195,17 +201,23 @@ static int intel_vbtn_remove(struct platform_device *device) static int intel_vbtn_pm_prepare(struct device *dev) { - struct intel_vbtn_priv *priv = dev_get_drvdata(dev); + if (device_may_wakeup(dev)) { + struct intel_vbtn_priv *priv = dev_get_drvdata(dev); - priv->wakeup_mode = true; + priv->wakeup_mode = true; + acpi_ec_set_gpe_wake_mask(ACPI_GPE_ENABLE); + } return 0; } static int intel_vbtn_pm_resume(struct device *dev) { - struct intel_vbtn_priv *priv = dev_get_drvdata(dev); + if (device_may_wakeup(dev)) { + struct intel_vbtn_priv *priv = dev_get_drvdata(dev); - priv->wakeup_mode = false; + acpi_ec_set_gpe_wake_mask(ACPI_GPE_DISABLE); + priv->wakeup_mode = false; + } return 0; } diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 9426b9aaed86..e65a4c5bbeae 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -931,6 +931,8 @@ int acpi_subsys_suspend_noirq(struct device *dev); int acpi_subsys_suspend(struct device *dev); int acpi_subsys_freeze(struct device *dev); int acpi_subsys_poweroff(struct device *dev); +void acpi_ec_mark_gpe_for_wake(void); +void acpi_ec_set_gpe_wake_mask(u8 action); #else static inline int acpi_subsys_prepare(struct device *dev) { return 0; } static inline void acpi_subsys_complete(struct device *dev) {} @@ -939,6 +941,8 @@ static inline int acpi_subsys_suspend_noirq(struct device *dev) { return 0; } static inline int acpi_subsys_suspend(struct device *dev) { return 0; } static inline int acpi_subsys_freeze(struct device *dev) { return 0; } static inline int acpi_subsys_poweroff(struct device *dev) { return 0; } +static inline void acpi_ec_mark_gpe_for_wake(void) {} +static inline void acpi_ec_set_gpe_wake_mask(u8 action) {} #endif #ifdef CONFIG_ACPI diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 66ce3871ed61..f0c4a8445140 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -335,6 +335,7 @@ static inline void pm_set_suspend_via_firmware(void) {} static inline void pm_set_resume_via_firmware(void) {} static inline bool pm_suspend_via_firmware(void) { return false; } static inline bool pm_resume_via_firmware(void) { return false; } +static inline bool pm_suspend_no_platform(void) { return false; } static inline bool pm_suspend_default_s2idle(void) { return false; } static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {} From fa86ee90eb1111267de67cb4272b5ce711f18cbb Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Wed, 3 Jul 2019 20:51:25 -0300 Subject: [PATCH 024/126] add cpuidle-haltpoll driver Add a cpuidle driver that calls the architecture default_idle routine. To be used in conjunction with the haltpoll governor. Signed-off-by: Marcelo Tosatti Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/process.c | 2 +- drivers/cpuidle/Kconfig | 9 ++++ drivers/cpuidle/Makefile | 1 + drivers/cpuidle/cpuidle-haltpoll.c | 68 ++++++++++++++++++++++++++++++ 4 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 drivers/cpuidle/cpuidle-haltpoll.c diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 75fea0d48c0e..5e94c4354d4e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -580,7 +580,7 @@ void __cpuidle default_idle(void) safe_halt(); trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } -#ifdef CONFIG_APM_MODULE +#if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE) EXPORT_SYMBOL(default_idle); #endif diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig index a4ac31e4a58c..cc8efc56be7d 100644 --- a/drivers/cpuidle/Kconfig +++ b/drivers/cpuidle/Kconfig @@ -51,6 +51,15 @@ depends on PPC source "drivers/cpuidle/Kconfig.powerpc" endmenu +config HALTPOLL_CPUIDLE + tristate "Halt poll cpuidle driver" + depends on X86 && KVM_GUEST + default y + help + This option enables halt poll cpuidle driver, which allows to poll + before halting in the guest (more efficient than polling in the + host via halt_poll_ns for some scenarios). + endif config ARCH_NEEDS_CPU_IDLE_COUPLED diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile index 9d7176cee3d3..240227474cd9 100644 --- a/drivers/cpuidle/Makefile +++ b/drivers/cpuidle/Makefile @@ -7,6 +7,7 @@ obj-y += cpuidle.o driver.o governor.o sysfs.o governors/ obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o obj-$(CONFIG_DT_IDLE_STATES) += dt_idle_states.o obj-$(CONFIG_ARCH_HAS_CPU_RELAX) += poll_state.o +obj-$(CONFIG_HALTPOLL_CPUIDLE) += cpuidle-haltpoll.o ################################################################################## # ARM SoC drivers diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c new file mode 100644 index 000000000000..35cfb53e9287 --- /dev/null +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * cpuidle driver for haltpoll governor. + * + * Copyright 2019 Red Hat, Inc. and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Authors: Marcelo Tosatti + */ + +#include +#include +#include +#include +#include + +static int default_enter_idle(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + if (current_clr_polling_and_test()) { + local_irq_enable(); + return index; + } + default_idle(); + return index; +} + +static struct cpuidle_driver haltpoll_driver = { + .name = "haltpoll", + .owner = THIS_MODULE, + .states = { + { /* entry 0 is for polling */ }, + { + .enter = default_enter_idle, + .exit_latency = 1, + .target_residency = 1, + .power_usage = -1, + .name = "haltpoll idle", + .desc = "default architecture idle", + }, + }, + .safe_state_index = 0, + .state_count = 2, +}; + +static int __init haltpoll_init(void) +{ + struct cpuidle_driver *drv = &haltpoll_driver; + + cpuidle_poll_state_init(drv); + + if (!kvm_para_available()) + return 0; + + return cpuidle_register(&haltpoll_driver, NULL); +} + +static void __exit haltpoll_exit(void) +{ + cpuidle_unregister(&haltpoll_driver); +} + +module_init(haltpoll_init); +module_exit(haltpoll_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Marcelo Tosatti "); From 259231a045616c4101d023a8f4dcc8379af265a6 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Wed, 3 Jul 2019 20:51:26 -0300 Subject: [PATCH 025/126] cpuidle: add poll_limit_ns to cpuidle_device structure Add a poll_limit_ns variable to cpuidle_device structure. Calculate and configure it in the new cpuidle_poll_time function, in case its zero. Individual governors are allowed to override this value. Signed-off-by: Marcelo Tosatti Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle.c | 30 ++++++++++++++++++++++++++++++ drivers/cpuidle/poll_state.c | 11 ++--------- drivers/cpuidle/sysfs.c | 7 +++++++ include/linux/cpuidle.h | 6 ++++++ 4 files changed, 45 insertions(+), 9 deletions(-) diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 0f4b7c45df3e..0895b988fa92 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -361,6 +361,36 @@ void cpuidle_reflect(struct cpuidle_device *dev, int index) cpuidle_curr_governor->reflect(dev, index); } +/** + * cpuidle_poll_time - return amount of time to poll for, + * governors can override dev->poll_limit_ns if necessary + * + * @drv: the cpuidle driver tied with the cpu + * @dev: the cpuidle device + * + */ +u64 cpuidle_poll_time(struct cpuidle_driver *drv, + struct cpuidle_device *dev) +{ + int i; + u64 limit_ns; + + if (dev->poll_limit_ns) + return dev->poll_limit_ns; + + limit_ns = TICK_NSEC; + for (i = 1; i < drv->state_count; i++) { + if (drv->states[i].disabled || dev->states_usage[i].disable) + continue; + + limit_ns = (u64)drv->states[i].target_residency * NSEC_PER_USEC; + } + + dev->poll_limit_ns = limit_ns; + + return dev->poll_limit_ns; +} + /** * cpuidle_install_idle_handler - installs the cpuidle idle loop handler */ diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c index 02b9315a9e96..c8fa5f41dfc4 100644 --- a/drivers/cpuidle/poll_state.c +++ b/drivers/cpuidle/poll_state.c @@ -20,16 +20,9 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev, local_irq_enable(); if (!current_set_polling_and_test()) { unsigned int loop_count = 0; - u64 limit = TICK_NSEC; - int i; + u64 limit; - for (i = 1; i < drv->state_count; i++) { - if (drv->states[i].disabled || dev->states_usage[i].disable) - continue; - - limit = (u64)drv->states[i].target_residency * NSEC_PER_USEC; - break; - } + limit = cpuidle_poll_time(drv, dev); while (!need_resched()) { cpu_relax(); diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index eb20adb5de23..2bb2683b493c 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -334,6 +334,7 @@ struct cpuidle_state_kobj { struct cpuidle_state_usage *state_usage; struct completion kobj_unregister; struct kobject kobj; + struct cpuidle_device *device; }; #ifdef CONFIG_SUSPEND @@ -391,6 +392,7 @@ static inline void cpuidle_remove_s2idle_attr_group(struct cpuidle_state_kobj *k #define kobj_to_state_obj(k) container_of(k, struct cpuidle_state_kobj, kobj) #define kobj_to_state(k) (kobj_to_state_obj(k)->state) #define kobj_to_state_usage(k) (kobj_to_state_obj(k)->state_usage) +#define kobj_to_device(k) (kobj_to_state_obj(k)->device) #define attr_to_stateattr(a) container_of(a, struct cpuidle_state_attr, attr) static ssize_t cpuidle_state_show(struct kobject *kobj, struct attribute *attr, @@ -414,10 +416,14 @@ static ssize_t cpuidle_state_store(struct kobject *kobj, struct attribute *attr, struct cpuidle_state *state = kobj_to_state(kobj); struct cpuidle_state_usage *state_usage = kobj_to_state_usage(kobj); struct cpuidle_state_attr *cattr = attr_to_stateattr(attr); + struct cpuidle_device *dev = kobj_to_device(kobj); if (cattr->store) ret = cattr->store(state, state_usage, buf, size); + /* reset poll time cache */ + dev->poll_limit_ns = 0; + return ret; } @@ -468,6 +474,7 @@ static int cpuidle_add_state_sysfs(struct cpuidle_device *device) } kobj->state = &drv->states[i]; kobj->state_usage = &device->states_usage[i]; + kobj->device = device; init_completion(&kobj->kobj_unregister); ret = kobject_init_and_add(&kobj->kobj, &ktype_state_cpuidle, diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index bb9a0db89f1a..b484dd69ec21 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -86,6 +86,7 @@ struct cpuidle_device { ktime_t next_hrtimer; int last_residency; + u64 poll_limit_ns; struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX]; struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX]; struct cpuidle_driver_kobj *kobj_driver; @@ -132,6 +133,8 @@ extern int cpuidle_select(struct cpuidle_driver *drv, extern int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index); extern void cpuidle_reflect(struct cpuidle_device *dev, int index); +extern u64 cpuidle_poll_time(struct cpuidle_driver *drv, + struct cpuidle_device *dev); extern int cpuidle_register_driver(struct cpuidle_driver *drv); extern struct cpuidle_driver *cpuidle_get_driver(void); @@ -166,6 +169,9 @@ static inline int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index) {return -ENODEV; } static inline void cpuidle_reflect(struct cpuidle_device *dev, int index) { } +extern u64 cpuidle_poll_time(struct cpuidle_driver *drv, + struct cpuidle_device *dev) +{return 0; } static inline int cpuidle_register_driver(struct cpuidle_driver *drv) {return -ENODEV; } static inline struct cpuidle_driver *cpuidle_get_driver(void) {return NULL; } From 7d4daeedd575bbc3c40c87fc6708a8b88c50fe7e Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Wed, 3 Jul 2019 20:51:27 -0300 Subject: [PATCH 026/126] governors: unify last_state_idx Since this field is shared by all governors, move it to cpuidle device structure. Signed-off-by: Marcelo Tosatti Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governors/ladder.c | 21 ++++++++++----------- drivers/cpuidle/governors/menu.c | 5 ++--- drivers/cpuidle/governors/teo.c | 12 ++++++------ include/linux/cpuidle.h | 1 + 4 files changed, 19 insertions(+), 20 deletions(-) diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c index f0dddc66af26..428eeb832fe7 100644 --- a/drivers/cpuidle/governors/ladder.c +++ b/drivers/cpuidle/governors/ladder.c @@ -38,7 +38,6 @@ struct ladder_device_state { struct ladder_device { struct ladder_device_state states[CPUIDLE_STATE_MAX]; - int last_state_idx; }; static DEFINE_PER_CPU(struct ladder_device, ladder_devices); @@ -49,12 +48,13 @@ static DEFINE_PER_CPU(struct ladder_device, ladder_devices); * @old_idx: the current state index * @new_idx: the new target state index */ -static inline void ladder_do_selection(struct ladder_device *ldev, +static inline void ladder_do_selection(struct cpuidle_device *dev, + struct ladder_device *ldev, int old_idx, int new_idx) { ldev->states[old_idx].stats.promotion_count = 0; ldev->states[old_idx].stats.demotion_count = 0; - ldev->last_state_idx = new_idx; + dev->last_state_idx = new_idx; } /** @@ -68,13 +68,13 @@ static int ladder_select_state(struct cpuidle_driver *drv, { struct ladder_device *ldev = this_cpu_ptr(&ladder_devices); struct ladder_device_state *last_state; - int last_residency, last_idx = ldev->last_state_idx; + int last_residency, last_idx = dev->last_state_idx; int first_idx = drv->states[0].flags & CPUIDLE_FLAG_POLLING ? 1 : 0; int latency_req = cpuidle_governor_latency_req(dev->cpu); /* Special case when user has set very strict latency requirement */ if (unlikely(latency_req == 0)) { - ladder_do_selection(ldev, last_idx, 0); + ladder_do_selection(dev, ldev, last_idx, 0); return 0; } @@ -91,7 +91,7 @@ static int ladder_select_state(struct cpuidle_driver *drv, last_state->stats.promotion_count++; last_state->stats.demotion_count = 0; if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) { - ladder_do_selection(ldev, last_idx, last_idx + 1); + ladder_do_selection(dev, ldev, last_idx, last_idx + 1); return last_idx + 1; } } @@ -107,7 +107,7 @@ static int ladder_select_state(struct cpuidle_driver *drv, if (drv->states[i].exit_latency <= latency_req) break; } - ladder_do_selection(ldev, last_idx, i); + ladder_do_selection(dev, ldev, last_idx, i); return i; } @@ -116,7 +116,7 @@ static int ladder_select_state(struct cpuidle_driver *drv, last_state->stats.demotion_count++; last_state->stats.promotion_count = 0; if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) { - ladder_do_selection(ldev, last_idx, last_idx - 1); + ladder_do_selection(dev, ldev, last_idx, last_idx - 1); return last_idx - 1; } } @@ -139,7 +139,7 @@ static int ladder_enable_device(struct cpuidle_driver *drv, struct ladder_device_state *lstate; struct cpuidle_state *state; - ldev->last_state_idx = first_idx; + dev->last_state_idx = first_idx; for (i = first_idx; i < drv->state_count; i++) { state = &drv->states[i]; @@ -167,9 +167,8 @@ static int ladder_enable_device(struct cpuidle_driver *drv, */ static void ladder_reflect(struct cpuidle_device *dev, int index) { - struct ladder_device *ldev = this_cpu_ptr(&ladder_devices); if (index > 0) - ldev->last_state_idx = index; + dev->last_state_idx = index; } static struct cpuidle_governor ladder_governor = { diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index e9a28c7846d6..dace4c7f830c 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -117,7 +117,6 @@ */ struct menu_device { - int last_state_idx; int needs_update; int tick_wakeup; @@ -455,7 +454,7 @@ static void menu_reflect(struct cpuidle_device *dev, int index) { struct menu_device *data = this_cpu_ptr(&menu_devices); - data->last_state_idx = index; + dev->last_state_idx = index; data->needs_update = 1; data->tick_wakeup = tick_nohz_idle_got_tick(); } @@ -468,7 +467,7 @@ static void menu_reflect(struct cpuidle_device *dev, int index) static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) { struct menu_device *data = this_cpu_ptr(&menu_devices); - int last_idx = data->last_state_idx; + int last_idx = dev->last_state_idx; struct cpuidle_state *target = &drv->states[last_idx]; unsigned int measured_us; unsigned int new_factor; diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 7d05efdbd3c6..a2fd81067a13 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -96,7 +96,6 @@ struct teo_idle_state { * @time_span_ns: Time between idle state selection and post-wakeup update. * @sleep_length_ns: Time till the closest timer event (at the selection time). * @states: Idle states data corresponding to this CPU. - * @last_state: Idle state entered by the CPU last time. * @interval_idx: Index of the most recent saved idle interval. * @intervals: Saved idle duration values. */ @@ -104,7 +103,6 @@ struct teo_cpu { u64 time_span_ns; u64 sleep_length_ns; struct teo_idle_state states[CPUIDLE_STATE_MAX]; - int last_state; int interval_idx; unsigned int intervals[INTERVALS]; }; @@ -130,7 +128,9 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) */ measured_us = sleep_length_us; } else { - unsigned int lat = drv->states[cpu_data->last_state].exit_latency; + unsigned int lat; + + lat = drv->states[dev->last_state_idx].exit_latency; measured_us = ktime_to_us(cpu_data->time_span_ns); /* @@ -245,9 +245,9 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, int max_early_idx, idx, i; ktime_t delta_tick; - if (cpu_data->last_state >= 0) { + if (dev->last_state_idx >= 0) { teo_update(drv, dev); - cpu_data->last_state = -1; + dev->last_state_idx = -1; } cpu_data->time_span_ns = local_clock(); @@ -394,7 +394,7 @@ static void teo_reflect(struct cpuidle_device *dev, int state) { struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); - cpu_data->last_state = state; + dev->last_state_idx = state; /* * If the wakeup was not "natural", but triggered by one of the safety * nets, assume that the CPU might have been idle for the entire sleep diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index b484dd69ec21..ba535a1a47d5 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -85,6 +85,7 @@ struct cpuidle_device { unsigned int cpu; ktime_t next_hrtimer; + int last_state_idx; int last_residency; u64 poll_limit_ns; struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX]; From 2cffe9f6b96fece065ee8522673c90e92ef2085d Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Wed, 3 Jul 2019 20:51:28 -0300 Subject: [PATCH 027/126] cpuidle: add haltpoll governor The cpuidle_haltpoll governor, in conjunction with the haltpoll cpuidle driver, allows guest vcpus to poll for a specified amount of time before halting. This provides the following benefits to host side polling: 1) The POLL flag is set while polling is performed, which allows a remote vCPU to avoid sending an IPI (and the associated cost of handling the IPI) when performing a wakeup. 2) The VM-exit cost can be avoided. The downside of guest side polling is that polling is performed even with other runnable tasks in the host. Results comparing halt_poll_ns and server/client application where a small packet is ping-ponged: host --> 31.33 halt_poll_ns=300000 / no guest busy spin --> 33.40 (93.8%) halt_poll_ns=0 / guest_halt_poll_ns=300000 --> 32.73 (95.7%) For the SAP HANA benchmarks (where idle_spin is a parameter of the previous version of the patch, results should be the same): hpns == halt_poll_ns idle_spin=0/ idle_spin=800/ idle_spin=0/ hpns=200000 hpns=0 hpns=800000 DeleteC06T03 (100 thread) 1.76 1.71 (-3%) 1.78 (+1%) InsertC16T02 (100 thread) 2.14 2.07 (-3%) 2.18 (+1.8%) DeleteC00T01 (1 thread) 1.34 1.28 (-4.5%) 1.29 (-3.7%) UpdateC00T03 (1 thread) 4.72 4.18 (-12%) 4.53 (-5%) Signed-off-by: Marcelo Tosatti Signed-off-by: Rafael J. Wysocki --- Documentation/virtual/guest-halt-polling.txt | 78 ++++++++++ drivers/cpuidle/Kconfig | 11 ++ drivers/cpuidle/governors/Makefile | 1 + drivers/cpuidle/governors/haltpoll.c | 150 +++++++++++++++++++ 4 files changed, 240 insertions(+) create mode 100644 Documentation/virtual/guest-halt-polling.txt create mode 100644 drivers/cpuidle/governors/haltpoll.c diff --git a/Documentation/virtual/guest-halt-polling.txt b/Documentation/virtual/guest-halt-polling.txt new file mode 100644 index 000000000000..b3a2a294532d --- /dev/null +++ b/Documentation/virtual/guest-halt-polling.txt @@ -0,0 +1,78 @@ +Guest halt polling +================== + +The cpuidle_haltpoll driver, with the haltpoll governor, allows +the guest vcpus to poll for a specified amount of time before +halting. +This provides the following benefits to host side polling: + + 1) The POLL flag is set while polling is performed, which allows + a remote vCPU to avoid sending an IPI (and the associated + cost of handling the IPI) when performing a wakeup. + + 2) The VM-exit cost can be avoided. + +The downside of guest side polling is that polling is performed +even with other runnable tasks in the host. + +The basic logic as follows: A global value, guest_halt_poll_ns, +is configured by the user, indicating the maximum amount of +time polling is allowed. This value is fixed. + +Each vcpu has an adjustable guest_halt_poll_ns +("per-cpu guest_halt_poll_ns"), which is adjusted by the algorithm +in response to events (explained below). + +Module Parameters +================= + +The haltpoll governor has 5 tunable module parameters: + +1) guest_halt_poll_ns: +Maximum amount of time, in nanoseconds, that polling is +performed before halting. + +Default: 200000 + +2) guest_halt_poll_shrink: +Division factor used to shrink per-cpu guest_halt_poll_ns when +wakeup event occurs after the global guest_halt_poll_ns. + +Default: 2 + +3) guest_halt_poll_grow: +Multiplication factor used to grow per-cpu guest_halt_poll_ns +when event occurs after per-cpu guest_halt_poll_ns +but before global guest_halt_poll_ns. + +Default: 2 + +4) guest_halt_poll_grow_start: +The per-cpu guest_halt_poll_ns eventually reaches zero +in case of an idle system. This value sets the initial +per-cpu guest_halt_poll_ns when growing. This can +be increased from 10000, to avoid misses during the initial +growth stage: + +10k, 20k, 40k, ... (example assumes guest_halt_poll_grow=2). + +Default: 50000 + +5) guest_halt_poll_allow_shrink: + +Bool parameter which allows shrinking. Set to N +to avoid it (per-cpu guest_halt_poll_ns will remain +high once achieves global guest_halt_poll_ns value). + +Default: Y + +The module parameters can be set from the debugfs files in: + + /sys/module/haltpoll/parameters/ + +Further Notes +============= + +- Care should be taken when setting the guest_halt_poll_ns parameter as a +large value has the potential to drive the cpu usage to 100% on a machine which +would be almost entirely idle otherwise. diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig index cc8efc56be7d..88727b7c0d59 100644 --- a/drivers/cpuidle/Kconfig +++ b/drivers/cpuidle/Kconfig @@ -33,6 +33,17 @@ config CPU_IDLE_GOV_TEO Some workloads benefit from using it and it generally should be safe to use. Say Y here if you are not happy with the alternatives. +config CPU_IDLE_GOV_HALTPOLL + bool "Haltpoll governor (for virtualized systems)" + depends on KVM_GUEST + help + This governor implements haltpoll idle state selection, to be + used in conjunction with the haltpoll cpuidle driver, allowing + for polling for a certain amount of time before entering idle + state. + + Some virtualized workloads benefit from using it. + config DT_IDLE_STATES bool diff --git a/drivers/cpuidle/governors/Makefile b/drivers/cpuidle/governors/Makefile index 42f44cc610dd..63abb5393a4d 100644 --- a/drivers/cpuidle/governors/Makefile +++ b/drivers/cpuidle/governors/Makefile @@ -6,3 +6,4 @@ obj-$(CONFIG_CPU_IDLE_GOV_LADDER) += ladder.o obj-$(CONFIG_CPU_IDLE_GOV_MENU) += menu.o obj-$(CONFIG_CPU_IDLE_GOV_TEO) += teo.o +obj-$(CONFIG_CPU_IDLE_GOV_HALTPOLL) += haltpoll.o diff --git a/drivers/cpuidle/governors/haltpoll.c b/drivers/cpuidle/governors/haltpoll.c new file mode 100644 index 000000000000..797477bda486 --- /dev/null +++ b/drivers/cpuidle/governors/haltpoll.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * haltpoll.c - haltpoll idle governor + * + * Copyright 2019 Red Hat, Inc. and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Authors: Marcelo Tosatti + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int guest_halt_poll_ns __read_mostly = 200000; +module_param(guest_halt_poll_ns, uint, 0644); + +/* division factor to shrink halt_poll_ns */ +static unsigned int guest_halt_poll_shrink __read_mostly = 2; +module_param(guest_halt_poll_shrink, uint, 0644); + +/* multiplication factor to grow per-cpu poll_limit_ns */ +static unsigned int guest_halt_poll_grow __read_mostly = 2; +module_param(guest_halt_poll_grow, uint, 0644); + +/* value in us to start growing per-cpu halt_poll_ns */ +static unsigned int guest_halt_poll_grow_start __read_mostly = 50000; +module_param(guest_halt_poll_grow_start, uint, 0644); + +/* allow shrinking guest halt poll */ +static bool guest_halt_poll_allow_shrink __read_mostly = true; +module_param(guest_halt_poll_allow_shrink, bool, 0644); + +/** + * haltpoll_select - selects the next idle state to enter + * @drv: cpuidle driver containing state data + * @dev: the CPU + * @stop_tick: indication on whether or not to stop the tick + */ +static int haltpoll_select(struct cpuidle_driver *drv, + struct cpuidle_device *dev, + bool *stop_tick) +{ + int latency_req = cpuidle_governor_latency_req(dev->cpu); + + if (!drv->state_count || latency_req == 0) { + *stop_tick = false; + return 0; + } + + if (dev->poll_limit_ns == 0) + return 1; + + /* Last state was poll? */ + if (dev->last_state_idx == 0) { + /* Halt if no event occurred on poll window */ + if (dev->poll_time_limit == true) + return 1; + + *stop_tick = false; + /* Otherwise, poll again */ + return 0; + } + + *stop_tick = false; + /* Last state was halt: poll */ + return 0; +} + +static void adjust_poll_limit(struct cpuidle_device *dev, unsigned int block_us) +{ + unsigned int val; + u64 block_ns = block_us*NSEC_PER_USEC; + + /* Grow cpu_halt_poll_us if + * cpu_halt_poll_us < block_ns < guest_halt_poll_us + */ + if (block_ns > dev->poll_limit_ns && block_ns <= guest_halt_poll_ns) { + val = dev->poll_limit_ns * guest_halt_poll_grow; + + if (val < guest_halt_poll_grow_start) + val = guest_halt_poll_grow_start; + if (val > guest_halt_poll_ns) + val = guest_halt_poll_ns; + + dev->poll_limit_ns = val; + } else if (block_ns > guest_halt_poll_ns && + guest_halt_poll_allow_shrink) { + unsigned int shrink = guest_halt_poll_shrink; + + val = dev->poll_limit_ns; + if (shrink == 0) + val = 0; + else + val /= shrink; + dev->poll_limit_ns = val; + } +} + +/** + * haltpoll_reflect - update variables and update poll time + * @dev: the CPU + * @index: the index of actual entered state + */ +static void haltpoll_reflect(struct cpuidle_device *dev, int index) +{ + dev->last_state_idx = index; + + if (index != 0) + adjust_poll_limit(dev, dev->last_residency); +} + +/** + * haltpoll_enable_device - scans a CPU's states and does setup + * @drv: cpuidle driver + * @dev: the CPU + */ +static int haltpoll_enable_device(struct cpuidle_driver *drv, + struct cpuidle_device *dev) +{ + dev->poll_limit_ns = 0; + + return 0; +} + +static struct cpuidle_governor haltpoll_governor = { + .name = "haltpoll", + .rating = 21, + .enable = haltpoll_enable_device, + .select = haltpoll_select, + .reflect = haltpoll_reflect, +}; + +static int __init init_haltpoll(void) +{ + if (kvm_para_available()) + return cpuidle_register_governor(&haltpoll_governor); + + return 0; +} + +postcore_initcall(init_haltpoll); From a1c4423b02b2121108e3ea9580741e0f26309a48 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Wed, 3 Jul 2019 20:51:29 -0300 Subject: [PATCH 028/126] cpuidle-haltpoll: disable host side polling when kvm virtualized When performing guest side polling, it is not necessary to also perform host side polling. So disable host side polling, via the new MSR interface, when loading cpuidle-haltpoll driver. Signed-off-by: Marcelo Tosatti Signed-off-by: Rafael J. Wysocki --- arch/x86/Kconfig | 7 +++++ arch/x86/include/asm/cpuidle_haltpoll.h | 8 +++++ arch/x86/kernel/kvm.c | 42 +++++++++++++++++++++++++ drivers/cpuidle/cpuidle-haltpoll.c | 9 +++++- include/linux/cpuidle_haltpoll.h | 16 ++++++++++ 5 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 arch/x86/include/asm/cpuidle_haltpoll.h create mode 100644 include/linux/cpuidle_haltpoll.h diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 222855cc0158..05e78acb187c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -794,6 +794,7 @@ config KVM_GUEST bool "KVM Guest support (including kvmclock)" depends on PARAVIRT select PARAVIRT_CLOCK + select ARCH_CPUIDLE_HALTPOLL default y ---help--- This option enables various optimizations for running under the KVM @@ -802,6 +803,12 @@ config KVM_GUEST underlying device model, the host provides the guest with timing infrastructure such as time of day, and system time +config ARCH_CPUIDLE_HALTPOLL + def_bool n + prompt "Disable host haltpoll when loading haltpoll driver" + help + If virtualized under KVM, disable host haltpoll. + config PVH bool "Support for running PVH guests" ---help--- diff --git a/arch/x86/include/asm/cpuidle_haltpoll.h b/arch/x86/include/asm/cpuidle_haltpoll.h new file mode 100644 index 000000000000..ff8607d81526 --- /dev/null +++ b/arch/x86/include/asm/cpuidle_haltpoll.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ARCH_HALTPOLL_H +#define _ARCH_HALTPOLL_H + +void arch_haltpoll_enable(void); +void arch_haltpoll_disable(void); + +#endif diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index b7f34fe2171e..f48401be8ce0 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -875,3 +875,45 @@ void __init kvm_spinlock_init(void) } #endif /* CONFIG_PARAVIRT_SPINLOCKS */ + +#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL + +static void kvm_disable_host_haltpoll(void *i) +{ + wrmsrl(MSR_KVM_POLL_CONTROL, 0); +} + +static void kvm_enable_host_haltpoll(void *i) +{ + wrmsrl(MSR_KVM_POLL_CONTROL, 1); +} + +void arch_haltpoll_enable(void) +{ + if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) { + printk(KERN_ERR "kvm: host does not support poll control\n"); + printk(KERN_ERR "kvm: host upgrade recommended\n"); + return; + } + + preempt_disable(); + /* Enable guest halt poll disables host halt poll */ + kvm_disable_host_haltpoll(NULL); + smp_call_function(kvm_disable_host_haltpoll, NULL, 1); + preempt_enable(); +} +EXPORT_SYMBOL_GPL(arch_haltpoll_enable); + +void arch_haltpoll_disable(void) +{ + if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) + return; + + preempt_disable(); + /* Enable guest halt poll disables host halt poll */ + kvm_enable_host_haltpoll(NULL); + smp_call_function(kvm_enable_host_haltpoll, NULL, 1); + preempt_enable(); +} +EXPORT_SYMBOL_GPL(arch_haltpoll_disable); +#endif diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c index 35cfb53e9287..9ac093dcbb01 100644 --- a/drivers/cpuidle/cpuidle-haltpoll.c +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -15,6 +15,7 @@ #include #include #include +#include static int default_enter_idle(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) @@ -47,6 +48,7 @@ static struct cpuidle_driver haltpoll_driver = { static int __init haltpoll_init(void) { + int ret; struct cpuidle_driver *drv = &haltpoll_driver; cpuidle_poll_state_init(drv); @@ -54,11 +56,16 @@ static int __init haltpoll_init(void) if (!kvm_para_available()) return 0; - return cpuidle_register(&haltpoll_driver, NULL); + ret = cpuidle_register(&haltpoll_driver, NULL); + if (ret == 0) + arch_haltpoll_enable(); + + return ret; } static void __exit haltpoll_exit(void) { + arch_haltpoll_disable(); cpuidle_unregister(&haltpoll_driver); } diff --git a/include/linux/cpuidle_haltpoll.h b/include/linux/cpuidle_haltpoll.h new file mode 100644 index 000000000000..fe5954c2409e --- /dev/null +++ b/include/linux/cpuidle_haltpoll.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _CPUIDLE_HALTPOLL_H +#define _CPUIDLE_HALTPOLL_H + +#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL +#include +#else +static inline void arch_haltpoll_enable(void) +{ +} + +static inline void arch_haltpoll_disable(void) +{ +} +#endif +#endif From 7dcddef6f769d7e60691c732eb6d09cdb1d9df76 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 31 Jul 2019 15:29:52 +1000 Subject: [PATCH 029/126] cpuidle: header file stubs must be "static inline" An x86_64 allmodconfig build produces these errors: x86_64-linux-gnu-ld: kernel/sched/core.o: in function `cpuidle_poll_time': core.c:(.text+0x230): multiple definition of `cpuidle_poll_time'; arch/x86/= kernel/process.o:process.c:(.text+0xc0): first defined here (and more) Fixes: 259231a04561 ("cpuidle: add poll_limit_ns to cpuidle_device structure") Signed-off-by: Stephen Rothwell Signed-off-by: Rafael J. Wysocki --- include/linux/cpuidle.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index ba535a1a47d5..1a9f54eb3aa1 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -170,7 +170,7 @@ static inline int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index) {return -ENODEV; } static inline void cpuidle_reflect(struct cpuidle_device *dev, int index) { } -extern u64 cpuidle_poll_time(struct cpuidle_driver *drv, +static inline u64 cpuidle_poll_time(struct cpuidle_driver *drv, struct cpuidle_device *dev) {return 0; } static inline int cpuidle_register_driver(struct cpuidle_driver *drv) From 0d1e16c640ee67dcae65ae8e3c6f58e6d8101047 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 16 Jul 2019 11:43:20 +0200 Subject: [PATCH 030/126] PM / Domains: Align in-parameter names for some genpd functions Align in-parameter names for the declarations of pm_genpd_add| remove_subdomain() and of_genpd_add_subdomain() according to their implementations, as to improve consistency. Signed-off-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 91d9bf497071..baf02ff91a31 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -197,9 +197,9 @@ static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev) int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev); int pm_genpd_remove_device(struct device *dev); int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, - struct generic_pm_domain *new_subdomain); + struct generic_pm_domain *subdomain); int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, - struct generic_pm_domain *target); + struct generic_pm_domain *subdomain); int pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off); int pm_genpd_remove(struct generic_pm_domain *genpd); @@ -226,12 +226,12 @@ static inline int pm_genpd_remove_device(struct device *dev) return -ENOSYS; } static inline int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, - struct generic_pm_domain *new_sd) + struct generic_pm_domain *subdomain) { return -ENOSYS; } static inline int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, - struct generic_pm_domain *target) + struct generic_pm_domain *subdomain) { return -ENOSYS; } @@ -282,8 +282,8 @@ int of_genpd_add_provider_onecell(struct device_node *np, struct genpd_onecell_data *data); void of_genpd_del_provider(struct device_node *np); int of_genpd_add_device(struct of_phandle_args *args, struct device *dev); -int of_genpd_add_subdomain(struct of_phandle_args *parent, - struct of_phandle_args *new_subdomain); +int of_genpd_add_subdomain(struct of_phandle_args *parent_spec, + struct of_phandle_args *subdomain_spec); struct generic_pm_domain *of_genpd_remove_last(struct device_node *np); int of_genpd_parse_idle_states(struct device_node *dn, struct genpd_power_state **states, int *n); @@ -316,8 +316,8 @@ static inline int of_genpd_add_device(struct of_phandle_args *args, return -ENODEV; } -static inline int of_genpd_add_subdomain(struct of_phandle_args *parent, - struct of_phandle_args *new_subdomain) +static inline int of_genpd_add_subdomain(struct of_phandle_args *parent_spec, + struct of_phandle_args *subdomain_spec) { return -ENODEV; } From 32b91ca15353b2803d27cfc747156e72dd2cd5d8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 18 Jul 2019 10:53:21 +0200 Subject: [PATCH 031/126] cpuidle: menu: Allow tick to be stopped if PM QoS is used After commit 554c8aa8ecad ("sched: idle: Select idle state before stopping the tick") the menu governor prevents the scheduler tick from being stopped (unless stopped already) if there is a PM QoS latency constraint for the given CPU and the target residency of the deepest idle state matching that constraint is below the tick boundary. However, that is problematic if CPUs with PM QoS latency constraints are idle for long times, because it effectively causes the tick to run on them all the time which is wasteful. [It is also confusing and questionable if they are full dynticks CPUs.] To address that issue, make the menu governor allow the tick to be stopped only if the idle duration predicted by it is beyond the tick boundary, except when the shallowest idle state is selected upfront and it is not a "polling" one. Fixes: 554c8aa8ecad ("sched: idle: Select idle state before stopping the tick") Link: https://lore.kernel.org/lkml/79b247b3-e056-610e-9a07-e685dfdaa6c9@gmail.com/ Reported-by: Thomas Lindroth Tested-by: Thomas Lindroth Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governors/menu.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index dace4c7f830c..e5a5d0c8d66b 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -301,9 +301,10 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, !drv->states[0].disabled && !dev->states_usage[0].disable)) { /* * In this case state[0] will be used no matter what, so return - * it right away and keep the tick running. + * it right away and keep the tick running if state[0] is a + * polling one. */ - *stop_tick = false; + *stop_tick = !(drv->states[0].flags & CPUIDLE_FLAG_POLLING); return 0; } @@ -394,16 +395,9 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, return idx; } - if (s->exit_latency > latency_req) { - /* - * If we break out of the loop for latency reasons, use - * the target residency of the selected state as the - * expected idle duration so that the tick is retained - * as long as that target residency is low enough. - */ - predicted_us = drv->states[idx].target_residency; + if (s->exit_latency > latency_req) break; - } + idx = i; } From cab09f3d2d2a0a6cb3dfb678660d67a2c3764f50 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 19 Jul 2019 12:12:42 +0200 Subject: [PATCH 032/126] cpuidle: teo: Allow tick to be stopped if PM QoS is used The TEO goveror prevents the scheduler tick from being stopped (unless stopped already) if there is a PM QoS latency constraint for the given CPU and the target residency of the deepest idle state matching that constraint is below the tick boundary. However, that is problematic if CPUs with PM QoS latency constraints are idle for long times, because it effectively causes the tick to run on them all the time which is wasteful. [It is also confusing and questionable if they are full dynticks CPUs.] To address that issue, modify the TEO governor to carry out the entire search for the most suitable idle state (from the target residency perspective) even if a latency constraint is present, to allow it to determine the expected idle duration in all cases. Also, when using the last several measured idle duration values to refine the idle state selection, make it compare those values with the current expected idle duration value (instead of comparing them with the target residency of the idle state selected so far) which should prevent the tick from being retained when it makes sense to stop it sometimes (especially in the presence of PM QoS latency constraints). Fixes: b26bf6ab716f ("cpuidle: New timer events oriented governor for tickless systems") Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governors/teo.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index a2fd81067a13..4d7a60c5b24a 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -242,7 +242,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); int latency_req = cpuidle_governor_latency_req(dev->cpu); unsigned int duration_us, count; - int max_early_idx, idx, i; + int max_early_idx, constraint_idx, idx, i; ktime_t delta_tick; if (dev->last_state_idx >= 0) { @@ -257,6 +257,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, count = 0; max_early_idx = -1; + constraint_idx = drv->state_count; idx = -1; for (i = 0; i < drv->state_count; i++) { @@ -286,16 +287,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, if (s->target_residency > duration_us) break; - if (s->exit_latency > latency_req) { - /* - * If we break out of the loop for latency reasons, use - * the target residency of the selected state as the - * expected idle duration to avoid stopping the tick - * as long as that target residency is low enough. - */ - duration_us = drv->states[idx].target_residency; - goto refine; - } + if (s->exit_latency > latency_req && constraint_idx > i) + constraint_idx = i; idx = i; @@ -321,7 +314,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, duration_us = drv->states[idx].target_residency; } -refine: + /* + * If there is a latency constraint, it may be necessary to use a + * shallower idle state than the one selected so far. + */ + if (constraint_idx < idx) + idx = constraint_idx; + if (idx < 0) { idx = 0; /* No states enabled. Must use 0. */ } else if (idx > 0) { @@ -331,13 +330,12 @@ refine: /* * Count and sum the most recent idle duration values less than - * the target residency of the state selected so far, find the - * max. + * the current expected idle duration value. */ for (i = 0; i < INTERVALS; i++) { unsigned int val = cpu_data->intervals[i]; - if (val >= drv->states[idx].target_residency) + if (val >= duration_us) continue; count++; @@ -356,8 +354,10 @@ refine: * would be too shallow. */ if (!(tick_nohz_tick_stopped() && avg_us < TICK_USEC)) { - idx = teo_find_shallower_state(drv, dev, idx, avg_us); duration_us = avg_us; + if (drv->states[idx].target_residency > avg_us) + idx = teo_find_shallower_state(drv, dev, + idx, avg_us); } } } From 2c8db5bef9fb442a5a0d2fee28aed20100362ce3 Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Wed, 31 Jul 2019 14:29:33 -0700 Subject: [PATCH 033/126] PM/sleep: Expose suspend stats in sysfs Userspace can get suspend stats from the suspend stats debugfs node. Since debugfs doesn't have stable ABI, expose suspend stats in sysfs under /sys/power/suspend_stats. Signed-off-by: Kalesh Singh Reviewed-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki --- Documentation/ABI/testing/sysfs-power | 106 ++++++++++++++++++++++++++ kernel/power/main.c | 97 ++++++++++++++++++++++- 2 files changed, 201 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power index 3c5130355011..6f87b9dd384b 100644 --- a/Documentation/ABI/testing/sysfs-power +++ b/Documentation/ABI/testing/sysfs-power @@ -301,3 +301,109 @@ Description: Using this sysfs file will override any values that were set using the kernel command line for disk offset. + +What: /sys/power/suspend_stats +Date: July 2019 +Contact: Kalesh Singh +Description: + The /sys/power/suspend_stats directory contains suspend related + statistics. + +What: /sys/power/suspend_stats/success +Date: July 2019 +Contact: Kalesh Singh +Description: + The /sys/power/suspend_stats/success file contains the number + of times entering system sleep state succeeded. + +What: /sys/power/suspend_stats/fail +Date: July 2019 +Contact: Kalesh Singh +Description: + The /sys/power/suspend_stats/fail file contains the number + of times entering system sleep state failed. + +What: /sys/power/suspend_stats/failed_freeze +Date: July 2019 +Contact: Kalesh Singh +Description: + The /sys/power/suspend_stats/failed_freeze file contains the + number of times freezing processes failed. + +What: /sys/power/suspend_stats/failed_prepare +Date: July 2019 +Contact: Kalesh Singh +Description: + The /sys/power/suspend_stats/failed_prepare file contains the + number of times preparing all non-sysdev devices for + a system PM transition failed. + +What: /sys/power/suspend_stats/failed_resume +Date: July 2019 +Contact: Kalesh Singh +Description: + The /sys/power/suspend_stats/failed_resume file contains the + number of times executing "resume" callbacks of + non-sysdev devices failed. + +What: /sys/power/suspend_stats/failed_resume_early +Date: July 2019 +Contact: Kalesh Singh +Description: + The /sys/power/suspend_stats/failed_resume_early file contains + the number of times executing "early resume" callbacks + of devices failed. + +What: /sys/power/suspend_stats/failed_resume_noirq +Date: July 2019 +Contact: Kalesh Singh +Description: + The /sys/power/suspend_stats/failed_resume_noirq file contains + the number of times executing "noirq resume" callbacks + of devices failed. + +What: /sys/power/suspend_stats/failed_suspend +Date: July 2019 +Contact: Kalesh Singh +Description: + The /sys/power/suspend_stats/failed_suspend file contains + the number of times executing "suspend" callbacks + of all non-sysdev devices failed. + +What: /sys/power/suspend_stats/failed_suspend_late +Date: July 2019 +Contact: Kalesh Singh +Description: + The /sys/power/suspend_stats/failed_suspend_late file contains + the number of times executing "late suspend" callbacks + of all devices failed. + +What: /sys/power/suspend_stats/failed_suspend_noirq +Date: July 2019 +Contact: Kalesh Singh +Description: + The /sys/power/suspend_stats/failed_suspend_noirq file contains + the number of times executing "noirq suspend" callbacks + of all devices failed. + +What: /sys/power/suspend_stats/last_failed_dev +Date: July 2019 +Contact: Kalesh Singh +Description: + The /sys/power/suspend_stats/last_failed_dev file contains + the last device for which a suspend/resume callback failed. + +What: /sys/power/suspend_stats/last_failed_errno +Date: July 2019 +Contact: Kalesh Singh +Description: + The /sys/power/suspend_stats/last_failed_errno file contains + the errno of the last failed attempt at entering + system sleep state. + +What: /sys/power/suspend_stats/last_failed_step +Date: July 2019 +Contact: Kalesh Singh +Description: + The /sys/power/suspend_stats/last_failed_step file contains + the last failed step in the suspend/resume path. diff --git a/kernel/power/main.c b/kernel/power/main.c index bdbd605c4215..938dc53a8b94 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -254,7 +254,6 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr, power_attr(pm_test); #endif /* CONFIG_PM_SLEEP_DEBUG */ -#ifdef CONFIG_DEBUG_FS static char *suspend_step_name(enum suspend_stat_step step) { switch (step) { @@ -275,6 +274,92 @@ static char *suspend_step_name(enum suspend_stat_step step) } } +#define suspend_attr(_name) \ +static ssize_t _name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf) \ +{ \ + return sprintf(buf, "%d\n", suspend_stats._name); \ +} \ +static struct kobj_attribute _name = __ATTR_RO(_name) + +suspend_attr(success); +suspend_attr(fail); +suspend_attr(failed_freeze); +suspend_attr(failed_prepare); +suspend_attr(failed_suspend); +suspend_attr(failed_suspend_late); +suspend_attr(failed_suspend_noirq); +suspend_attr(failed_resume); +suspend_attr(failed_resume_early); +suspend_attr(failed_resume_noirq); + +static ssize_t last_failed_dev_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + int index; + char *last_failed_dev = NULL; + + index = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1; + index %= REC_FAILED_NUM; + last_failed_dev = suspend_stats.failed_devs[index]; + + return sprintf(buf, "%s\n", last_failed_dev); +} +static struct kobj_attribute last_failed_dev = __ATTR_RO(last_failed_dev); + +static ssize_t last_failed_errno_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + int index; + int last_failed_errno; + + index = suspend_stats.last_failed_errno + REC_FAILED_NUM - 1; + index %= REC_FAILED_NUM; + last_failed_errno = suspend_stats.errno[index]; + + return sprintf(buf, "%d\n", last_failed_errno); +} +static struct kobj_attribute last_failed_errno = __ATTR_RO(last_failed_errno); + +static ssize_t last_failed_step_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + int index; + enum suspend_stat_step step; + char *last_failed_step = NULL; + + index = suspend_stats.last_failed_step + REC_FAILED_NUM - 1; + index %= REC_FAILED_NUM; + step = suspend_stats.failed_steps[index]; + last_failed_step = suspend_step_name(step); + + return sprintf(buf, "%s\n", last_failed_step); +} +static struct kobj_attribute last_failed_step = __ATTR_RO(last_failed_step); + +static struct attribute *suspend_attrs[] = { + &success.attr, + &fail.attr, + &failed_freeze.attr, + &failed_prepare.attr, + &failed_suspend.attr, + &failed_suspend_late.attr, + &failed_suspend_noirq.attr, + &failed_resume.attr, + &failed_resume_early.attr, + &failed_resume_noirq.attr, + &last_failed_dev.attr, + &last_failed_errno.attr, + &last_failed_step.attr, + NULL, +}; + +static struct attribute_group suspend_attr_group = { + .name = "suspend_stats", + .attrs = suspend_attrs, +}; + +#ifdef CONFIG_DEBUG_FS static int suspend_stats_show(struct seq_file *s, void *unused) { int i, index, last_dev, last_errno, last_step; @@ -794,6 +879,14 @@ static const struct attribute_group attr_group = { .attrs = g, }; +static const struct attribute_group *attr_groups[] = { + &attr_group, +#ifdef CONFIG_PM_SLEEP + &suspend_attr_group, +#endif + NULL, +}; + struct workqueue_struct *pm_wq; EXPORT_SYMBOL_GPL(pm_wq); @@ -815,7 +908,7 @@ static int __init pm_init(void) power_kobj = kobject_create_and_add("power", NULL); if (!power_kobj) return -ENOMEM; - error = sysfs_create_group(power_kobj, &attr_group); + error = sysfs_create_groups(power_kobj, attr_groups); if (error) return error; pm_print_times_init(); From 683df830e6bd12e9eb7da1fbdfccc6ac444508ae Mon Sep 17 00:00:00 2001 From: Fabien Parent Date: Tue, 6 Aug 2019 11:50:29 +0200 Subject: [PATCH 034/126] cpufreq: mediatek-cpufreq: Add compatible for MT8516 Add the compatible for MT8516 in order to take advantage of the MediaTek CPUFreq driver for Mediatek's MT8516 SoC. Signed-off-by: Fabien Parent Signed-off-by: Viresh Kumar --- drivers/cpufreq/mediatek-cpufreq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index f14f3a85f2f7..10bc06f5dd45 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -535,6 +535,7 @@ static const struct of_device_id mtk_cpufreq_machines[] __initconst = { { .compatible = "mediatek,mt817x", }, { .compatible = "mediatek,mt8173", }, { .compatible = "mediatek,mt8176", }, + { .compatible = "mediatek,mt8516", }, { } }; From 2e2c2fdc53437beffd2cf26aaf6187e602d565bc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 31 Jul 2019 11:05:15 +0200 Subject: [PATCH 035/126] ACPI: PM: s2idle: Rearrange lps0_device_attach() To allow a subsequent change to be simpler, rearrange the code in lps0_device_attach() to reduce the indentation level and (while at it) make it avoid calling lpi_device_get_constraints() when lps0_device_handle is not going to be set. Signed-off-by: Rafael J. Wysocki Tested-by: Kai-Heng Feng --- drivers/acpi/sleep.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 9cb0532f7471..3d706938980a 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -916,28 +916,30 @@ static int lps0_device_attach(struct acpi_device *adev, guid_parse(ACPI_LPS0_DSM_UUID, &lps0_dsm_guid); /* Check if the _DSM is present and as expected. */ out_obj = acpi_evaluate_dsm(adev->handle, &lps0_dsm_guid, 1, 0, NULL); - if (out_obj && out_obj->type == ACPI_TYPE_BUFFER) { - char bitmask = *(char *)out_obj->buffer.pointer; - - lps0_dsm_func_mask = bitmask; - lps0_device_handle = adev->handle; - /* - * Use suspend-to-idle by default if the default - * suspend mode was not set from the command line. - */ - if (mem_sleep_default > PM_SUSPEND_MEM) - mem_sleep_current = PM_SUSPEND_TO_IDLE; - - acpi_handle_debug(adev->handle, "_DSM function mask: 0x%x\n", - bitmask); - } else { + if (!out_obj || out_obj->type != ACPI_TYPE_BUFFER) { acpi_handle_debug(adev->handle, "_DSM function 0 evaluation failed\n"); + return 0; } + + lps0_dsm_func_mask = *(char *)out_obj->buffer.pointer; + ACPI_FREE(out_obj); + acpi_handle_debug(adev->handle, "_DSM function mask: 0x%x\n", + lps0_dsm_func_mask); + + lps0_device_handle = adev->handle; + lpi_device_get_constraints(); + /* + * Use suspend-to-idle by default if the default suspend mode was not + * set from the command line. + */ + if (mem_sleep_default > PM_SUSPEND_MEM) + mem_sleep_current = PM_SUSPEND_TO_IDLE; + return 0; } From 068b47d0984b8756ae71702a1a87aa226cb72fe8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 31 Jul 2019 11:05:25 +0200 Subject: [PATCH 036/126] ACPI: PM: s2idle: Add acpi.sleep_no_lps0 module parameter Add a module parameter to prevent the ACPI LPS0 _DSM functions from being invoked (if need be) and rework the suspend-to-idle blacklist entries in acpisleep_dmi_table[] to make them simply prevent suspend-to-idle from being used by default on the systems in question (which really is the original purpose of those entries). Signed-off-by: Rafael J. Wysocki Tested-by: Kai-Heng Feng --- drivers/acpi/sleep.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 3d706938980a..4a94600fea39 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -89,6 +89,10 @@ bool acpi_sleep_state_supported(u8 sleep_state) } #ifdef CONFIG_ACPI_SLEEP +static bool sleep_no_lps0 __read_mostly; +module_param(sleep_no_lps0, bool, 0644); +MODULE_PARM_DESC(sleep_no_lps0, "Do not use the special LPS0 device interface"); + static u32 acpi_target_sleep_state = ACPI_STATE_S0; u32 acpi_target_system_state(void) @@ -158,11 +162,11 @@ static int __init init_nvs_nosave(const struct dmi_system_id *d) return 0; } -static bool acpi_sleep_no_lps0; +static bool acpi_sleep_default_s3; -static int __init init_no_lps0(const struct dmi_system_id *d) +static int __init init_default_s3(const struct dmi_system_id *d) { - acpi_sleep_no_lps0 = true; + acpi_sleep_default_s3 = true; return 0; } @@ -363,7 +367,7 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = { * S0 Idle firmware interface. */ { - .callback = init_no_lps0, + .callback = init_default_s3, .ident = "Dell XPS13 9360", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), @@ -376,7 +380,7 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = { * https://bugzilla.kernel.org/show_bug.cgi?id=199057). */ { - .callback = init_no_lps0, + .callback = init_default_s3, .ident = "ThinkPad X1 Tablet(2016)", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), @@ -524,8 +528,9 @@ static void acpi_pm_end(void) acpi_sleep_tts_switch(acpi_target_sleep_state); } #else /* !CONFIG_ACPI_SLEEP */ +#define sleep_no_lps0 (1) #define acpi_target_sleep_state ACPI_STATE_S0 -#define acpi_sleep_no_lps0 (false) +#define acpi_sleep_default_s3 (1) static inline void acpi_sleep_dmi_check(void) {} #endif /* CONFIG_ACPI_SLEEP */ @@ -904,12 +909,6 @@ static int lps0_device_attach(struct acpi_device *adev, if (lps0_device_handle) return 0; - if (acpi_sleep_no_lps0) { - acpi_handle_info(adev->handle, - "Low Power S0 Idle interface disabled\n"); - return 0; - } - if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)) return 0; @@ -937,7 +936,7 @@ static int lps0_device_attach(struct acpi_device *adev, * Use suspend-to-idle by default if the default suspend mode was not * set from the command line. */ - if (mem_sleep_default > PM_SUSPEND_MEM) + if (mem_sleep_default > PM_SUSPEND_MEM && !acpi_sleep_default_s3) mem_sleep_current = PM_SUSPEND_TO_IDLE; return 0; @@ -957,7 +956,7 @@ static int acpi_s2idle_begin(void) static int acpi_s2idle_prepare(void) { - if (lps0_device_handle) { + if (lps0_device_handle && !sleep_no_lps0) { acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF); acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY); } @@ -977,7 +976,7 @@ static int acpi_s2idle_prepare(void) static void acpi_s2idle_wake(void) { - if (lps0_device_handle && pm_debug_messages_on) + if (lps0_device_handle && !sleep_no_lps0 && pm_debug_messages_on) lpi_check_constraints(); /* @@ -1025,7 +1024,7 @@ static void acpi_s2idle_restore(void) if (acpi_sci_irq_valid()) disable_irq_wake(acpi_sci_irq); - if (lps0_device_handle) { + if (lps0_device_handle && !sleep_no_lps0) { acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT); acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON); } From fcd0a04267ac7c5d5f9a27d2af824270f2091760 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 31 Jul 2019 11:05:33 +0200 Subject: [PATCH 037/126] ACPI: PM: s2idle: Switch EC over to polling during "noirq" suspend Since the ACPI SCI is set up for system wakeup before the "noirq" suspend of devices, it is better to make suspend-to-idle follow suspend-to-RAM (S3) and switch over the EC to polling during "noirq" suspend (and back to interrupt-based flow during "noirq" resume). The frequency of spurious wakeup interrupts from the EC may be reduced this way. Signed-off-by: Rafael J. Wysocki Tested-by: Kai-Heng Feng --- drivers/acpi/ec.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index b996ca5f253f..5a38409114d8 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1958,8 +1958,7 @@ static int acpi_ec_suspend_noirq(struct device *dev) ec->reference_count >= 1) acpi_set_gpe(NULL, ec->gpe, ACPI_GPE_DISABLE); - if (acpi_sleep_no_ec_events()) - acpi_ec_enter_noirq(ec); + acpi_ec_enter_noirq(ec); return 0; } @@ -1968,8 +1967,7 @@ static int acpi_ec_resume_noirq(struct device *dev) { struct acpi_ec *ec = acpi_driver_data(to_acpi_device(dev)); - if (acpi_sleep_no_ec_events()) - acpi_ec_leave_noirq(ec); + acpi_ec_leave_noirq(ec); if (ec_no_wakeup && test_bit(EC_FLAGS_STARTED, &ec->flags) && ec->reference_count >= 1) From 6e86633a791fdf631617ef3a9af3263141d34bc9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 31 Jul 2019 11:05:42 +0200 Subject: [PATCH 038/126] ACPI: PM: s2idle: Eliminate acpi_sleep_no_ec_events() Change acpi_ec_suspend() to use pm_suspend_no_platform() instead of acpi_sleep_no_ec_events(), which allows the latter to be eliminated along with the s2idle_in_progress variable which is only used by it. Signed-off-by: Rafael J. Wysocki Tested-by: Kai-Heng Feng --- drivers/acpi/ec.c | 2 +- drivers/acpi/internal.h | 2 -- drivers/acpi/sleep.c | 9 --------- 3 files changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 5a38409114d8..be29b9919e2d 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1941,7 +1941,7 @@ static int acpi_ec_suspend(struct device *dev) struct acpi_ec *ec = acpi_driver_data(to_acpi_device(dev)); - if (acpi_sleep_no_ec_events() && ec_freeze_events) + if (!pm_suspend_no_platform() && ec_freeze_events) acpi_ec_disable_event(ec); return 0; } diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index bcc080511197..8c9cd3733f07 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -210,11 +210,9 @@ void acpi_ec_flush_work(void); -------------------------------------------------------------------------- */ #ifdef CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT extern bool acpi_s2idle_wakeup(void); -extern bool acpi_sleep_no_ec_events(void); extern int acpi_sleep_init(void); #else static inline bool acpi_s2idle_wakeup(void) { return false; } -static inline bool acpi_sleep_no_ec_events(void) { return true; } static inline int acpi_sleep_init(void) { return -ENXIO; } #endif diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 4a94600fea39..864bb18d3a5d 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -696,7 +696,6 @@ static const struct platform_suspend_ops acpi_suspend_ops_old = { .recover = acpi_pm_finish, }; -static bool s2idle_in_progress; static bool s2idle_wakeup; /* @@ -950,7 +949,6 @@ static struct acpi_scan_handler lps0_handler = { static int acpi_s2idle_begin(void) { acpi_scan_lock_acquire(); - s2idle_in_progress = true; return 0; } @@ -1032,7 +1030,6 @@ static void acpi_s2idle_restore(void) static void acpi_s2idle_end(void) { - s2idle_in_progress = false; acpi_scan_lock_release(); } @@ -1060,7 +1057,6 @@ static void acpi_sleep_suspend_setup(void) } #else /* !CONFIG_SUSPEND */ -#define s2idle_in_progress (false) #define s2idle_wakeup (false) #define lps0_device_handle (NULL) static inline void acpi_sleep_suspend_setup(void) {} @@ -1071,11 +1067,6 @@ bool acpi_s2idle_wakeup(void) return s2idle_wakeup; } -bool acpi_sleep_no_ec_events(void) -{ - return !s2idle_in_progress; -} - #ifdef CONFIG_PM_SLEEP static u32 saved_bm_rld; From d7589404932be148fabe696b56b7c391bad6bdb1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 31 Jul 2019 11:05:52 +0200 Subject: [PATCH 039/126] ACPI: EC: PM: Consolidate some code depending on PM_SLEEP Move some routines, including acpi_ec_dispatch_gpe(), that are only used if CONFIG_PM_SLEEP is set to the #ifdef block containing the EC suspend and resume callbacks, to make the "full EC PM picture" easier to follow. While at it, move the header of acpi_ec_dispatch_gpe() in the header file to a CONFIG_PM_SLEEP #ifdef block. Signed-off-by: Rafael J. Wysocki Tested-by: Kai-Heng Feng --- drivers/acpi/ec.c | 54 ++++++++++++++++++++--------------------- drivers/acpi/internal.h | 2 +- 2 files changed, 27 insertions(+), 29 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index be29b9919e2d..8d7247b4441f 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1049,33 +1049,6 @@ void acpi_ec_unblock_transactions(void) acpi_ec_start(first_ec, true); } -#ifdef CONFIG_PM_SLEEP -void acpi_ec_mark_gpe_for_wake(void) -{ - if (first_ec && !ec_no_wakeup) - acpi_mark_gpe_for_wake(NULL, first_ec->gpe); -} -EXPORT_SYMBOL_GPL(acpi_ec_mark_gpe_for_wake); - -void acpi_ec_set_gpe_wake_mask(u8 action) -{ - if (pm_suspend_no_platform() && first_ec && !ec_no_wakeup) - acpi_set_gpe_wake_mask(NULL, first_ec->gpe, action); -} -EXPORT_SYMBOL_GPL(acpi_ec_set_gpe_wake_mask); -#endif - -bool acpi_ec_dispatch_gpe(void) -{ - u32 ret; - - if (!first_ec) - return false; - - ret = acpi_dispatch_gpe(NULL, first_ec->gpe); - return ret == ACPI_INTERRUPT_HANDLED; -} - /* -------------------------------------------------------------------------- Event Management -------------------------------------------------------------------------- */ @@ -1984,7 +1957,32 @@ static int acpi_ec_resume(struct device *dev) acpi_ec_enable_event(ec); return 0; } -#endif + +void acpi_ec_mark_gpe_for_wake(void) +{ + if (first_ec && !ec_no_wakeup) + acpi_mark_gpe_for_wake(NULL, first_ec->gpe); +} +EXPORT_SYMBOL_GPL(acpi_ec_mark_gpe_for_wake); + +void acpi_ec_set_gpe_wake_mask(u8 action) +{ + if (pm_suspend_no_platform() && first_ec && !ec_no_wakeup) + acpi_set_gpe_wake_mask(NULL, first_ec->gpe, action); +} +EXPORT_SYMBOL_GPL(acpi_ec_set_gpe_wake_mask); + +bool acpi_ec_dispatch_gpe(void) +{ + u32 ret; + + if (!first_ec) + return false; + + ret = acpi_dispatch_gpe(NULL, first_ec->gpe); + return ret == ACPI_INTERRUPT_HANDLED; +} +#endif /* CONFIG_PM_SLEEP */ static const struct dev_pm_ops acpi_ec_pm = { SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(acpi_ec_suspend_noirq, acpi_ec_resume_noirq) diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 8c9cd3733f07..afe6636f9ad3 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -194,7 +194,6 @@ void acpi_ec_ecdt_probe(void); void acpi_ec_dsdt_probe(void); void acpi_ec_block_transactions(void); void acpi_ec_unblock_transactions(void); -bool acpi_ec_dispatch_gpe(void); int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit, acpi_handle handle, acpi_ec_query_func func, void *data); @@ -202,6 +201,7 @@ void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit); #ifdef CONFIG_PM_SLEEP void acpi_ec_flush_work(void); +bool acpi_ec_dispatch_gpe(void); #endif From 29113f2f0a7d8d5332bfdfdfca995c06d0896e83 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 31 Jul 2019 11:06:00 +0200 Subject: [PATCH 040/126] ACPI: EC: PM: Make acpi_ec_dispatch_gpe() print debug message Add a pm_pr_dbg() debug statement to acpi_ec_dispatch_gpe() to print a message when the EC GPE has been dispatched (because its status was set). Signed-off-by: Rafael J. Wysocki Tested-by: Kai-Heng Feng --- drivers/acpi/ec.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 8d7247b4441f..58597ec813eb 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1980,7 +1980,11 @@ bool acpi_ec_dispatch_gpe(void) return false; ret = acpi_dispatch_gpe(NULL, first_ec->gpe); - return ret == ACPI_INTERRUPT_HANDLED; + if (ret == ACPI_INTERRUPT_HANDLED) { + pm_pr_dbg("EC GPE dispatched\n"); + return true; + } + return false; } #endif /* CONFIG_PM_SLEEP */ From ac9eafbe930abb589e9289842a99cc575cadb854 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 1 Aug 2019 19:31:10 +0200 Subject: [PATCH 041/126] ACPI: PM: s2idle: Execute LPS0 _DSM functions with suspended devices According to Section 3.5 of the "Intel Low Power S0 Idle" document [1], Function 5 of the LPS0 _DSM is expected to be invoked when the system configuration matches the criteria for entering the target low-power state of the platform. In particular, this means that all devices should be suspended and in low-power states already when that function is invoked. This is not the case currently, however, because Function 5 of the LPS0 _DSM is invoked by it before the "noirq" phase of device suspend, which means that some devices may not have been put into low-power states yet at that point. That is a consequence of the previous design of the suspend-to-idle flow that allowed the "noirq" phase of device suspend and the "noirq" phase of device resume to be carried out for multiple times while "suspended" (if any spurious wakeup events were detected) and the point of the LPS0 _DSM Function 5 invocation was chosen so as to call it (and LPS0 _DSM Function 6 analogously) once per suspend-resume cycle (regardless of how many times the "noirq" phases of device suspend and resume were carried out while "suspended"). Now that the suspend-to-idle flow has been redesigned to carry out the "noirq" phases of device suspend and resume once in each cycle, the code can be reordered to follow the specification that it is based on more closely. For this purpose, add ->prepare_late and ->restore_early platform callbacks for suspend-to-idle, to be executed, respectively, after the "noirq" phase of suspending devices and before the "noirq" phase of resuming them and make ACPI use them for the invocation of LPS0 _DSM functions as appropriate. While at it, move the LPS0 entry requirements check to be made before invoking Functions 3 and 5 of the LPS0 _DSM (also once per cycle) as follows from the specification [1]. Link: https://uefi.org/sites/default/files/resources/Intel_ACPI_Low_Power_S0_Idle.pdf # [1] Signed-off-by: Rafael J. Wysocki Tested-by: Kai-Heng Feng --- drivers/acpi/sleep.c | 36 ++++++++++++++++++++++++------------ include/linux/suspend.h | 2 ++ kernel/power/suspend.c | 12 +++++++++--- 3 files changed, 35 insertions(+), 15 deletions(-) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 864bb18d3a5d..8f7e95f97e1f 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -954,11 +954,6 @@ static int acpi_s2idle_begin(void) static int acpi_s2idle_prepare(void) { - if (lps0_device_handle && !sleep_no_lps0) { - acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF); - acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY); - } - if (acpi_sci_irq_valid()) enable_irq_wake(acpi_sci_irq); @@ -972,11 +967,22 @@ static int acpi_s2idle_prepare(void) return 0; } -static void acpi_s2idle_wake(void) +static int acpi_s2idle_prepare_late(void) { - if (lps0_device_handle && !sleep_no_lps0 && pm_debug_messages_on) + if (!lps0_device_handle || sleep_no_lps0) + return 0; + + if (pm_debug_messages_on) lpi_check_constraints(); + acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF); + acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY); + + return 0; +} + +static void acpi_s2idle_wake(void) +{ /* * If IRQD_WAKEUP_ARMED is set for the SCI at this point, the SCI has * not triggered while suspended, so bail out. @@ -1011,6 +1017,15 @@ static void acpi_s2idle_wake(void) rearm_wake_irq(acpi_sci_irq); } +static void acpi_s2idle_restore_early(void) +{ + if (!lps0_device_handle || sleep_no_lps0) + return; + + acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT); + acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON); +} + static void acpi_s2idle_restore(void) { s2idle_wakeup = false; @@ -1021,11 +1036,6 @@ static void acpi_s2idle_restore(void) if (acpi_sci_irq_valid()) disable_irq_wake(acpi_sci_irq); - - if (lps0_device_handle && !sleep_no_lps0) { - acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT); - acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON); - } } static void acpi_s2idle_end(void) @@ -1036,7 +1046,9 @@ static void acpi_s2idle_end(void) static const struct platform_s2idle_ops acpi_s2idle_ops = { .begin = acpi_s2idle_begin, .prepare = acpi_s2idle_prepare, + .prepare_late = acpi_s2idle_prepare_late, .wake = acpi_s2idle_wake, + .restore_early = acpi_s2idle_restore_early, .restore = acpi_s2idle_restore, .end = acpi_s2idle_end, }; diff --git a/include/linux/suspend.h b/include/linux/suspend.h index f0c4a8445140..6fc8843f1c9e 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -190,7 +190,9 @@ struct platform_suspend_ops { struct platform_s2idle_ops { int (*begin)(void); int (*prepare)(void); + int (*prepare_late)(void); void (*wake)(void); + void (*restore_early)(void); void (*restore)(void); void (*end)(void); }; diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 2b6057853b33..ed9ddef12b13 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -253,13 +253,19 @@ static int platform_suspend_prepare_late(suspend_state_t state) static int platform_suspend_prepare_noirq(suspend_state_t state) { - return state != PM_SUSPEND_TO_IDLE && suspend_ops->prepare_late ? - suspend_ops->prepare_late() : 0; + if (state == PM_SUSPEND_TO_IDLE) { + if (s2idle_ops && s2idle_ops->prepare_late) + return s2idle_ops->prepare_late(); + } + return suspend_ops->prepare_late ? suspend_ops->prepare_late() : 0; } static void platform_resume_noirq(suspend_state_t state) { - if (state != PM_SUSPEND_TO_IDLE && suspend_ops->wake) + if (state == PM_SUSPEND_TO_IDLE) { + if (s2idle_ops && s2idle_ops->restore_early) + s2idle_ops->restore_early(); + } else if (suspend_ops->wake) suspend_ops->wake(); } From ee8193ee96c7802b5ba766243c4c950059a43323 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 2 Aug 2019 12:26:02 +0200 Subject: [PATCH 042/126] ACPI: PM: Print debug messages on device power state changes Add an acpi_handle_debug() statement to acpi_device_set_power() to allow ACPI device power state changes to be tracked. Signed-off-by: Rafael J. Wysocki --- drivers/acpi/device_pm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index f616b16c1f0b..4cb93d4f2ab6 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -166,6 +166,10 @@ int acpi_device_set_power(struct acpi_device *device, int state) || (state < ACPI_STATE_D0) || (state > ACPI_STATE_D3_COLD)) return -EINVAL; + acpi_handle_debug(device->handle, "Power state change: %s -> %s\n", + acpi_power_state_string(device->power.state), + acpi_power_state_string(state)); + /* Make sure this is a valid target state */ /* There is a special case for D0 addressed below. */ From 31eb845718398f9bc9f6fbe1aca675f4e6284392 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 8 Aug 2019 11:39:17 +0200 Subject: [PATCH 043/126] intel-hid: intel-vbtn: Avoid leaking wakeup_mode set Both intel-hid and intel-vbtn set a wakeup_mode flag causing them to behave in a special way during system suspend and while suspended in their "prepare" PM callbacks and clear it in their "resume" PM callbacks. That may cause the wakeup_mode flag to remain set after a system suspend failure (if some other driver's "suspend" callback returns an error before the "suspend" callback of either intel-hid or intel-vbtn is invoked). After commit 10a08fd65ec1 ("ACPI: PM: Set up EC GPE for system wakeup from drivers that need it") that also affects the "wakeup mask" bit of the EC GPE, which may not be cleared after a failing system suspend. Fix this issue by adding "complete" PM callbacks to intel-hid and intel-vbtn to clear the wakeup_mode flag and the "wakeup mask" bit of the EC GPE if they have not been cleared earlier. Fixes: 10a08fd65ec1 ("ACPI: PM: Set up EC GPE for system wakeup from drivers that need it") Signed-off-by: Rafael J. Wysocki Acked-by: Andy Shevchenko --- drivers/platform/x86/intel-hid.c | 17 ++++++++++++----- drivers/platform/x86/intel-vbtn.c | 12 +++++++++--- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c index e51c72b92cbd..6feda887df9d 100644 --- a/drivers/platform/x86/intel-hid.c +++ b/drivers/platform/x86/intel-hid.c @@ -262,6 +262,16 @@ static int intel_hid_pm_prepare(struct device *device) return 0; } +static void intel_hid_pm_complete(struct device *device) +{ + struct intel_hid_priv *priv = dev_get_drvdata(device); + + if (priv->wakeup_mode) { + acpi_ec_set_gpe_wake_mask(ACPI_GPE_DISABLE); + priv->wakeup_mode = false; + } +} + static int intel_hid_pl_suspend_handler(struct device *device) { if (pm_suspend_via_firmware()) { @@ -273,12 +283,8 @@ static int intel_hid_pl_suspend_handler(struct device *device) static int intel_hid_pl_resume_handler(struct device *device) { - if (device_may_wakeup(device)) { - struct intel_hid_priv *priv = dev_get_drvdata(device); + intel_hid_pm_complete(device); - acpi_ec_set_gpe_wake_mask(ACPI_GPE_DISABLE); - priv->wakeup_mode = false; - } if (pm_resume_via_firmware()) { intel_hid_set_enable(device, true); intel_button_array_enable(device, true); @@ -288,6 +294,7 @@ static int intel_hid_pl_resume_handler(struct device *device) static const struct dev_pm_ops intel_hid_pl_pm_ops = { .prepare = intel_hid_pm_prepare, + .complete = intel_hid_pm_complete, .freeze = intel_hid_pl_suspend_handler, .thaw = intel_hid_pl_resume_handler, .restore = intel_hid_pl_resume_handler, diff --git a/drivers/platform/x86/intel-vbtn.c b/drivers/platform/x86/intel-vbtn.c index ab84e1bbdedd..b28e5519337e 100644 --- a/drivers/platform/x86/intel-vbtn.c +++ b/drivers/platform/x86/intel-vbtn.c @@ -210,19 +210,25 @@ static int intel_vbtn_pm_prepare(struct device *dev) return 0; } -static int intel_vbtn_pm_resume(struct device *dev) +static void intel_vbtn_pm_complete(struct device *dev) { - if (device_may_wakeup(dev)) { - struct intel_vbtn_priv *priv = dev_get_drvdata(dev); + struct intel_vbtn_priv *priv = dev_get_drvdata(dev); + if (priv->wakeup_mode) { acpi_ec_set_gpe_wake_mask(ACPI_GPE_DISABLE); priv->wakeup_mode = false; } +} + +static int intel_vbtn_pm_resume(struct device *dev) +{ + intel_vbtn_pm_complete(dev); return 0; } static const struct dev_pm_ops intel_vbtn_pm_ops = { .prepare = intel_vbtn_pm_prepare, + .complete = intel_vbtn_pm_complete, .resume = intel_vbtn_pm_resume, .restore = intel_vbtn_pm_resume, .thaw = intel_vbtn_pm_resume, From d19bdb876bece27187d4ffbc272672e1239cd313 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 8 Aug 2019 11:39:23 +0200 Subject: [PATCH 044/126] intel-hid: Disable button array during suspend-to-idle Notice that intel_button_array_enable() never disables the power button which is the only one needed to wake up the system from suspend-to-idle, so it can be safely called during suspend-to-idle as well as during "regular" system suspend, and rearrange the code in the driver's "suspend" and "resume" callbacks accordingly. While at it, use pm_suspend_no_platform() to check if the current suspend-resume cycle is suspend-to-idle, as that is the only case when the device should be enabled while suspended. Signed-off-by: Rafael J. Wysocki Acked-by: Andy Shevchenko --- drivers/platform/x86/intel-hid.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c index 6feda887df9d..18ac237114ff 100644 --- a/drivers/platform/x86/intel-hid.c +++ b/drivers/platform/x86/intel-hid.c @@ -274,10 +274,11 @@ static void intel_hid_pm_complete(struct device *device) static int intel_hid_pl_suspend_handler(struct device *device) { - if (pm_suspend_via_firmware()) { + intel_button_array_enable(device, false); + + if (!pm_suspend_no_platform()) intel_hid_set_enable(device, false); - intel_button_array_enable(device, false); - } + return 0; } @@ -285,10 +286,10 @@ static int intel_hid_pl_resume_handler(struct device *device) { intel_hid_pm_complete(device); - if (pm_resume_via_firmware()) { + if (!pm_suspend_no_platform()) intel_hid_set_enable(device, true); - intel_button_array_enable(device, true); - } + + intel_button_array_enable(device, true); return 0; } From 11f26633cccb7243217370837cbb066a73f678a5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 10 Aug 2019 13:18:06 +0200 Subject: [PATCH 045/126] PM: suspend: Fix platform_suspend_prepare_noirq() After commit ac9eafbe930a ("ACPI: PM: s2idle: Execute LPS0 _DSM functions with suspended devices"), a NULL pointer may be dereferenced if suspend-to-idle is attempted on a platform without "traditional" suspend support due to invalid fall-through in platform_suspend_prepare_noirq(). Fix that and while at it add missing braces in platform_resume_noirq(). Fixes: ac9eafbe930a ("ACPI: PM: s2idle: Execute LPS0 _DSM functions with suspended devices") Reported-by: Marek Szyprowski Signed-off-by: Rafael J. Wysocki --- kernel/power/suspend.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index ed9ddef12b13..f3b7239f1892 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -253,10 +253,10 @@ static int platform_suspend_prepare_late(suspend_state_t state) static int platform_suspend_prepare_noirq(suspend_state_t state) { - if (state == PM_SUSPEND_TO_IDLE) { - if (s2idle_ops && s2idle_ops->prepare_late) - return s2idle_ops->prepare_late(); - } + if (state == PM_SUSPEND_TO_IDLE) + return s2idle_ops && s2idle_ops->prepare_late ? + s2idle_ops->prepare_late() : 0; + return suspend_ops->prepare_late ? suspend_ops->prepare_late() : 0; } @@ -265,8 +265,9 @@ static void platform_resume_noirq(suspend_state_t state) if (state == PM_SUSPEND_TO_IDLE) { if (s2idle_ops && s2idle_ops->restore_early) s2idle_ops->restore_early(); - } else if (suspend_ops->wake) + } else if (suspend_ops->wake) { suspend_ops->wake(); + } } static void platform_resume_early(suspend_state_t state) From 6a1490367c44f94614e39e8b98ff7114ff8a6449 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 23 Jul 2019 11:44:01 +0530 Subject: [PATCH 046/126] cpufreq: Add policy create/remove notifiers back This effectively reverts some changes made by commit f9f41e3ef99 ("cpufreq: Remove policy create/remove notifiers"). We have a new use case for policy create/remove notifiers (for allocating/freeing QoS requests per policy), so add them back. Signed-off-by: Viresh Kumar [ rjw: Subject & changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 15 ++++++++++++++- include/linux/cpufreq.h | 2 ++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 8dda62367816..c13dcb59b30c 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1266,7 +1266,17 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy) DEV_PM_QOS_MAX_FREQUENCY); dev_pm_qos_remove_notifier(dev, &policy->nb_min, DEV_PM_QOS_MIN_FREQUENCY); - dev_pm_qos_remove_request(policy->max_freq_req); + + if (policy->max_freq_req) { + /* + * CPUFREQ_CREATE_POLICY notification is sent only after + * successfully adding max_freq_req request. + */ + blocking_notifier_call_chain(&cpufreq_policy_notifier_list, + CPUFREQ_REMOVE_POLICY, policy); + dev_pm_qos_remove_request(policy->max_freq_req); + } + dev_pm_qos_remove_request(policy->min_freq_req); kfree(policy->min_freq_req); @@ -1391,6 +1401,9 @@ static int cpufreq_online(unsigned int cpu) ret); goto out_destroy_policy; } + + blocking_notifier_call_chain(&cpufreq_policy_notifier_list, + CPUFREQ_CREATE_POLICY, policy); } if (cpufreq_driver->get && has_target()) { diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 536a049d7ecc..afc10384a681 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -458,6 +458,8 @@ static inline void cpufreq_resume(void) {} /* Policy Notifiers */ #define CPUFREQ_ADJUST (0) #define CPUFREQ_NOTIFY (1) +#define CPUFREQ_CREATE_POLICY (2) +#define CPUFREQ_REMOVE_POLICY (3) #ifdef CONFIG_CPU_FREQ int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list); From 5130802ddbb10a73cf17d1117e2044c8d5ba7d65 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 23 Jul 2019 11:44:02 +0530 Subject: [PATCH 047/126] thermal: cpu_cooling: Switch to QoS requests for freq limits The cpufreq core now takes the min/max frequency constraints via QoS requests and the CPUFREQ_ADJUST notifier shall get removed later on. Switch over to using the QoS request for maximum frequency constraint for cpu_cooling driver. Signed-off-by: Viresh Kumar [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki --- drivers/thermal/cpu_cooling.c | 110 ++++++++-------------------------- 1 file changed, 26 insertions(+), 84 deletions(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 4c5db59a619b..391f39776c6a 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -66,8 +67,6 @@ struct time_in_idle { * @last_load: load measured by the latest call to cpufreq_get_requested_power() * @cpufreq_state: integer value representing the current state of cpufreq * cooling devices. - * @clipped_freq: integer value representing the absolute value of the clipped - * frequency. * @max_level: maximum cooling level. One less than total number of valid * cpufreq frequencies. * @freq_table: Freq table in descending order of frequencies @@ -84,12 +83,12 @@ struct cpufreq_cooling_device { int id; u32 last_load; unsigned int cpufreq_state; - unsigned int clipped_freq; unsigned int max_level; struct freq_table *freq_table; /* In descending order */ struct cpufreq_policy *policy; struct list_head node; struct time_in_idle *idle_time; + struct dev_pm_qos_request qos_req; }; static DEFINE_IDA(cpufreq_ida); @@ -118,59 +117,6 @@ static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev, return level - 1; } -/** - * cpufreq_thermal_notifier - notifier callback for cpufreq policy change. - * @nb: struct notifier_block * with callback info. - * @event: value showing cpufreq event for which this function invoked. - * @data: callback-specific data - * - * Callback to hijack the notification on cpufreq policy transition. - * Every time there is a change in policy, we will intercept and - * update the cpufreq policy with thermal constraints. - * - * Return: 0 (success) - */ -static int cpufreq_thermal_notifier(struct notifier_block *nb, - unsigned long event, void *data) -{ - struct cpufreq_policy *policy = data; - unsigned long clipped_freq; - struct cpufreq_cooling_device *cpufreq_cdev; - - if (event != CPUFREQ_ADJUST) - return NOTIFY_DONE; - - mutex_lock(&cooling_list_lock); - list_for_each_entry(cpufreq_cdev, &cpufreq_cdev_list, node) { - /* - * A new copy of the policy is sent to the notifier and can't - * compare that directly. - */ - if (policy->cpu != cpufreq_cdev->policy->cpu) - continue; - - /* - * policy->max is the maximum allowed frequency defined by user - * and clipped_freq is the maximum that thermal constraints - * allow. - * - * If clipped_freq is lower than policy->max, then we need to - * readjust policy->max. - * - * But, if clipped_freq is greater than policy->max, we don't - * need to do anything. - */ - clipped_freq = cpufreq_cdev->clipped_freq; - - if (policy->max > clipped_freq) - cpufreq_verify_within_limits(policy, 0, clipped_freq); - break; - } - mutex_unlock(&cooling_list_lock); - - return NOTIFY_OK; -} - /** * update_freq_table() - Update the freq table with power numbers * @cpufreq_cdev: the cpufreq cooling device in which to update the table @@ -374,7 +320,6 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state) { struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata; - unsigned int clip_freq; /* Request state should be less than max_level */ if (WARN_ON(state > cpufreq_cdev->max_level)) @@ -384,13 +329,10 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev, if (cpufreq_cdev->cpufreq_state == state) return 0; - clip_freq = cpufreq_cdev->freq_table[state].frequency; cpufreq_cdev->cpufreq_state = state; - cpufreq_cdev->clipped_freq = clip_freq; - cpufreq_update_policy(cpufreq_cdev->policy->cpu); - - return 0; + return dev_pm_qos_update_request(&cpufreq_cdev->qos_req, + cpufreq_cdev->freq_table[state].frequency); } /** @@ -554,11 +496,6 @@ static struct thermal_cooling_device_ops cpufreq_power_cooling_ops = { .power2state = cpufreq_power2state, }; -/* Notifier for cpufreq policy change */ -static struct notifier_block thermal_cpufreq_notifier_block = { - .notifier_call = cpufreq_thermal_notifier, -}; - static unsigned int find_next_max(struct cpufreq_frequency_table *table, unsigned int prev_max) { @@ -596,9 +533,16 @@ __cpufreq_cooling_register(struct device_node *np, struct cpufreq_cooling_device *cpufreq_cdev; char dev_name[THERMAL_NAME_LENGTH]; unsigned int freq, i, num_cpus; + struct device *dev; int ret; struct thermal_cooling_device_ops *cooling_ops; - bool first; + + dev = get_cpu_device(policy->cpu); + if (unlikely(!dev)) { + pr_warn("No cpu device for cpu %d\n", policy->cpu); + return ERR_PTR(-ENODEV); + } + if (IS_ERR_OR_NULL(policy)) { pr_err("%s: cpufreq policy isn't valid: %p\n", __func__, policy); @@ -671,25 +615,29 @@ __cpufreq_cooling_register(struct device_node *np, cooling_ops = &cpufreq_cooling_ops; } + ret = dev_pm_qos_add_request(dev, &cpufreq_cdev->qos_req, + DEV_PM_QOS_MAX_FREQUENCY, + cpufreq_cdev->freq_table[0].frequency); + if (ret < 0) { + pr_err("%s: Failed to add freq constraint (%d)\n", __func__, + ret); + cdev = ERR_PTR(ret); + goto remove_ida; + } + cdev = thermal_of_cooling_device_register(np, dev_name, cpufreq_cdev, cooling_ops); if (IS_ERR(cdev)) - goto remove_ida; - - cpufreq_cdev->clipped_freq = cpufreq_cdev->freq_table[0].frequency; + goto remove_qos_req; mutex_lock(&cooling_list_lock); - /* Register the notifier for first cpufreq cooling device */ - first = list_empty(&cpufreq_cdev_list); list_add(&cpufreq_cdev->node, &cpufreq_cdev_list); mutex_unlock(&cooling_list_lock); - if (first) - cpufreq_register_notifier(&thermal_cpufreq_notifier_block, - CPUFREQ_POLICY_NOTIFIER); - return cdev; +remove_qos_req: + dev_pm_qos_remove_request(&cpufreq_cdev->qos_req); remove_ida: ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id); free_table: @@ -777,7 +725,6 @@ EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register); void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) { struct cpufreq_cooling_device *cpufreq_cdev; - bool last; if (!cdev) return; @@ -786,15 +733,10 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) mutex_lock(&cooling_list_lock); list_del(&cpufreq_cdev->node); - /* Unregister the notifier for the last cpufreq cooling device */ - last = list_empty(&cpufreq_cdev_list); mutex_unlock(&cooling_list_lock); - if (last) - cpufreq_unregister_notifier(&thermal_cpufreq_notifier_block, - CPUFREQ_POLICY_NOTIFIER); - thermal_cooling_device_unregister(cdev); + dev_pm_qos_remove_request(&cpufreq_cdev->qos_req); ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id); kfree(cpufreq_cdev->idle_time); kfree(cpufreq_cdev->freq_table); From b7e7fffd3e8c81aa0654ed4c9f7a142b4c4dab1a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 30 Jul 2019 12:11:08 +0200 Subject: [PATCH 048/126] cpuidle: teo: Get rid of redundant check in teo_update() Notice that setting measured_us to UINT_MAX in teo_update() earlier doesn't change the behavior of the following code, so do that and eliminate a redundant check used for setting measured_us to UINT_MAX. This change is not expected to alter functionality. Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governors/teo.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 4d7a60c5b24a..b5a0e498f798 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -123,10 +123,11 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) { /* - * One of the safety nets has triggered or this was a timer - * wakeup (or equivalent). + * One of the safety nets has triggered or the wakeup was close + * enough to the closest timer event expected at the idle state + * selection time to be discarded. */ - measured_us = sleep_length_us; + measured_us = UINT_MAX; } else { unsigned int lat; @@ -188,15 +189,6 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) cpu_data->states[idx_timer].hits = hits; } - /* - * If the total time span between idle state selection and the "reflect" - * callback is greater than or equal to the sleep length determined at - * the idle state selection time, the wakeup is likely to be due to a - * timer event. - */ - if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) - measured_us = UINT_MAX; - /* * Save idle duration values corresponding to non-timer wakeups for * pattern detection. From d30bdfc0ecf885ca0495bdee522cb9d7ee005cf8 Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Fri, 9 Aug 2019 15:10:23 +0800 Subject: [PATCH 049/126] PM: sleep: Replace strncmp() with str_has_prefix() Use str_has_prefix() instead of strncmp() which is less straightforward in decode_state(). Signed-off-by: Chuhong Yuan [ rjw: Subject & changelog ] Signed-off-by: Rafael J. Wysocki --- kernel/power/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/main.c b/kernel/power/main.c index 938dc53a8b94..e8710d179b35 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -580,7 +580,7 @@ static suspend_state_t decode_state(const char *buf, size_t n) len = p ? p - buf : n; /* Check hibernation first. */ - if (len == 4 && !strncmp(buf, "disk", len)) + if (len == 4 && str_has_prefix(buf, "disk")) return PM_SUSPEND_MAX; #ifdef CONFIG_SUSPEND From 0d105d0f25386ee5b74603db6d249ebed7590cbc Mon Sep 17 00:00:00 2001 From: Tri Vo Date: Tue, 6 Aug 2019 18:48:44 -0700 Subject: [PATCH 050/126] PM / wakeup: Drop wakeup_source_init(), wakeup_source_prepare() wakeup_source_init() has no users. Remove it. As a result, wakeup_source_prepare() is only called from wakeup_source_create(). Merge wakeup_source_prepare() into wakeup_source_create() and remove it. Change wakeup_source_create() behavior so that assigning NULL to wakeup source's name throws an error. Signed-off-by: Tri Vo Reviewed-by: Greg Kroah-Hartman Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- drivers/base/power/wakeup.c | 33 +++++++++++++-------------------- include/linux/pm_wakeup.h | 11 ----------- 2 files changed, 13 insertions(+), 31 deletions(-) diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index ee31d4f8d856..3938892c8903 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -72,23 +72,6 @@ static struct wakeup_source deleted_ws = { .lock = __SPIN_LOCK_UNLOCKED(deleted_ws.lock), }; -/** - * wakeup_source_prepare - Prepare a new wakeup source for initialization. - * @ws: Wakeup source to prepare. - * @name: Pointer to the name of the new wakeup source. - * - * Callers must ensure that the @name string won't be freed when @ws is still in - * use. - */ -void wakeup_source_prepare(struct wakeup_source *ws, const char *name) -{ - if (ws) { - memset(ws, 0, sizeof(*ws)); - ws->name = name; - } -} -EXPORT_SYMBOL_GPL(wakeup_source_prepare); - /** * wakeup_source_create - Create a struct wakeup_source object. * @name: Name of the new wakeup source. @@ -96,13 +79,23 @@ EXPORT_SYMBOL_GPL(wakeup_source_prepare); struct wakeup_source *wakeup_source_create(const char *name) { struct wakeup_source *ws; + const char *ws_name; - ws = kmalloc(sizeof(*ws), GFP_KERNEL); + ws = kzalloc(sizeof(*ws), GFP_KERNEL); if (!ws) - return NULL; + goto err_ws; + + ws_name = kstrdup_const(name, GFP_KERNEL); + if (!ws_name) + goto err_name; + ws->name = ws_name; - wakeup_source_prepare(ws, name ? kstrdup_const(name, GFP_KERNEL) : NULL); return ws; + +err_name: + kfree(ws); +err_ws: + return NULL; } EXPORT_SYMBOL_GPL(wakeup_source_create); diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index 91027602d137..c0cad2d8f800 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -81,7 +81,6 @@ static inline void device_set_wakeup_path(struct device *dev) } /* drivers/base/power/wakeup.c */ -extern void wakeup_source_prepare(struct wakeup_source *ws, const char *name); extern struct wakeup_source *wakeup_source_create(const char *name); extern void wakeup_source_destroy(struct wakeup_source *ws); extern void wakeup_source_add(struct wakeup_source *ws); @@ -112,9 +111,6 @@ static inline bool device_can_wakeup(struct device *dev) return dev->power.can_wakeup; } -static inline void wakeup_source_prepare(struct wakeup_source *ws, - const char *name) {} - static inline struct wakeup_source *wakeup_source_create(const char *name) { return NULL; @@ -181,13 +177,6 @@ static inline void pm_wakeup_dev_event(struct device *dev, unsigned int msec, #endif /* !CONFIG_PM_SLEEP */ -static inline void wakeup_source_init(struct wakeup_source *ws, - const char *name) -{ - wakeup_source_prepare(ws, name); - wakeup_source_add(ws); -} - static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec) { return pm_wakeup_ws_event(ws, msec, false); From 2434aea58e652a9fe114181ac90aa60e2f2e1b25 Mon Sep 17 00:00:00 2001 From: Tri Vo Date: Tue, 6 Aug 2019 18:48:45 -0700 Subject: [PATCH 051/126] PM / wakeup: Use wakeup_source_register() in wakelock.c kernel/power/wakelock.c duplicates wakeup source creation and registration code from drivers/base/power/wakeup.c. Change struct wakelock's wakeup source to a pointer and use wakeup_source_register() function to create and register said wakeup source. Use wakeup_source_unregister() on cleanup path. Signed-off-by: Tri Vo Reviewed-by: Stephen Boyd Reviewed-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki --- kernel/power/wakelock.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c index 4210152e56f0..d1eb7fd98b64 100644 --- a/kernel/power/wakelock.c +++ b/kernel/power/wakelock.c @@ -27,7 +27,7 @@ static DEFINE_MUTEX(wakelocks_lock); struct wakelock { char *name; struct rb_node node; - struct wakeup_source ws; + struct wakeup_source *ws; #ifdef CONFIG_PM_WAKELOCKS_GC struct list_head lru; #endif @@ -46,7 +46,7 @@ ssize_t pm_show_wakelocks(char *buf, bool show_active) for (node = rb_first(&wakelocks_tree); node; node = rb_next(node)) { wl = rb_entry(node, struct wakelock, node); - if (wl->ws.active == show_active) + if (wl->ws->active == show_active) str += scnprintf(str, end - str, "%s ", wl->name); } if (str > buf) @@ -112,16 +112,16 @@ static void __wakelocks_gc(struct work_struct *work) u64 idle_time_ns; bool active; - spin_lock_irq(&wl->ws.lock); - idle_time_ns = ktime_to_ns(ktime_sub(now, wl->ws.last_time)); - active = wl->ws.active; - spin_unlock_irq(&wl->ws.lock); + spin_lock_irq(&wl->ws->lock); + idle_time_ns = ktime_to_ns(ktime_sub(now, wl->ws->last_time)); + active = wl->ws->active; + spin_unlock_irq(&wl->ws->lock); if (idle_time_ns < ((u64)WL_GC_TIME_SEC * NSEC_PER_SEC)) break; if (!active) { - wakeup_source_remove(&wl->ws); + wakeup_source_unregister(wl->ws); rb_erase(&wl->node, &wakelocks_tree); list_del(&wl->lru); kfree(wl->name); @@ -187,9 +187,15 @@ static struct wakelock *wakelock_lookup_add(const char *name, size_t len, kfree(wl); return ERR_PTR(-ENOMEM); } - wl->ws.name = wl->name; - wl->ws.last_time = ktime_get(); - wakeup_source_add(&wl->ws); + + wl->ws = wakeup_source_register(wl->name); + if (!wl->ws) { + kfree(wl->name); + kfree(wl); + return ERR_PTR(-ENOMEM); + } + wl->ws->last_time = ktime_get(); + rb_link_node(&wl->node, parent, node); rb_insert_color(&wl->node, &wakelocks_tree); wakelocks_lru_add(wl); @@ -233,9 +239,9 @@ int pm_wake_lock(const char *buf) u64 timeout_ms = timeout_ns + NSEC_PER_MSEC - 1; do_div(timeout_ms, NSEC_PER_MSEC); - __pm_wakeup_event(&wl->ws, timeout_ms); + __pm_wakeup_event(wl->ws, timeout_ms); } else { - __pm_stay_awake(&wl->ws); + __pm_stay_awake(wl->ws); } wakelocks_lru_most_recent(wl); @@ -271,7 +277,7 @@ int pm_wake_unlock(const char *buf) ret = PTR_ERR(wl); goto out; } - __pm_relax(&wl->ws); + __pm_relax(wl->ws); wakelocks_lru_most_recent(wl); wakelocks_gc(); From c8377adfa78103be5380200eb9dab764d7ca890e Mon Sep 17 00:00:00 2001 From: Tri Vo Date: Tue, 6 Aug 2019 18:48:46 -0700 Subject: [PATCH 052/126] PM / wakeup: Show wakeup sources stats in sysfs Add an ID and a device pointer to 'struct wakeup_source'. Use them to to expose wakeup sources statistics in sysfs under /sys/class/wakeup/wakeup/*. Co-developed-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman Co-developed-by: Stephen Boyd Signed-off-by: Stephen Boyd Signed-off-by: Tri Vo Tested-by: Kalesh Singh Signed-off-by: Rafael J. Wysocki --- Documentation/ABI/testing/sysfs-class-wakeup | 76 +++++++ drivers/acpi/device_pm.c | 3 +- drivers/base/power/Makefile | 2 +- drivers/base/power/power.h | 9 + drivers/base/power/wakeup.c | 28 ++- drivers/base/power/wakeup_stats.c | 203 +++++++++++++++++++ fs/eventpoll.c | 4 +- include/linux/pm_wakeup.h | 10 +- kernel/power/autosleep.c | 2 +- kernel/power/wakelock.c | 2 +- kernel/time/alarmtimer.c | 2 +- 11 files changed, 328 insertions(+), 13 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-class-wakeup create mode 100644 drivers/base/power/wakeup_stats.c diff --git a/Documentation/ABI/testing/sysfs-class-wakeup b/Documentation/ABI/testing/sysfs-class-wakeup new file mode 100644 index 000000000000..754aab8b6dcd --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-wakeup @@ -0,0 +1,76 @@ +What: /sys/class/wakeup/ +Date: June 2019 +Contact: Tri Vo +Description: + The /sys/class/wakeup/ directory contains pointers to all + wakeup sources in the kernel at that moment in time. + +What: /sys/class/wakeup/.../name +Date: June 2019 +Contact: Tri Vo +Description: + This file contains the name of the wakeup source. + +What: /sys/class/wakeup/.../active_count +Date: June 2019 +Contact: Tri Vo +Description: + This file contains the number of times the wakeup source was + activated. + +What: /sys/class/wakeup/.../event_count +Date: June 2019 +Contact: Tri Vo +Description: + This file contains the number of signaled wakeup events + associated with the wakeup source. + +What: /sys/class/wakeup/.../wakeup_count +Date: June 2019 +Contact: Tri Vo +Description: + This file contains the number of times the wakeup source might + abort suspend. + +What: /sys/class/wakeup/.../expire_count +Date: June 2019 +Contact: Tri Vo +Description: + This file contains the number of times the wakeup source's + timeout has expired. + +What: /sys/class/wakeup/.../active_time_ms +Date: June 2019 +Contact: Tri Vo +Description: + This file contains the amount of time the wakeup source has + been continuously active, in milliseconds. If the wakeup + source is not active, this file contains '0'. + +What: /sys/class/wakeup/.../total_time_ms +Date: June 2019 +Contact: Tri Vo +Description: + This file contains the total amount of time this wakeup source + has been active, in milliseconds. + +What: /sys/class/wakeup/.../max_time_ms +Date: June 2019 +Contact: Tri Vo +Description: + This file contains the maximum amount of time this wakeup + source has been continuously active, in milliseconds. + +What: /sys/class/wakeup/.../last_change_ms +Date: June 2019 +Contact: Tri Vo +Description: + This file contains the monotonic clock time when the wakeup + source was touched last time, in milliseconds. + +What: /sys/class/wakeup/.../prevent_suspend_time_ms +Date: June 2019 +Contact: Tri Vo +Description: + The file contains the total amount of time this wakeup source + has been preventing autosleep, in milliseconds. diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index f616b16c1f0b..3f8958007a93 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -497,7 +497,8 @@ acpi_status acpi_add_pm_notifier(struct acpi_device *adev, struct device *dev, goto out; mutex_lock(&acpi_pm_notifier_lock); - adev->wakeup.ws = wakeup_source_register(dev_name(&adev->dev)); + adev->wakeup.ws = wakeup_source_register(&adev->dev, + dev_name(&adev->dev)); adev->wakeup.context.dev = dev; adev->wakeup.context.func = func; adev->wakeup.flags.notifier_present = true; diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile index e1bb691cf8f1..ec5bb190b9d0 100644 --- a/drivers/base/power/Makefile +++ b/drivers/base/power/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_PM) += sysfs.o generic_ops.o common.o qos.o runtime.o wakeirq.o -obj-$(CONFIG_PM_SLEEP) += main.o wakeup.o +obj-$(CONFIG_PM_SLEEP) += main.o wakeup.o wakeup_stats.o obj-$(CONFIG_PM_TRACE_RTC) += trace.o obj-$(CONFIG_PM_GENERIC_DOMAINS) += domain.o domain_governor.o obj-$(CONFIG_HAVE_CLK) += clock_ops.o diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h index ec33fbdb919b..57b1d1d88c8e 100644 --- a/drivers/base/power/power.h +++ b/drivers/base/power/power.h @@ -149,3 +149,12 @@ static inline void device_pm_init(struct device *dev) device_pm_sleep_init(dev); pm_runtime_init(dev); } + +#ifdef CONFIG_PM_SLEEP + +/* drivers/base/power/wakeup_stats.c */ +extern int wakeup_source_sysfs_add(struct device *parent, + struct wakeup_source *ws); +extern void wakeup_source_sysfs_remove(struct wakeup_source *ws); + +#endif /* CONFIG_PM_SLEEP */ diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 3938892c8903..973675f24314 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -72,6 +72,8 @@ static struct wakeup_source deleted_ws = { .lock = __SPIN_LOCK_UNLOCKED(deleted_ws.lock), }; +static DEFINE_IDA(wakeup_ida); + /** * wakeup_source_create - Create a struct wakeup_source object. * @name: Name of the new wakeup source. @@ -80,6 +82,7 @@ struct wakeup_source *wakeup_source_create(const char *name) { struct wakeup_source *ws; const char *ws_name; + int id; ws = kzalloc(sizeof(*ws), GFP_KERNEL); if (!ws) @@ -90,8 +93,15 @@ struct wakeup_source *wakeup_source_create(const char *name) goto err_name; ws->name = ws_name; + id = ida_alloc(&wakeup_ida, GFP_KERNEL); + if (id < 0) + goto err_id; + ws->id = id; + return ws; +err_id: + kfree_const(ws->name); err_name: kfree(ws); err_ws: @@ -140,6 +150,7 @@ void wakeup_source_destroy(struct wakeup_source *ws) __pm_relax(ws); wakeup_source_record(ws); + ida_free(&wakeup_ida, ws->id); kfree_const(ws->name); kfree(ws); } @@ -193,16 +204,24 @@ EXPORT_SYMBOL_GPL(wakeup_source_remove); /** * wakeup_source_register - Create wakeup source and add it to the list. + * @dev: Device this wakeup source is associated with (or NULL if virtual). * @name: Name of the wakeup source to register. */ -struct wakeup_source *wakeup_source_register(const char *name) +struct wakeup_source *wakeup_source_register(struct device *dev, + const char *name) { struct wakeup_source *ws; + int ret; ws = wakeup_source_create(name); - if (ws) + if (ws) { + ret = wakeup_source_sysfs_add(dev, ws); + if (ret) { + wakeup_source_destroy(ws); + return NULL; + } wakeup_source_add(ws); - + } return ws; } EXPORT_SYMBOL_GPL(wakeup_source_register); @@ -215,6 +234,7 @@ void wakeup_source_unregister(struct wakeup_source *ws) { if (ws) { wakeup_source_remove(ws); + wakeup_source_sysfs_remove(ws); wakeup_source_destroy(ws); } } @@ -258,7 +278,7 @@ int device_wakeup_enable(struct device *dev) if (pm_suspend_target_state != PM_SUSPEND_ON) dev_dbg(dev, "Suspicious %s() during system transition!\n", __func__); - ws = wakeup_source_register(dev_name(dev)); + ws = wakeup_source_register(dev, dev_name(dev)); if (!ws) return -ENOMEM; diff --git a/drivers/base/power/wakeup_stats.c b/drivers/base/power/wakeup_stats.c new file mode 100644 index 000000000000..220a20ff8918 --- /dev/null +++ b/drivers/base/power/wakeup_stats.c @@ -0,0 +1,203 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Wakeup statistics in sysfs + * + * Copyright (c) 2019 Linux Foundation + * Copyright (c) 2019 Greg Kroah-Hartman + * Copyright (c) 2019 Google Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "power.h" + +static struct class *wakeup_class; + +#define wakeup_attr(_name) \ +static ssize_t _name##_show(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct wakeup_source *ws = dev_get_drvdata(dev); \ + \ + return sprintf(buf, "%lu\n", ws->_name); \ +} \ +static DEVICE_ATTR_RO(_name) + +wakeup_attr(active_count); +wakeup_attr(event_count); +wakeup_attr(wakeup_count); +wakeup_attr(expire_count); + +static ssize_t active_time_ms_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct wakeup_source *ws = dev_get_drvdata(dev); + ktime_t active_time = + ws->active ? ktime_sub(ktime_get(), ws->last_time) : 0; + + return sprintf(buf, "%lld\n", ktime_to_ms(active_time)); +} +static DEVICE_ATTR_RO(active_time_ms); + +static ssize_t total_time_ms_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct wakeup_source *ws = dev_get_drvdata(dev); + ktime_t active_time; + ktime_t total_time = ws->total_time; + + if (ws->active) { + active_time = ktime_sub(ktime_get(), ws->last_time); + total_time = ktime_add(total_time, active_time); + } + return sprintf(buf, "%lld\n", ktime_to_ms(total_time)); +} +static DEVICE_ATTR_RO(total_time_ms); + +static ssize_t max_time_ms_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct wakeup_source *ws = dev_get_drvdata(dev); + ktime_t active_time; + ktime_t max_time = ws->max_time; + + if (ws->active) { + active_time = ktime_sub(ktime_get(), ws->last_time); + if (active_time > max_time) + max_time = active_time; + } + return sprintf(buf, "%lld\n", ktime_to_ms(max_time)); +} +static DEVICE_ATTR_RO(max_time_ms); + +static ssize_t last_change_ms_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct wakeup_source *ws = dev_get_drvdata(dev); + + return sprintf(buf, "%lld\n", ktime_to_ms(ws->last_time)); +} +static DEVICE_ATTR_RO(last_change_ms); + +static ssize_t name_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct wakeup_source *ws = dev_get_drvdata(dev); + + return sprintf(buf, "%s\n", ws->name); +} +static DEVICE_ATTR_RO(name); + +static ssize_t prevent_suspend_time_ms_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct wakeup_source *ws = dev_get_drvdata(dev); + ktime_t prevent_sleep_time = ws->prevent_sleep_time; + + if (ws->active && ws->autosleep_enabled) { + prevent_sleep_time = ktime_add(prevent_sleep_time, + ktime_sub(ktime_get(), ws->start_prevent_time)); + } + return sprintf(buf, "%lld\n", ktime_to_ms(prevent_sleep_time)); +} +static DEVICE_ATTR_RO(prevent_suspend_time_ms); + +static struct attribute *wakeup_source_attrs[] = { + &dev_attr_name.attr, + &dev_attr_active_count.attr, + &dev_attr_event_count.attr, + &dev_attr_wakeup_count.attr, + &dev_attr_expire_count.attr, + &dev_attr_active_time_ms.attr, + &dev_attr_total_time_ms.attr, + &dev_attr_max_time_ms.attr, + &dev_attr_last_change_ms.attr, + &dev_attr_prevent_suspend_time_ms.attr, + NULL, +}; +ATTRIBUTE_GROUPS(wakeup_source); + +static void device_create_release(struct device *dev) +{ + kfree(dev); +} + +static struct device *wakeup_source_device_create(struct device *parent, + struct wakeup_source *ws) +{ + struct device *dev = NULL; + int retval = -ENODEV; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) { + retval = -ENOMEM; + goto error; + } + + device_initialize(dev); + dev->devt = MKDEV(0, 0); + dev->class = wakeup_class; + dev->parent = parent; + dev->groups = wakeup_source_groups; + dev->release = device_create_release; + dev_set_drvdata(dev, ws); + device_set_pm_not_required(dev); + + retval = kobject_set_name(&dev->kobj, "wakeup%d", ws->id); + if (retval) + goto error; + + retval = device_add(dev); + if (retval) + goto error; + + return dev; + +error: + put_device(dev); + return ERR_PTR(retval); +} + +/** + * wakeup_source_sysfs_add - Add wakeup_source attributes to sysfs. + * @parent: Device given wakeup source is associated with (or NULL if virtual). + * @ws: Wakeup source to be added in sysfs. + */ +int wakeup_source_sysfs_add(struct device *parent, struct wakeup_source *ws) +{ + struct device *dev; + + dev = wakeup_source_device_create(parent, ws); + if (IS_ERR(dev)) + return PTR_ERR(dev); + ws->dev = dev; + + return 0; +} +EXPORT_SYMBOL_GPL(wakeup_source_sysfs_add); + +/** + * wakeup_source_sysfs_remove - Remove wakeup_source attributes from sysfs. + * @ws: Wakeup source to be removed from sysfs. + */ +void wakeup_source_sysfs_remove(struct wakeup_source *ws) +{ + device_unregister(ws->dev); +} +EXPORT_SYMBOL_GPL(wakeup_source_sysfs_remove); + +static int __init wakeup_sources_sysfs_init(void) +{ + wakeup_class = class_create(THIS_MODULE, "wakeup"); + + return PTR_ERR_OR_ZERO(wakeup_class); +} +postcore_initcall(wakeup_sources_sysfs_init); diff --git a/fs/eventpoll.c b/fs/eventpoll.c index d7f1f5011fac..c4159bcc05d9 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1459,13 +1459,13 @@ static int ep_create_wakeup_source(struct epitem *epi) struct wakeup_source *ws; if (!epi->ep->ws) { - epi->ep->ws = wakeup_source_register("eventpoll"); + epi->ep->ws = wakeup_source_register(NULL, "eventpoll"); if (!epi->ep->ws) return -ENOMEM; } name = epi->ffd.file->f_path.dentry->d_name.name; - ws = wakeup_source_register(name); + ws = wakeup_source_register(NULL, name); if (!ws) return -ENOMEM; diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index c0cad2d8f800..661efa029c96 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -21,6 +21,7 @@ struct wake_irq; * struct wakeup_source - Representation of wakeup sources * * @name: Name of the wakeup source + * @id: Wakeup source id * @entry: Wakeup source list entry * @lock: Wakeup source lock * @wakeirq: Optional device specific wakeirq @@ -35,11 +36,13 @@ struct wake_irq; * @relax_count: Number of times the wakeup source was deactivated. * @expire_count: Number of times the wakeup source's timeout has expired. * @wakeup_count: Number of times the wakeup source might abort suspend. + * @dev: Struct device for sysfs statistics about the wakeup source. * @active: Status of the wakeup source. * @autosleep_enabled: Autosleep is active, so update @prevent_sleep_time. */ struct wakeup_source { const char *name; + int id; struct list_head entry; spinlock_t lock; struct wake_irq *wakeirq; @@ -55,6 +58,7 @@ struct wakeup_source { unsigned long relax_count; unsigned long expire_count; unsigned long wakeup_count; + struct device *dev; bool active:1; bool autosleep_enabled:1; }; @@ -85,7 +89,8 @@ extern struct wakeup_source *wakeup_source_create(const char *name); extern void wakeup_source_destroy(struct wakeup_source *ws); extern void wakeup_source_add(struct wakeup_source *ws); extern void wakeup_source_remove(struct wakeup_source *ws); -extern struct wakeup_source *wakeup_source_register(const char *name); +extern struct wakeup_source *wakeup_source_register(struct device *dev, + const char *name); extern void wakeup_source_unregister(struct wakeup_source *ws); extern int device_wakeup_enable(struct device *dev); extern int device_wakeup_disable(struct device *dev); @@ -122,7 +127,8 @@ static inline void wakeup_source_add(struct wakeup_source *ws) {} static inline void wakeup_source_remove(struct wakeup_source *ws) {} -static inline struct wakeup_source *wakeup_source_register(const char *name) +static inline struct wakeup_source *wakeup_source_register(struct device *dev, + const char *name) { return NULL; } diff --git a/kernel/power/autosleep.c b/kernel/power/autosleep.c index 41e83a779e19..9af5a50d3489 100644 --- a/kernel/power/autosleep.c +++ b/kernel/power/autosleep.c @@ -116,7 +116,7 @@ int pm_autosleep_set_state(suspend_state_t state) int __init pm_autosleep_init(void) { - autosleep_ws = wakeup_source_register("autosleep"); + autosleep_ws = wakeup_source_register(NULL, "autosleep"); if (!autosleep_ws) return -ENOMEM; diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c index d1eb7fd98b64..105df4dfc783 100644 --- a/kernel/power/wakelock.c +++ b/kernel/power/wakelock.c @@ -188,7 +188,7 @@ static struct wakelock *wakelock_lookup_add(const char *name, size_t len, return ERR_PTR(-ENOMEM); } - wl->ws = wakeup_source_register(wl->name); + wl->ws = wakeup_source_register(NULL, wl->name); if (!wl->ws) { kfree(wl->name); kfree(wl); diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 57518efc3810..93b382d9701c 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -97,7 +97,7 @@ static int alarmtimer_rtc_add_device(struct device *dev, if (!device_may_wakeup(rtc->dev.parent)) return -1; - __ws = wakeup_source_register("alarmtimer"); + __ws = wakeup_source_register(dev, "alarmtimer"); spin_lock_irqsave(&rtcdev_lock, flags); if (!rtcdev) { From ae367b7936408444afc76a8a3e141abede9ccbe4 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 13 Aug 2019 17:40:53 -0700 Subject: [PATCH 053/126] PM / wakeup: Fix sysfs registration error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We shouldn't call wakeup_source_destroy() from the error path in wakeup_source_register() because that calls __pm_relax() which takes a lock that isn't initialized until wakeup_source_add() is called. Add a new function, wakeup_source_free(), that just does the bare minimum to free a wakeup source that was created but hasn't been added yet and use it from the two places it's needed. This fixes the following problem seen on various x86 server boxes: INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 12 PID: 1 Comm: swapper/0 Not tainted 5.3.0-rc4- Hardware name: HP ProLiant XL420 Gen9/ProLiant XL420 Gen9, BIOS U19 12/27/2015 Call Trace:   dump_stack+0x62/0x9a   register_lock_class+0x95a/0x960   ? __platform_driver_probe+0xcd/0x230   ? __platform_create_bundle+0xc0/0xe0   ? i8042_init+0x4ec/0x578   ? do_one_initcall+0xfe/0x45a   ? kernel_init_freeable+0x614/0x6a7   ? kernel_init+0x11/0x138   ? ret_from_fork+0x35/0x40   ? is_dynamic_key+0xf0/0xf0   ? rwlock_bug.part.0+0x60/0x60   ? __debug_check_no_obj_freed+0x8e/0x250   __lock_acquire.isra.13+0x5f/0x830   ? __debug_check_no_obj_freed+0x152/0x250   lock_acquire+0x107/0x220   ? __pm_relax.part.2+0x21/0xa0   _raw_spin_lock_irqsave+0x35/0x50   ? __pm_relax.part.2+0x21/0xa0   __pm_relax.part.2+0x21/0xa0   wakeup_source_destroy.part.3+0x18/0x190   wakeup_source_register+0x43/0x50 Fixes: c8377adfa781 ("PM / wakeup: Show wakeup sources stats in sysfs") Reported-by: Qian Cai Signed-off-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- drivers/base/power/wakeup.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 973675f24314..036469528f88 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -137,6 +137,13 @@ static void wakeup_source_record(struct wakeup_source *ws) spin_unlock_irqrestore(&deleted_ws.lock, flags); } +static void wakeup_source_free(struct wakeup_source *ws) +{ + ida_free(&wakeup_ida, ws->id); + kfree_const(ws->name); + kfree(ws); +} + /** * wakeup_source_destroy - Destroy a struct wakeup_source object. * @ws: Wakeup source to destroy. @@ -150,9 +157,7 @@ void wakeup_source_destroy(struct wakeup_source *ws) __pm_relax(ws); wakeup_source_record(ws); - ida_free(&wakeup_ida, ws->id); - kfree_const(ws->name); - kfree(ws); + wakeup_source_free(ws); } EXPORT_SYMBOL_GPL(wakeup_source_destroy); @@ -217,7 +222,7 @@ struct wakeup_source *wakeup_source_register(struct device *dev, if (ws) { ret = wakeup_source_sysfs_add(dev, ws); if (ret) { - wakeup_source_destroy(ws); + wakeup_source_free(ws); return NULL; } wakeup_source_add(ws); From 2ca3d1ecb8c432ee212d80fa7615cdd5d1df62e3 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 19 Aug 2019 15:41:57 -0700 Subject: [PATCH 054/126] PM / wakeup: Register wakeup class kobj after device is added The device_set_wakeup_enable() function can be called on a device that hasn't been registered with device_add() yet. This allows the device to be in a state where wakeup is enabled for it but the device isn't published to userspace in sysfs yet. After commit c8377adfa781 ("PM / wakeup: Show wakeup sources stats in sysfs"), calling device_set_wakeup_enable() will fail for a device that hasn't been registered with the driver core via device_add(). This is because we try to create sysfs entries for the device and associate a wakeup class kobject with it before the device has been registered. Let's follow a similar approach that device_set_wakeup_capable() takes here and register the wakeup class either from device_set_wakeup_enable() when the device is already registered, or from dpm_sysfs_add() when the device is being registered with the driver core via device_add(). Fixes: c8377adfa781 ("PM / wakeup: Show wakeup sources stats in sysfs") Reported-by: Qian Cai Reviewed-by: Tri Vo Signed-off-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- drivers/base/power/power.h | 9 +++++++++ drivers/base/power/sysfs.c | 6 ++++++ drivers/base/power/wakeup.c | 10 ++++++---- drivers/base/power/wakeup_stats.c | 13 +++++++++++++ 4 files changed, 34 insertions(+), 4 deletions(-) diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h index 57b1d1d88c8e..39a06a0cfdaa 100644 --- a/drivers/base/power/power.h +++ b/drivers/base/power/power.h @@ -157,4 +157,13 @@ extern int wakeup_source_sysfs_add(struct device *parent, struct wakeup_source *ws); extern void wakeup_source_sysfs_remove(struct wakeup_source *ws); +extern int pm_wakeup_source_sysfs_add(struct device *parent); + +#else /* !CONFIG_PM_SLEEP */ + +static inline int pm_wakeup_source_sysfs_add(struct device *parent) +{ + return 0; +} + #endif /* CONFIG_PM_SLEEP */ diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 1b9c281cbe41..d7d82db2e4bc 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include "power.h" @@ -667,8 +668,13 @@ int dpm_sysfs_add(struct device *dev) if (rc) goto err_wakeup; } + rc = pm_wakeup_source_sysfs_add(dev); + if (rc) + goto err_latency; return 0; + err_latency: + sysfs_unmerge_group(&dev->kobj, &pm_qos_latency_tolerance_attr_group); err_wakeup: sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group); err_runtime: diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 036469528f88..e58b070985b1 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -220,10 +220,12 @@ struct wakeup_source *wakeup_source_register(struct device *dev, ws = wakeup_source_create(name); if (ws) { - ret = wakeup_source_sysfs_add(dev, ws); - if (ret) { - wakeup_source_free(ws); - return NULL; + if (!dev || device_is_registered(dev)) { + ret = wakeup_source_sysfs_add(dev, ws); + if (ret) { + wakeup_source_free(ws); + return NULL; + } } wakeup_source_add(ws); } diff --git a/drivers/base/power/wakeup_stats.c b/drivers/base/power/wakeup_stats.c index 220a20ff8918..bc5e3945f7a8 100644 --- a/drivers/base/power/wakeup_stats.c +++ b/drivers/base/power/wakeup_stats.c @@ -184,6 +184,19 @@ int wakeup_source_sysfs_add(struct device *parent, struct wakeup_source *ws) } EXPORT_SYMBOL_GPL(wakeup_source_sysfs_add); +/** + * pm_wakeup_source_sysfs_add - Add wakeup_source attributes to sysfs + * for a device if they're missing. + * @parent: Device given wakeup source is associated with + */ +int pm_wakeup_source_sysfs_add(struct device *parent) +{ + if (!parent->power.wakeup || parent->power.wakeup->dev) + return 0; + + return wakeup_source_sysfs_add(parent, parent->power.wakeup); +} + /** * wakeup_source_sysfs_remove - Remove wakeup_source attributes from sysfs. * @ws: Wakeup source to be removed from sysfs. From 78c0f050847cac4c43dbfc1916a63d1557c74ac0 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 19 Aug 2019 15:41:58 -0700 Subject: [PATCH 055/126] PM / wakeup: Unexport wakeup_source_sysfs_{add,remove}() These functions are just used by the PM core, and that isn't modular so these functions don't need to be exported. Drop the exports. Fixes: c8377adfa781 ("PM / wakeup: Show wakeup sources stats in sysfs") Reviewed-by: Tri Vo Signed-off-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- drivers/base/power/wakeup_stats.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/base/power/wakeup_stats.c b/drivers/base/power/wakeup_stats.c index bc5e3945f7a8..c7734914d914 100644 --- a/drivers/base/power/wakeup_stats.c +++ b/drivers/base/power/wakeup_stats.c @@ -182,7 +182,6 @@ int wakeup_source_sysfs_add(struct device *parent, struct wakeup_source *ws) return 0; } -EXPORT_SYMBOL_GPL(wakeup_source_sysfs_add); /** * pm_wakeup_source_sysfs_add - Add wakeup_source attributes to sysfs @@ -205,7 +204,6 @@ void wakeup_source_sysfs_remove(struct wakeup_source *ws) { device_unregister(ws->dev); } -EXPORT_SYMBOL_GPL(wakeup_source_sysfs_remove); static int __init wakeup_sources_sysfs_init(void) { From 62c23a89fd269f0308226b63ac4a427339b6f945 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 13 Aug 2019 13:21:21 +0100 Subject: [PATCH 056/126] cpufreq: remove redundant assignment to ret Variable ret is initialized to a value that is never read and it is re-assigned later. The initialization is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index c28ebf2810f1..26d82e0a2de5 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2140,7 +2140,7 @@ int cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { - int ret = -EINVAL; + int ret; down_write(&policy->rwsem); From 1446794a89c13816bd526dcbff4051cac9743db7 Mon Sep 17 00:00:00 2001 From: Todd Brandt Date: Mon, 12 Aug 2019 14:08:44 -0700 Subject: [PATCH 057/126] pm-graph v5.5 Upgrade bootgraph/sleepgraph to be able to run on python2 and python3. Both now simply require python, the system can choose which to use. bootgraph python3 update: - add floor function to handle integer arithmetic - change argument loop to use next() instead of args.next() - open dmesg log and popen in binary, use decode(ascii, ignore) - sort all html data to allow diff between python versions - change exception handler to use python3 as instead of comma sleepgraph python3 update: - import configparser not ConfigParser (p2 needs python-configparser) - add floor function to handle integer arithmetic - change argument loop to use next() instead of args.next() - handle popen output in binary, use decode(ascii, ignore) - sort all html/output data to allow diff between python versions - force gzip open to use text mode, same for file open - ensure no binary data is written to logs (ascii convert devprops info) - use codecs library to handle zlib encoding for mcelog data - remove all uses of python3.7 keyword "async" as members or vars - assume all FPDT and DMI data is in binary string form sleepgraph: - turbostat will be used by default if it's found & the mode is freeze - a new option "-noturbostat" will disable its use - fix bug where two callgraphs with the same start time overwrite. - fix s2idle processing where two suspend/resume_machines occur back2back - update getexec function to use which first (assuming PATH exists) - new platforminfo data in log with: lspci, gpe counts, /proc/interrupts - new data is zipped, b64 encoded, and tacked on the end of ftrace Signed-off-by: Todd Brandt Signed-off-by: Rafael J. Wysocki --- tools/power/pm-graph/README | 6 +- tools/power/pm-graph/bootgraph.py | 59 ++- tools/power/pm-graph/sleepgraph.8 | 8 +- tools/power/pm-graph/sleepgraph.py | 610 ++++++++++++++++------------- 4 files changed, 387 insertions(+), 296 deletions(-) diff --git a/tools/power/pm-graph/README b/tools/power/pm-graph/README index 58a5591e3951..96259f6e5715 100644 --- a/tools/power/pm-graph/README +++ b/tools/power/pm-graph/README @@ -1,7 +1,7 @@ p m - g r a p h pm-graph: suspend/resume/boot timing analysis tools - Version: 5.4 + Version: 5.5 Author: Todd Brandt Home Page: https://01.org/pm-graph @@ -18,6 +18,10 @@ - upstream version in git: https://github.com/intel/pm-graph/ + Requirements: + - runs with python2 or python3, choice is made by /usr/bin/python link + - python2 now requires python-configparser be installed + Table of Contents - Overview - Setup diff --git a/tools/power/pm-graph/bootgraph.py b/tools/power/pm-graph/bootgraph.py index 666bcbda648d..d3b99a1e92d6 100755 --- a/tools/power/pm-graph/bootgraph.py +++ b/tools/power/pm-graph/bootgraph.py @@ -1,9 +1,18 @@ -#!/usr/bin/python2 +#!/usr/bin/python # SPDX-License-Identifier: GPL-2.0-only # # Tool for analyzing boot timing # Copyright (c) 2013, Intel Corporation. # +# This program is free software; you can redistribute it and/or modify it +# under the terms and conditions of the GNU General Public License, +# version 2, as published by the Free Software Foundation. +# +# This program is distributed in the hope it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# # Authors: # Todd Brandt # @@ -81,7 +90,7 @@ class SystemValues(aslib.SystemValues): cmdline = 'initcall_debug log_buf_len=32M' if self.useftrace: if self.cpucount > 0: - bs = min(self.memtotal / 2, 2*1024*1024) / self.cpucount + bs = min(self.memtotal // 2, 2*1024*1024) // self.cpucount else: bs = 131072 cmdline += ' trace_buf_size=%dK trace_clock=global '\ @@ -137,13 +146,13 @@ class SystemValues(aslib.SystemValues): if arg in ['-h', '-v', '-cronjob', '-reboot', '-verbose']: continue elif arg in ['-o', '-dmesg', '-ftrace', '-func']: - args.next() + next(args) continue elif arg == '-result': - cmdline += ' %s "%s"' % (arg, os.path.abspath(args.next())) + cmdline += ' %s "%s"' % (arg, os.path.abspath(next(args))) continue elif arg == '-cgskip': - file = self.configFile(args.next()) + file = self.configFile(next(args)) cmdline += ' %s "%s"' % (arg, os.path.abspath(file)) continue cmdline += ' '+arg @@ -292,11 +301,11 @@ def parseKernelLog(): tp = aslib.TestProps() devtemp = dict() if(sysvals.dmesgfile): - lf = open(sysvals.dmesgfile, 'r') + lf = open(sysvals.dmesgfile, 'rb') else: lf = Popen('dmesg', stdout=PIPE).stdout for line in lf: - line = line.replace('\r\n', '') + line = aslib.ascii(line).replace('\r\n', '') # grab the stamp and sysinfo if re.match(tp.stampfmt, line): tp.stamp = line @@ -649,7 +658,7 @@ def createBootGraph(data): statinfo += '\t"%s": [\n\t\t"%s",\n' % (n, devstats[n]['info']) if 'fstat' in devstats[n]: funcs = devstats[n]['fstat'] - for f in sorted(funcs, key=funcs.get, reverse=True): + for f in sorted(funcs, key=lambda k:(funcs[k], k), reverse=True): if funcs[f][0] < 0.01 and len(funcs) > 10: break statinfo += '\t\t"%f|%s|%d",\n' % (funcs[f][0], f, funcs[f][1]) @@ -729,7 +738,7 @@ def updateCron(restore=False): op.write('@reboot python %s\n' % sysvals.cronjobCmdString()) op.close() res = call([cmd, cronfile]) - except Exception, e: + except Exception as e: pprint('Exception: %s' % str(e)) shutil.move(backfile, cronfile) res = -1 @@ -745,7 +754,7 @@ def updateGrub(restore=False): try: call(sysvals.blexec, stderr=PIPE, stdout=PIPE, env={'PATH': '.:/sbin:/usr/sbin:/usr/bin:/sbin:/bin'}) - except Exception, e: + except Exception as e: pprint('Exception: %s\n' % str(e)) return # extract the option and create a grub config without it @@ -792,7 +801,7 @@ def updateGrub(restore=False): op.close() res = call(sysvals.blexec) os.remove(grubfile) - except Exception, e: + except Exception as e: pprint('Exception: %s' % str(e)) res = -1 # cleanup @@ -866,6 +875,7 @@ def printHelp(): 'Other commands:\n'\ ' -flistall Print all functions capable of being captured in ftrace\n'\ ' -sysinfo Print out system info extracted from BIOS\n'\ + ' -which exec Print an executable path, should function even without PATH\n'\ ' [redo]\n'\ ' -dmesg file Create HTML output using dmesg input (used with -ftrace)\n'\ ' -ftrace file Create HTML output using ftrace input (used with -dmesg)\n'\ @@ -907,13 +917,13 @@ if __name__ == '__main__': sysvals.mincglen = aslib.getArgFloat('-mincg', args, 0.0, 10000.0) elif(arg == '-cgfilter'): try: - val = args.next() + val = next(args) except: doError('No callgraph functions supplied', True) sysvals.setCallgraphFilter(val) elif(arg == '-cgskip'): try: - val = args.next() + val = next(args) except: doError('No file supplied', True) if val.lower() in switchoff: @@ -924,7 +934,7 @@ if __name__ == '__main__': doError('%s does not exist' % cgskip) elif(arg == '-bl'): try: - val = args.next() + val = next(args) except: doError('No boot loader name supplied', True) if val.lower() not in ['grub']: @@ -937,7 +947,7 @@ if __name__ == '__main__': sysvals.max_graph_depth = aslib.getArgInt('-maxdepth', args, 0, 1000) elif(arg == '-func'): try: - val = args.next() + val = next(args) except: doError('No filter functions supplied', True) sysvals.useftrace = True @@ -946,7 +956,7 @@ if __name__ == '__main__': sysvals.setGraphFilter(val) elif(arg == '-ftrace'): try: - val = args.next() + val = next(args) except: doError('No ftrace file supplied', True) if(os.path.exists(val) == False): @@ -959,7 +969,7 @@ if __name__ == '__main__': sysvals.cgexp = True elif(arg == '-dmesg'): try: - val = args.next() + val = next(args) except: doError('No dmesg file supplied', True) if(os.path.exists(val) == False): @@ -968,13 +978,13 @@ if __name__ == '__main__': sysvals.dmesgfile = val elif(arg == '-o'): try: - val = args.next() + val = next(args) except: doError('No subdirectory name supplied', True) sysvals.testdir = sysvals.setOutputFolder(val) elif(arg == '-result'): try: - val = args.next() + val = next(args) except: doError('No result file supplied', True) sysvals.result = val @@ -986,6 +996,17 @@ if __name__ == '__main__': # remaining options are only for cron job use elif(arg == '-cronjob'): sysvals.iscronjob = True + elif(arg == '-which'): + try: + val = next(args) + except: + doError('No executable supplied', True) + out = sysvals.getExec(val) + if not out: + print('%s not found' % val) + sys.exit(1) + print(out) + sys.exit(0) else: doError('Invalid argument: '+arg, True) diff --git a/tools/power/pm-graph/sleepgraph.8 b/tools/power/pm-graph/sleepgraph.8 index 9648be644d5f..43aee64316df 100644 --- a/tools/power/pm-graph/sleepgraph.8 +++ b/tools/power/pm-graph/sleepgraph.8 @@ -53,10 +53,10 @@ disable rtcwake and require a user keypress to resume. Add the dmesg and ftrace logs to the html output. They will be viewable by clicking buttons in the timeline. .TP -\fB-turbostat\fR -Use turbostat to execute the command in freeze mode (default: disabled). This -will provide turbostat output in the log which will tell you which actual -power modes were entered. +\fB-noturbostat\fR +By default, if turbostat is found and the requested mode is freeze, sleepgraph +will execute the suspend via turbostat and collect data in the timeline log. +This option disables the use of turbostat. .TP \fB-result \fIfile\fR Export a results table to a text file for parsing. diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index 4f46a7a1feb6..1794c79a7d1b 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -1,9 +1,18 @@ -#!/usr/bin/python2 +#!/usr/bin/python # SPDX-License-Identifier: GPL-2.0-only # # Tool for analyzing suspend/resume timing # Copyright (c) 2013, Intel Corporation. # +# This program is free software; you can redistribute it and/or modify it +# under the terms and conditions of the GNU General Public License, +# version 2, as published by the Free Software Foundation. +# +# This program is distributed in the hope it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# # Authors: # Todd Brandt # @@ -48,9 +57,10 @@ import string import re import platform import signal +import codecs from datetime import datetime import struct -import ConfigParser +import configparser import gzip from threading import Thread from subprocess import call, Popen, PIPE @@ -60,6 +70,9 @@ def pprint(msg): print(msg) sys.stdout.flush() +def ascii(text): + return text.decode('ascii', 'ignore') + # ----------------- CLASSES -------------------- # Class: SystemValues @@ -68,7 +81,7 @@ def pprint(msg): # store system values and test parameters class SystemValues: title = 'SleepGraph' - version = '5.4' + version = '5.5' ansi = False rs = 0 display = '' @@ -78,7 +91,7 @@ class SystemValues: testlog = True dmesglog = True ftracelog = False - tstat = False + tstat = True mindevlen = 0.0 mincglen = 0.0 cgphase = '' @@ -147,6 +160,7 @@ class SystemValues: devdump = False mixedphaseheight = True devprops = dict() + platinfo = [] predelay = 0 postdelay = 0 pmdebug = '' @@ -323,13 +337,20 @@ class SystemValues: sys.exit(1) return False def getExec(self, cmd): - dirlist = ['/sbin', '/bin', '/usr/sbin', '/usr/bin', - '/usr/local/sbin', '/usr/local/bin'] - for path in dirlist: + try: + fp = Popen(['which', cmd], stdout=PIPE, stderr=PIPE).stdout + out = ascii(fp.read()).strip() + fp.close() + except: + out = '' + if out: + return out + for path in ['/sbin', '/bin', '/usr/sbin', '/usr/bin', + '/usr/local/sbin', '/usr/local/bin']: cmdfull = os.path.join(path, cmd) if os.path.exists(cmdfull): return cmdfull - return '' + return out def setPrecision(self, num): if num < 0 or num > 6: return @@ -455,7 +476,7 @@ class SystemValues: fp = Popen('dmesg', stdout=PIPE).stdout ktime = '0' for line in fp: - line = line.replace('\r\n', '') + line = ascii(line).replace('\r\n', '') idx = line.find('[') if idx > 1: line = line[idx:] @@ -469,7 +490,7 @@ class SystemValues: # store all new dmesg lines since initdmesg was called fp = Popen('dmesg', stdout=PIPE).stdout for line in fp: - line = line.replace('\r\n', '') + line = ascii(line).replace('\r\n', '') idx = line.find('[') if idx > 1: line = line[idx:] @@ -501,7 +522,7 @@ class SystemValues: call('cat '+self.tpath+'available_filter_functions', shell=True) return master = self.listFromFile(self.tpath+'available_filter_functions') - for i in self.tracefuncs: + for i in sorted(self.tracefuncs): if 'func' in self.tracefuncs[i]: i = self.tracefuncs[i]['func'] if i in master: @@ -628,7 +649,7 @@ class SystemValues: self.fsetVal(kprobeevents, 'kprobe_events') if output: check = self.fgetVal('kprobe_events') - linesack = (len(check.split('\n')) - 1) / 2 + linesack = (len(check.split('\n')) - 1) // 2 pprint(' kprobe functions enabled: %d/%d' % (linesack, linesout)) self.fsetVal('1', 'events/kprobes/enable') def testKprobe(self, kname, kprobe): @@ -650,7 +671,7 @@ class SystemValues: if not os.path.exists(file): return False try: - fp = open(file, mode, 0) + fp = open(file, mode) fp.write(val) fp.flush() fp.close() @@ -719,7 +740,7 @@ class SystemValues: tgtsize = min(self.memfree, bmax) else: tgtsize = 65536 - while not self.fsetVal('%d' % (tgtsize / cpus), 'buffer_size_kb'): + while not self.fsetVal('%d' % (tgtsize // cpus), 'buffer_size_kb'): # if the size failed to set, lower it and keep trying tgtsize -= 65536 if tgtsize < 65536: @@ -863,14 +884,23 @@ class SystemValues: isgz = self.gzip if mode == 'r': try: - with gzip.open(filename, mode+'b') as fp: + with gzip.open(filename, mode+'t') as fp: test = fp.read(64) isgz = True except: isgz = False if isgz: - return gzip.open(filename, mode+'b') + return gzip.open(filename, mode+'t') return open(filename, mode) + def b64unzip(self, data): + try: + out = codecs.decode(base64.b64decode(data), 'zlib').decode() + except: + out = data + return out + def b64zip(self, data): + out = base64.b64encode(codecs.encode(data.encode(), 'zlib')).decode() + return out def mcelog(self, clear=False): cmd = self.getExec('mcelog') if not cmd: @@ -878,12 +908,124 @@ class SystemValues: if clear: call(cmd+' > /dev/null 2>&1', shell=True) return '' - fp = Popen([cmd], stdout=PIPE, stderr=PIPE).stdout - out = fp.read().strip() - fp.close() + try: + fp = Popen([cmd], stdout=PIPE, stderr=PIPE).stdout + out = ascii(fp.read()).strip() + fp.close() + except: + return '' if not out: return '' - return base64.b64encode(out.encode('zlib')) + return self.b64zip(out) + def platforminfo(self): + # add platform info on to a completed ftrace file + if not os.path.exists(self.ftracefile): + return False + footer = '#\n' + + # add test command string line if need be + if self.suspendmode == 'command' and self.testcommand: + footer += '# platform-testcmd: %s\n' % (self.testcommand) + + # get a list of target devices from the ftrace file + props = dict() + tp = TestProps() + tf = self.openlog(self.ftracefile, 'r') + for line in tf: + # determine the trace data type (required for further parsing) + m = re.match(tp.tracertypefmt, line) + if(m): + tp.setTracerType(m.group('t')) + continue + # parse only valid lines, if this is not one move on + m = re.match(tp.ftrace_line_fmt, line) + if(not m or 'device_pm_callback_start' not in line): + continue + m = re.match('.*: (?P.*) (?P.*), parent: *(?P

.*), .*', m.group('msg')); + if(not m): + continue + dev = m.group('d') + if dev not in props: + props[dev] = DevProps() + tf.close() + + # now get the syspath for each target device + for dirname, dirnames, filenames in os.walk('/sys/devices'): + if(re.match('.*/power', dirname) and 'async' in filenames): + dev = dirname.split('/')[-2] + if dev in props and (not props[dev].syspath or len(dirname) < len(props[dev].syspath)): + props[dev].syspath = dirname[:-6] + + # now fill in the properties for our target devices + for dev in sorted(props): + dirname = props[dev].syspath + if not dirname or not os.path.exists(dirname): + continue + with open(dirname+'/power/async') as fp: + text = fp.read() + props[dev].isasync = False + if 'enabled' in text: + props[dev].isasync = True + fields = os.listdir(dirname) + if 'product' in fields: + with open(dirname+'/product', 'rb') as fp: + props[dev].altname = ascii(fp.read()) + elif 'name' in fields: + with open(dirname+'/name', 'rb') as fp: + props[dev].altname = ascii(fp.read()) + elif 'model' in fields: + with open(dirname+'/model', 'rb') as fp: + props[dev].altname = ascii(fp.read()) + elif 'description' in fields: + with open(dirname+'/description', 'rb') as fp: + props[dev].altname = ascii(fp.read()) + elif 'id' in fields: + with open(dirname+'/id', 'rb') as fp: + props[dev].altname = ascii(fp.read()) + elif 'idVendor' in fields and 'idProduct' in fields: + idv, idp = '', '' + with open(dirname+'/idVendor', 'rb') as fp: + idv = ascii(fp.read()).strip() + with open(dirname+'/idProduct', 'rb') as fp: + idp = ascii(fp.read()).strip() + props[dev].altname = '%s:%s' % (idv, idp) + if props[dev].altname: + out = props[dev].altname.strip().replace('\n', ' ')\ + .replace(',', ' ').replace(';', ' ') + props[dev].altname = out + + # add a devinfo line to the bottom of ftrace + out = '' + for dev in sorted(props): + out += props[dev].out(dev) + footer += '# platform-devinfo: %s\n' % self.b64zip(out) + + # add a line for each of these commands with their outputs + cmds = [ + ['pcidevices', 'lspci', '-tv'], + ['interrupts', 'cat', '/proc/interrupts'], + ['gpecounts', 'sh', '-c', 'grep -v invalid /sys/firmware/acpi/interrupts/gpe*'], + ] + for cargs in cmds: + name = cargs[0] + cmdline = ' '.join(cargs[1:]) + cmdpath = self.getExec(cargs[1]) + if not cmdpath: + continue + cmd = [cmdpath] + cargs[2:] + try: + fp = Popen(cmd, stdout=PIPE, stderr=PIPE).stdout + info = ascii(fp.read()).strip() + fp.close() + except: + continue + if not info: + continue + footer += '# platform-%s: %s | %s\n' % (name, cmdline, self.b64zip(info)) + + with self.openlog(self.ftracefile, 'a') as fp: + fp.write(footer) + return True def haveTurbostat(self): if not self.tstat: return False @@ -891,31 +1033,40 @@ class SystemValues: if not cmd: return False fp = Popen([cmd, '-v'], stdout=PIPE, stderr=PIPE).stderr - out = fp.read().strip() + out = ascii(fp.read()).strip() fp.close() - return re.match('turbostat version [0-9\.]* .*', out) + if re.match('turbostat version [0-9\.]* .*', out): + sysvals.vprint(out) + return True + return False def turbostat(self): cmd = self.getExec('turbostat') - if not cmd: - return 'missing turbostat executable' - text = [] + rawout = keyline = valline = '' fullcmd = '%s -q -S echo freeze > %s' % (cmd, self.powerfile) fp = Popen(['sh', '-c', fullcmd], stdout=PIPE, stderr=PIPE).stderr for line in fp: - if re.match('[0-9.]* sec', line): + line = ascii(line) + rawout += line + if keyline and valline: continue - text.append(line.split()) + if re.match('(?i)Avg_MHz.*', line): + keyline = line.strip().split() + elif keyline: + valline = line.strip().split() fp.close() - if len(text) < 2: - return 'turbostat output format error' + if not keyline or not valline or len(keyline) != len(valline): + errmsg = 'unrecognized turbostat output:\n'+rawout.strip() + sysvals.vprint(errmsg) + if not sysvals.verbose: + pprint(errmsg) + return '' + if sysvals.verbose: + pprint(rawout.strip()) out = [] - for key in text[0]: - values = [] - idx = text[0].index(key) - for line in text[1:]: - if len(line) > idx: - values.append(line[idx]) - out.append('%s=%s' % (key, ','.join(values))) + for key in keyline: + idx = keyline.index(key) + val = valline[idx] + out.append('%s=%s' % (key, val)) return '|'.join(out) def checkWifi(self): out = dict() @@ -924,7 +1075,7 @@ class SystemValues: return out fp = Popen(iwcmd, stdout=PIPE, stderr=PIPE).stdout for line in fp: - m = re.match('(?P\S*) .* ESSID:(?P\S*)', line) + m = re.match('(?P\S*) .* ESSID:(?P\S*)', ascii(line)) if not m: continue out['device'] = m.group('dev') @@ -935,7 +1086,7 @@ class SystemValues: if 'device' in out: fp = Popen([ifcmd, out['device']], stdout=PIPE, stderr=PIPE).stdout for line in fp: - m = re.match('.* inet (?P[0-9\.]*)', line) + m = re.match('.* inet (?P[0-9\.]*)', ascii(line)) if m: out['ip'] = m.group('ip') break @@ -990,13 +1141,13 @@ class DevProps: def __init__(self): self.syspath = '' self.altname = '' - self.async = True + self.isasync = True self.xtraclass = '' self.xtrainfo = '' def out(self, dev): - return '%s,%s,%d;' % (dev, self.altname, self.async) + return '%s,%s,%d;' % (dev, self.altname, self.isasync) def debug(self, dev): - pprint('%s:\n\taltname = %s\n\t async = %s' % (dev, self.altname, self.async)) + pprint('%s:\n\taltname = %s\n\t async = %s' % (dev, self.altname, self.isasync)) def altName(self, dev): if not self.altname or self.altname == dev: return dev @@ -1004,13 +1155,13 @@ class DevProps: def xtraClass(self): if self.xtraclass: return ' '+self.xtraclass - if not self.async: + if not self.isasync: return ' sync' return '' def xtraInfo(self): if self.xtraclass: return ' '+self.xtraclass - if self.async: + if self.isasync: return ' async_device' return ' sync_device' @@ -1108,7 +1259,7 @@ class Data: return sorted(self.dmesg, key=lambda k:self.dmesg[k]['order']) def initDevicegroups(self): # called when phases are all finished being added - for phase in self.dmesg.keys(): + for phase in sorted(self.dmesg.keys()): if '*' in phase: p = phase.split('*') pnew = '%s%d' % (p[0], len(p)) @@ -1430,16 +1581,7 @@ class Data: return phase def sortedDevices(self, phase): list = self.dmesg[phase]['list'] - slist = [] - tmp = dict() - for devname in list: - dev = list[devname] - if dev['length'] == 0: - continue - tmp[dev['start']] = devname - for t in sorted(tmp): - slist.append(tmp[t]) - return slist + return sorted(list, key=lambda k:list[k]['start']) def fixupInitcalls(self, phase): # if any calls never returned, clip them at system resume end phaselist = self.dmesg[phase]['list'] @@ -1576,7 +1718,7 @@ class Data: maxname = '%d' % self.maxDeviceNameSize(phase) fmt = '%3d) %'+maxname+'s - %f - %f' c = 1 - for name in devlist: + for name in sorted(devlist): s = devlist[name]['start'] e = devlist[name]['end'] sysvals.vprint(fmt % (c, name, s, e)) @@ -1588,7 +1730,7 @@ class Data: devlist = [] for phase in self.sortedPhases(): list = self.deviceChildren(devname, phase) - for dev in list: + for dev in sorted(list): if dev not in devlist: devlist.append(dev) return devlist @@ -1628,16 +1770,16 @@ class Data: def rootDeviceList(self): # list of devices graphed real = [] - for phase in self.dmesg: + for phase in self.sortedPhases(): list = self.dmesg[phase]['list'] - for dev in list: + for dev in sorted(list): if list[dev]['pid'] >= 0 and dev not in real: real.append(dev) # list of top-most root devices rootlist = [] - for phase in self.dmesg: + for phase in self.sortedPhases(): list = self.dmesg[phase]['list'] - for dev in list: + for dev in sorted(list): pdev = list[dev]['par'] pid = list[dev]['pid'] if(pid < 0 or re.match('[0-9]*-[0-9]*\.[0-9]*[\.0-9]*\:[\.0-9]*$', pdev)): @@ -1718,9 +1860,9 @@ class Data: def createProcessUsageEvents(self): # get an array of process names proclist = [] - for t in self.pstl: + for t in sorted(self.pstl): pslist = self.pstl[t] - for ps in pslist: + for ps in sorted(pslist): if ps not in proclist: proclist.append(ps) # get a list of data points for suspend and resume @@ -1765,7 +1907,7 @@ class Data: def debugPrint(self): for p in self.sortedPhases(): list = self.dmesg[p]['list'] - for devname in list: + for devname in sorted(list): dev = list[devname] if 'ftrace' in dev: dev['ftrace'].debugPrint(' [%s]' % devname) @@ -2466,7 +2608,7 @@ class Timeline: # if there is 1 line per row, draw them the standard way for t, p in standardphases: for i in sorted(self.rowheight[t][p]): - self.rowheight[t][p][i] = self.bodyH/len(self.rowlines[t][p]) + self.rowheight[t][p][i] = float(self.bodyH)/len(self.rowlines[t][p]) def createZoomBox(self, mode='command', testcount=1): # Create bounding box, add buttons html_zoombox = '

\n' @@ -2537,6 +2679,7 @@ class TestProps: cmdlinefmt = '^# command \| (?P.*)' kparamsfmt = '^# kparams \| (?P.*)' devpropfmt = '# Device Properties: .*' + pinfofmt = '# platform-(?P[a-z,A-Z,0-9]*): (?P.*)' tracertypefmt = '# tracer: (?P.*)' firmwarefmt = '# fwsuspend (?P[0-9]*) fwresume (?P[0-9]*)$' procexecfmt = 'ps - (?P.*)$' @@ -2571,12 +2714,6 @@ class TestProps: self.ftrace_line_fmt = self.ftrace_line_fmt_nop else: doError('Invalid tracer format: [%s]' % tracer) - def decode(self, data): - try: - out = base64.b64decode(data).decode('zlib') - except: - out = data - return out def stampInfo(self, line): if re.match(self.stampfmt, line): self.stamp = line @@ -2660,7 +2797,7 @@ class TestProps: if len(self.mcelog) > data.testnumber: m = re.match(self.mcelogfmt, self.mcelog[data.testnumber]) if m: - data.mcelog = self.decode(m.group('m')) + data.mcelog = sv.b64unzip(m.group('m')) # turbostat data if len(self.turbostat) > data.testnumber: m = re.match(self.tstatfmt, self.turbostat[data.testnumber]) @@ -2681,6 +2818,46 @@ class TestProps: m = re.match(self.testerrfmt, self.testerror[data.testnumber]) if m: data.enterfail = m.group('e') + def devprops(self, data): + props = dict() + devlist = data.split(';') + for dev in devlist: + f = dev.split(',') + if len(f) < 3: + continue + dev = f[0] + props[dev] = DevProps() + props[dev].altname = f[1] + if int(f[2]): + props[dev].isasync = True + else: + props[dev].isasync = False + return props + def parseDevprops(self, line, sv): + idx = line.index(': ') + 2 + if idx >= len(line): + return + props = self.devprops(line[idx:]) + if sv.suspendmode == 'command' and 'testcommandstring' in props: + sv.testcommand = props['testcommandstring'].altname + sv.devprops = props + def parsePlatformInfo(self, line, sv): + m = re.match(self.pinfofmt, line) + if not m: + return + name, info = m.group('val'), m.group('info') + if name == 'devinfo': + sv.devprops = self.devprops(sv.b64unzip(info)) + return + elif name == 'testcmd': + sv.testcommand = info + return + field = info.split('|') + if len(field) < 2: + return + cmdline = field[0].strip() + output = sv.b64unzip(field[1].strip()) + sv.platinfo.append([name, cmdline, output]) # Class: TestRun # Description: @@ -2701,7 +2878,7 @@ class ProcessMonitor: process = Popen(c, shell=True, stdout=PIPE) running = dict() for line in process.stdout: - data = line.split() + data = ascii(line).split() pid = data[0] name = re.sub('[()]', '', data[1]) user = int(data[13]) @@ -2805,7 +2982,11 @@ def appendIncompleteTraceLog(testruns): continue # device properties line if(re.match(tp.devpropfmt, line)): - devProps(line) + tp.parseDevprops(line, sysvals) + continue + # platform info line + if(re.match(tp.pinfofmt, line)): + tp.parsePlatformInfo(line, sysvals) continue # parse only valid lines, if this is not one move on m = re.match(tp.ftrace_line_fmt, line) @@ -2902,7 +3083,7 @@ def parseTraceLog(live=False): sysvals.setupAllKprobes() ksuscalls = ['pm_prepare_console'] krescalls = ['pm_restore_console'] - tracewatch = [] + tracewatch = ['irq_wakeup'] if sysvals.usekprobes: tracewatch += ['sync_filesystems', 'freeze_processes', 'syscore_suspend', 'syscore_resume', 'resume_console', 'thaw_processes', 'CPU_ON', @@ -2928,7 +3109,11 @@ def parseTraceLog(live=False): continue # device properties line if(re.match(tp.devpropfmt, line)): - devProps(line) + tp.parseDevprops(line, sysvals) + continue + # platform info line + if(re.match(tp.pinfofmt, line)): + tp.parsePlatformInfo(line, sysvals) continue # ignore all other commented lines if line[0] == '#': @@ -3001,16 +3186,11 @@ def parseTraceLog(live=False): isbegin = False else: continue - m = re.match('(?P.*)\[(?P[0-9]*)\] .*', t.name) - if(m): - val = m.group('val') - if val == '0': - name = m.group('name') - else: - name = m.group('name')+'['+val+']' + if '[' in t.name: + m = re.match('(?P.*)\[.*', t.name) else: m = re.match('(?P.*) .*', t.name) - name = m.group('name') + name = m.group('name') # ignore these events if(name.split('[')[0] in tracewatch): continue @@ -3045,6 +3225,8 @@ def parseTraceLog(live=False): elif(re.match('machine_suspend\[.*', t.name)): if(isbegin): lp = data.lastPhase() + if lp == 'resume_machine': + data.dmesg[lp]['end'] = t.time phase = data.setPhase('suspend_machine', data.dmesg[lp]['end'], True) data.setPhase(phase, t.time, False) if data.tSuspended == 0: @@ -3213,11 +3395,11 @@ def parseTraceLog(live=False): # add the traceevent data to the device hierarchy if(sysvals.usetraceevents): # add actual trace funcs - for name in test.ttemp: + for name in sorted(test.ttemp): for event in test.ttemp[name]: data.newActionGlobal(name, event['begin'], event['end'], event['pid']) # add the kprobe based virtual tracefuncs as actual devices - for key in tp.ktemp: + for key in sorted(tp.ktemp): name, pid = key if name not in sysvals.tracefuncs: continue @@ -3231,7 +3413,7 @@ def parseTraceLog(live=False): data.newActionGlobal(e['name'], kb, ke, pid, color) # add config base kprobes and dev kprobes if sysvals.usedevsrc: - for key in tp.ktemp: + for key in sorted(tp.ktemp): name, pid = key if name in sysvals.tracefuncs or name not in sysvals.dev_tracefuncs: continue @@ -3244,7 +3426,7 @@ def parseTraceLog(live=False): if sysvals.usecallgraph: # add the callgraph data to the device hierarchy sortlist = dict() - for key in test.ftemp: + for key in sorted(test.ftemp): proc, pid = key for cg in test.ftemp[key]: if len(cg.list) < 1 or cg.invalid or (cg.end - cg.start == 0): @@ -3582,7 +3764,7 @@ def parseKernelLog(data): # if trace events are not available, these are better than nothing if(not sysvals.usetraceevents): # look for known actions - for a in at: + for a in sorted(at): if(re.match(at[a]['smsg'], msg)): if(a not in actions): actions[a] = [] @@ -3641,7 +3823,7 @@ def parseKernelLog(data): data.tResumed = data.tSuspended # fill in any actions we've found - for name in actions: + for name in sorted(actions): for event in actions[name]: data.newActionGlobal(name, event['begin'], event['end']) @@ -3761,7 +3943,7 @@ def createHTMLSummarySimple(testruns, htmlfile, title): if lastmode and lastmode != mode and num > 0: for i in range(2): s = sorted(tMed[i]) - list[lastmode]['med'][i] = s[int(len(s)/2)] + list[lastmode]['med'][i] = s[int(len(s)//2)] iMed[i] = tMed[i][list[lastmode]['med'][i]] list[lastmode]['avg'] = [tAvg[0] / num, tAvg[1] / num] list[lastmode]['min'] = tMin @@ -3803,7 +3985,7 @@ def createHTMLSummarySimple(testruns, htmlfile, title): if lastmode and num > 0: for i in range(2): s = sorted(tMed[i]) - list[lastmode]['med'][i] = s[int(len(s)/2)] + list[lastmode]['med'][i] = s[int(len(s)//2)] iMed[i] = tMed[i][list[lastmode]['med'][i]] list[lastmode]['avg'] = [tAvg[0] / num, tAvg[1] / num] list[lastmode]['min'] = tMin @@ -3845,7 +4027,7 @@ def createHTMLSummarySimple(testruns, htmlfile, title): '\n' headnone = '{0}{1}\n' - for mode in list: + for mode in sorted(list): # header line for each suspend mode num = 0 tAvg, tMin, tMax, tMed = list[mode]['avg'], list[mode]['min'],\ @@ -3944,7 +4126,8 @@ def createHTMLDeviceSummary(testruns, htmlfile, title): th.format('Average Time') + th.format('Count') +\ th.format('Worst Time') + th.format('Host (worst time)') +\ th.format('Link (worst time)') + '\n' - for name in sorted(devlist, key=lambda k:devlist[k]['worst'], reverse=True): + for name in sorted(devlist, key=lambda k:(devlist[k]['worst'], \ + devlist[k]['total'], devlist[k]['name']), reverse=True): data = devall[type][name] data['average'] = data['total'] / data['count'] if data['average'] < limit: @@ -4085,7 +4268,7 @@ def createHTML(testruns, testfail): if(tTotal == 0): doError('No timeline data') if(len(data.tLow) > 0): - low_time = '|'.join(data.tLow) + low_time = '+'.join(data.tLow) if sysvals.suspendmode == 'command': run_time = '%.0f'%((data.end-data.start)*1000) if sysvals.testcommand: @@ -4151,7 +4334,7 @@ def createHTML(testruns, testfail): for group in data.devicegroups: devlist = [] for phase in group: - for devname in data.tdevlist[phase]: + for devname in sorted(data.tdevlist[phase]): d = DevItem(data.testnumber, phase, data.dmesg[phase]['list'][devname]) devlist.append(d) if d.isa('kth'): @@ -4230,7 +4413,7 @@ def createHTML(testruns, testfail): for b in phases[dir]: # draw the devices for this phase phaselist = data.dmesg[b]['list'] - for d in data.tdevlist[b]: + for d in sorted(data.tdevlist[b]): name = d drv = '' dev = phaselist[d] @@ -4971,13 +5154,9 @@ def executeSuspend(): if mode == 'freeze' and sysvals.haveTurbostat(): # execution will pause here turbo = sysvals.turbostat() - if '|' in turbo: + if turbo: tdata['turbo'] = turbo - else: - tdata['error'] = turbo else: - if sysvals.haveTurbostat(): - sysvals.vprint('WARNING: ignoring turbostat in mode "%s"' % mode) pf = open(sysvals.powerfile, 'w') pf.write(mode) # execution will pause here @@ -5024,7 +5203,7 @@ def executeSuspend(): op.write(line) op.close() sysvals.fsetVal('', 'trace') - devProps() + sysvals.platforminfo() return testdata def readFile(file): @@ -5040,9 +5219,9 @@ def readFile(file): # The time string, e.g. "1901m16s" def ms2nice(val): val = int(val) - h = val / 3600000 - m = (val / 60000) % 60 - s = (val / 1000) % 60 + h = val // 3600000 + m = (val // 60000) % 60 + s = (val // 1000) % 60 if h > 0: return '%d:%02d:%02d' % (h, m, s) if m > 0: @@ -5115,127 +5294,6 @@ def deviceInfo(output=''): print(lines[i]) return res -# Function: devProps -# Description: -# Retrieve a list of properties for all devices in the trace log -def devProps(data=0): - props = dict() - - if data: - idx = data.index(': ') + 2 - if idx >= len(data): - return - devlist = data[idx:].split(';') - for dev in devlist: - f = dev.split(',') - if len(f) < 3: - continue - dev = f[0] - props[dev] = DevProps() - props[dev].altname = f[1] - if int(f[2]): - props[dev].async = True - else: - props[dev].async = False - sysvals.devprops = props - if sysvals.suspendmode == 'command' and 'testcommandstring' in props: - sysvals.testcommand = props['testcommandstring'].altname - return - - if(os.path.exists(sysvals.ftracefile) == False): - doError('%s does not exist' % sysvals.ftracefile) - - # first get the list of devices we need properties for - msghead = 'Additional data added by AnalyzeSuspend' - alreadystamped = False - tp = TestProps() - tf = sysvals.openlog(sysvals.ftracefile, 'r') - for line in tf: - if msghead in line: - alreadystamped = True - continue - # determine the trace data type (required for further parsing) - m = re.match(tp.tracertypefmt, line) - if(m): - tp.setTracerType(m.group('t')) - continue - # parse only valid lines, if this is not one move on - m = re.match(tp.ftrace_line_fmt, line) - if(not m or 'device_pm_callback_start' not in line): - continue - m = re.match('.*: (?P.*) (?P.*), parent: *(?P

.*), .*', m.group('msg')); - if(not m): - continue - dev = m.group('d') - if dev not in props: - props[dev] = DevProps() - tf.close() - - if not alreadystamped and sysvals.suspendmode == 'command': - out = '#\n# '+msghead+'\n# Device Properties: ' - out += 'testcommandstring,%s,0;' % (sysvals.testcommand) - with sysvals.openlog(sysvals.ftracefile, 'a') as fp: - fp.write(out+'\n') - sysvals.devprops = props - return - - # now get the syspath for each of our target devices - for dirname, dirnames, filenames in os.walk('/sys/devices'): - if(re.match('.*/power', dirname) and 'async' in filenames): - dev = dirname.split('/')[-2] - if dev in props and (not props[dev].syspath or len(dirname) < len(props[dev].syspath)): - props[dev].syspath = dirname[:-6] - - # now fill in the properties for our target devices - for dev in props: - dirname = props[dev].syspath - if not dirname or not os.path.exists(dirname): - continue - with open(dirname+'/power/async') as fp: - text = fp.read() - props[dev].async = False - if 'enabled' in text: - props[dev].async = True - fields = os.listdir(dirname) - if 'product' in fields: - with open(dirname+'/product') as fp: - props[dev].altname = fp.read() - elif 'name' in fields: - with open(dirname+'/name') as fp: - props[dev].altname = fp.read() - elif 'model' in fields: - with open(dirname+'/model') as fp: - props[dev].altname = fp.read() - elif 'description' in fields: - with open(dirname+'/description') as fp: - props[dev].altname = fp.read() - elif 'id' in fields: - with open(dirname+'/id') as fp: - props[dev].altname = fp.read() - elif 'idVendor' in fields and 'idProduct' in fields: - idv, idp = '', '' - with open(dirname+'/idVendor') as fp: - idv = fp.read().strip() - with open(dirname+'/idProduct') as fp: - idp = fp.read().strip() - props[dev].altname = '%s:%s' % (idv, idp) - - if props[dev].altname: - out = props[dev].altname.strip().replace('\n', ' ') - out = out.replace(',', ' ') - out = out.replace(';', ' ') - props[dev].altname = out - - # and now write the data to the ftrace file - if not alreadystamped: - out = '#\n# '+msghead+'\n# Device Properties: ' - for dev in sorted(props): - out += props[dev].out(dev) - with sysvals.openlog(sysvals.ftracefile, 'a') as fp: - fp.write(out+'\n') - - sysvals.devprops = props - # Function: getModes # Description: # Determine the supported power modes on this system @@ -5339,11 +5397,11 @@ def dmidecode(mempath, fatal=False): # search for either an SM table or DMI table i = base = length = num = 0 while(i < memsize): - if buf[i:i+4] == '_SM_' and i < memsize - 16: + if buf[i:i+4] == b'_SM_' and i < memsize - 16: length = struct.unpack('H', buf[i+22:i+24])[0] base, num = struct.unpack('IH', buf[i+24:i+30]) break - elif buf[i:i+5] == '_DMI_': + elif buf[i:i+5] == b'_DMI_': length = struct.unpack('H', buf[i+6:i+8])[0] base, num = struct.unpack('IH', buf[i+8:i+14]) break @@ -5376,15 +5434,15 @@ def dmidecode(mempath, fatal=False): if 0 == struct.unpack('H', buf[n:n+2])[0]: break n += 1 - data = buf[i+size:n+2].split('\0') + data = buf[i+size:n+2].split(b'\0') for name in info: itype, idxadr = info[name] if itype == type: - idx = struct.unpack('B', buf[i+idxadr])[0] + idx = struct.unpack('B', buf[i+idxadr:i+idxadr+1])[0] if idx > 0 and idx < len(data) - 1: - s = data[idx-1].strip() - if s and s.lower() != 'to be filled by o.e.m.': - out[name] = data[idx-1] + s = data[idx-1].decode('utf-8') + if s.strip() and s.strip().lower() != 'to be filled by o.e.m.': + out[name] = s i = n + 2 count += 1 return out @@ -5409,7 +5467,7 @@ def getBattery(): return (ac, charge) def displayControl(cmd): - xset, ret = 'xset -d :0.0 {0}', 0 + xset, ret = 'timeout 10 xset -d :0.0 {0}', 0 if sysvals.sudouser: xset = 'sudo -u %s %s' % (sysvals.sudouser, xset) if cmd == 'init': @@ -5433,7 +5491,7 @@ def displayControl(cmd): fp = Popen(xset.format('q').split(' '), stdout=PIPE).stdout ret = 'unknown' for line in fp: - m = re.match('[\s]*Monitor is (?P.*)', line) + m = re.match('[\s]*Monitor is (?P.*)', ascii(line)) if(m and len(m.group('m')) >= 2): out = m.group('m').lower() ret = out[3:] if out[0:2] == 'in' else out @@ -5495,10 +5553,11 @@ def getFPDT(output): ' OEM Revision : %u\n'\ ' Creator ID : %s\n'\ ' Creator Revision : 0x%x\n'\ - '' % (table[0], table[0], table[1], table[2], table[3], - table[4], table[5], table[6], table[7], table[8])) + '' % (ascii(table[0]), ascii(table[0]), table[1], table[2], + table[3], ascii(table[4]), ascii(table[5]), table[6], + ascii(table[7]), table[8])) - if(table[0] != 'FPDT'): + if(table[0] != b'FPDT'): if(output): doError('Invalid FPDT table') return False @@ -5530,8 +5589,8 @@ def getFPDT(output): return [0, 0] rechead = struct.unpack('4sI', first) recdata = fp.read(rechead[1]-8) - if(rechead[0] == 'FBPT'): - record = struct.unpack('HBBIQQQQQ', recdata) + if(rechead[0] == b'FBPT'): + record = struct.unpack('HBBIQQQQQ', recdata[:48]) if(output): pprint('%s (%s)\n'\ ' Reset END : %u ns\n'\ @@ -5539,11 +5598,11 @@ def getFPDT(output): ' OS Loader StartImage Start : %u ns\n'\ ' ExitBootServices Entry : %u ns\n'\ ' ExitBootServices Exit : %u ns'\ - '' % (rectype[header[0]], rechead[0], record[4], record[5], + '' % (rectype[header[0]], ascii(rechead[0]), record[4], record[5], record[6], record[7], record[8])) - elif(rechead[0] == 'S3PT'): + elif(rechead[0] == b'S3PT'): if(output): - pprint('%s (%s)' % (rectype[header[0]], rechead[0])) + pprint('%s (%s)' % (rectype[header[0]], ascii(rechead[0]))) j = 0 while(j < len(recdata)): prechead = struct.unpack('HBB', recdata[j:j+4]) @@ -5689,7 +5748,7 @@ def doError(msg, help=False): def getArgInt(name, args, min, max, main=True): if main: try: - arg = args.next() + arg = next(args) except: doError(name+': no argument supplied', True) else: @@ -5708,7 +5767,7 @@ def getArgInt(name, args, min, max, main=True): def getArgFloat(name, args, min, max, main=True): if main: try: - arg = args.next() + arg = next(args) except: doError(name+': no argument supplied', True) else: @@ -5737,9 +5796,12 @@ def processData(live=False): parseKernelLog(data) if(sysvals.ftracefile and (sysvals.usecallgraph or sysvals.usetraceevents)): appendIncompleteTraceLog(testruns) + shown = ['bios', 'biosdate', 'cpu', 'host', 'kernel', 'man', 'memfr', + 'memsz', 'mode', 'numcpu', 'plat', 'time'] sysvals.vprint('System Info:') for key in sorted(sysvals.stamp): - sysvals.vprint(' %-8s : %s' % (key.upper(), sysvals.stamp[key])) + if key in shown: + sysvals.vprint(' %-8s : %s' % (key.upper(), sysvals.stamp[key])) if sysvals.kparams: sysvals.vprint('Kparams:\n %s' % sysvals.kparams) sysvals.vprint('Command:\n %s' % sysvals.cmdline) @@ -5768,6 +5830,12 @@ def processData(live=False): (w[0], w[1]) sysvals.vprint(s) data.printDetails() + if len(sysvals.platinfo) > 0: + sysvals.vprint('\nPlatform Info:') + for info in sysvals.platinfo: + sysvals.vprint(info[0]+' - '+info[1]) + sysvals.vprint(info[2]) + sysvals.vprint('') if sysvals.cgdump: for data in testruns: data.debugPrint() @@ -5951,7 +6019,7 @@ def data_from_html(file, outpath, issues, fulldetail=False): worst[d] = {'name':'', 'time': 0.0} dev = devices[d] if d in devices else 0 if dev and len(dev.keys()) > 0: - n = sorted(dev, key=dev.get, reverse=True)[0] + n = sorted(dev, key=lambda k:(dev[k], k), reverse=True)[0] worst[d]['name'], worst[d]['time'] = n, dev[n] data = { 'mode': stmp[2], @@ -5976,7 +6044,7 @@ def data_from_html(file, outpath, issues, fulldetail=False): data['funclist'] = find_in_html(html, '

Date: Fri, 9 Aug 2019 07:52:49 +0530 Subject: [PATCH 058/126] cpufreq: intel_pstate: Implement QoS supported freq constraints Intel pstate driver exposes min_perf_pct and max_perf_pct sysfs files, which can be used to force a limit on the min/max P state of the driver. Though these files eventually control the min/max frequencies that the CPUs will run at, they don't make a change to policy->min/max values. When the values of these files are changed (in passive mode of the driver), it leads to calling ->limits() callback of the cpufreq governors, like schedutil. On a call to it the governors shall forcefully update the frequency to come within the limits. Since the limits, i.e. policy->min/max, aren't updated by the driver, the governors fails to get the target freq within limit and sometimes aborts the update believing that the frequency is already set to the target value. This patch implements the QoS supported frequency constraints to update policy->min/max values whenever min_perf_pct or max_perf_pct files are updated. This is only done for the passive mode as of now, as the driver is already working fine in active mode. Fixes: ecd288429126 ("cpufreq: schedutil: Don't set next_freq to UINT_MAX") Reported-by: Doug Smythies Tested-by: Doug Smythies Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 120 +++++++++++++++++++++++++++++++-- 1 file changed, 116 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index cc27d4c59dca..32f27563613b 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -1085,6 +1086,47 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, return count; } +static struct cpufreq_driver intel_pstate; + +static void update_qos_request(enum dev_pm_qos_req_type type) +{ + int max_state, turbo_max, freq, i, perf_pct; + struct dev_pm_qos_request *req; + struct cpufreq_policy *policy; + + for_each_possible_cpu(i) { + struct cpudata *cpu = all_cpu_data[i]; + + policy = cpufreq_cpu_get(i); + if (!policy) + continue; + + req = policy->driver_data; + cpufreq_cpu_put(policy); + + if (!req) + continue; + + if (hwp_active) + intel_pstate_get_hwp_max(i, &turbo_max, &max_state); + else + turbo_max = cpu->pstate.turbo_pstate; + + if (type == DEV_PM_QOS_MIN_FREQUENCY) { + perf_pct = global.min_perf_pct; + } else { + req++; + perf_pct = global.max_perf_pct; + } + + freq = DIV_ROUND_UP(turbo_max * perf_pct, 100); + freq *= cpu->pstate.scaling; + + if (dev_pm_qos_update_request(req, freq) < 0) + pr_warn("Failed to update freq constraint: CPU%d\n", i); + } +} + static ssize_t store_max_perf_pct(struct kobject *a, struct kobj_attribute *b, const char *buf, size_t count) { @@ -1108,7 +1150,10 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct kobj_attribute *b, mutex_unlock(&intel_pstate_limits_lock); - intel_pstate_update_policies(); + if (intel_pstate_driver == &intel_pstate) + intel_pstate_update_policies(); + else + update_qos_request(DEV_PM_QOS_MAX_FREQUENCY); mutex_unlock(&intel_pstate_driver_lock); @@ -1139,7 +1184,10 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct kobj_attribute *b, mutex_unlock(&intel_pstate_limits_lock); - intel_pstate_update_policies(); + if (intel_pstate_driver == &intel_pstate) + intel_pstate_update_policies(); + else + update_qos_request(DEV_PM_QOS_MIN_FREQUENCY); mutex_unlock(&intel_pstate_driver_lock); @@ -2332,8 +2380,16 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy, static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy) { - int ret = __intel_pstate_cpu_init(policy); + int max_state, turbo_max, min_freq, max_freq, ret; + struct dev_pm_qos_request *req; + struct cpudata *cpu; + struct device *dev; + dev = get_cpu_device(policy->cpu); + if (!dev) + return -ENODEV; + + ret = __intel_pstate_cpu_init(policy); if (ret) return ret; @@ -2342,7 +2398,63 @@ static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy) /* This reflects the intel_pstate_get_cpu_pstates() setting. */ policy->cur = policy->cpuinfo.min_freq; + req = kcalloc(2, sizeof(*req), GFP_KERNEL); + if (!req) { + ret = -ENOMEM; + goto pstate_exit; + } + + cpu = all_cpu_data[policy->cpu]; + + if (hwp_active) + intel_pstate_get_hwp_max(policy->cpu, &turbo_max, &max_state); + else + turbo_max = cpu->pstate.turbo_pstate; + + min_freq = DIV_ROUND_UP(turbo_max * global.min_perf_pct, 100); + min_freq *= cpu->pstate.scaling; + max_freq = DIV_ROUND_UP(turbo_max * global.max_perf_pct, 100); + max_freq *= cpu->pstate.scaling; + + ret = dev_pm_qos_add_request(dev, req, DEV_PM_QOS_MIN_FREQUENCY, + min_freq); + if (ret < 0) { + dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret); + goto free_req; + } + + ret = dev_pm_qos_add_request(dev, req + 1, DEV_PM_QOS_MAX_FREQUENCY, + max_freq); + if (ret < 0) { + dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret); + goto remove_min_req; + } + + policy->driver_data = req; + return 0; + +remove_min_req: + dev_pm_qos_remove_request(req); +free_req: + kfree(req); +pstate_exit: + intel_pstate_exit_perf_limits(policy); + + return ret; +} + +static int intel_cpufreq_cpu_exit(struct cpufreq_policy *policy) +{ + struct dev_pm_qos_request *req; + + req = policy->driver_data; + + dev_pm_qos_remove_request(req + 1); + dev_pm_qos_remove_request(req); + kfree(req); + + return intel_pstate_cpu_exit(policy); } static struct cpufreq_driver intel_cpufreq = { @@ -2351,7 +2463,7 @@ static struct cpufreq_driver intel_cpufreq = { .target = intel_cpufreq_target, .fast_switch = intel_cpufreq_fast_switch, .init = intel_cpufreq_cpu_init, - .exit = intel_pstate_cpu_exit, + .exit = intel_cpufreq_cpu_exit, .stop_cpu = intel_cpufreq_stop_cpu, .update_limits = intel_pstate_update_limits, .name = "intel_cpufreq", From c3082a674f46fe49383b157882c41dfabaa37113 Mon Sep 17 00:00:00 2001 From: Amit Kucheria Date: Thu, 11 Jul 2019 19:51:25 +0530 Subject: [PATCH 059/126] PM: QoS: Get rid of unused flags The network_latency and network_throughput flags for PM-QoS have not found much use in drivers or in userspace since they were introduced. Commit 4a733ef1bea7 ("mac80211: remove PM-QoS listener") removed the only user PM_QOS_NETWORK_LATENCY in the kernel a while ago and there don't seem to be any userspace tools using the character device files either. PM_QOS_MEMORY_BANDWIDTH was never even added to the trace events. Remove all the flags except cpu_dma_latency. Signed-off-by: Amit Kucheria Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- Documentation/power/pm_qos_interface.rst | 5 +-- include/linux/pm_qos.h | 6 --- include/trace/events/power.h | 8 +--- kernel/power/qos.c | 48 ------------------------ 4 files changed, 4 insertions(+), 63 deletions(-) diff --git a/Documentation/power/pm_qos_interface.rst b/Documentation/power/pm_qos_interface.rst index 69921f072ce1..3097694fba69 100644 --- a/Documentation/power/pm_qos_interface.rst +++ b/Documentation/power/pm_qos_interface.rst @@ -7,8 +7,7 @@ performance expectations by drivers, subsystems and user space applications on one of the parameters. Two different PM QoS frameworks are available: -1. PM QoS classes for cpu_dma_latency, network_latency, network_throughput, -memory_bandwidth. +1. PM QoS classes for cpu_dma_latency 2. the per-device PM QoS framework provides the API to manage the per-device latency constraints and PM QoS flags. @@ -79,7 +78,7 @@ cleanup of a process, the interface requires the process to register its parameter requests in the following way: To register the default pm_qos target for the specific parameter, the process -must open one of /dev/[cpu_dma_latency, network_latency, network_throughput] +must open /dev/cpu_dma_latency As long as the device node is held open that process has a registered request on the parameter. diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 2aebbc5b9950..222c3e01397c 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -13,9 +13,6 @@ enum { PM_QOS_RESERVED = 0, PM_QOS_CPU_DMA_LATENCY, - PM_QOS_NETWORK_LATENCY, - PM_QOS_NETWORK_THROUGHPUT, - PM_QOS_MEMORY_BANDWIDTH, /* insert new class ID */ PM_QOS_NUM_CLASSES, @@ -33,9 +30,6 @@ enum pm_qos_flags_status { #define PM_QOS_LATENCY_ANY_NS ((s64)PM_QOS_LATENCY_ANY * NSEC_PER_USEC) #define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) -#define PM_QOS_NETWORK_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) -#define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE 0 -#define PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE 0 #define PM_QOS_RESUME_LATENCY_DEFAULT_VALUE PM_QOS_LATENCY_ANY #define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT PM_QOS_LATENCY_ANY #define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS PM_QOS_LATENCY_ANY_NS diff --git a/include/trace/events/power.h b/include/trace/events/power.h index f7aece721aed..7457e238e1b7 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -379,9 +379,7 @@ DECLARE_EVENT_CLASS(pm_qos_request, TP_printk("pm_qos_class=%s value=%d", __print_symbolic(__entry->pm_qos_class, - { PM_QOS_CPU_DMA_LATENCY, "CPU_DMA_LATENCY" }, - { PM_QOS_NETWORK_LATENCY, "NETWORK_LATENCY" }, - { PM_QOS_NETWORK_THROUGHPUT, "NETWORK_THROUGHPUT" }), + { PM_QOS_CPU_DMA_LATENCY, "CPU_DMA_LATENCY" }), __entry->value) ); @@ -426,9 +424,7 @@ TRACE_EVENT(pm_qos_update_request_timeout, TP_printk("pm_qos_class=%s value=%d, timeout_us=%ld", __print_symbolic(__entry->pm_qos_class, - { PM_QOS_CPU_DMA_LATENCY, "CPU_DMA_LATENCY" }, - { PM_QOS_NETWORK_LATENCY, "NETWORK_LATENCY" }, - { PM_QOS_NETWORK_THROUGHPUT, "NETWORK_THROUGHPUT" }), + { PM_QOS_CPU_DMA_LATENCY, "CPU_DMA_LATENCY" }), __entry->value, __entry->timeout_us) ); diff --git a/kernel/power/qos.c b/kernel/power/qos.c index 33e3febaba53..9568a2fe7c11 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c @@ -78,57 +78,9 @@ static struct pm_qos_object cpu_dma_pm_qos = { .name = "cpu_dma_latency", }; -static BLOCKING_NOTIFIER_HEAD(network_lat_notifier); -static struct pm_qos_constraints network_lat_constraints = { - .list = PLIST_HEAD_INIT(network_lat_constraints.list), - .target_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, - .default_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, - .no_constraint_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, - .type = PM_QOS_MIN, - .notifiers = &network_lat_notifier, -}; -static struct pm_qos_object network_lat_pm_qos = { - .constraints = &network_lat_constraints, - .name = "network_latency", -}; - - -static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier); -static struct pm_qos_constraints network_tput_constraints = { - .list = PLIST_HEAD_INIT(network_tput_constraints.list), - .target_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, - .default_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, - .no_constraint_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, - .type = PM_QOS_MAX, - .notifiers = &network_throughput_notifier, -}; -static struct pm_qos_object network_throughput_pm_qos = { - .constraints = &network_tput_constraints, - .name = "network_throughput", -}; - - -static BLOCKING_NOTIFIER_HEAD(memory_bandwidth_notifier); -static struct pm_qos_constraints memory_bw_constraints = { - .list = PLIST_HEAD_INIT(memory_bw_constraints.list), - .target_value = PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE, - .default_value = PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE, - .no_constraint_value = PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE, - .type = PM_QOS_SUM, - .notifiers = &memory_bandwidth_notifier, -}; -static struct pm_qos_object memory_bandwidth_pm_qos = { - .constraints = &memory_bw_constraints, - .name = "memory_bandwidth", -}; - - static struct pm_qos_object *pm_qos_array[] = { &null_pm_qos, &cpu_dma_pm_qos, - &network_lat_pm_qos, - &network_throughput_pm_qos, - &memory_bandwidth_pm_qos, }; static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, From 45dc1576e4575ba621cb6d017faf41531d8c1073 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 19 Aug 2019 12:35:03 +0200 Subject: [PATCH 060/126] ACPI: PM: s2idle: Avoid rearming SCI for wakeup unnecessarily It is only necessary to rearm the ACPI SCI for wakeup if pm_system_cancel_wakeup() has been called, so invoke rearm_wake_irq() only in that case. Signed-off-by: Rafael J. Wysocki --- drivers/acpi/sleep.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 8f7e95f97e1f..c52ecbda863f 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -1012,9 +1012,9 @@ static void acpi_s2idle_wake(void) acpi_os_wait_events_complete(); /* synchronize EC GPE processing */ acpi_ec_flush_work(); acpi_os_wait_events_complete(); /* synchronize Notify handling */ - } - rearm_wake_irq(acpi_sci_irq); + rearm_wake_irq(acpi_sci_irq); + } } static void acpi_s2idle_restore_early(void) From b90ff3554aa3e123bb7e6d08789f6fd92d86ddde Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 21 Aug 2019 11:40:19 +0200 Subject: [PATCH 061/126] ACPI: PM: s2idle: Always set up EC GPE for system wakeup Commit 10a08fd65ec1 ("ACPI: PM: Set up EC GPE for system wakeup from drivers that need it") assumed that the EC GPE would only need to be set up for system wakeup if either the intel-hid or the intel-vbtn driver was in use, but that turns out to be incorrect. In particular, on ASUS Zenbook UX430UNR/i7-8550U, if the EC GPE is not enabled while suspended, the system cannot be woken up by opening the lid or pressing a key, and that machine doesn't use any of the drivers mentioned above. For this reason, always set up the EC GPE for system wakeup from suspend-to-idle by setting and clearing its wake mask in the ACPI suspend-to-idle callbacks. Fixes: 10a08fd65ec1 ("ACPI: PM: Set up EC GPE for system wakeup from drivers that need it") Reported-by: Kristian Klausen Tested-by: Kristian Klausen Acked-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 1 - drivers/acpi/sleep.c | 15 +++++++++++++-- drivers/platform/x86/intel-hid.c | 6 +----- drivers/platform/x86/intel-vbtn.c | 6 +----- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 58597ec813eb..da1e5c5ce150 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1970,7 +1970,6 @@ void acpi_ec_set_gpe_wake_mask(u8 action) if (pm_suspend_no_platform() && first_ec && !ec_no_wakeup) acpi_set_gpe_wake_mask(NULL, first_ec->gpe, action); } -EXPORT_SYMBOL_GPL(acpi_ec_set_gpe_wake_mask); bool acpi_ec_dispatch_gpe(void) { diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index c52ecbda863f..9fa77d72ef27 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -938,6 +938,13 @@ static int lps0_device_attach(struct acpi_device *adev, if (mem_sleep_default > PM_SUSPEND_MEM && !acpi_sleep_default_s3) mem_sleep_current = PM_SUSPEND_TO_IDLE; + /* + * Some LPS0 systems, like ASUS Zenbook UX430UNR/i7-8550U, require the + * EC GPE to be enabled while suspended for certain wakeup devices to + * work, so mark it as wakeup-capable. + */ + acpi_ec_mark_gpe_for_wake(); + return 0; } @@ -954,8 +961,10 @@ static int acpi_s2idle_begin(void) static int acpi_s2idle_prepare(void) { - if (acpi_sci_irq_valid()) + if (acpi_sci_irq_valid()) { enable_irq_wake(acpi_sci_irq); + acpi_ec_set_gpe_wake_mask(ACPI_GPE_ENABLE); + } acpi_enable_wakeup_devices(ACPI_STATE_S0); @@ -1034,8 +1043,10 @@ static void acpi_s2idle_restore(void) acpi_disable_wakeup_devices(ACPI_STATE_S0); - if (acpi_sci_irq_valid()) + if (acpi_sci_irq_valid()) { + acpi_ec_set_gpe_wake_mask(ACPI_GPE_DISABLE); disable_irq_wake(acpi_sci_irq); + } } static void acpi_s2idle_end(void) diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c index 18ac237114ff..ef6d4bd77b1a 100644 --- a/drivers/platform/x86/intel-hid.c +++ b/drivers/platform/x86/intel-hid.c @@ -257,7 +257,6 @@ static int intel_hid_pm_prepare(struct device *device) struct intel_hid_priv *priv = dev_get_drvdata(device); priv->wakeup_mode = true; - acpi_ec_set_gpe_wake_mask(ACPI_GPE_ENABLE); } return 0; } @@ -266,10 +265,7 @@ static void intel_hid_pm_complete(struct device *device) { struct intel_hid_priv *priv = dev_get_drvdata(device); - if (priv->wakeup_mode) { - acpi_ec_set_gpe_wake_mask(ACPI_GPE_DISABLE); - priv->wakeup_mode = false; - } + priv->wakeup_mode = false; } static int intel_hid_pl_suspend_handler(struct device *device) diff --git a/drivers/platform/x86/intel-vbtn.c b/drivers/platform/x86/intel-vbtn.c index b28e5519337e..b74932307d69 100644 --- a/drivers/platform/x86/intel-vbtn.c +++ b/drivers/platform/x86/intel-vbtn.c @@ -205,7 +205,6 @@ static int intel_vbtn_pm_prepare(struct device *dev) struct intel_vbtn_priv *priv = dev_get_drvdata(dev); priv->wakeup_mode = true; - acpi_ec_set_gpe_wake_mask(ACPI_GPE_ENABLE); } return 0; } @@ -214,10 +213,7 @@ static void intel_vbtn_pm_complete(struct device *dev) { struct intel_vbtn_priv *priv = dev_get_drvdata(dev); - if (priv->wakeup_mode) { - acpi_ec_set_gpe_wake_mask(ACPI_GPE_DISABLE); - priv->wakeup_mode = false; - } + priv->wakeup_mode = false; } static int intel_vbtn_pm_resume(struct device *dev) From 0eae1e37db8acef1e511cdbc63634cb700188644 Mon Sep 17 00:00:00 2001 From: Sibi Sankar Date: Wed, 7 Aug 2019 17:15:43 +0530 Subject: [PATCH 062/126] cpufreq: qcom-hw: Update logic to detect turbo frequency The core count read back from the each domain's look up table serves as an indicator for the onset of the turbo frequency and not accurate representation of number of cores in a paticular domain. Update turbo detection logic accordingly to add support for SM8150 SoCs. Signed-off-by: Sibi Sankar Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 4b0b50403901..c9a9da592f10 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -20,6 +20,7 @@ #define LUT_VOLT GENMASK(11, 0) #define LUT_ROW_SIZE 32 #define CLK_HW_DIV 2 +#define LUT_TURBO_IND 1 /* Register offsets */ #define REG_ENABLE 0x0 @@ -77,9 +78,8 @@ static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev, struct cpufreq_policy *policy, void __iomem *base) { - u32 data, src, lval, i, core_count, prev_cc = 0, prev_freq = 0, freq; + u32 data, src, lval, i, core_count, prev_freq = 0, freq; u32 volt; - unsigned int max_cores = cpumask_weight(policy->cpus); struct cpufreq_frequency_table *table; table = kcalloc(LUT_MAX_ENTRIES + 1, sizeof(*table), GFP_KERNEL); @@ -102,12 +102,12 @@ static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev, else freq = cpu_hw_rate / 1000; - if (freq != prev_freq && core_count == max_cores) { + if (freq != prev_freq && core_count != LUT_TURBO_IND) { table[i].frequency = freq; dev_pm_opp_add(cpu_dev, freq * 1000, volt); dev_dbg(cpu_dev, "index=%d freq=%d, core_count %d\n", i, freq, core_count); - } else { + } else if (core_count == LUT_TURBO_IND) { table[i].frequency = CPUFREQ_ENTRY_INVALID; } @@ -115,14 +115,14 @@ static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev, * Two of the same frequencies with the same core counts means * end of table */ - if (i > 0 && prev_freq == freq && prev_cc == core_count) { + if (i > 0 && prev_freq == freq) { struct cpufreq_frequency_table *prev = &table[i - 1]; /* * Only treat the last frequency that might be a boost * as the boost frequency */ - if (prev_cc != max_cores) { + if (prev->frequency == CPUFREQ_ENTRY_INVALID) { prev->frequency = prev_freq; prev->flags = CPUFREQ_BOOST_FREQ; dev_pm_opp_add(cpu_dev, prev_freq * 1000, volt); @@ -131,7 +131,6 @@ static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev, break; } - prev_cc = core_count; prev_freq = freq; } From ada54f35b2270942f45250fa1c553a426b579f9e Mon Sep 17 00:00:00 2001 From: Douglas RAILLARD Date: Thu, 8 Aug 2019 14:18:57 +0100 Subject: [PATCH 063/126] cpufreq: qcom-hw: invoke frequency-invariance setter function Add calls to arch_set_freq_scale() in qcom-cpufreq-hw driver to enable frequency invariance. Signed-off-by: Douglas RAILLARD Reviewed-by: Quentin Perret Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index c9a9da592f10..a9ae2f84a4ef 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -35,9 +35,12 @@ static int qcom_cpufreq_hw_target_index(struct cpufreq_policy *policy, unsigned int index) { void __iomem *perf_state_reg = policy->driver_data; + unsigned long freq = policy->freq_table[index].frequency; writel_relaxed(index, perf_state_reg); + arch_set_freq_scale(policy->related_cpus, freq, + policy->cpuinfo.max_freq); return 0; } @@ -64,6 +67,7 @@ static unsigned int qcom_cpufreq_hw_fast_switch(struct cpufreq_policy *policy, { void __iomem *perf_state_reg = policy->driver_data; int index; + unsigned long freq; index = policy->cached_resolved_idx; if (index < 0) @@ -71,7 +75,11 @@ static unsigned int qcom_cpufreq_hw_fast_switch(struct cpufreq_policy *policy, writel_relaxed(index, perf_state_reg); - return policy->freq_table[index].frequency; + freq = policy->freq_table[index].frequency; + arch_set_freq_scale(policy->related_cpus, freq, + policy->cpuinfo.max_freq); + + return freq; } static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev, From 8ec5035039c743888c5ecd04a680676ef1537fe8 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Sun, 18 Aug 2019 02:32:21 -0400 Subject: [PATCH 064/126] cpufreq: Use imx-cpufreq-dt for i.MX8MN's speed grading Add i.MX8MN to blacklist, so that imx-cpufreq-dt driver can handle speed grading bits just like other i.MX8M SoCs. Signed-off-by: Anson Huang Reviewed-by: Leonard Crestez Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt-platdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index f9444ddd35ab..6fce11f63403 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -109,6 +109,7 @@ static const struct of_device_id blacklist[] __initconst = { { .compatible = "fsl,imx7d", }, { .compatible = "fsl,imx8mq", }, { .compatible = "fsl,imx8mm", }, + { .compatible = "fsl,imx8mn", }, { .compatible = "marvell,armadaxp", }, From 75c000c4bcbe2b0eb82baf90c7dd75c7380cc3fd Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Sun, 18 Aug 2019 02:32:22 -0400 Subject: [PATCH 065/126] cpufreq: imx-cpufreq-dt: Add i.MX8MN support i.MX8MN has different speed grading definition as below, it has 4 bits to define speed grading, add support for it. SPEED_GRADE[3:0] MHz 0000 2300 0001 2200 0010 2100 0011 2000 0100 1900 0101 1800 0110 1700 0111 1600 1000 1500 1001 1400 1010 1300 1011 1200 1100 1100 1101 1000 1110 900 1111 800 Signed-off-by: Anson Huang Reviewed-by: Leonard Crestez Signed-off-by: Viresh Kumar --- drivers/cpufreq/imx-cpufreq-dt.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/imx-cpufreq-dt.c b/drivers/cpufreq/imx-cpufreq-dt.c index 4f85f3112784..35db14cf3102 100644 --- a/drivers/cpufreq/imx-cpufreq-dt.c +++ b/drivers/cpufreq/imx-cpufreq-dt.c @@ -16,6 +16,7 @@ #define OCOTP_CFG3_SPEED_GRADE_SHIFT 8 #define OCOTP_CFG3_SPEED_GRADE_MASK (0x3 << 8) +#define IMX8MN_OCOTP_CFG3_SPEED_GRADE_MASK (0xf << 8) #define OCOTP_CFG3_MKT_SEGMENT_SHIFT 6 #define OCOTP_CFG3_MKT_SEGMENT_MASK (0x3 << 6) @@ -34,7 +35,12 @@ static int imx_cpufreq_dt_probe(struct platform_device *pdev) if (ret) return ret; - speed_grade = (cell_value & OCOTP_CFG3_SPEED_GRADE_MASK) >> OCOTP_CFG3_SPEED_GRADE_SHIFT; + if (of_machine_is_compatible("fsl,imx8mn")) + speed_grade = (cell_value & IMX8MN_OCOTP_CFG3_SPEED_GRADE_MASK) + >> OCOTP_CFG3_SPEED_GRADE_SHIFT; + else + speed_grade = (cell_value & OCOTP_CFG3_SPEED_GRADE_MASK) + >> OCOTP_CFG3_SPEED_GRADE_SHIFT; mkt_segment = (cell_value & OCOTP_CFG3_MKT_SEGMENT_MASK) >> OCOTP_CFG3_MKT_SEGMENT_SHIFT; /* From dce0bb84a73bac8a5bf3ad54e922215b61350481 Mon Sep 17 00:00:00 2001 From: "Andrew-sh.Cheng" Date: Tue, 13 Aug 2019 21:31:46 +0800 Subject: [PATCH 066/126] cpufreq: mediatek: change to regulator_get_optional For new mediatek chip mt8183, cci and little cluster share the same buck, so need to modify the attribute of regulator from exclusive to optional Signed-off-by: Andrew-sh.Cheng Signed-off-by: Viresh Kumar --- drivers/cpufreq/mediatek-cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 10bc06f5dd45..42bc4c9044f6 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -338,7 +338,7 @@ static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu) goto out_free_resources; } - proc_reg = regulator_get_exclusive(cpu_dev, "proc"); + proc_reg = regulator_get_optional(cpu_dev, "proc"); if (IS_ERR(proc_reg)) { if (PTR_ERR(proc_reg) == -EPROBE_DEFER) pr_warn("proc regulator for cpu%d not ready, retry.\n", From 9176b425bf8ff98fd12bffeea8e39373345cd745 Mon Sep 17 00:00:00 2001 From: "Andrew-sh.Cheng" Date: Tue, 13 Aug 2019 21:31:48 +0800 Subject: [PATCH 067/126] cpufreq: mediatek: Add support for mt8183 Add compatible string for mediatek mt8183 Signed-off-by: Andrew-sh.Cheng [ VK: Fixed rebase conflict ] Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt-platdev.c | 1 + drivers/cpufreq/mediatek-cpufreq.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 6fce11f63403..d51bfef286e3 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -120,6 +120,7 @@ static const struct of_device_id blacklist[] __initconst = { { .compatible = "mediatek,mt817x", }, { .compatible = "mediatek,mt8173", }, { .compatible = "mediatek,mt8176", }, + { .compatible = "mediatek,mt8183", }, { .compatible = "nvidia,tegra124", }, { .compatible = "nvidia,tegra210", }, diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 42bc4c9044f6..0c98dd08273d 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -535,6 +535,7 @@ static const struct of_device_id mtk_cpufreq_machines[] __initconst = { { .compatible = "mediatek,mt817x", }, { .compatible = "mediatek,mt8173", }, { .compatible = "mediatek,mt8176", }, + { .compatible = "mediatek,mt8183", }, { .compatible = "mediatek,mt8516", }, { } From e9a7cc1d97f13f11970f7c484bdfc0502efc4223 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 21 Aug 2019 16:16:32 -0700 Subject: [PATCH 068/126] cpufreq: Print driver name if cpufreq_suspend() fails Instead of printing the policy, which is incidentally a kernel pointer, so with limited interest, print the cpufreq driver name that failed to be suspend, which is more useful for debugging. Signed-off-by: Florian Fainelli Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 26d82e0a2de5..5bfda2bfdf4f 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1807,8 +1807,8 @@ void cpufreq_suspend(void) } if (cpufreq_driver->suspend && cpufreq_driver->suspend(policy)) - pr_err("%s: Failed to suspend driver: %p\n", __func__, - policy); + pr_err("%s: Failed to suspend driver: %s\n", __func__, + cpufreq_driver->name); } suspend: From 62bacb06b9f08965c4ef10e17875450490c948c0 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 2 May 2019 02:38:00 +0300 Subject: [PATCH 069/126] PM / devfreq: tegra: Fix kHz to Hz conversion The kHz to Hz is incorrectly converted in a few places in the code, this results in a wrong frequency being calculated because devfreq core uses OPP frequencies that are given in Hz to clamp the rate, while tegra-devfreq gives to the core value in kHz and then it also expects to receive value in kHz from the core. In a result memory freq is always set to a value which is close to ULONG_MAX because of the bug. Hence the EMC frequency is always capped to the maximum and the driver doesn't do anything useful. This patch was tested on Tegra30 and Tegra124 SoC's, EMC frequency scaling works properly now. Cc: # 4.14+ Tested-by: Steev Klimaszewski Reviewed-by: Chanwoo Choi Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: MyungJoo Ham --- drivers/devfreq/tegra-devfreq.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra-devfreq.c index 35c38aad8b4f..cd15c96dd27f 100644 --- a/drivers/devfreq/tegra-devfreq.c +++ b/drivers/devfreq/tegra-devfreq.c @@ -474,11 +474,11 @@ static int tegra_devfreq_target(struct device *dev, unsigned long *freq, { struct tegra_devfreq *tegra = dev_get_drvdata(dev); struct dev_pm_opp *opp; - unsigned long rate = *freq * KHZ; + unsigned long rate; - opp = devfreq_recommended_opp(dev, &rate, flags); + opp = devfreq_recommended_opp(dev, freq, flags); if (IS_ERR(opp)) { - dev_err(dev, "Failed to find opp for %lu KHz\n", *freq); + dev_err(dev, "Failed to find opp for %lu Hz\n", *freq); return PTR_ERR(opp); } rate = dev_pm_opp_get_freq(opp); @@ -487,8 +487,6 @@ static int tegra_devfreq_target(struct device *dev, unsigned long *freq, clk_set_min_rate(tegra->emc_clock, rate); clk_set_rate(tegra->emc_clock, 0); - *freq = rate; - return 0; } @@ -498,7 +496,7 @@ static int tegra_devfreq_get_dev_status(struct device *dev, struct tegra_devfreq *tegra = dev_get_drvdata(dev); struct tegra_devfreq_device *actmon_dev; - stat->current_frequency = tegra->cur_freq; + stat->current_frequency = tegra->cur_freq * KHZ; /* To be used by the tegra governor */ stat->private_data = tegra; @@ -553,7 +551,7 @@ static int tegra_governor_get_target(struct devfreq *devfreq, target_freq = max(target_freq, dev->target_freq); } - *freq = target_freq; + *freq = target_freq * KHZ; return 0; } From efe9043db49649c1f6c21df8a3860689c2035911 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 2 May 2019 02:38:01 +0300 Subject: [PATCH 070/126] PM / devfreq: tegra: Replace readl-writel with relaxed versions There is no need to insert memory barrier on each readl/writel invocation, hence use the relaxed versions. Reviewed-by: Chanwoo Choi Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: MyungJoo Ham --- drivers/devfreq/tegra-devfreq.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra-devfreq.c index cd15c96dd27f..9d9bfef1465d 100644 --- a/drivers/devfreq/tegra-devfreq.c +++ b/drivers/devfreq/tegra-devfreq.c @@ -179,23 +179,23 @@ static struct tegra_actmon_emc_ratio actmon_emc_ratios[] = { static u32 actmon_readl(struct tegra_devfreq *tegra, u32 offset) { - return readl(tegra->regs + offset); + return readl_relaxed(tegra->regs + offset); } static void actmon_writel(struct tegra_devfreq *tegra, u32 val, u32 offset) { - writel(val, tegra->regs + offset); + writel_relaxed(val, tegra->regs + offset); } static u32 device_readl(struct tegra_devfreq_device *dev, u32 offset) { - return readl(dev->regs + offset); + return readl_relaxed(dev->regs + offset); } static void device_writel(struct tegra_devfreq_device *dev, u32 val, u32 offset) { - writel(val, dev->regs + offset); + writel_relaxed(val, dev->regs + offset); } static unsigned long do_percent(unsigned long val, unsigned int pct) From ed2a8dd22a752af60282af38a47d26b227f772a7 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 2 May 2019 02:38:02 +0300 Subject: [PATCH 071/126] PM / devfreq: tegra: Replace write memory barrier with the read barrier The write memory barrier isn't needed because the BUS buffer is flushed by read after write that happens after the removed wmb(), we will also use readl() instead of the relaxed version to ensure that read is indeed completed. Reviewed-by: Chanwoo Choi Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: MyungJoo Ham --- drivers/devfreq/tegra-devfreq.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra-devfreq.c index 9d9bfef1465d..986f3dfbcab9 100644 --- a/drivers/devfreq/tegra-devfreq.c +++ b/drivers/devfreq/tegra-devfreq.c @@ -231,8 +231,7 @@ static void tegra_devfreq_update_wmark(struct tegra_devfreq *tegra, static void actmon_write_barrier(struct tegra_devfreq *tegra) { /* ensure the update has reached the ACTMON */ - wmb(); - actmon_readl(tegra, ACTMON_GLB_STATUS); + readl(tegra->regs + ACTMON_GLB_STATUS); } static void actmon_isr_device(struct tegra_devfreq *tegra, From 30af44fae8bdccfb21a45dfb4029f954f618d2e3 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 2 May 2019 02:38:03 +0300 Subject: [PATCH 072/126] PM / devfreq: tegra: Don't ignore clk errors The clk_set_min_rate() could fail and in this case clk_set_rate() sets rate to 0, which may drop EMC rate to minimum and make machine very difficult to use. Reviewed-by: Chanwoo Choi Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: MyungJoo Ham --- drivers/devfreq/tegra-devfreq.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra-devfreq.c index 986f3dfbcab9..325443f67725 100644 --- a/drivers/devfreq/tegra-devfreq.c +++ b/drivers/devfreq/tegra-devfreq.c @@ -472,8 +472,10 @@ static int tegra_devfreq_target(struct device *dev, unsigned long *freq, u32 flags) { struct tegra_devfreq *tegra = dev_get_drvdata(dev); + struct devfreq *devfreq = tegra->devfreq; struct dev_pm_opp *opp; unsigned long rate; + int err; opp = devfreq_recommended_opp(dev, freq, flags); if (IS_ERR(opp)) { @@ -483,10 +485,20 @@ static int tegra_devfreq_target(struct device *dev, unsigned long *freq, rate = dev_pm_opp_get_freq(opp); dev_pm_opp_put(opp); - clk_set_min_rate(tegra->emc_clock, rate); - clk_set_rate(tegra->emc_clock, 0); + err = clk_set_min_rate(tegra->emc_clock, rate); + if (err) + return err; + + err = clk_set_rate(tegra->emc_clock, 0); + if (err) + goto restore_min_rate; return 0; + +restore_min_rate: + clk_set_min_rate(tegra->emc_clock, devfreq->previous_freq); + + return err; } static int tegra_devfreq_get_dev_status(struct device *dev, From b061312f4bca20ed60246f7fe2e1ffa247ce5bf7 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 2 May 2019 02:38:04 +0300 Subject: [PATCH 073/126] PM / devfreq: tegra: Don't set EMC clock rate to maximum on probe There is no real benefit from doing so, hence let's drop that rate setting for consistency. Reviewed-by: Chanwoo Choi Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: MyungJoo Ham --- drivers/devfreq/tegra-devfreq.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra-devfreq.c index 325443f67725..6780fb41ed12 100644 --- a/drivers/devfreq/tegra-devfreq.c +++ b/drivers/devfreq/tegra-devfreq.c @@ -641,8 +641,6 @@ static int tegra_devfreq_probe(struct platform_device *pdev) return PTR_ERR(tegra->emc_clock); } - clk_set_rate(tegra->emc_clock, ULONG_MAX); - tegra->rate_change_nb.notifier_call = tegra_actmon_rate_notify_cb; err = clk_notifier_register(tegra->emc_clock, &tegra->rate_change_nb); if (err) { From dd3f2616bb99f1d2bfd0536074824722e953f6be Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 2 May 2019 02:38:05 +0300 Subject: [PATCH 074/126] PM / devfreq: tegra: Drop primary interrupt handler There is no real need in the primary interrupt handler, hence move everything to the secondary (threaded) handler. In a result locking is consistent now and there are no potential races with the interrupt handler because it is protected with the devfreq's mutex. Reviewed-by: Chanwoo Choi Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: MyungJoo Ham --- drivers/devfreq/tegra-devfreq.c | 55 +++++++++++---------------------- 1 file changed, 18 insertions(+), 37 deletions(-) diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra-devfreq.c index 6780fb41ed12..b923d216a4a9 100644 --- a/drivers/devfreq/tegra-devfreq.c +++ b/drivers/devfreq/tegra-devfreq.c @@ -132,7 +132,6 @@ static struct tegra_devfreq_device_config actmon_device_configs[] = { struct tegra_devfreq_device { const struct tegra_devfreq_device_config *config; void __iomem *regs; - spinlock_t lock; /* Average event count sampled in the last interrupt */ u32 avg_count; @@ -237,11 +236,8 @@ static void actmon_write_barrier(struct tegra_devfreq *tegra) static void actmon_isr_device(struct tegra_devfreq *tegra, struct tegra_devfreq_device *dev) { - unsigned long flags; u32 intr_status, dev_ctrl; - spin_lock_irqsave(&dev->lock, flags); - dev->avg_count = device_readl(dev, ACTMON_DEV_AVG_COUNT); tegra_devfreq_update_avg_wmark(tegra, dev); @@ -290,26 +286,6 @@ static void actmon_isr_device(struct tegra_devfreq *tegra, device_writel(dev, ACTMON_INTR_STATUS_CLEAR, ACTMON_DEV_INTR_STATUS); actmon_write_barrier(tegra); - - spin_unlock_irqrestore(&dev->lock, flags); -} - -static irqreturn_t actmon_isr(int irq, void *data) -{ - struct tegra_devfreq *tegra = data; - bool handled = false; - unsigned int i; - u32 val; - - val = actmon_readl(tegra, ACTMON_GLB_STATUS); - for (i = 0; i < ARRAY_SIZE(tegra->devices); i++) { - if (val & tegra->devices[i].config->irq_mask) { - actmon_isr_device(tegra, tegra->devices + i); - handled = true; - } - } - - return handled ? IRQ_WAKE_THREAD : IRQ_NONE; } static unsigned long actmon_cpu_to_emc_rate(struct tegra_devfreq *tegra, @@ -336,15 +312,12 @@ static void actmon_update_target(struct tegra_devfreq *tegra, unsigned long cpu_freq = 0; unsigned long static_cpu_emc_freq = 0; unsigned int avg_sustain_coef; - unsigned long flags; if (dev->config->avg_dependency_threshold) { cpu_freq = cpufreq_get(0); static_cpu_emc_freq = actmon_cpu_to_emc_rate(tegra, cpu_freq); } - spin_lock_irqsave(&dev->lock, flags); - dev->target_freq = dev->avg_count / ACTMON_SAMPLING_PERIOD; avg_sustain_coef = 100 * 100 / dev->config->boost_up_threshold; dev->target_freq = do_percent(dev->target_freq, avg_sustain_coef); @@ -352,19 +325,31 @@ static void actmon_update_target(struct tegra_devfreq *tegra, if (dev->avg_count >= dev->config->avg_dependency_threshold) dev->target_freq = max(dev->target_freq, static_cpu_emc_freq); - - spin_unlock_irqrestore(&dev->lock, flags); } static irqreturn_t actmon_thread_isr(int irq, void *data) { struct tegra_devfreq *tegra = data; + bool handled = false; + unsigned int i; + u32 val; mutex_lock(&tegra->devfreq->lock); - update_devfreq(tegra->devfreq); + + val = actmon_readl(tegra, ACTMON_GLB_STATUS); + for (i = 0; i < ARRAY_SIZE(tegra->devices); i++) { + if (val & tegra->devices[i].config->irq_mask) { + actmon_isr_device(tegra, tegra->devices + i); + handled = true; + } + } + + if (handled) + update_devfreq(tegra->devfreq); + mutex_unlock(&tegra->devfreq->lock); - return IRQ_HANDLED; + return handled ? IRQ_HANDLED : IRQ_NONE; } static int tegra_actmon_rate_notify_cb(struct notifier_block *nb, @@ -374,7 +359,6 @@ static int tegra_actmon_rate_notify_cb(struct notifier_block *nb, struct tegra_devfreq *tegra; struct tegra_devfreq_device *dev; unsigned int i; - unsigned long flags; if (action != POST_RATE_CHANGE) return NOTIFY_OK; @@ -386,9 +370,7 @@ static int tegra_actmon_rate_notify_cb(struct notifier_block *nb, for (i = 0; i < ARRAY_SIZE(tegra->devices); i++) { dev = &tegra->devices[i]; - spin_lock_irqsave(&dev->lock, flags); tegra_devfreq_update_wmark(tegra, dev); - spin_unlock_irqrestore(&dev->lock, flags); } actmon_write_barrier(tegra); @@ -670,7 +652,6 @@ static int tegra_devfreq_probe(struct platform_device *pdev) dev = tegra->devices + i; dev->config = actmon_device_configs + i; dev->regs = tegra->regs + dev->config->offset; - spin_lock_init(&dev->lock); tegra_actmon_configure_device(tegra, dev); } @@ -688,8 +669,8 @@ static int tegra_devfreq_probe(struct platform_device *pdev) platform_set_drvdata(pdev, tegra); - err = devm_request_threaded_irq(&pdev->dev, irq, actmon_isr, - actmon_thread_isr, IRQF_SHARED, + err = devm_request_threaded_irq(&pdev->dev, irq, NULL, + actmon_thread_isr, IRQF_ONESHOT, "tegra-devfreq", tegra); if (err) { dev_err(&pdev->dev, "Interrupt request failed\n"); From 7514dd05eaae5b8cf0b2af34f75a048ee023fe2c Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 2 May 2019 02:38:06 +0300 Subject: [PATCH 075/126] PM / devfreq: tegra: Properly disable interrupts There is no guarantee that interrupt handling isn't running in parallel with tegra_actmon_disable_interrupts(), hence it is necessary to protect DEV_CTRL register accesses and clear IRQ status with ACTMON's IRQ being disabled in the Interrupt Controller in order to ensure that device interrupt is indeed being disabled. Reviewed-by: Chanwoo Choi Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: MyungJoo Ham --- drivers/devfreq/tegra-devfreq.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra-devfreq.c index b923d216a4a9..d88f81fc99de 100644 --- a/drivers/devfreq/tegra-devfreq.c +++ b/drivers/devfreq/tegra-devfreq.c @@ -159,6 +159,8 @@ struct tegra_devfreq { struct notifier_block rate_change_nb; struct tegra_devfreq_device devices[ARRAY_SIZE(actmon_device_configs)]; + + int irq; }; struct tegra_actmon_emc_ratio { @@ -405,6 +407,8 @@ static void tegra_actmon_disable_interrupts(struct tegra_devfreq *tegra) u32 val; unsigned int i; + disable_irq(tegra->irq); + for (i = 0; i < ARRAY_SIZE(tegra->devices); i++) { dev = &tegra->devices[i]; @@ -415,9 +419,14 @@ static void tegra_actmon_disable_interrupts(struct tegra_devfreq *tegra) val &= ~ACTMON_DEV_CTRL_CONSECUTIVE_ABOVE_WMARK_EN; device_writel(dev, val, ACTMON_DEV_CTRL); + + device_writel(dev, ACTMON_INTR_STATUS_CLEAR, + ACTMON_DEV_INTR_STATUS); } actmon_write_barrier(tegra); + + enable_irq(tegra->irq); } static void tegra_actmon_configure_device(struct tegra_devfreq *tegra, @@ -592,7 +601,6 @@ static int tegra_devfreq_probe(struct platform_device *pdev) struct resource *res; unsigned int i; unsigned long rate; - int irq; int err; tegra = devm_kzalloc(&pdev->dev, sizeof(*tegra), GFP_KERNEL); @@ -661,15 +669,16 @@ static int tegra_devfreq_probe(struct platform_device *pdev) dev_pm_opp_add(&pdev->dev, rate, 0); } - irq = platform_get_irq(pdev, 0); - if (irq < 0) { - dev_err(&pdev->dev, "Failed to get IRQ: %d\n", irq); - return irq; + tegra->irq = platform_get_irq(pdev, 0); + if (tegra->irq < 0) { + err = tegra->irq; + dev_err(&pdev->dev, "Failed to get IRQ: %d\n", err); + return err; } platform_set_drvdata(pdev, tegra); - err = devm_request_threaded_irq(&pdev->dev, irq, NULL, + err = devm_request_threaded_irq(&pdev->dev, tegra->irq, NULL, actmon_thread_isr, IRQF_ONESHOT, "tegra-devfreq", tegra); if (err) { From 8fda5c1fab4f8df22bad43ba93a5ba83594ac256 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 2 May 2019 02:38:07 +0300 Subject: [PATCH 076/126] PM / devfreq: tegra: Clean up driver's probe / remove Reset hardware, disable ACTMON clock, release OPP's and handle all possible error cases correctly, maintaining the correct tear down order. Also use devm_platform_ioremap_resource() which is now available in the kernel. Reviewed-by: Chanwoo Choi Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: MyungJoo Ham --- drivers/devfreq/tegra-devfreq.c | 83 +++++++++++++++++++-------------- 1 file changed, 47 insertions(+), 36 deletions(-) diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra-devfreq.c index d88f81fc99de..7e9145bd3686 100644 --- a/drivers/devfreq/tegra-devfreq.c +++ b/drivers/devfreq/tegra-devfreq.c @@ -598,7 +598,6 @@ static int tegra_devfreq_probe(struct platform_device *pdev) { struct tegra_devfreq *tegra; struct tegra_devfreq_device *dev; - struct resource *res; unsigned int i; unsigned long rate; int err; @@ -607,9 +606,7 @@ static int tegra_devfreq_probe(struct platform_device *pdev) if (!tegra) return -ENOMEM; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - - tegra->regs = devm_ioremap_resource(&pdev->dev, res); + tegra->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(tegra->regs)) return PTR_ERR(tegra->regs); @@ -631,11 +628,10 @@ static int tegra_devfreq_probe(struct platform_device *pdev) return PTR_ERR(tegra->emc_clock); } - tegra->rate_change_nb.notifier_call = tegra_actmon_rate_notify_cb; - err = clk_notifier_register(tegra->emc_clock, &tegra->rate_change_nb); - if (err) { - dev_err(&pdev->dev, - "Failed to register rate change notifier\n"); + tegra->irq = platform_get_irq(pdev, 0); + if (tegra->irq < 0) { + err = tegra->irq; + dev_err(&pdev->dev, "Failed to get IRQ: %d\n", err); return err; } @@ -666,54 +662,69 @@ static int tegra_devfreq_probe(struct platform_device *pdev) for (rate = 0; rate <= tegra->max_freq * KHZ; rate++) { rate = clk_round_rate(tegra->emc_clock, rate); - dev_pm_opp_add(&pdev->dev, rate, 0); - } - tegra->irq = platform_get_irq(pdev, 0); - if (tegra->irq < 0) { - err = tegra->irq; - dev_err(&pdev->dev, "Failed to get IRQ: %d\n", err); - return err; + err = dev_pm_opp_add(&pdev->dev, rate, 0); + if (err) { + dev_err(&pdev->dev, "Failed to add OPP: %d\n", err); + goto remove_opps; + } } platform_set_drvdata(pdev, tegra); + tegra->rate_change_nb.notifier_call = tegra_actmon_rate_notify_cb; + err = clk_notifier_register(tegra->emc_clock, &tegra->rate_change_nb); + if (err) { + dev_err(&pdev->dev, + "Failed to register rate change notifier\n"); + goto remove_opps; + } + + tegra_devfreq_profile.initial_freq = clk_get_rate(tegra->emc_clock); + tegra->devfreq = devfreq_add_device(&pdev->dev, + &tegra_devfreq_profile, + "tegra_actmon", + NULL); + if (IS_ERR(tegra->devfreq)) { + err = PTR_ERR(tegra->devfreq); + goto unreg_notifier; + } + err = devm_request_threaded_irq(&pdev->dev, tegra->irq, NULL, actmon_thread_isr, IRQF_ONESHOT, "tegra-devfreq", tegra); if (err) { - dev_err(&pdev->dev, "Interrupt request failed\n"); - return err; + dev_err(&pdev->dev, "Interrupt request failed: %d\n", err); + goto remove_devfreq; } - tegra_devfreq_profile.initial_freq = clk_get_rate(tegra->emc_clock); - tegra->devfreq = devm_devfreq_add_device(&pdev->dev, - &tegra_devfreq_profile, - "tegra_actmon", - NULL); - return 0; + +remove_devfreq: + devfreq_remove_device(tegra->devfreq); + +unreg_notifier: + clk_notifier_unregister(tegra->emc_clock, &tegra->rate_change_nb); + +remove_opps: + dev_pm_opp_remove_all_dynamic(&pdev->dev); + + reset_control_reset(tegra->reset); + clk_disable_unprepare(tegra->clock); + + return err; } static int tegra_devfreq_remove(struct platform_device *pdev) { struct tegra_devfreq *tegra = platform_get_drvdata(pdev); - int irq = platform_get_irq(pdev, 0); - u32 val; - unsigned int i; - for (i = 0; i < ARRAY_SIZE(actmon_device_configs); i++) { - val = device_readl(&tegra->devices[i], ACTMON_DEV_CTRL); - val &= ~ACTMON_DEV_CTRL_ENB; - device_writel(&tegra->devices[i], val, ACTMON_DEV_CTRL); - } - - actmon_write_barrier(tegra); - - devm_free_irq(&pdev->dev, irq, tegra); + devfreq_remove_device(tegra->devfreq); clk_notifier_unregister(tegra->emc_clock, &tegra->rate_change_nb); + dev_pm_opp_remove_all_dynamic(&pdev->dev); + reset_control_reset(tegra->reset); clk_disable_unprepare(tegra->clock); return 0; From 151531f79a2daa30141c9fdb23e0cff00195b17a Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 2 May 2019 02:38:08 +0300 Subject: [PATCH 077/126] PM / devfreq: tegra: Avoid inconsistency of current frequency value The frequency value potentially could change in-between. It doesn't cause any real problem at all right now, but that could change in the future. Hence let's avoid the inconsistency. Reviewed-by: Chanwoo Choi Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: MyungJoo Ham --- drivers/devfreq/tegra-devfreq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra-devfreq.c index 7e9145bd3686..f23f7b35863b 100644 --- a/drivers/devfreq/tegra-devfreq.c +++ b/drivers/devfreq/tegra-devfreq.c @@ -497,13 +497,15 @@ static int tegra_devfreq_get_dev_status(struct device *dev, { struct tegra_devfreq *tegra = dev_get_drvdata(dev); struct tegra_devfreq_device *actmon_dev; + unsigned long cur_freq; - stat->current_frequency = tegra->cur_freq * KHZ; + cur_freq = READ_ONCE(tegra->cur_freq); /* To be used by the tegra governor */ stat->private_data = tegra; /* The below are to be used by the other governors */ + stat->current_frequency = cur_freq * KHZ; actmon_dev = &tegra->devices[MCALL]; @@ -514,7 +516,7 @@ static int tegra_devfreq_get_dev_status(struct device *dev, stat->busy_time *= 100 / BUS_SATURATION_RATIO; /* Number of cycles in a sampling period */ - stat->total_time = ACTMON_SAMPLING_PERIOD * tegra->cur_freq; + stat->total_time = ACTMON_SAMPLING_PERIOD * cur_freq; stat->busy_time = min(stat->busy_time, stat->total_time); From 386789ebbdd3a204ebe16b7a6277f7a45823549d Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 2 May 2019 02:38:09 +0300 Subject: [PATCH 078/126] PM / devfreq: tegra: Mark ACTMON's governor as immutable The ACTMON's governor supports only the Tegra's devfreq device and there is no need to use any other governor, hence let's mark Tegra governor as immutable to permanently stick it with Tegra's devfreq device. Reviewed-by: Chanwoo Choi Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: MyungJoo Ham --- drivers/devfreq/Kconfig | 1 - drivers/devfreq/tegra-devfreq.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig index ba98a4e3ad33..7dd46d44579d 100644 --- a/drivers/devfreq/Kconfig +++ b/drivers/devfreq/Kconfig @@ -95,7 +95,6 @@ config ARM_EXYNOS_BUS_DEVFREQ config ARM_TEGRA_DEVFREQ tristate "Tegra DEVFREQ Driver" depends on ARCH_TEGRA_124_SOC - select DEVFREQ_GOV_SIMPLE_ONDEMAND select PM_OPP help This adds the DEVFREQ driver for the Tegra family of SoCs. diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra-devfreq.c index f23f7b35863b..bf3fe2e81bc9 100644 --- a/drivers/devfreq/tegra-devfreq.c +++ b/drivers/devfreq/tegra-devfreq.c @@ -594,6 +594,7 @@ static struct devfreq_governor tegra_devfreq_governor = { .name = "tegra_actmon", .get_target_freq = tegra_governor_get_target, .event_handler = tegra_governor_event_handler, + .immutable = true, }; static int tegra_devfreq_probe(struct platform_device *pdev) From 5a7e10c89501ec69adc5c39b24d7d69a223bf275 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 2 May 2019 02:38:10 +0300 Subject: [PATCH 079/126] PM / devfreq: tegra: Move governor registration to driver's probe There is no need to register the ACTMON's governor separately from the driver, hence let's move the registration into the driver's probe function for consistency and to make code cleaner a tad. Reviewed-by: Chanwoo Choi Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: MyungJoo Ham --- drivers/devfreq/tegra-devfreq.c | 43 +++++++++------------------------ 1 file changed, 12 insertions(+), 31 deletions(-) diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra-devfreq.c index bf3fe2e81bc9..d78f29ac138b 100644 --- a/drivers/devfreq/tegra-devfreq.c +++ b/drivers/devfreq/tegra-devfreq.c @@ -683,6 +683,12 @@ static int tegra_devfreq_probe(struct platform_device *pdev) goto remove_opps; } + err = devfreq_add_governor(&tegra_devfreq_governor); + if (err) { + dev_err(&pdev->dev, "Failed to add governor: %d\n", err); + goto unreg_notifier; + } + tegra_devfreq_profile.initial_freq = clk_get_rate(tegra->emc_clock); tegra->devfreq = devfreq_add_device(&pdev->dev, &tegra_devfreq_profile, @@ -690,7 +696,7 @@ static int tegra_devfreq_probe(struct platform_device *pdev) NULL); if (IS_ERR(tegra->devfreq)) { err = PTR_ERR(tegra->devfreq); - goto unreg_notifier; + goto remove_governor; } err = devm_request_threaded_irq(&pdev->dev, tegra->irq, NULL, @@ -706,6 +712,9 @@ static int tegra_devfreq_probe(struct platform_device *pdev) remove_devfreq: devfreq_remove_device(tegra->devfreq); +remove_governor: + devfreq_remove_governor(&tegra_devfreq_governor); + unreg_notifier: clk_notifier_unregister(tegra->emc_clock, &tegra->rate_change_nb); @@ -723,6 +732,7 @@ static int tegra_devfreq_remove(struct platform_device *pdev) struct tegra_devfreq *tegra = platform_get_drvdata(pdev); devfreq_remove_device(tegra->devfreq); + devfreq_remove_governor(&tegra_devfreq_governor); clk_notifier_unregister(tegra->emc_clock, &tegra->rate_change_nb); dev_pm_opp_remove_all_dynamic(&pdev->dev); @@ -748,36 +758,7 @@ static struct platform_driver tegra_devfreq_driver = { .of_match_table = tegra_devfreq_of_match, }, }; - -static int __init tegra_devfreq_init(void) -{ - int ret = 0; - - ret = devfreq_add_governor(&tegra_devfreq_governor); - if (ret) { - pr_err("%s: failed to add governor: %d\n", __func__, ret); - return ret; - } - - ret = platform_driver_register(&tegra_devfreq_driver); - if (ret) - devfreq_remove_governor(&tegra_devfreq_governor); - - return ret; -} -module_init(tegra_devfreq_init) - -static void __exit tegra_devfreq_exit(void) -{ - int ret = 0; - - platform_driver_unregister(&tegra_devfreq_driver); - - ret = devfreq_remove_governor(&tegra_devfreq_governor); - if (ret) - pr_err("%s: failed to remove governor: %d\n", __func__, ret); -} -module_exit(tegra_devfreq_exit) +module_platform_driver(tegra_devfreq_driver); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("Tegra devfreq driver"); From 546ff09381370015ffaf4d139aae671a0bb66af2 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 2 May 2019 02:38:11 +0300 Subject: [PATCH 080/126] PM / devfreq: tegra: Reconfigure hardware on governor's restart Move hardware configuration to governor's start/resume methods. This allows to re-initialize hardware counters and reconfigure cleanly if governor was stopped/paused. That is needed because we are not aware of all hardware changes that happened while governor was stopped and the paused state may get out of sync with reality, hence it's better to start with a clean slate after the pause. In a result there is no memory bandwidth starvation after resume from suspend-to-ram that results in display controller underflowing that happens on resume because of improper decision made by devfreq about the required memory frequency. This change also cleans up code a tad by moving hardware-configuration code into a single location. Reviewed-by: Chanwoo Choi Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: MyungJoo Ham --- drivers/devfreq/tegra-devfreq.c | 98 ++++++++++++++------------------- 1 file changed, 40 insertions(+), 58 deletions(-) diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra-devfreq.c index d78f29ac138b..5cddf2199c4e 100644 --- a/drivers/devfreq/tegra-devfreq.c +++ b/drivers/devfreq/tegra-devfreq.c @@ -380,55 +380,6 @@ static int tegra_actmon_rate_notify_cb(struct notifier_block *nb, return NOTIFY_OK; } -static void tegra_actmon_enable_interrupts(struct tegra_devfreq *tegra) -{ - struct tegra_devfreq_device *dev; - u32 val; - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(tegra->devices); i++) { - dev = &tegra->devices[i]; - - val = device_readl(dev, ACTMON_DEV_CTRL); - val |= ACTMON_DEV_CTRL_AVG_ABOVE_WMARK_EN; - val |= ACTMON_DEV_CTRL_AVG_BELOW_WMARK_EN; - val |= ACTMON_DEV_CTRL_CONSECUTIVE_BELOW_WMARK_EN; - val |= ACTMON_DEV_CTRL_CONSECUTIVE_ABOVE_WMARK_EN; - - device_writel(dev, val, ACTMON_DEV_CTRL); - } - - actmon_write_barrier(tegra); -} - -static void tegra_actmon_disable_interrupts(struct tegra_devfreq *tegra) -{ - struct tegra_devfreq_device *dev; - u32 val; - unsigned int i; - - disable_irq(tegra->irq); - - for (i = 0; i < ARRAY_SIZE(tegra->devices); i++) { - dev = &tegra->devices[i]; - - val = device_readl(dev, ACTMON_DEV_CTRL); - val &= ~ACTMON_DEV_CTRL_AVG_ABOVE_WMARK_EN; - val &= ~ACTMON_DEV_CTRL_AVG_BELOW_WMARK_EN; - val &= ~ACTMON_DEV_CTRL_CONSECUTIVE_BELOW_WMARK_EN; - val &= ~ACTMON_DEV_CTRL_CONSECUTIVE_ABOVE_WMARK_EN; - - device_writel(dev, val, ACTMON_DEV_CTRL); - - device_writel(dev, ACTMON_INTR_STATUS_CLEAR, - ACTMON_DEV_INTR_STATUS); - } - - actmon_write_barrier(tegra); - - enable_irq(tegra->irq); -} - static void tegra_actmon_configure_device(struct tegra_devfreq *tegra, struct tegra_devfreq_device *dev) { @@ -452,11 +403,47 @@ static void tegra_actmon_configure_device(struct tegra_devfreq *tegra, << ACTMON_DEV_CTRL_CONSECUTIVE_BELOW_WMARK_NUM_SHIFT; val |= (ACTMON_ABOVE_WMARK_WINDOW - 1) << ACTMON_DEV_CTRL_CONSECUTIVE_ABOVE_WMARK_NUM_SHIFT; + val |= ACTMON_DEV_CTRL_AVG_ABOVE_WMARK_EN; + val |= ACTMON_DEV_CTRL_AVG_BELOW_WMARK_EN; + val |= ACTMON_DEV_CTRL_CONSECUTIVE_BELOW_WMARK_EN; + val |= ACTMON_DEV_CTRL_CONSECUTIVE_ABOVE_WMARK_EN; val |= ACTMON_DEV_CTRL_ENB; device_writel(dev, val, ACTMON_DEV_CTRL); +} + +static void tegra_actmon_start(struct tegra_devfreq *tegra) +{ + unsigned int i; + + disable_irq(tegra->irq); + + actmon_writel(tegra, ACTMON_SAMPLING_PERIOD - 1, + ACTMON_GLB_PERIOD_CTRL); + + for (i = 0; i < ARRAY_SIZE(tegra->devices); i++) + tegra_actmon_configure_device(tegra, &tegra->devices[i]); actmon_write_barrier(tegra); + + enable_irq(tegra->irq); +} + +static void tegra_actmon_stop(struct tegra_devfreq *tegra) +{ + unsigned int i; + + disable_irq(tegra->irq); + + for (i = 0; i < ARRAY_SIZE(tegra->devices); i++) { + device_writel(&tegra->devices[i], 0x00000000, ACTMON_DEV_CTRL); + device_writel(&tegra->devices[i], ACTMON_INTR_STATUS_CLEAR, + ACTMON_DEV_INTR_STATUS); + } + + actmon_write_barrier(tegra); + + enable_irq(tegra->irq); } static int tegra_devfreq_target(struct device *dev, unsigned long *freq, @@ -568,22 +555,22 @@ static int tegra_governor_event_handler(struct devfreq *devfreq, switch (event) { case DEVFREQ_GOV_START: devfreq_monitor_start(devfreq); - tegra_actmon_enable_interrupts(tegra); + tegra_actmon_start(tegra); break; case DEVFREQ_GOV_STOP: - tegra_actmon_disable_interrupts(tegra); + tegra_actmon_stop(tegra); devfreq_monitor_stop(devfreq); break; case DEVFREQ_GOV_SUSPEND: - tegra_actmon_disable_interrupts(tegra); + tegra_actmon_stop(tegra); devfreq_monitor_suspend(devfreq); break; case DEVFREQ_GOV_RESUME: devfreq_monitor_resume(devfreq); - tegra_actmon_enable_interrupts(tegra); + tegra_actmon_start(tegra); break; } @@ -652,15 +639,10 @@ static int tegra_devfreq_probe(struct platform_device *pdev) tegra->max_freq = clk_round_rate(tegra->emc_clock, ULONG_MAX) / KHZ; tegra->cur_freq = clk_get_rate(tegra->emc_clock) / KHZ; - actmon_writel(tegra, ACTMON_SAMPLING_PERIOD - 1, - ACTMON_GLB_PERIOD_CTRL); - for (i = 0; i < ARRAY_SIZE(actmon_device_configs); i++) { dev = tegra->devices + i; dev->config = actmon_device_configs + i; dev->regs = tegra->regs + dev->config->offset; - - tegra_actmon_configure_device(tegra, dev); } for (rate = 0; rate <= tegra->max_freq * KHZ; rate++) { From 1ac347488529939d74f5e2d17948825b5bc6f5e6 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 2 May 2019 02:38:12 +0300 Subject: [PATCH 081/126] PM / devfreq: tegra: Support Tegra30 The devfreq driver can be used on Tegra30 without any code change and it works perfectly fine, the default Tegra124 parameters are good enough for Tegra30. Signed-off-by: Dmitry Osipenko Reviewed-by: Chanwoo Choi [Modified by MyungJoo to depends on Tegra30/114/124/210 only] Signed-off-by: MyungJoo Ham --- drivers/devfreq/Kconfig | 6 ++++-- drivers/devfreq/tegra-devfreq.c | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig index 7dd46d44579d..8f3f0ed17d85 100644 --- a/drivers/devfreq/Kconfig +++ b/drivers/devfreq/Kconfig @@ -93,8 +93,10 @@ config ARM_EXYNOS_BUS_DEVFREQ This does not yet operate with optimal voltages. config ARM_TEGRA_DEVFREQ - tristate "Tegra DEVFREQ Driver" - depends on ARCH_TEGRA_124_SOC + tristate "NVIDIA Tegra30/114/124/210 DEVFREQ Driver" + depends on ARCH_TEGRA_3x_SOC || ARCH_TEGRA_114_SOC || \ + ARCH_TEGRA_132_SOC || ARCH_TEGRA_124_SOC || \ + ARCH_TEGRA_210_SOC select PM_OPP help This adds the DEVFREQ driver for the Tegra family of SoCs. diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra-devfreq.c index 5cddf2199c4e..a6ba75f4106d 100644 --- a/drivers/devfreq/tegra-devfreq.c +++ b/drivers/devfreq/tegra-devfreq.c @@ -726,6 +726,7 @@ static int tegra_devfreq_remove(struct platform_device *pdev) } static const struct of_device_id tegra_devfreq_of_match[] = { + { .compatible = "nvidia,tegra30-actmon" }, { .compatible = "nvidia,tegra124-actmon" }, { }, }; From 35f8dbc727212bc5a49b90961d98f9ef596e2072 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 2 May 2019 02:38:13 +0300 Subject: [PATCH 082/126] PM / devfreq: tegra: Enable COMPILE_TEST for the driver The driver's compilation doesn't have any specific dependencies, hence the COMPILE_TEST option can be supported in Kconfig. Signed-off-by: Dmitry Osipenko Reviewed-by: Chanwoo Choi Signed-off-by: MyungJoo Ham --- drivers/devfreq/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig index 8f3f0ed17d85..1091119c1d0a 100644 --- a/drivers/devfreq/Kconfig +++ b/drivers/devfreq/Kconfig @@ -96,7 +96,8 @@ config ARM_TEGRA_DEVFREQ tristate "NVIDIA Tegra30/114/124/210 DEVFREQ Driver" depends on ARCH_TEGRA_3x_SOC || ARCH_TEGRA_114_SOC || \ ARCH_TEGRA_132_SOC || ARCH_TEGRA_124_SOC || \ - ARCH_TEGRA_210_SOC + ARCH_TEGRA_210_SOC || \ + COMPILE_TEST select PM_OPP help This adds the DEVFREQ driver for the Tegra family of SoCs. From 23601752911b5dac91207859e0ab77bd8c77545c Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 2 May 2019 02:38:14 +0300 Subject: [PATCH 083/126] PM / devfreq: tegra: Rename tegra-devfreq.c to tegra30-devfreq.c In order to reflect that driver serves NVIDIA Tegra30 and later SoC generations, let's rename the driver's source file to "tegra30-devfreq.c". This will make driver files to look more consistent after addition of a driver for Tegra20. Reviewed-by: Chanwoo Choi Signed-off-by: Dmitry Osipenko Signed-off-by: MyungJoo Ham --- drivers/devfreq/Makefile | 2 +- drivers/devfreq/{tegra-devfreq.c => tegra30-devfreq.c} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename drivers/devfreq/{tegra-devfreq.c => tegra30-devfreq.c} (100%) diff --git a/drivers/devfreq/Makefile b/drivers/devfreq/Makefile index 32b8d4d3f12c..47e5aeeebfd1 100644 --- a/drivers/devfreq/Makefile +++ b/drivers/devfreq/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_DEVFREQ_GOV_PASSIVE) += governor_passive.o # DEVFREQ Drivers obj-$(CONFIG_ARM_EXYNOS_BUS_DEVFREQ) += exynos-bus.o obj-$(CONFIG_ARM_RK3399_DMC_DEVFREQ) += rk3399_dmc.o -obj-$(CONFIG_ARM_TEGRA_DEVFREQ) += tegra-devfreq.o +obj-$(CONFIG_ARM_TEGRA_DEVFREQ) += tegra30-devfreq.o # DEVFREQ Event Drivers obj-$(CONFIG_PM_DEVFREQ_EVENT) += event/ diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra30-devfreq.c similarity index 100% rename from drivers/devfreq/tegra-devfreq.c rename to drivers/devfreq/tegra30-devfreq.c From d196175ed8f45248b54bf5c2e7c05ac0e1e97d70 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 2 May 2019 02:38:15 +0300 Subject: [PATCH 084/126] PM / devfreq: Introduce driver for NVIDIA Tegra20 Add devfreq driver for NVIDIA Tegra20 SoC's. The driver periodically reads out Memory Controller counters and adjusts memory frequency based on the memory clients activity. Reviewed-by: Chanwoo Choi Signed-off-by: Dmitry Osipenko [Removed MAINTAINERS updates by MyungJoo so that it can be sent elsewhere.] Signed-off-by: MyungJoo Ham --- drivers/devfreq/Kconfig | 10 ++ drivers/devfreq/Makefile | 1 + drivers/devfreq/tegra20-devfreq.c | 212 ++++++++++++++++++++++++++++++ 3 files changed, 223 insertions(+) create mode 100644 drivers/devfreq/tegra20-devfreq.c diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig index 1091119c1d0a..f3b242987fd9 100644 --- a/drivers/devfreq/Kconfig +++ b/drivers/devfreq/Kconfig @@ -104,6 +104,16 @@ config ARM_TEGRA_DEVFREQ It reads ACTMON counters of memory controllers and adjusts the operating frequencies and voltages with OPP support. +config ARM_TEGRA20_DEVFREQ + tristate "NVIDIA Tegra20 DEVFREQ Driver" + depends on (TEGRA_MC && TEGRA20_EMC) || COMPILE_TEST + select DEVFREQ_GOV_SIMPLE_ONDEMAND + select PM_OPP + help + This adds the DEVFREQ driver for the Tegra20 family of SoCs. + It reads Memory Controller counters and adjusts the operating + frequencies and voltages with OPP support. + config ARM_RK3399_DMC_DEVFREQ tristate "ARM RK3399 DMC DEVFREQ Driver" depends on ARCH_ROCKCHIP diff --git a/drivers/devfreq/Makefile b/drivers/devfreq/Makefile index 47e5aeeebfd1..338ae8440db6 100644 --- a/drivers/devfreq/Makefile +++ b/drivers/devfreq/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_DEVFREQ_GOV_PASSIVE) += governor_passive.o obj-$(CONFIG_ARM_EXYNOS_BUS_DEVFREQ) += exynos-bus.o obj-$(CONFIG_ARM_RK3399_DMC_DEVFREQ) += rk3399_dmc.o obj-$(CONFIG_ARM_TEGRA_DEVFREQ) += tegra30-devfreq.o +obj-$(CONFIG_ARM_TEGRA20_DEVFREQ) += tegra20-devfreq.o # DEVFREQ Event Drivers obj-$(CONFIG_PM_DEVFREQ_EVENT) += event/ diff --git a/drivers/devfreq/tegra20-devfreq.c b/drivers/devfreq/tegra20-devfreq.c new file mode 100644 index 000000000000..ff82bac9ee4e --- /dev/null +++ b/drivers/devfreq/tegra20-devfreq.c @@ -0,0 +1,212 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NVIDIA Tegra20 devfreq driver + * + * Copyright (C) 2019 GRATE-DRIVER project + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "governor.h" + +#define MC_STAT_CONTROL 0x90 +#define MC_STAT_EMC_CLOCK_LIMIT 0xa0 +#define MC_STAT_EMC_CLOCKS 0xa4 +#define MC_STAT_EMC_CONTROL 0xa8 +#define MC_STAT_EMC_COUNT 0xb8 + +#define EMC_GATHER_CLEAR (1 << 8) +#define EMC_GATHER_ENABLE (3 << 8) + +struct tegra_devfreq { + struct devfreq *devfreq; + struct clk *emc_clock; + void __iomem *regs; +}; + +static int tegra_devfreq_target(struct device *dev, unsigned long *freq, + u32 flags) +{ + struct tegra_devfreq *tegra = dev_get_drvdata(dev); + struct devfreq *devfreq = tegra->devfreq; + struct dev_pm_opp *opp; + unsigned long rate; + int err; + + opp = devfreq_recommended_opp(dev, freq, flags); + if (IS_ERR(opp)) + return PTR_ERR(opp); + + rate = dev_pm_opp_get_freq(opp); + dev_pm_opp_put(opp); + + err = clk_set_min_rate(tegra->emc_clock, rate); + if (err) + return err; + + err = clk_set_rate(tegra->emc_clock, 0); + if (err) + goto restore_min_rate; + + return 0; + +restore_min_rate: + clk_set_min_rate(tegra->emc_clock, devfreq->previous_freq); + + return err; +} + +static int tegra_devfreq_get_dev_status(struct device *dev, + struct devfreq_dev_status *stat) +{ + struct tegra_devfreq *tegra = dev_get_drvdata(dev); + + /* + * EMC_COUNT returns number of memory events, that number is lower + * than the number of clocks. Conversion ratio of 1/8 results in a + * bit higher bandwidth than actually needed, it is good enough for + * the time being because drivers don't support requesting minimum + * needed memory bandwidth yet. + * + * TODO: adjust the ratio value once relevant drivers will support + * memory bandwidth management. + */ + stat->busy_time = readl_relaxed(tegra->regs + MC_STAT_EMC_COUNT); + stat->total_time = readl_relaxed(tegra->regs + MC_STAT_EMC_CLOCKS) / 8; + stat->current_frequency = clk_get_rate(tegra->emc_clock); + + writel_relaxed(EMC_GATHER_CLEAR, tegra->regs + MC_STAT_CONTROL); + writel_relaxed(EMC_GATHER_ENABLE, tegra->regs + MC_STAT_CONTROL); + + return 0; +} + +static struct devfreq_dev_profile tegra_devfreq_profile = { + .polling_ms = 500, + .target = tegra_devfreq_target, + .get_dev_status = tegra_devfreq_get_dev_status, +}; + +static struct tegra_mc *tegra_get_memory_controller(void) +{ + struct platform_device *pdev; + struct device_node *np; + struct tegra_mc *mc; + + np = of_find_compatible_node(NULL, NULL, "nvidia,tegra20-mc-gart"); + if (!np) + return ERR_PTR(-ENOENT); + + pdev = of_find_device_by_node(np); + of_node_put(np); + if (!pdev) + return ERR_PTR(-ENODEV); + + mc = platform_get_drvdata(pdev); + if (!mc) + return ERR_PTR(-EPROBE_DEFER); + + return mc; +} + +static int tegra_devfreq_probe(struct platform_device *pdev) +{ + struct tegra_devfreq *tegra; + struct tegra_mc *mc; + unsigned long max_rate; + unsigned long rate; + int err; + + mc = tegra_get_memory_controller(); + if (IS_ERR(mc)) { + err = PTR_ERR(mc); + dev_err(&pdev->dev, "failed to get memory controller: %d\n", + err); + return err; + } + + tegra = devm_kzalloc(&pdev->dev, sizeof(*tegra), GFP_KERNEL); + if (!tegra) + return -ENOMEM; + + /* EMC is a system-critical clock that is always enabled */ + tegra->emc_clock = devm_clk_get(&pdev->dev, "emc"); + if (IS_ERR(tegra->emc_clock)) { + err = PTR_ERR(tegra->emc_clock); + dev_err(&pdev->dev, "failed to get emc clock: %d\n", err); + return err; + } + + tegra->regs = mc->regs; + + max_rate = clk_round_rate(tegra->emc_clock, ULONG_MAX); + + for (rate = 0; rate <= max_rate; rate++) { + rate = clk_round_rate(tegra->emc_clock, rate); + + err = dev_pm_opp_add(&pdev->dev, rate, 0); + if (err) { + dev_err(&pdev->dev, "failed to add opp: %d\n", err); + goto remove_opps; + } + } + + /* + * Reset statistic gathers state, select global bandwidth for the + * statistics collection mode and set clocks counter saturation + * limit to maximum. + */ + writel_relaxed(0x00000000, tegra->regs + MC_STAT_CONTROL); + writel_relaxed(0x00000000, tegra->regs + MC_STAT_EMC_CONTROL); + writel_relaxed(0xffffffff, tegra->regs + MC_STAT_EMC_CLOCK_LIMIT); + + platform_set_drvdata(pdev, tegra); + + tegra->devfreq = devfreq_add_device(&pdev->dev, &tegra_devfreq_profile, + DEVFREQ_GOV_SIMPLE_ONDEMAND, NULL); + if (IS_ERR(tegra->devfreq)) { + err = PTR_ERR(tegra->devfreq); + goto remove_opps; + } + + return 0; + +remove_opps: + dev_pm_opp_remove_all_dynamic(&pdev->dev); + + return err; +} + +static int tegra_devfreq_remove(struct platform_device *pdev) +{ + struct tegra_devfreq *tegra = platform_get_drvdata(pdev); + + devfreq_remove_device(tegra->devfreq); + dev_pm_opp_remove_all_dynamic(&pdev->dev); + + return 0; +} + +static struct platform_driver tegra_devfreq_driver = { + .probe = tegra_devfreq_probe, + .remove = tegra_devfreq_remove, + .driver = { + .name = "tegra20-devfreq", + }, +}; +module_platform_driver(tegra_devfreq_driver); + +MODULE_ALIAS("platform:tegra20-devfreq"); +MODULE_AUTHOR("Dmitry Osipenko "); +MODULE_DESCRIPTION("NVIDIA Tegra20 devfreq driver"); +MODULE_LICENSE("GPL v2"); From 2c09083293f46eb1a17be8dd7e92efbdcab43fc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABl=20PORTAY?= Date: Thu, 2 May 2019 14:27:36 -0400 Subject: [PATCH 085/126] PM / devfreq: Fix spelling typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add missing 'r' in "monitoing". Signed-off-by: Gaël PORTAY Reviewed-by: Chanwoo Choi Signed-off-by: MyungJoo Ham --- drivers/devfreq/devfreq.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index ab22bf8a12d6..bed647637a93 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -402,7 +402,7 @@ static void devfreq_monitor(struct work_struct *work) * devfreq_monitor_start() - Start load monitoring of devfreq instance * @devfreq: the devfreq instance. * - * Helper function for starting devfreq device load monitoing. By + * Helper function for starting devfreq device load monitoring. By * default delayed work based monitoring is supported. Function * to be called from governor in response to DEVFREQ_GOV_START * event when device is added to devfreq framework. @@ -420,7 +420,7 @@ EXPORT_SYMBOL(devfreq_monitor_start); * devfreq_monitor_stop() - Stop load monitoring of a devfreq instance * @devfreq: the devfreq instance. * - * Helper function to stop devfreq device load monitoing. Function + * Helper function to stop devfreq device load monitoring. Function * to be called from governor in response to DEVFREQ_GOV_STOP * event when device is removed from devfreq framework. */ @@ -434,7 +434,7 @@ EXPORT_SYMBOL(devfreq_monitor_stop); * devfreq_monitor_suspend() - Suspend load monitoring of a devfreq instance * @devfreq: the devfreq instance. * - * Helper function to suspend devfreq device load monitoing. Function + * Helper function to suspend devfreq device load monitoring. Function * to be called from governor in response to DEVFREQ_GOV_SUSPEND * event or when polling interval is set to zero. * @@ -461,7 +461,7 @@ EXPORT_SYMBOL(devfreq_monitor_suspend); * devfreq_monitor_resume() - Resume load monitoring of a devfreq instance * @devfreq: the devfreq instance. * - * Helper function to resume devfreq device load monitoing. Function + * Helper function to resume devfreq device load monitoring. Function * to be called from governor in response to DEVFREQ_GOV_RESUME * event or when polling interval is set to non-zero. */ From 0a453aca94bd7886bca86cc78ce2771f55152232 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABl=20PORTAY?= Date: Thu, 2 May 2019 14:40:13 -0400 Subject: [PATCH 086/126] PM / devfreq: rk3399_dmc: Fix spelling typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reorder 'i' and 'v' in "drvier". Signed-off-by: Gaël PORTAY Reviewed-by: Chanwoo Choi Signed-off-by: MyungJoo Ham --- drivers/devfreq/rk3399_dmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c index 682465fa57e1..2e65d7279d79 100644 --- a/drivers/devfreq/rk3399_dmc.c +++ b/drivers/devfreq/rk3399_dmc.c @@ -351,7 +351,7 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev) /* * Get dram timing and pass it to arm trust firmware, - * the dram drvier in arm trust firmware will get these + * the dram driver in arm trust firmware will get these * timing and to do dram initial. */ if (!of_get_ddr_timings(&data->timing, np)) { From 7544fd7f384591038646d3cd9efb311ab4509e24 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Fri, 21 Jun 2019 18:39:49 -0300 Subject: [PATCH 087/126] PM / devfreq: Fix kernel oops on governor module load A bit unexpectedly (but still documented), request_module may return a positive value, in case of a modprobe error. This is currently causing issues in the devfreq framework. When a request_module exits with a positive value, we currently return that via ERR_PTR. However, because the value is positive, it's not a ERR_VALUE proper, and is therefore treated as a valid struct devfreq_governor pointer, leading to a kernel oops. Fix this by returning -EINVAL if request_module returns a positive value. Fixes: b53b0128052ff ("PM / devfreq: Fix static checker warning in try_then_request_governor") Signed-off-by: Ezequiel Garcia Reviewed-by: Chanwoo Choi Signed-off-by: MyungJoo Ham --- drivers/devfreq/devfreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index bed647637a93..784c08e4f931 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -254,7 +254,7 @@ static struct devfreq_governor *try_then_request_governor(const char *name) /* Restore previous state before return */ mutex_lock(&devfreq_list_lock); if (err) - return ERR_PTR(err); + return (err < 0) ? ERR_PTR(err) : ERR_PTR(-EINVAL); governor = find_devfreq_governor(name); } From 5f8669639f3ec6723ebebd83aaccfbea4b5c362b Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Fri, 14 Jun 2019 11:53:05 +0200 Subject: [PATCH 088/126] PM / devfreq: events: add Exynos PPMU new events Define new performance events supported by Exynos5422 SoC counters. The counters are built-in in Dynamic Memory Controller and provide information regarding memory utilization. Signed-off-by: Lukasz Luba Acked-by: Chanwoo Choi Signed-off-by: MyungJoo Ham --- drivers/devfreq/event/exynos-ppmu.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c index 3ee3dd5653aa..aae1fd6c91f7 100644 --- a/drivers/devfreq/event/exynos-ppmu.c +++ b/drivers/devfreq/event/exynos-ppmu.c @@ -86,6 +86,12 @@ static struct __exynos_ppmu_events { PPMU_EVENT(d1-cpu), PPMU_EVENT(d1-general), PPMU_EVENT(d1-rt), + + /* For Exynos5422 SoC */ + PPMU_EVENT(dmc0_0), + PPMU_EVENT(dmc0_1), + PPMU_EVENT(dmc1_0), + PPMU_EVENT(dmc1_1), }; static int exynos_ppmu_find_ppmu_id(struct devfreq_event_dev *edev) From 23ca7d2503d895c80b65e4321fc6cc678c7642f2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 28 Jun 2019 12:32:20 +0200 Subject: [PATCH 089/126] PM / devfreq: tegra20: add COMMON_CLK dependency Compile-testing the new driver on platforms without CONFIG_COMMON_CLK leads to a link error: drivers/devfreq/tegra20-devfreq.o: In function `tegra_devfreq_target': tegra20-devfreq.c:(.text+0x288): undefined reference to `clk_set_min_rate' Add a dependency on COMMON_CLK to avoid this. Fixes: 1d39ee8dad6d ("PM / devfreq: Introduce driver for NVIDIA Tegra20") Signed-off-by: Arnd Bergmann Reviewed-by: Dmitry Osipenko Reviewed-by: Chanwoo Choi Signed-off-by: MyungJoo Ham --- drivers/devfreq/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig index f3b242987fd9..defe1d438710 100644 --- a/drivers/devfreq/Kconfig +++ b/drivers/devfreq/Kconfig @@ -107,6 +107,7 @@ config ARM_TEGRA_DEVFREQ config ARM_TEGRA20_DEVFREQ tristate "NVIDIA Tegra20 DEVFREQ Driver" depends on (TEGRA_MC && TEGRA20_EMC) || COMPILE_TEST + depends on COMMON_CLK select DEVFREQ_GOV_SIMPLE_ONDEMAND select PM_OPP help From 0ae9c3213c2ca34f7e7ebad1436267778d37ba9f Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Wed, 5 Jun 2019 11:12:33 +0200 Subject: [PATCH 090/126] PM / devfreq: exynos-events: change matching code during probe The patch changes the way how the 'ops' gets populated for different device versions. The matching function now uses 'of_device_id' in order to identify the device type. Signed-off-by: Lukasz Luba Signed-off-by: Chanwoo Choi Signed-off-by: MyungJoo Ham --- drivers/devfreq/event/exynos-ppmu.c | 38 +++++++++++++++++++---------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c index aae1fd6c91f7..1756c6b5cb09 100644 --- a/drivers/devfreq/event/exynos-ppmu.c +++ b/drivers/devfreq/event/exynos-ppmu.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -20,6 +21,11 @@ #include "exynos-ppmu.h" +enum exynos_ppmu_type { + EXYNOS_TYPE_PPMU, + EXYNOS_TYPE_PPMU_V2, +}; + struct exynos_ppmu_data { struct clk *clk; }; @@ -33,6 +39,7 @@ struct exynos_ppmu { struct regmap *regmap; struct exynos_ppmu_data ppmu; + enum exynos_ppmu_type ppmu_type; }; #define PPMU_EVENT(name) \ @@ -486,31 +493,23 @@ static const struct devfreq_event_ops exynos_ppmu_v2_ops = { static const struct of_device_id exynos_ppmu_id_match[] = { { .compatible = "samsung,exynos-ppmu", - .data = (void *)&exynos_ppmu_ops, + .data = (void *)EXYNOS_TYPE_PPMU, }, { .compatible = "samsung,exynos-ppmu-v2", - .data = (void *)&exynos_ppmu_v2_ops, + .data = (void *)EXYNOS_TYPE_PPMU_V2, }, { /* sentinel */ }, }; MODULE_DEVICE_TABLE(of, exynos_ppmu_id_match); -static struct devfreq_event_ops *exynos_bus_get_ops(struct device_node *np) -{ - const struct of_device_id *match; - - match = of_match_node(exynos_ppmu_id_match, np); - return (struct devfreq_event_ops *)match->data; -} - static int of_get_devfreq_events(struct device_node *np, struct exynos_ppmu *info) { struct devfreq_event_desc *desc; - struct devfreq_event_ops *event_ops; struct device *dev = info->dev; struct device_node *events_np, *node; int i, j, count; + const struct of_device_id *of_id; events_np = of_get_child_by_name(np, "events"); if (!events_np) { @@ -518,7 +517,6 @@ static int of_get_devfreq_events(struct device_node *np, "failed to get child node of devfreq-event devices\n"); return -EINVAL; } - event_ops = exynos_bus_get_ops(np); count = of_get_child_count(events_np); desc = devm_kcalloc(dev, count, sizeof(*desc), GFP_KERNEL); @@ -526,6 +524,12 @@ static int of_get_devfreq_events(struct device_node *np, return -ENOMEM; info->num_events = count; + of_id = of_match_device(exynos_ppmu_id_match, dev); + if (of_id) + info->ppmu_type = (enum exynos_ppmu_type)of_id->data; + else + return -EINVAL; + j = 0; for_each_child_of_node(events_np, node) { for (i = 0; i < ARRAY_SIZE(ppmu_events); i++) { @@ -543,7 +547,15 @@ static int of_get_devfreq_events(struct device_node *np, continue; } - desc[j].ops = event_ops; + switch (info->ppmu_type) { + case EXYNOS_TYPE_PPMU: + desc[j].ops = &exynos_ppmu_ops; + break; + case EXYNOS_TYPE_PPMU_V2: + desc[j].ops = &exynos_ppmu_v2_ops; + break; + } + desc[j].driver_data = info; of_property_read_string(node, "event-name", &desc[j].name); From 1dd62c66d345a4e5dcfa5a4e81999600515b4309 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Wed, 5 Jun 2019 11:12:34 +0200 Subject: [PATCH 091/126] PM / devfreq: events: extend events by type of counted data This patch adds posibility to choose what type of data should be counted by the PPMU counter. Now the type comes from DT where the event has been defined. When there is no 'event-data-type' the default value is used, which is 'read+write data in bytes'. It is needed when you want to know not only read+write data bytes but i.e. only write data in byte, or number of read requests, etc. Signed-off-by: Lukasz Luba Acked-by: Chanwoo Choi [Updated property by MyungJoo. data_type --> event_type] Signed-off-by: MyungJoo Ham --- drivers/devfreq/event/exynos-ppmu.c | 60 ++++++++++++++++++++--------- include/linux/devfreq-event.h | 6 +++ 2 files changed, 47 insertions(+), 19 deletions(-) diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c index 1756c6b5cb09..87b42055e6bc 100644 --- a/drivers/devfreq/event/exynos-ppmu.c +++ b/drivers/devfreq/event/exynos-ppmu.c @@ -164,9 +164,9 @@ static int exynos_ppmu_set_event(struct devfreq_event_dev *edev) if (ret < 0) return ret; - /* Set the event of Read/Write data count */ + /* Set the event of proper data type monitoring */ ret = regmap_write(info->regmap, PPMU_BEVTxSEL(id), - PPMU_RO_DATA_CNT | PPMU_WO_DATA_CNT); + edev->desc->event_type); if (ret < 0) return ret; @@ -378,23 +378,11 @@ static int exynos_ppmu_v2_set_event(struct devfreq_event_dev *edev) if (ret < 0) return ret; - /* Set the event of Read/Write data count */ - switch (id) { - case PPMU_PMNCNT0: - case PPMU_PMNCNT1: - case PPMU_PMNCNT2: - ret = regmap_write(info->regmap, PPMU_V2_CH_EVx_TYPE(id), - PPMU_V2_RO_DATA_CNT | PPMU_V2_WO_DATA_CNT); - if (ret < 0) - return ret; - break; - case PPMU_PMNCNT3: - ret = regmap_write(info->regmap, PPMU_V2_CH_EVx_TYPE(id), - PPMU_V2_EVT3_RW_DATA_CNT); - if (ret < 0) - return ret; - break; - } + /* Set the event of proper data type monitoring */ + ret = regmap_write(info->regmap, PPMU_V2_CH_EVx_TYPE(id), + edev->desc->event_type); + if (ret < 0) + return ret; /* Reset cycle counter/performance counter and enable PPMU */ ret = regmap_read(info->regmap, PPMU_V2_PMNC, &pmnc); @@ -510,6 +498,7 @@ static int of_get_devfreq_events(struct device_node *np, struct device_node *events_np, *node; int i, j, count; const struct of_device_id *of_id; + int ret; events_np = of_get_child_by_name(np, "events"); if (!events_np) { @@ -559,6 +548,39 @@ static int of_get_devfreq_events(struct device_node *np, desc[j].driver_data = info; of_property_read_string(node, "event-name", &desc[j].name); + ret = of_property_read_u32(node, "event-data-type", + &desc[j].event_type); + if (ret) { + /* Set the event of proper data type counting. + * Check if the data type has been defined in DT, + * use default if not. + */ + if (info->ppmu_type == EXYNOS_TYPE_PPMU_V2) { + struct devfreq_event_dev edev; + int id; + /* Not all registers take the same value for + * read+write data count. + */ + edev.desc = &desc[j]; + id = exynos_ppmu_find_ppmu_id(&edev); + + switch (id) { + case PPMU_PMNCNT0: + case PPMU_PMNCNT1: + case PPMU_PMNCNT2: + desc[j].event_type = PPMU_V2_RO_DATA_CNT + | PPMU_V2_WO_DATA_CNT; + break; + case PPMU_PMNCNT3: + desc[j].event_type = + PPMU_V2_EVT3_RW_DATA_CNT; + break; + } + } else { + desc[j].event_type = PPMU_RO_DATA_CNT | + PPMU_WO_DATA_CNT; + } + } j++; } diff --git a/include/linux/devfreq-event.h b/include/linux/devfreq-event.h index 29fc0dd735ae..f14f17f8cb7f 100644 --- a/include/linux/devfreq-event.h +++ b/include/linux/devfreq-event.h @@ -78,14 +78,20 @@ struct devfreq_event_ops { * struct devfreq_event_desc - the descriptor of devfreq-event device * * @name : the name of devfreq-event device. + * @event_type : the type of the event determined and used by driver * @driver_data : the private data for devfreq-event driver. * @ops : the operation to control devfreq-event device. * * Each devfreq-event device is described with a this structure. * This structure contains the various data for devfreq-event device. + * The event_type describes what is going to be counted in the register. + * It might choose to count e.g. read requests, write data in bytes, etc. + * The full supported list of types is present in specyfic header in: + * include/dt-bindings/pmu/. */ struct devfreq_event_desc { const char *name; + u32 event_type; void *driver_data; const struct devfreq_event_ops *ops; From e2fc1677eea749414a7db0058847509566bf64e1 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 24 Jul 2019 20:59:53 +0200 Subject: [PATCH 092/126] PM / devfreq: Correct devm_devfreq_remove_device() documentation Correct the documentation for devm_devfreq_remove_device() argument. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Chanwoo Choi Signed-off-by: MyungJoo Ham --- drivers/devfreq/devfreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 784c08e4f931..446490c9d635 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -867,7 +867,7 @@ EXPORT_SYMBOL_GPL(devfreq_get_devfreq_by_phandle); /** * devm_devfreq_remove_device() - Resource-managed devfreq_remove_device() - * @dev: the device to add devfreq feature. + * @dev: the device from which to remove devfreq feature. * @devfreq: the devfreq instance to be removed */ void devm_devfreq_remove_device(struct device *dev, struct devfreq *devfreq) From 2c2b20e0da89c76759ee28c6824413ab2fa3bfc6 Mon Sep 17 00:00:00 2001 From: Kamil Konieczny Date: Wed, 7 Aug 2019 15:38:35 +0200 Subject: [PATCH 093/126] PM / devfreq: exynos-bus: Correct clock enable sequence Regulators should be enabled before clocks to avoid h/w hang. This require change in exynos_bus_probe() to move exynos_bus_parse_of() after exynos_bus_parent_parse_of() and change in error handling. Similar change is needed in exynos_bus_exit() where clock should be disabled before regulators. Signed-off-by: Kamil Konieczny Acked-by: Chanwoo Choi Signed-off-by: MyungJoo Ham --- drivers/devfreq/exynos-bus.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c index d9f377912c10..7c06df8bd74f 100644 --- a/drivers/devfreq/exynos-bus.c +++ b/drivers/devfreq/exynos-bus.c @@ -191,11 +191,10 @@ static void exynos_bus_exit(struct device *dev) if (ret < 0) dev_warn(dev, "failed to disable the devfreq-event devices\n"); - if (bus->regulator) - regulator_disable(bus->regulator); - dev_pm_opp_of_remove_table(dev); clk_disable_unprepare(bus->clk); + if (bus->regulator) + regulator_disable(bus->regulator); } /* @@ -383,6 +382,7 @@ static int exynos_bus_probe(struct platform_device *pdev) struct exynos_bus *bus; int ret, max_state; unsigned long min_freq, max_freq; + bool passive = false; if (!np) { dev_err(dev, "failed to find devicetree node\n"); @@ -396,27 +396,27 @@ static int exynos_bus_probe(struct platform_device *pdev) bus->dev = &pdev->dev; platform_set_drvdata(pdev, bus); - /* Parse the device-tree to get the resource information */ - ret = exynos_bus_parse_of(np, bus); - if (ret < 0) - return ret; - profile = devm_kzalloc(dev, sizeof(*profile), GFP_KERNEL); - if (!profile) { - ret = -ENOMEM; - goto err; - } + if (!profile) + return -ENOMEM; node = of_parse_phandle(dev->of_node, "devfreq", 0); if (node) { of_node_put(node); - goto passive; + passive = true; } else { ret = exynos_bus_parent_parse_of(np, bus); + if (ret < 0) + return ret; } + /* Parse the device-tree to get the resource information */ + ret = exynos_bus_parse_of(np, bus); if (ret < 0) - goto err; + goto err_reg; + + if (passive) + goto passive; /* Initialize the struct profile and governor data for parent device */ profile->polling_ms = 50; @@ -507,6 +507,9 @@ out: err: dev_pm_opp_of_remove_table(dev); clk_disable_unprepare(bus->clk); +err_reg: + if (!passive) + regulator_disable(bus->regulator); return ret; } From 4294a779bd8dff6c65e7e85ffe7a1ea236e92a68 Mon Sep 17 00:00:00 2001 From: Kamil Konieczny Date: Wed, 7 Aug 2019 15:38:36 +0200 Subject: [PATCH 094/126] PM / devfreq: exynos-bus: Convert to use dev_pm_opp_set_rate() Reuse opp core code for setting bus clock and voltage. As a side effect this allow usage of coupled regulators feature (required for boards using Exynos5422/5800 SoCs) because dev_pm_opp_set_rate() uses regulator_set_voltage_triplet() for setting regulator voltage while the old code used regulator_set_voltage_tol() with fixed tolerance. This patch also removes no longer needed parsing of DT property "exynos,voltage-tolerance" (no Exynos devfreq DT node uses it). After applying changes both functions exynos_bus_passive_target() and exynos_bus_target() have the same code, so remove exynos_bus_passive_target(). In exynos_bus_probe() replace it with exynos_bus_target. Signed-off-by: Kamil Konieczny Acked-by: Chanwoo Choi Signed-off-by: MyungJoo Ham --- drivers/devfreq/exynos-bus.c | 130 +++++++---------------------------- 1 file changed, 24 insertions(+), 106 deletions(-) diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c index 7c06df8bd74f..c832673273a2 100644 --- a/drivers/devfreq/exynos-bus.c +++ b/drivers/devfreq/exynos-bus.c @@ -22,7 +22,6 @@ #include #define DEFAULT_SATURATION_RATIO 40 -#define DEFAULT_VOLTAGE_TOLERANCE 2 struct exynos_bus { struct device *dev; @@ -34,9 +33,8 @@ struct exynos_bus { unsigned long curr_freq; - struct regulator *regulator; + struct opp_table *opp_table; struct clk *clk; - unsigned int voltage_tolerance; unsigned int ratio; }; @@ -90,62 +88,29 @@ static int exynos_bus_get_event(struct exynos_bus *bus, } /* - * Must necessary function for devfreq simple-ondemand governor + * devfreq function for both simple-ondemand and passive governor */ static int exynos_bus_target(struct device *dev, unsigned long *freq, u32 flags) { struct exynos_bus *bus = dev_get_drvdata(dev); struct dev_pm_opp *new_opp; - unsigned long old_freq, new_freq, new_volt, tol; int ret = 0; - /* Get new opp-bus instance according to new bus clock */ + /* Get correct frequency for bus. */ new_opp = devfreq_recommended_opp(dev, freq, flags); if (IS_ERR(new_opp)) { dev_err(dev, "failed to get recommended opp instance\n"); return PTR_ERR(new_opp); } - new_freq = dev_pm_opp_get_freq(new_opp); - new_volt = dev_pm_opp_get_voltage(new_opp); dev_pm_opp_put(new_opp); - old_freq = bus->curr_freq; - - if (old_freq == new_freq) - return 0; - tol = new_volt * bus->voltage_tolerance / 100; - /* Change voltage and frequency according to new OPP level */ mutex_lock(&bus->lock); + ret = dev_pm_opp_set_rate(dev, *freq); + if (!ret) + bus->curr_freq = *freq; - if (old_freq < new_freq) { - ret = regulator_set_voltage_tol(bus->regulator, new_volt, tol); - if (ret < 0) { - dev_err(bus->dev, "failed to set voltage\n"); - goto out; - } - } - - ret = clk_set_rate(bus->clk, new_freq); - if (ret < 0) { - dev_err(dev, "failed to change clock of bus\n"); - clk_set_rate(bus->clk, old_freq); - goto out; - } - - if (old_freq > new_freq) { - ret = regulator_set_voltage_tol(bus->regulator, new_volt, tol); - if (ret < 0) { - dev_err(bus->dev, "failed to set voltage\n"); - goto out; - } - } - bus->curr_freq = new_freq; - - dev_dbg(dev, "Set the frequency of bus (%luHz -> %luHz, %luHz)\n", - old_freq, new_freq, clk_get_rate(bus->clk)); -out: mutex_unlock(&bus->lock); return ret; @@ -193,54 +158,10 @@ static void exynos_bus_exit(struct device *dev) dev_pm_opp_of_remove_table(dev); clk_disable_unprepare(bus->clk); - if (bus->regulator) - regulator_disable(bus->regulator); -} - -/* - * Must necessary function for devfreq passive governor - */ -static int exynos_bus_passive_target(struct device *dev, unsigned long *freq, - u32 flags) -{ - struct exynos_bus *bus = dev_get_drvdata(dev); - struct dev_pm_opp *new_opp; - unsigned long old_freq, new_freq; - int ret = 0; - - /* Get new opp-bus instance according to new bus clock */ - new_opp = devfreq_recommended_opp(dev, freq, flags); - if (IS_ERR(new_opp)) { - dev_err(dev, "failed to get recommended opp instance\n"); - return PTR_ERR(new_opp); + if (bus->opp_table) { + dev_pm_opp_put_regulators(bus->opp_table); + bus->opp_table = NULL; } - - new_freq = dev_pm_opp_get_freq(new_opp); - dev_pm_opp_put(new_opp); - - old_freq = bus->curr_freq; - - if (old_freq == new_freq) - return 0; - - /* Change the frequency according to new OPP level */ - mutex_lock(&bus->lock); - - ret = clk_set_rate(bus->clk, new_freq); - if (ret < 0) { - dev_err(dev, "failed to set the clock of bus\n"); - goto out; - } - - *freq = new_freq; - bus->curr_freq = new_freq; - - dev_dbg(dev, "Set the frequency of bus (%luHz -> %luHz, %luHz)\n", - old_freq, new_freq, clk_get_rate(bus->clk)); -out: - mutex_unlock(&bus->lock); - - return ret; } static void exynos_bus_passive_exit(struct device *dev) @@ -255,21 +176,19 @@ static int exynos_bus_parent_parse_of(struct device_node *np, struct exynos_bus *bus) { struct device *dev = bus->dev; + struct opp_table *opp_table; + const char *vdd = "vdd"; int i, ret, count, size; - /* Get the regulator to provide each bus with the power */ - bus->regulator = devm_regulator_get(dev, "vdd"); - if (IS_ERR(bus->regulator)) { - dev_err(dev, "failed to get VDD regulator\n"); - return PTR_ERR(bus->regulator); - } - - ret = regulator_enable(bus->regulator); - if (ret < 0) { - dev_err(dev, "failed to enable VDD regulator\n"); + opp_table = dev_pm_opp_set_regulators(dev, &vdd, 1); + if (IS_ERR(opp_table)) { + ret = PTR_ERR(opp_table); + dev_err(dev, "failed to set regulators %d\n", ret); return ret; } + bus->opp_table = opp_table; + /* * Get the devfreq-event devices to get the current utilization of * buses. This raw data will be used in devfreq ondemand governor. @@ -310,14 +229,11 @@ static int exynos_bus_parent_parse_of(struct device_node *np, if (of_property_read_u32(np, "exynos,saturation-ratio", &bus->ratio)) bus->ratio = DEFAULT_SATURATION_RATIO; - if (of_property_read_u32(np, "exynos,voltage-tolerance", - &bus->voltage_tolerance)) - bus->voltage_tolerance = DEFAULT_VOLTAGE_TOLERANCE; - return 0; err_regulator: - regulator_disable(bus->regulator); + dev_pm_opp_put_regulators(bus->opp_table); + bus->opp_table = NULL; return ret; } @@ -468,7 +384,7 @@ static int exynos_bus_probe(struct platform_device *pdev) goto out; passive: /* Initialize the struct profile and governor data for passive device */ - profile->target = exynos_bus_passive_target; + profile->target = exynos_bus_target; profile->exit = exynos_bus_passive_exit; /* Get the instance of parent devfreq device */ @@ -508,8 +424,10 @@ err: dev_pm_opp_of_remove_table(dev); clk_disable_unprepare(bus->clk); err_reg: - if (!passive) - regulator_disable(bus->regulator); + if (!passive) { + dev_pm_opp_put_regulators(bus->opp_table); + bus->opp_table = NULL; + } return ret; } From 0ef7c7cce43f6ecc2b96d447e69b2900a9655f7c Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Thu, 8 Aug 2019 19:54:08 +0300 Subject: [PATCH 095/126] PM / devfreq: passive: Use non-devm notifiers The devfreq passive governor registers and unregisters devfreq transition notifiers on DEVFREQ_GOV_START/GOV_STOP using devm wrappers. If devfreq itself is registered with devm then a warning is triggered on rmmod from devm_devfreq_unregister_notifier. Call stack looks like this: devm_devfreq_unregister_notifier+0x30/0x40 devfreq_passive_event_handler+0x4c/0x88 devfreq_remove_device.part.8+0x6c/0x9c devm_devfreq_dev_release+0x18/0x20 release_nodes+0x1b0/0x220 devres_release_all+0x78/0x84 device_release_driver_internal+0x100/0x1c0 driver_detach+0x4c/0x90 bus_remove_driver+0x7c/0xd0 driver_unregister+0x2c/0x58 platform_driver_unregister+0x10/0x18 imx_devfreq_platdrv_exit+0x14/0xd40 [imx_devfreq] This happens because devres_release_all will first remove all the nodes into a separate todo list so the nested devres_release from devm_devfreq_unregister_notifier won't find anything. Fix the warning by calling the non-devm APIS for frequency notification. Using devm wrappers is not actually useful for a governor anyway: it relies on the devfreq core to correctly match the GOV_START/GOV_STOP notifications. Fixes: 996133119f57 ("PM / devfreq: Add new passive governor") Signed-off-by: Leonard Crestez Acked-by: Chanwoo Choi Signed-off-by: MyungJoo Ham --- drivers/devfreq/governor_passive.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c index 58308948b863..da485477065c 100644 --- a/drivers/devfreq/governor_passive.c +++ b/drivers/devfreq/governor_passive.c @@ -165,12 +165,12 @@ static int devfreq_passive_event_handler(struct devfreq *devfreq, p_data->this = devfreq; nb->notifier_call = devfreq_passive_notifier_call; - ret = devm_devfreq_register_notifier(dev, parent, nb, + ret = devfreq_register_notifier(parent, nb, DEVFREQ_TRANSITION_NOTIFIER); break; case DEVFREQ_GOV_STOP: - devm_devfreq_unregister_notifier(dev, parent, nb, - DEVFREQ_TRANSITION_NOTIFIER); + WARN_ON(devfreq_unregister_notifier(parent, nb, + DEVFREQ_TRANSITION_NOTIFIER)); break; default: break; From dce2e3a8414e31fc286a3f8064df773a3af83a26 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 23 Jul 2019 11:44:03 +0530 Subject: [PATCH 096/126] cpufreq: powerpc: macintosh: Switch to QoS requests for freq limits The cpufreq core now takes the min/max frequency constraints via QoS requests and the CPUFREQ_ADJUST notifier shall get removed later on. Switch over to using the QoS request for maximum frequency constraint for windfarm_cpufreq_clamp driver. Signed-off-by: Viresh Kumar [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki --- drivers/macintosh/windfarm_cpufreq_clamp.c | 77 ++++++++++++++-------- 1 file changed, 50 insertions(+), 27 deletions(-) diff --git a/drivers/macintosh/windfarm_cpufreq_clamp.c b/drivers/macintosh/windfarm_cpufreq_clamp.c index 52fd5fca89a0..705c6200814b 100644 --- a/drivers/macintosh/windfarm_cpufreq_clamp.c +++ b/drivers/macintosh/windfarm_cpufreq_clamp.c @@ -3,9 +3,11 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -16,36 +18,24 @@ static int clamped; static struct wf_control *clamp_control; - -static int clamp_notifier_call(struct notifier_block *self, - unsigned long event, void *data) -{ - struct cpufreq_policy *p = data; - unsigned long max_freq; - - if (event != CPUFREQ_ADJUST) - return 0; - - max_freq = clamped ? (p->cpuinfo.min_freq) : (p->cpuinfo.max_freq); - cpufreq_verify_within_limits(p, 0, max_freq); - - return 0; -} - -static struct notifier_block clamp_notifier = { - .notifier_call = clamp_notifier_call, -}; +static struct dev_pm_qos_request qos_req; +static unsigned int min_freq, max_freq; static int clamp_set(struct wf_control *ct, s32 value) { - if (value) + unsigned int freq; + + if (value) { + freq = min_freq; printk(KERN_INFO "windfarm: Clamping CPU frequency to " "minimum !\n"); - else + } else { + freq = max_freq; printk(KERN_INFO "windfarm: CPU frequency unclamped !\n"); + } clamped = value; - cpufreq_update_policy(0); - return 0; + + return dev_pm_qos_update_request(&qos_req, freq); } static int clamp_get(struct wf_control *ct, s32 *value) @@ -74,27 +64,60 @@ static const struct wf_control_ops clamp_ops = { static int __init wf_cpufreq_clamp_init(void) { + struct cpufreq_policy *policy; struct wf_control *clamp; + struct device *dev; + int ret; + + policy = cpufreq_cpu_get(0); + if (!policy) { + pr_warn("%s: cpufreq policy not found cpu0\n", __func__); + return -EPROBE_DEFER; + } + + min_freq = policy->cpuinfo.min_freq; + max_freq = policy->cpuinfo.max_freq; + cpufreq_cpu_put(policy); + + dev = get_cpu_device(0); + if (unlikely(!dev)) { + pr_warn("%s: No cpu device for cpu0\n", __func__); + return -ENODEV; + } clamp = kmalloc(sizeof(struct wf_control), GFP_KERNEL); if (clamp == NULL) return -ENOMEM; - cpufreq_register_notifier(&clamp_notifier, CPUFREQ_POLICY_NOTIFIER); + + ret = dev_pm_qos_add_request(dev, &qos_req, DEV_PM_QOS_MAX_FREQUENCY, + max_freq); + if (ret < 0) { + pr_err("%s: Failed to add freq constraint (%d)\n", __func__, + ret); + goto free; + } + clamp->ops = &clamp_ops; clamp->name = "cpufreq-clamp"; - if (wf_register_control(clamp)) + ret = wf_register_control(clamp); + if (ret) goto fail; clamp_control = clamp; return 0; fail: + dev_pm_qos_remove_request(&qos_req); + + free: kfree(clamp); - return -ENODEV; + return ret; } static void __exit wf_cpufreq_clamp_exit(void) { - if (clamp_control) + if (clamp_control) { wf_unregister_control(clamp_control); + dev_pm_qos_remove_request(&qos_req); + } } From afe969074eb7d0f52761ed80da15e9a282d19677 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 5 Jul 2019 15:49:48 +0530 Subject: [PATCH 097/126] cpufreq: powerpc_cbe: Switch to QoS requests for freq limits The cpufreq core now takes the min/max frequency constraints via QoS requests and the CPUFREQ_ADJUST notifier shall get removed later on. Switch over to using the QoS request for maximum frequency constraint for ppc_cbe_cpufreq driver. Signed-off-by: Viresh Kumar [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/ppc_cbe_cpufreq.c | 19 ++++- drivers/cpufreq/ppc_cbe_cpufreq.h | 8 +++ drivers/cpufreq/ppc_cbe_cpufreq_pmi.c | 100 ++++++++++++++++---------- 3 files changed, 88 insertions(+), 39 deletions(-) diff --git a/drivers/cpufreq/ppc_cbe_cpufreq.c b/drivers/cpufreq/ppc_cbe_cpufreq.c index b83f36febf03..c58abb4cca3a 100644 --- a/drivers/cpufreq/ppc_cbe_cpufreq.c +++ b/drivers/cpufreq/ppc_cbe_cpufreq.c @@ -110,6 +110,13 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy) #endif policy->freq_table = cbe_freqs; + cbe_cpufreq_pmi_policy_init(policy); + return 0; +} + +static int cbe_cpufreq_cpu_exit(struct cpufreq_policy *policy) +{ + cbe_cpufreq_pmi_policy_exit(policy); return 0; } @@ -129,6 +136,7 @@ static struct cpufreq_driver cbe_cpufreq_driver = { .verify = cpufreq_generic_frequency_table_verify, .target_index = cbe_cpufreq_target, .init = cbe_cpufreq_cpu_init, + .exit = cbe_cpufreq_cpu_exit, .name = "cbe-cpufreq", .flags = CPUFREQ_CONST_LOOPS, }; @@ -139,15 +147,24 @@ static struct cpufreq_driver cbe_cpufreq_driver = { static int __init cbe_cpufreq_init(void) { + int ret; + if (!machine_is(cell)) return -ENODEV; - return cpufreq_register_driver(&cbe_cpufreq_driver); + cbe_cpufreq_pmi_init(); + + ret = cpufreq_register_driver(&cbe_cpufreq_driver); + if (ret) + cbe_cpufreq_pmi_exit(); + + return ret; } static void __exit cbe_cpufreq_exit(void) { cpufreq_unregister_driver(&cbe_cpufreq_driver); + cbe_cpufreq_pmi_exit(); } module_init(cbe_cpufreq_init); diff --git a/drivers/cpufreq/ppc_cbe_cpufreq.h b/drivers/cpufreq/ppc_cbe_cpufreq.h index 9d973519d669..00cd8633b0d9 100644 --- a/drivers/cpufreq/ppc_cbe_cpufreq.h +++ b/drivers/cpufreq/ppc_cbe_cpufreq.h @@ -20,6 +20,14 @@ int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode); #if IS_ENABLED(CONFIG_CPU_FREQ_CBE_PMI) extern bool cbe_cpufreq_has_pmi; +void cbe_cpufreq_pmi_policy_init(struct cpufreq_policy *policy); +void cbe_cpufreq_pmi_policy_exit(struct cpufreq_policy *policy); +void cbe_cpufreq_pmi_init(void); +void cbe_cpufreq_pmi_exit(void); #else #define cbe_cpufreq_has_pmi (0) +static inline void cbe_cpufreq_pmi_policy_init(struct cpufreq_policy *policy) {} +static inline void cbe_cpufreq_pmi_policy_exit(struct cpufreq_policy *policy) {} +static inline void cbe_cpufreq_pmi_init(void) {} +static inline void cbe_cpufreq_pmi_exit(void) {} #endif diff --git a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c index 97c8ee4614b7..bc9dd30395c4 100644 --- a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c +++ b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -24,8 +25,6 @@ #include "ppc_cbe_cpufreq.h" -static u8 pmi_slow_mode_limit[MAX_CBE]; - bool cbe_cpufreq_has_pmi = false; EXPORT_SYMBOL_GPL(cbe_cpufreq_has_pmi); @@ -65,64 +64,89 @@ EXPORT_SYMBOL_GPL(cbe_cpufreq_set_pmode_pmi); static void cbe_cpufreq_handle_pmi(pmi_message_t pmi_msg) { + struct cpufreq_policy *policy; + struct dev_pm_qos_request *req; u8 node, slow_mode; + int cpu, ret; BUG_ON(pmi_msg.type != PMI_TYPE_FREQ_CHANGE); node = pmi_msg.data1; slow_mode = pmi_msg.data2; - pmi_slow_mode_limit[node] = slow_mode; + cpu = cbe_node_to_cpu(node); pr_debug("cbe_handle_pmi: node: %d max_freq: %d\n", node, slow_mode); -} -static int pmi_notifier(struct notifier_block *nb, - unsigned long event, void *data) -{ - struct cpufreq_policy *policy = data; - struct cpufreq_frequency_table *cbe_freqs = policy->freq_table; - u8 node; - - /* Should this really be called for CPUFREQ_ADJUST and CPUFREQ_NOTIFY - * policy events?) - */ - node = cbe_cpu_to_node(policy->cpu); - - pr_debug("got notified, event=%lu, node=%u\n", event, node); - - if (pmi_slow_mode_limit[node] != 0) { - pr_debug("limiting node %d to slow mode %d\n", - node, pmi_slow_mode_limit[node]); - - cpufreq_verify_within_limits(policy, 0, - - cbe_freqs[pmi_slow_mode_limit[node]].frequency); + policy = cpufreq_cpu_get(cpu); + if (!policy) { + pr_warn("cpufreq policy not found cpu%d\n", cpu); + return; } - return 0; -} + req = policy->driver_data; -static struct notifier_block pmi_notifier_block = { - .notifier_call = pmi_notifier, -}; + ret = dev_pm_qos_update_request(req, + policy->freq_table[slow_mode].frequency); + if (ret < 0) + pr_warn("Failed to update freq constraint: %d\n", ret); + else + pr_debug("limiting node %d to slow mode %d\n", node, slow_mode); + + cpufreq_cpu_put(policy); +} static struct pmi_handler cbe_pmi_handler = { .type = PMI_TYPE_FREQ_CHANGE, .handle_pmi_message = cbe_cpufreq_handle_pmi, }; - - -static int __init cbe_cpufreq_pmi_init(void) +void cbe_cpufreq_pmi_policy_init(struct cpufreq_policy *policy) { - cbe_cpufreq_has_pmi = pmi_register_handler(&cbe_pmi_handler) == 0; + struct dev_pm_qos_request *req; + int ret; if (!cbe_cpufreq_has_pmi) - return -ENODEV; + return; - cpufreq_register_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER); + req = kzalloc(sizeof(*req), GFP_KERNEL); + if (!req) + return; - return 0; + ret = dev_pm_qos_add_request(get_cpu_device(policy->cpu), req, + DEV_PM_QOS_MAX_FREQUENCY, + policy->freq_table[0].frequency); + if (ret < 0) { + pr_err("Failed to add freq constraint (%d)\n", ret); + kfree(req); + return; + } + + policy->driver_data = req; } -device_initcall(cbe_cpufreq_pmi_init); +EXPORT_SYMBOL_GPL(cbe_cpufreq_pmi_policy_init); + +void cbe_cpufreq_pmi_policy_exit(struct cpufreq_policy *policy) +{ + struct dev_pm_qos_request *req = policy->driver_data; + + if (cbe_cpufreq_has_pmi) { + dev_pm_qos_remove_request(req); + kfree(req); + } +} +EXPORT_SYMBOL_GPL(cbe_cpufreq_pmi_policy_exit); + +void cbe_cpufreq_pmi_init(void) +{ + if (!pmi_register_handler(&cbe_pmi_handler)) + cbe_cpufreq_has_pmi = true; +} +EXPORT_SYMBOL_GPL(cbe_cpufreq_pmi_init); + +void cbe_cpufreq_pmi_exit(void) +{ + pmi_unregister_handler(&cbe_pmi_handler); + cbe_cpufreq_has_pmi = false; +} +EXPORT_SYMBOL_GPL(cbe_cpufreq_pmi_exit); From 40f0fc2a416b343a604a8131247a150588658d32 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 23 Jul 2019 11:44:06 +0530 Subject: [PATCH 098/126] arch_topology: Use CPUFREQ_CREATE_POLICY instead of CPUFREQ_NOTIFY CPUFREQ_NOTIFY is going to get removed soon, lets use CPUFREQ_CREATE_POLICY instead of that here. CPUFREQ_CREATE_POLICY is called only once (which is exactly what we want here) for each cpufreq policy when it is first created. Signed-off-by: Viresh Kumar Acked-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki --- drivers/base/arch_topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 63c1e76739f1..8cab1f5a8e0c 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -174,7 +174,7 @@ init_cpu_capacity_callback(struct notifier_block *nb, if (!raw_capacity) return 0; - if (val != CPUFREQ_NOTIFY) + if (val != CPUFREQ_CREATE_POLICY) return 0; pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n", From eb58a4fad3432be6e9dc2e35b78ae99f4a40ce5f Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 23 Jul 2019 11:44:07 +0530 Subject: [PATCH 099/126] video: sa1100fb: Remove cpufreq policy notifier The cpufreq policy notifier's CPUFREQ_ADJUST notification is going to get removed soon. The notifier callback sa1100fb_freq_policy() isn't doing anything apart from printing a debug message on CPUFREQ_ADJUST notification. There is no point in keeping an otherwise empty callback and registering the notifier. Remove it. Acked-by: Bartlomiej Zolnierkiewicz Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/video/fbdev/sa1100fb.c | 27 --------------------------- drivers/video/fbdev/sa1100fb.h | 1 - 2 files changed, 28 deletions(-) diff --git a/drivers/video/fbdev/sa1100fb.c b/drivers/video/fbdev/sa1100fb.c index f7f8dee044b1..ae2bcfee338a 100644 --- a/drivers/video/fbdev/sa1100fb.c +++ b/drivers/video/fbdev/sa1100fb.c @@ -1005,31 +1005,6 @@ sa1100fb_freq_transition(struct notifier_block *nb, unsigned long val, } return 0; } - -static int -sa1100fb_freq_policy(struct notifier_block *nb, unsigned long val, - void *data) -{ - struct sa1100fb_info *fbi = TO_INF(nb, freq_policy); - struct cpufreq_policy *policy = data; - - switch (val) { - case CPUFREQ_ADJUST: - dev_dbg(fbi->dev, "min dma period: %d ps, " - "new clock %d kHz\n", sa1100fb_min_dma_period(fbi), - policy->max); - /* todo: fill in min/max values */ - break; - case CPUFREQ_NOTIFY: - do {} while(0); - /* todo: panic if min/max values aren't fulfilled - * [can't really happen unless there's a bug in the - * CPU policy verififcation process * - */ - break; - } - return 0; -} #endif #ifdef CONFIG_PM @@ -1242,9 +1217,7 @@ static int sa1100fb_probe(struct platform_device *pdev) #ifdef CONFIG_CPU_FREQ fbi->freq_transition.notifier_call = sa1100fb_freq_transition; - fbi->freq_policy.notifier_call = sa1100fb_freq_policy; cpufreq_register_notifier(&fbi->freq_transition, CPUFREQ_TRANSITION_NOTIFIER); - cpufreq_register_notifier(&fbi->freq_policy, CPUFREQ_POLICY_NOTIFIER); #endif /* This driver cannot be unloaded at the moment */ diff --git a/drivers/video/fbdev/sa1100fb.h b/drivers/video/fbdev/sa1100fb.h index 7a1a9ca33cec..d0aa33b0b88a 100644 --- a/drivers/video/fbdev/sa1100fb.h +++ b/drivers/video/fbdev/sa1100fb.h @@ -64,7 +64,6 @@ struct sa1100fb_info { #ifdef CONFIG_CPU_FREQ struct notifier_block freq_transition; - struct notifier_block freq_policy; #endif const struct sa1100fb_mach_info *inf; From 8c7d7b4bb172709f71975c2a6e95c9715171603f Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 23 Jul 2019 11:44:08 +0530 Subject: [PATCH 100/126] video: pxafb: Remove cpufreq policy notifier The cpufreq policy notifier's CPUFREQ_ADJUST notification is going to get removed soon. The notifier callback pxafb_freq_policy() isn't doing anything apart from printing a debug message on CPUFREQ_ADJUST notification. There is no point in keeping an otherwise empty callback and registering the notifier. Remove it. Acked-by: Bartlomiej Zolnierkiewicz Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/video/fbdev/pxafb.c | 21 --------------------- drivers/video/fbdev/pxafb.h | 1 - 2 files changed, 22 deletions(-) diff --git a/drivers/video/fbdev/pxafb.c b/drivers/video/fbdev/pxafb.c index 4282cb117b92..f70c9f79622e 100644 --- a/drivers/video/fbdev/pxafb.c +++ b/drivers/video/fbdev/pxafb.c @@ -1678,24 +1678,6 @@ pxafb_freq_transition(struct notifier_block *nb, unsigned long val, void *data) } return 0; } - -static int -pxafb_freq_policy(struct notifier_block *nb, unsigned long val, void *data) -{ - struct pxafb_info *fbi = TO_INF(nb, freq_policy); - struct fb_var_screeninfo *var = &fbi->fb.var; - struct cpufreq_policy *policy = data; - - switch (val) { - case CPUFREQ_ADJUST: - pr_debug("min dma period: %d ps, " - "new clock %d kHz\n", pxafb_display_dma_period(var), - policy->max); - /* TODO: fill in min/max values */ - break; - } - return 0; -} #endif #ifdef CONFIG_PM @@ -2400,11 +2382,8 @@ static int pxafb_probe(struct platform_device *dev) #ifdef CONFIG_CPU_FREQ fbi->freq_transition.notifier_call = pxafb_freq_transition; - fbi->freq_policy.notifier_call = pxafb_freq_policy; cpufreq_register_notifier(&fbi->freq_transition, CPUFREQ_TRANSITION_NOTIFIER); - cpufreq_register_notifier(&fbi->freq_policy, - CPUFREQ_POLICY_NOTIFIER); #endif /* diff --git a/drivers/video/fbdev/pxafb.h b/drivers/video/fbdev/pxafb.h index b641289c8a99..86b1e9ab1a38 100644 --- a/drivers/video/fbdev/pxafb.h +++ b/drivers/video/fbdev/pxafb.h @@ -162,7 +162,6 @@ struct pxafb_info { #ifdef CONFIG_CPU_FREQ struct notifier_block freq_transition; - struct notifier_block freq_policy; #endif struct regulator *lcd_supply; From 0465814831a926ce2f83e8f606d067d86745234e Mon Sep 17 00:00:00 2001 From: MyungJoo Ham Date: Mon, 26 Aug 2019 21:37:37 +0900 Subject: [PATCH 101/126] PM / devfreq: passive: fix compiler warning The recent commit of PM / devfreq: passive: Use non-devm notifiers had incurred compiler warning, "unused variable 'dev'". Reported-by: Stephen Rothwell Signed-off-by: MyungJoo Ham --- drivers/devfreq/governor_passive.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c index da485477065c..be6eeab9c814 100644 --- a/drivers/devfreq/governor_passive.c +++ b/drivers/devfreq/governor_passive.c @@ -149,7 +149,6 @@ static int devfreq_passive_notifier_call(struct notifier_block *nb, static int devfreq_passive_event_handler(struct devfreq *devfreq, unsigned int event, void *data) { - struct device *dev = devfreq->dev.parent; struct devfreq_passive_data *p_data = (struct devfreq_passive_data *)devfreq->data; struct devfreq *parent = (struct devfreq *)p_data->parent; From d15ce412737accaba5e4c7d653b184772da47365 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 28 Aug 2019 14:20:13 +0530 Subject: [PATCH 102/126] ACPI: cpufreq: Switch to QoS requests instead of cpufreq notifier The cpufreq core now takes the min/max frequency constraints via QoS requests and the CPUFREQ_ADJUST notifier shall get removed later on. Switch over to using the QoS request for maximum frequency constraint for acpi driver. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/acpi/processor_driver.c | 39 ++++++++++-- drivers/acpi/processor_perflib.c | 100 ++++++++++++------------------- drivers/acpi/processor_thermal.c | 82 +++++++++++++------------ include/acpi/processor.h | 26 +++++--- 4 files changed, 131 insertions(+), 116 deletions(-) diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index aea8d674a33d..08da9c29f1e9 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -284,6 +284,29 @@ static int acpi_processor_stop(struct device *dev) return 0; } +bool acpi_processor_cpufreq_init; + +static int acpi_processor_notifier(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct cpufreq_policy *policy = data; + int cpu = policy->cpu; + + if (event == CPUFREQ_CREATE_POLICY) { + acpi_thermal_cpufreq_init(cpu); + acpi_processor_ppc_init(cpu); + } else if (event == CPUFREQ_REMOVE_POLICY) { + acpi_processor_ppc_exit(cpu); + acpi_thermal_cpufreq_exit(cpu); + } + + return 0; +} + +static struct notifier_block acpi_processor_notifier_block = { + .notifier_call = acpi_processor_notifier, +}; + /* * We keep the driver loaded even when ACPI is not running. * This is needed for the powernow-k8 driver, that works even without @@ -310,8 +333,12 @@ static int __init acpi_processor_driver_init(void) cpuhp_setup_state_nocalls(CPUHP_ACPI_CPUDRV_DEAD, "acpi/cpu-drv:dead", NULL, acpi_soft_cpu_dead); - acpi_thermal_cpufreq_init(); - acpi_processor_ppc_init(); + if (!cpufreq_register_notifier(&acpi_processor_notifier_block, + CPUFREQ_POLICY_NOTIFIER)) { + acpi_processor_cpufreq_init = true; + acpi_processor_ignore_ppc_init(); + } + acpi_processor_throttling_init(); return 0; err: @@ -324,8 +351,12 @@ static void __exit acpi_processor_driver_exit(void) if (acpi_disabled) return; - acpi_processor_ppc_exit(); - acpi_thermal_cpufreq_exit(); + if (acpi_processor_cpufreq_init) { + cpufreq_unregister_notifier(&acpi_processor_notifier_block, + CPUFREQ_POLICY_NOTIFIER); + acpi_processor_cpufreq_init = false; + } + cpuhp_remove_state_nocalls(hp_online); cpuhp_remove_state_nocalls(CPUHP_ACPI_CPUDRV_DEAD); driver_unregister(&acpi_processor_driver); diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index ee87cb6f6e59..2261713d1aec 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -50,57 +50,13 @@ module_param(ignore_ppc, int, 0644); MODULE_PARM_DESC(ignore_ppc, "If the frequency of your machine gets wrongly" \ "limited by BIOS, this should help"); -#define PPC_REGISTERED 1 -#define PPC_IN_USE 2 - -static int acpi_processor_ppc_status; - -static int acpi_processor_ppc_notifier(struct notifier_block *nb, - unsigned long event, void *data) -{ - struct cpufreq_policy *policy = data; - struct acpi_processor *pr; - unsigned int ppc = 0; - - if (ignore_ppc < 0) - ignore_ppc = 0; - - if (ignore_ppc) - return 0; - - if (event != CPUFREQ_ADJUST) - return 0; - - mutex_lock(&performance_mutex); - - pr = per_cpu(processors, policy->cpu); - if (!pr || !pr->performance) - goto out; - - ppc = (unsigned int)pr->performance_platform_limit; - - if (ppc >= pr->performance->state_count) - goto out; - - cpufreq_verify_within_limits(policy, 0, - pr->performance->states[ppc]. - core_frequency * 1000); - - out: - mutex_unlock(&performance_mutex); - - return 0; -} - -static struct notifier_block acpi_ppc_notifier_block = { - .notifier_call = acpi_processor_ppc_notifier, -}; +static bool acpi_processor_ppc_in_use; static int acpi_processor_get_platform_limit(struct acpi_processor *pr) { acpi_status status = 0; unsigned long long ppc = 0; - + int ret; if (!pr) return -EINVAL; @@ -112,7 +68,7 @@ static int acpi_processor_get_platform_limit(struct acpi_processor *pr) status = acpi_evaluate_integer(pr->handle, "_PPC", NULL, &ppc); if (status != AE_NOT_FOUND) - acpi_processor_ppc_status |= PPC_IN_USE; + acpi_processor_ppc_in_use = true; if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) { ACPI_EXCEPTION((AE_INFO, status, "Evaluating _PPC")); @@ -124,6 +80,17 @@ static int acpi_processor_get_platform_limit(struct acpi_processor *pr) pr->performance_platform_limit = (int)ppc; + if (ppc >= pr->performance->state_count || + unlikely(!dev_pm_qos_request_active(&pr->perflib_req))) + return 0; + + ret = dev_pm_qos_update_request(&pr->perflib_req, + pr->performance->states[ppc].core_frequency * 1000); + if (ret < 0) { + pr_warn("Failed to update perflib freq constraint: CPU%d (%d)\n", + pr->id, ret); + } + return 0; } @@ -184,23 +151,32 @@ int acpi_processor_get_bios_limit(int cpu, unsigned int *limit) } EXPORT_SYMBOL(acpi_processor_get_bios_limit); -void acpi_processor_ppc_init(void) +void acpi_processor_ignore_ppc_init(void) { - if (!cpufreq_register_notifier - (&acpi_ppc_notifier_block, CPUFREQ_POLICY_NOTIFIER)) - acpi_processor_ppc_status |= PPC_REGISTERED; - else - printk(KERN_DEBUG - "Warning: Processor Platform Limit not supported.\n"); + if (ignore_ppc < 0) + ignore_ppc = 0; } -void acpi_processor_ppc_exit(void) +void acpi_processor_ppc_init(int cpu) { - if (acpi_processor_ppc_status & PPC_REGISTERED) - cpufreq_unregister_notifier(&acpi_ppc_notifier_block, - CPUFREQ_POLICY_NOTIFIER); + struct acpi_processor *pr = per_cpu(processors, cpu); + int ret; - acpi_processor_ppc_status &= ~PPC_REGISTERED; + ret = dev_pm_qos_add_request(get_cpu_device(cpu), + &pr->perflib_req, DEV_PM_QOS_MAX_FREQUENCY, + INT_MAX); + if (ret < 0) { + pr_err("Failed to add freq constraint for CPU%d (%d)\n", cpu, + ret); + return; + } +} + +void acpi_processor_ppc_exit(int cpu) +{ + struct acpi_processor *pr = per_cpu(processors, cpu); + + dev_pm_qos_remove_request(&pr->perflib_req); } static int acpi_processor_get_performance_control(struct acpi_processor *pr) @@ -477,7 +453,7 @@ int acpi_processor_notify_smm(struct module *calling_module) static int is_done = 0; int result; - if (!(acpi_processor_ppc_status & PPC_REGISTERED)) + if (!acpi_processor_cpufreq_init) return -EBUSY; if (!try_module_get(calling_module)) @@ -513,7 +489,7 @@ int acpi_processor_notify_smm(struct module *calling_module) * we can allow the cpufreq driver to be rmmod'ed. */ is_done = 1; - if (!(acpi_processor_ppc_status & PPC_IN_USE)) + if (!acpi_processor_ppc_in_use) module_put(calling_module); return 0; @@ -742,7 +718,7 @@ acpi_processor_register_performance(struct acpi_processor_performance { struct acpi_processor *pr; - if (!(acpi_processor_ppc_status & PPC_REGISTERED)) + if (!acpi_processor_cpufreq_init) return -EINVAL; mutex_lock(&performance_mutex); diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c index 50fb0107375e..ec2638f1df4f 100644 --- a/drivers/acpi/processor_thermal.c +++ b/drivers/acpi/processor_thermal.c @@ -35,7 +35,6 @@ ACPI_MODULE_NAME("processor_thermal"); #define CPUFREQ_THERMAL_MAX_STEP 3 static DEFINE_PER_CPU(unsigned int, cpufreq_thermal_reduction_pctg); -static unsigned int acpi_thermal_cpufreq_is_init = 0; #define reduction_pctg(cpu) \ per_cpu(cpufreq_thermal_reduction_pctg, phys_package_first_cpu(cpu)) @@ -61,35 +60,11 @@ static int phys_package_first_cpu(int cpu) static int cpu_has_cpufreq(unsigned int cpu) { struct cpufreq_policy policy; - if (!acpi_thermal_cpufreq_is_init || cpufreq_get_policy(&policy, cpu)) + if (!acpi_processor_cpufreq_init || cpufreq_get_policy(&policy, cpu)) return 0; return 1; } -static int acpi_thermal_cpufreq_notifier(struct notifier_block *nb, - unsigned long event, void *data) -{ - struct cpufreq_policy *policy = data; - unsigned long max_freq = 0; - - if (event != CPUFREQ_ADJUST) - goto out; - - max_freq = ( - policy->cpuinfo.max_freq * - (100 - reduction_pctg(policy->cpu) * 20) - ) / 100; - - cpufreq_verify_within_limits(policy, 0, max_freq); - - out: - return 0; -} - -static struct notifier_block acpi_thermal_cpufreq_notifier_block = { - .notifier_call = acpi_thermal_cpufreq_notifier, -}; - static int cpufreq_get_max_state(unsigned int cpu) { if (!cpu_has_cpufreq(cpu)) @@ -108,7 +83,10 @@ static int cpufreq_get_cur_state(unsigned int cpu) static int cpufreq_set_cur_state(unsigned int cpu, int state) { - int i; + struct cpufreq_policy *policy; + struct acpi_processor *pr; + unsigned long max_freq; + int i, ret; if (!cpu_has_cpufreq(cpu)) return 0; @@ -121,33 +99,53 @@ static int cpufreq_set_cur_state(unsigned int cpu, int state) * frequency. */ for_each_online_cpu(i) { - if (topology_physical_package_id(i) == + if (topology_physical_package_id(i) != topology_physical_package_id(cpu)) - cpufreq_update_policy(i); + continue; + + pr = per_cpu(processors, i); + + if (unlikely(!dev_pm_qos_request_active(&pr->thermal_req))) + continue; + + policy = cpufreq_cpu_get(i); + if (!policy) + return -EINVAL; + + max_freq = (policy->cpuinfo.max_freq * (100 - reduction_pctg(i) * 20)) / 100; + + cpufreq_cpu_put(policy); + + ret = dev_pm_qos_update_request(&pr->thermal_req, max_freq); + if (ret < 0) { + pr_warn("Failed to update thermal freq constraint: CPU%d (%d)\n", + pr->id, ret); + } } return 0; } -void acpi_thermal_cpufreq_init(void) +void acpi_thermal_cpufreq_init(int cpu) { - int i; + struct acpi_processor *pr = per_cpu(processors, cpu); + int ret; - i = cpufreq_register_notifier(&acpi_thermal_cpufreq_notifier_block, - CPUFREQ_POLICY_NOTIFIER); - if (!i) - acpi_thermal_cpufreq_is_init = 1; + ret = dev_pm_qos_add_request(get_cpu_device(cpu), + &pr->thermal_req, DEV_PM_QOS_MAX_FREQUENCY, + INT_MAX); + if (ret < 0) { + pr_err("Failed to add freq constraint for CPU%d (%d)\n", cpu, + ret); + return; + } } -void acpi_thermal_cpufreq_exit(void) +void acpi_thermal_cpufreq_exit(int cpu) { - if (acpi_thermal_cpufreq_is_init) - cpufreq_unregister_notifier - (&acpi_thermal_cpufreq_notifier_block, - CPUFREQ_POLICY_NOTIFIER); + struct acpi_processor *pr = per_cpu(processors, cpu); - acpi_thermal_cpufreq_is_init = 0; + dev_pm_qos_remove_request(&pr->thermal_req); } - #else /* ! CONFIG_CPU_FREQ */ static int cpufreq_get_max_state(unsigned int cpu) { diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 1194a4c78d55..f936033cb9e6 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -4,6 +4,8 @@ #include #include +#include +#include #include #include @@ -230,6 +232,8 @@ struct acpi_processor { struct acpi_processor_limit limit; struct thermal_cooling_device *cdev; struct device *dev; /* Processor device. */ + struct dev_pm_qos_request perflib_req; + struct dev_pm_qos_request thermal_req; }; struct acpi_processor_errata { @@ -296,16 +300,22 @@ static inline void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx /* in processor_perflib.c */ #ifdef CONFIG_CPU_FREQ -void acpi_processor_ppc_init(void); -void acpi_processor_ppc_exit(void); +extern bool acpi_processor_cpufreq_init; +void acpi_processor_ignore_ppc_init(void); +void acpi_processor_ppc_init(int cpu); +void acpi_processor_ppc_exit(int cpu); void acpi_processor_ppc_has_changed(struct acpi_processor *pr, int event_flag); extern int acpi_processor_get_bios_limit(int cpu, unsigned int *limit); #else -static inline void acpi_processor_ppc_init(void) +static inline void acpi_processor_ignore_ppc_init(void) { return; } -static inline void acpi_processor_ppc_exit(void) +static inline void acpi_processor_ppc_init(int cpu) +{ + return; +} +static inline void acpi_processor_ppc_exit(int cpu) { return; } @@ -421,14 +431,14 @@ static inline int acpi_processor_hotplug(struct acpi_processor *pr) int acpi_processor_get_limit_info(struct acpi_processor *pr); extern const struct thermal_cooling_device_ops processor_cooling_ops; #if defined(CONFIG_ACPI_CPU_FREQ_PSS) & defined(CONFIG_CPU_FREQ) -void acpi_thermal_cpufreq_init(void); -void acpi_thermal_cpufreq_exit(void); +void acpi_thermal_cpufreq_init(int cpu); +void acpi_thermal_cpufreq_exit(int cpu); #else -static inline void acpi_thermal_cpufreq_init(void) +static inline void acpi_thermal_cpufreq_init(int cpu) { return; } -static inline void acpi_thermal_cpufreq_exit(void) +static inline void acpi_thermal_cpufreq_exit(int cpu) { return; } From 77c84dd1881d0f0176cb678d770bfbda26c54390 Mon Sep 17 00:00:00 2001 From: Douglas RAILLARD Date: Wed, 7 Aug 2019 16:33:40 +0100 Subject: [PATCH 103/126] sched/cpufreq: Align trace event behavior of fast switching Fast switching path only emits an event for the CPU of interest, whereas the regular path emits an event for all the CPUs that had their frequency changed, i.e. all the CPUs sharing the same policy. With the current behavior, looking at cpu_frequency event for a given CPU that is using the fast switching path will not give the correct frequency signal. Signed-off-by: Douglas RAILLARD Acked-by: Peter Zijlstra (Intel) Signed-off-by: Rafael J. Wysocki --- kernel/sched/cpufreq_schedutil.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 867b4bb6d4be..b03ca2f73713 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -117,6 +117,7 @@ static void sugov_fast_switch(struct sugov_policy *sg_policy, u64 time, unsigned int next_freq) { struct cpufreq_policy *policy = sg_policy->policy; + int cpu; if (!sugov_update_next_freq(sg_policy, time, next_freq)) return; @@ -126,7 +127,11 @@ static void sugov_fast_switch(struct sugov_policy *sg_policy, u64 time, return; policy->cur = next_freq; - trace_cpu_frequency(next_freq, smp_processor_id()); + + if (trace_cpu_frequency_enabled()) { + for_each_cpu(cpu, policy->cpus) + trace_cpu_frequency(next_freq, cpu); + } } static void sugov_deferred_update(struct sugov_policy *sg_policy, u64 time, From 828f369d76d16bffa97133f6f63a64c13a401953 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 17 Jun 2019 16:51:15 +0200 Subject: [PATCH 104/126] cpupower: Add missing newline at end of file "git diff" says: \ No newline at end of file after modifying the files. Signed-off-by: Geert Uytterhoeven Signed-off-by: Shuah Khan --- tools/power/cpupower/bench/cpufreq-bench_plot.sh | 2 +- tools/power/cpupower/bench/cpufreq-bench_script.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/power/cpupower/bench/cpufreq-bench_plot.sh b/tools/power/cpupower/bench/cpufreq-bench_plot.sh index 9061b4f1244e..f5f8b3c8f062 100644 --- a/tools/power/cpupower/bench/cpufreq-bench_plot.sh +++ b/tools/power/cpupower/bench/cpufreq-bench_plot.sh @@ -88,4 +88,4 @@ done echo >> $dir/plot_script.gpl gnuplot $dir/plot_script.gpl -rm -r $dir \ No newline at end of file +rm -r $dir diff --git a/tools/power/cpupower/bench/cpufreq-bench_script.sh b/tools/power/cpupower/bench/cpufreq-bench_script.sh index 4e9714b876d2..785a3679c704 100644 --- a/tools/power/cpupower/bench/cpufreq-bench_script.sh +++ b/tools/power/cpupower/bench/cpufreq-bench_script.sh @@ -85,4 +85,4 @@ function create_plots() } measure -create_plots \ No newline at end of file +create_plots From a73f6e2fbe8077811ea9546e0d44a7533111f0ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Szymanski?= Date: Fri, 7 Jun 2019 17:02:55 +0200 Subject: [PATCH 105/126] tools/power/cpupower: fix 64bit detection when cross-compiling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When cross-compiling cpupower, 64bit detection is done with the host compiler instead of the cross-compiler and libcpupower.so.0 ends up in /usr/lib64 instead of /usr/lib for 32bit target. Fix this by moving 64bit detection after CC is defined. Signed-off-by: Sébastien Szymanski Signed-off-by: Shuah Khan --- tools/power/cpupower/Makefile | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index 9063fca480b3..c8622497ef23 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -18,7 +18,6 @@ OUTDIR := $(shell cd $(OUTPUT) && pwd) $(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist)) endif -include ../../scripts/Makefile.arch # --- CONFIGURATION BEGIN --- @@ -69,11 +68,6 @@ bindir ?= /usr/bin sbindir ?= /usr/sbin mandir ?= /usr/man includedir ?= /usr/include -ifeq ($(IS_64_BIT), 1) -libdir ?= /usr/lib64 -else -libdir ?= /usr/lib -endif localedir ?= /usr/share/locale docdir ?= /usr/share/doc/packages/cpupower confdir ?= /etc/ @@ -100,6 +94,14 @@ RANLIB = $(CROSS)ranlib HOSTCC = gcc MKDIR = mkdir +# 64bit library detection +include ../../scripts/Makefile.arch + +ifeq ($(IS_64_BIT), 1) +libdir ?= /usr/lib64 +else +libdir ?= /usr/lib +endif # Now we set up the build system # From 87ce243206944a57383309dcbcdcc5750e6c905b Mon Sep 17 00:00:00 2001 From: Benjamin Weis Date: Mon, 24 Jun 2019 15:31:50 +0200 Subject: [PATCH 106/126] cpupower: update German translation Update the German translation of cpupower, and change the encoding to UTF-8. [skhan@linuxfoundation.org: fix merge conflicts] Signed-off-by: Benjamin Weis Acked-by: Dominik Brodowski Signed-off-by: Shuah Khan --- tools/power/cpupower/po/de.po | 344 ++++++++++++++++++---------------- 1 file changed, 180 insertions(+), 164 deletions(-) diff --git a/tools/power/cpupower/po/de.po b/tools/power/cpupower/po/de.po index 70887bb8ba95..9780a447bb55 100644 --- a/tools/power/cpupower/po/de.po +++ b/tools/power/cpupower/po/de.po @@ -8,66 +8,66 @@ msgstr "" "Project-Id-Version: cpufrequtils 006\n" "Report-Msgid-Bugs-To: \n" "POT-Creation-Date: 2011-03-08 17:03+0100\n" -"PO-Revision-Date: 2009-08-08 17:18+0100\n" -"Last-Translator: \n" +"PO-Revision-Date: 2019-06-02 15:23+0200\n" +"Last-Translator: Benjamin Weis \n" "Language-Team: NONE\n" "Language: \n" "MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=ISO-8859-1\n" +"Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "Plural-Forms: nplurals=2; plural=(n != 1);\n" #: utils/idle_monitor/nhm_idle.c:36 msgid "Processor Core C3" -msgstr "" +msgstr "Prozessorkern C3" #: utils/idle_monitor/nhm_idle.c:43 msgid "Processor Core C6" -msgstr "" +msgstr "Prozessorkern C6" #: utils/idle_monitor/nhm_idle.c:51 msgid "Processor Package C3" -msgstr "" +msgstr "Prozessorpaket C3" #: utils/idle_monitor/nhm_idle.c:58 utils/idle_monitor/amd_fam14h_idle.c:70 msgid "Processor Package C6" -msgstr "" +msgstr "Prozessorpaket C6" #: utils/idle_monitor/snb_idle.c:33 msgid "Processor Core C7" -msgstr "" +msgstr "Prozessorkern C7" #: utils/idle_monitor/snb_idle.c:40 msgid "Processor Package C2" -msgstr "" +msgstr "Prozessorpaket C2" #: utils/idle_monitor/snb_idle.c:47 msgid "Processor Package C7" -msgstr "" +msgstr "Prozessorpaket C7" #: utils/idle_monitor/amd_fam14h_idle.c:56 msgid "Package in sleep state (PC1 or deeper)" -msgstr "" +msgstr "Paket in Schlafzustand (PC1 oder tiefer)" #: utils/idle_monitor/amd_fam14h_idle.c:63 msgid "Processor Package C1" -msgstr "" +msgstr "Prozessorpaket C1" #: utils/idle_monitor/amd_fam14h_idle.c:77 msgid "North Bridge P1 boolean counter (returns 0 or 1)" -msgstr "" +msgstr "North Bridge P1 boolescher Zähler (gibt 0 oder 1 zurück)" #: utils/idle_monitor/mperf_monitor.c:35 msgid "Processor Core not idle" -msgstr "" +msgstr "Prozessorkern ist nicht im Leerlauf" #: utils/idle_monitor/mperf_monitor.c:42 msgid "Processor Core in an idle state" -msgstr "" +msgstr "Prozessorkern ist in einem Ruhezustand" #: utils/idle_monitor/mperf_monitor.c:50 msgid "Average Frequency (including boost) in MHz" -msgstr "" +msgstr "Durchschnittliche Frequenz (einschließlich Boost) in MHz" #: utils/idle_monitor/cpupower-monitor.c:66 #, c-format @@ -75,6 +75,8 @@ msgid "" "cpupower monitor: [-h] [ [-t] | [-l] | [-m ,[] ] ] [-i " "interval_sec | -c command ...]\n" msgstr "" +"cpupower monitor: [-h] [ [-t] | [-l] | [-m ,[] ] ] [-i " +"interval_sec | -c Befehl ...]\n" #: utils/idle_monitor/cpupower-monitor.c:69 #, c-format @@ -82,36 +84,40 @@ msgid "" "cpupower monitor: [-v] [-h] [ [-t] | [-l] | [-m ,[] ] ] [-i " "interval_sec | -c command ...]\n" msgstr "" +"cpupower monitor: [-v] [-h] [ [-t] | [-l] | [-m ,[] ] ] [-i " +"interval_sec | -c Befehl ...]\n" #: utils/idle_monitor/cpupower-monitor.c:71 #, c-format msgid "\t -v: be more verbose\n" -msgstr "" +msgstr "\t -v: ausführlicher\n" #: utils/idle_monitor/cpupower-monitor.c:73 #, c-format msgid "\t -h: print this help\n" -msgstr "" +msgstr "\t -h: diese Hilfe ausgeben\n" #: utils/idle_monitor/cpupower-monitor.c:74 #, c-format msgid "\t -i: time interval to measure for in seconds (default 1)\n" -msgstr "" +msgstr "\t -i: Zeitintervall für die Messung in Sekunden (Standard 1)\n" #: utils/idle_monitor/cpupower-monitor.c:75 #, c-format msgid "\t -t: show CPU topology/hierarchy\n" -msgstr "" +msgstr "\t -t: CPU-Topologie/Hierarchie anzeigen\n" #: utils/idle_monitor/cpupower-monitor.c:76 #, c-format msgid "\t -l: list available CPU sleep monitors (for use with -m)\n" msgstr "" +"\t -l: verfügbare CPU-Schlafwächter auflisten (für Verwendung mit -m)\n" #: utils/idle_monitor/cpupower-monitor.c:77 #, c-format msgid "\t -m: show specific CPU sleep monitors only (in same order)\n" msgstr "" +"\t -m: spezifische CPU-Schlafwächter anzeigen (in gleicher Reihenfolge)\n" #: utils/idle_monitor/cpupower-monitor.c:79 #, c-format @@ -119,71 +125,73 @@ msgid "" "only one of: -t, -l, -m are allowed\n" "If none of them is passed," msgstr "" +"nur einer von: -t, -l, -m ist erlaubt\n" +"Wenn keiner von ihnen übergeben wird," #: utils/idle_monitor/cpupower-monitor.c:80 #, c-format msgid " all supported monitors are shown\n" -msgstr "" +msgstr " werden alle unterstützten Wächter angezeigt\n" #: utils/idle_monitor/cpupower-monitor.c:197 #, c-format msgid "Monitor %s, Counter %s has no count function. Implementation error\n" -msgstr "" +msgstr "Wächter %s, Zähler %s hat keine Zählfunktion. Implementierungsfehler\n" #: utils/idle_monitor/cpupower-monitor.c:207 #, c-format msgid " *is offline\n" -msgstr "" +msgstr " *ist offline\n" #: utils/idle_monitor/cpupower-monitor.c:236 #, c-format msgid "%s: max monitor name length (%d) exceeded\n" -msgstr "" +msgstr "%s: max. Wächternamenslänge (%d) überschritten\n" #: utils/idle_monitor/cpupower-monitor.c:250 #, c-format msgid "No matching monitor found in %s, try -l option\n" -msgstr "" +msgstr "Kein passender Wächter in %s gefunden, versuchen Sie die Option -l\n" #: utils/idle_monitor/cpupower-monitor.c:266 #, c-format msgid "Monitor \"%s\" (%d states) - Might overflow after %u s\n" -msgstr "" +msgstr "Wächter \"%s\" (%d Zustände) - Könnte nach %u s überlaufen\n" #: utils/idle_monitor/cpupower-monitor.c:319 #, c-format msgid "%s took %.5f seconds and exited with status %d\n" -msgstr "" +msgstr "%s hat %.5f Sekunden gedauert und hat sich mit Status %d beendet\n" #: utils/idle_monitor/cpupower-monitor.c:406 #, c-format msgid "Cannot read number of available processors\n" -msgstr "" +msgstr "Anzahl der verfügbaren Prozessoren kann nicht gelesen werden\n" #: utils/idle_monitor/cpupower-monitor.c:417 #, c-format msgid "Available monitor %s needs root access\n" -msgstr "" +msgstr "Verfügbarer Wächter %s benötigt root-Zugriff\n" #: utils/idle_monitor/cpupower-monitor.c:428 #, c-format msgid "No HW Cstate monitors found\n" -msgstr "" +msgstr "Keine HW C-Zustandswächter gefunden\n" #: utils/cpupower.c:78 #, c-format msgid "cpupower [ -c cpulist ] subcommand [ARGS]\n" -msgstr "" +msgstr "cpupower [ -c cpulist ] Unterbefehl [ARGS]\n" #: utils/cpupower.c:79 #, c-format msgid "cpupower --version\n" -msgstr "" +msgstr "cpupower --version\n" #: utils/cpupower.c:80 #, c-format msgid "Supported subcommands are:\n" -msgstr "" +msgstr "Unterstützte Unterbefehle sind:\n" #: utils/cpupower.c:83 #, c-format @@ -191,11 +199,15 @@ msgid "" "\n" "Some subcommands can make use of the -c cpulist option.\n" msgstr "" +"\n" +"Einige Unterbefehle können die Option -c cpulist verwenden.\n" #: utils/cpupower.c:84 #, c-format msgid "Look at the general cpupower manpage how to use it\n" msgstr "" +"Schauen Sie sich die allgemeine cpupower manpage an, um zu erfahren, wie man " +"es benutzt\n" #: utils/cpupower.c:85 #, c-format @@ -217,30 +229,31 @@ msgstr "Bitte melden Sie Fehler an %s.\n" #: utils/cpupower.c:114 #, c-format msgid "Error parsing cpu list\n" -msgstr "" +msgstr "Fehler beim Parsen der CPU-Liste\n" #: utils/cpupower.c:172 #, c-format msgid "Subcommand %s needs root privileges\n" -msgstr "" +msgstr "Unterbefehl %s benötigt root-Rechte\n" #: utils/cpufreq-info.c:31 #, c-format msgid "Couldn't count the number of CPUs (%s: %s), assuming 1\n" msgstr "" -"Konnte nicht die Anzahl der CPUs herausfinden (%s : %s), nehme daher 1 an.\n" +"Anzahl der CPUs konnte nicht herausgefinden werden (%s: %s), es wird daher 1 " +"angenommen\n" #: utils/cpufreq-info.c:63 #, c-format msgid "" " minimum CPU frequency - maximum CPU frequency - governor\n" -msgstr "" -" minimale CPU-Taktfreq. - maximale CPU-Taktfreq. - Regler \n" +msgstr " minimale CPU-Frequenz - maximale CPU-Frequenz - Regler\n" #: utils/cpufreq-info.c:151 #, c-format msgid "Error while evaluating Boost Capabilities on CPU %d -- are you root?\n" msgstr "" +"Fehler beim Evaluieren der Boost-Fähigkeiten bei CPU %d -- sind Sie root?\n" #. P state changes via MSR are identified via cpuid 80000007 #. on Intel and AMD, but we assume boost capable machines can do that @@ -250,50 +263,50 @@ msgstr "" #: utils/cpufreq-info.c:161 #, c-format msgid " boost state support: \n" -msgstr "" +msgstr " Boost-Zustand-Unterstützung: \n" #: utils/cpufreq-info.c:163 #, c-format msgid " Supported: %s\n" -msgstr "" +msgstr " Unterstützt: %s\n" #: utils/cpufreq-info.c:163 utils/cpufreq-info.c:164 msgid "yes" -msgstr "" +msgstr "ja" #: utils/cpufreq-info.c:163 utils/cpufreq-info.c:164 msgid "no" -msgstr "" +msgstr "nein" #: utils/cpufreq-info.c:164 -#, fuzzy, c-format +#, c-format msgid " Active: %s\n" -msgstr " Treiber: %s\n" +msgstr " Aktiv: %s\n" #: utils/cpufreq-info.c:177 #, c-format msgid " Boost States: %d\n" -msgstr "" +msgstr " Boost-Zustände: %d\n" #: utils/cpufreq-info.c:178 #, c-format msgid " Total States: %d\n" -msgstr "" +msgstr " Gesamtzustände: %d\n" #: utils/cpufreq-info.c:181 #, c-format msgid " Pstate-Pb%d: %luMHz (boost state)\n" -msgstr "" +msgstr " Pstate-Pb%d: %luMHz (Boost-Zustand)\n" #: utils/cpufreq-info.c:184 #, c-format msgid " Pstate-P%d: %luMHz\n" -msgstr "" +msgstr " Pstate-P%d: %luMHz\n" #: utils/cpufreq-info.c:211 #, c-format msgid " no or unknown cpufreq driver is active on this CPU\n" -msgstr " kein oder nicht bestimmbarer cpufreq-Treiber aktiv\n" +msgstr " kein oder ein unbekannter cpufreq-Treiber ist auf dieser CPU aktiv\n" #: utils/cpufreq-info.c:213 #, c-format @@ -303,12 +316,12 @@ msgstr " Treiber: %s\n" #: utils/cpufreq-info.c:219 #, c-format msgid " CPUs which run at the same hardware frequency: " -msgstr " Folgende CPUs laufen mit der gleichen Hardware-Taktfrequenz: " +msgstr " CPUs, die mit der gleichen Hardwarefrequenz laufen: " #: utils/cpufreq-info.c:230 #, c-format msgid " CPUs which need to have their frequency coordinated by software: " -msgstr " Die Taktfrequenz folgender CPUs werden per Software koordiniert: " +msgstr " CPUs, die ihre Frequenz mit Software koordinieren müssen: " #: utils/cpufreq-info.c:241 #, c-format @@ -318,22 +331,22 @@ msgstr " Maximale Dauer eines Taktfrequenzwechsels: " #: utils/cpufreq-info.c:247 #, c-format msgid " hardware limits: " -msgstr " Hardwarebedingte Grenzen der Taktfrequenz: " +msgstr " Hardwarebegrenzungen: " #: utils/cpufreq-info.c:256 #, c-format msgid " available frequency steps: " -msgstr " mögliche Taktfrequenzen: " +msgstr " verfügbare Frequenzschritte: " #: utils/cpufreq-info.c:269 #, c-format msgid " available cpufreq governors: " -msgstr " mögliche Regler: " +msgstr " verfügbare cpufreq-Regler: " #: utils/cpufreq-info.c:280 #, c-format msgid " current policy: frequency should be within " -msgstr " momentane Taktik: die Frequenz soll innerhalb " +msgstr " momentane Richtlinie: Frequenz sollte innerhalb " #: utils/cpufreq-info.c:282 #, c-format @@ -346,29 +359,28 @@ msgid "" "The governor \"%s\" may decide which speed to use\n" " within this range.\n" msgstr "" -" liegen. Der Regler \"%s\" kann frei entscheiden,\n" -" welche Taktfrequenz innerhalb dieser Grenze verwendet " -"wird.\n" +" sein. Der Regler \"%s\" kann frei entscheiden,\n" +" welche Geschwindigkeit er in diesem Bereich verwendet.\n" #: utils/cpufreq-info.c:293 #, c-format msgid " current CPU frequency is " -msgstr " momentane Taktfrequenz ist " +msgstr " momentane CPU-Frequenz ist " #: utils/cpufreq-info.c:296 #, c-format msgid " (asserted by call to hardware)" -msgstr " (verifiziert durch Nachfrage bei der Hardware)" +msgstr " (durch Aufruf der Hardware sichergestellt)" #: utils/cpufreq-info.c:304 #, c-format msgid " cpufreq stats: " -msgstr " Statistik:" +msgstr " cpufreq-Statistiken: " #: utils/cpufreq-info.c:472 -#, fuzzy, c-format +#, c-format msgid "Usage: cpupower freqinfo [options]\n" -msgstr "Aufruf: cpufreq-info [Optionen]\n" +msgstr "Aufruf: cpupower freqinfo [Optionen]\n" #: utils/cpufreq-info.c:473 utils/cpufreq-set.c:26 utils/cpupower-set.c:23 #: utils/cpupower-info.c:22 utils/cpuidle-info.c:148 @@ -377,11 +389,9 @@ msgid "Options:\n" msgstr "Optionen:\n" #: utils/cpufreq-info.c:474 -#, fuzzy, c-format +#, c-format msgid " -e, --debug Prints out debug information [default]\n" -msgstr "" -" -e, --debug Erzeugt detaillierte Informationen, hilfreich\n" -" zum Aufspüren von Fehlern\n" +msgstr " -e, --debug Gibt Debug-Informationen aus [Standard]\n" #: utils/cpufreq-info.c:475 #, c-format @@ -424,7 +434,7 @@ msgstr " -p, --policy Findet die momentane Taktik heraus *\n" #: utils/cpufreq-info.c:482 #, c-format msgid " -g, --governors Determines available cpufreq governors *\n" -msgstr " -g, --governors Erzeugt eine Liste mit verfügbaren Reglern *\n" +msgstr " -g, --governors Ermittelt verfügbare cpufreq-Regler *\n" #: utils/cpufreq-info.c:483 #, c-format @@ -449,8 +459,7 @@ msgstr "" #: utils/cpufreq-info.c:486 #, c-format msgid " -s, --stats Shows cpufreq statistics if available\n" -msgstr "" -" -s, --stats Zeigt, sofern möglich, Statistiken über cpufreq an.\n" +msgstr " -s, --stats Zeigt cpufreq-Statistiken an, falls vorhanden\n" #: utils/cpufreq-info.c:487 #, c-format @@ -464,13 +473,13 @@ msgstr "" #: utils/cpufreq-info.c:488 #, c-format msgid " -b, --boost Checks for turbo or boost modes *\n" -msgstr "" +msgstr " -b, --boost Prüft auf Turbo- oder Boost-Modi *\n" #: utils/cpufreq-info.c:489 #, c-format msgid "" -" -o, --proc Prints out information like provided by the /proc/" -"cpufreq\n" +" -o, --proc Prints out information like provided by the " +"/proc/cpufreq\n" " interface in 2.4. and early 2.6. kernels\n" msgstr "" " -o, --proc Erzeugt Informationen in einem ähnlichem Format zu " @@ -509,8 +518,8 @@ msgid "" "For the arguments marked with *, omitting the -c or --cpu argument is\n" "equivalent to setting it to zero\n" msgstr "" -"Bei den mit * markierten Parametern wird '--cpu 0' angenommen, soweit nicht\n" -"mittels -c oder --cpu etwas anderes angegeben wird\n" +"Für die mit * markierten Argumente ist das Weglassen des Arguments\n" +"-c oder --cpu gleichbedeutend mit der Einstellung auf Null\n" #: utils/cpufreq-info.c:580 #, c-format @@ -525,8 +534,8 @@ msgid "" "You can't specify more than one --cpu parameter and/or\n" "more than one output-specific argument\n" msgstr "" -"Man kann nicht mehr als einen --cpu-Parameter und/oder mehr als einen\n" -"informationsspezifischen Parameter gleichzeitig angeben\n" +"Sie können nicht mehr als einen Parameter --cpu und/oder\n" +"mehr als ein ausgabespezifisches Argument angeben\n" #: utils/cpufreq-info.c:600 utils/cpufreq-set.c:82 utils/cpupower-set.c:42 #: utils/cpupower-info.c:42 utils/cpuidle-info.c:213 @@ -538,17 +547,17 @@ msgstr "unbekannter oder falscher Parameter\n" #, c-format msgid "couldn't analyze CPU %d as it doesn't seem to be present\n" msgstr "" -"Konnte nicht die CPU %d analysieren, da sie (scheinbar?) nicht existiert.\n" +"CPU %d konnte nicht analysiert werden, da sie scheinbar nicht existiert\n" #: utils/cpufreq-info.c:620 utils/cpupower-info.c:142 #, c-format msgid "analyzing CPU %d:\n" -msgstr "analysiere CPU %d:\n" +msgstr "CPU %d wird analysiert:\n" #: utils/cpufreq-set.c:25 -#, fuzzy, c-format +#, c-format msgid "Usage: cpupower frequency-set [options]\n" -msgstr "Aufruf: cpufreq-set [Optionen]\n" +msgstr "Aufruf: cpupower frequency-set [Optionen]\n" #: utils/cpufreq-set.c:27 #, c-format @@ -556,7 +565,7 @@ msgid "" " -d FREQ, --min FREQ new minimum CPU frequency the governor may " "select\n" msgstr "" -" -d FREQ, --min FREQ neue minimale Taktfrequenz, die der Regler\n" +" -d FREQ, --min FREQ neue minimale CPU-Frequenz, die der Regler\n" " auswählen darf\n" #: utils/cpufreq-set.c:28 @@ -571,7 +580,7 @@ msgstr "" #: utils/cpufreq-set.c:29 #, c-format msgid " -g GOV, --governor GOV new cpufreq governor\n" -msgstr " -g GOV, --governors GOV wechsle zu Regler GOV\n" +msgstr " -g GOV, --governors GOV neuer cpufreq-Regler\n" #: utils/cpufreq-set.c:30 #, c-format @@ -579,29 +588,29 @@ msgid "" " -f FREQ, --freq FREQ specific frequency to be set. Requires userspace\n" " governor to be available and loaded\n" msgstr "" -" -f FREQ, --freq FREQ setze exakte Taktfrequenz. Benötigt den Regler\n" -" 'userspace'.\n" +" -f FREQ, --freq FREQ bestimmte Frequenz, die eingestellt werden soll.\n" +" Erfordert einen verfügbaren und geladenen " +"userspace-Regler\n" #: utils/cpufreq-set.c:32 #, c-format msgid " -r, --related Switches all hardware-related CPUs\n" -msgstr "" -" -r, --related Setze Werte für alle CPUs, deren Taktfrequenz\n" -" hardwarebedingt identisch ist.\n" +msgstr " -r, --related Schaltet alle hardwarebezogenen CPUs um\n" #: utils/cpufreq-set.c:33 utils/cpupower-set.c:28 utils/cpupower-info.c:27 #, c-format msgid " -h, --help Prints out this screen\n" -msgstr " -h, --help Gibt diese Kurzübersicht aus\n" +msgstr " -h, --help Gibt diesen Bildschirm aus\n" #: utils/cpufreq-set.c:35 -#, fuzzy, c-format +#, c-format msgid "" "Notes:\n" "1. Omitting the -c or --cpu argument is equivalent to setting it to \"all\"\n" msgstr "" -"Bei den mit * markierten Parametern wird '--cpu 0' angenommen, soweit nicht\n" -"mittels -c oder --cpu etwas anderes angegeben wird\n" +"Hinweis:\n" +"1. Das Weglassen des Arguments -c oder --cpu ist gleichbedeutend mit der " +"Einstellung auf \"all\"\n" #: utils/cpufreq-set.c:37 #, fuzzy, c-format @@ -636,17 +645,21 @@ msgid "" "frequency\n" " or because the userspace governor isn't loaded?\n" msgstr "" -"Beim Einstellen ist ein Fehler aufgetreten. Typische Fehlerquellen sind:\n" -"- nicht ausreichende Rechte (Administrator)\n" -"- der Regler ist nicht verfügbar bzw. nicht geladen\n" -"- die angegebene Taktik ist inkorrekt\n" -"- eine spezifische Frequenz wurde angegeben, aber der Regler 'userspace'\n" -" kann entweder hardwarebedingt nicht genutzt werden oder ist nicht geladen\n" +"Fehler beim Festlegen neuer Werte. Häufige Fehler:\n" +"- Verfügen Sie über die erforderlichen Administrationsrechte? (Superuser?)\n" +"- Ist der von Ihnen gewünschte Regler verfügbar und mittels modprobe " +"geladen?\n" +"- Versuchen Sie eine ungültige Richtlinie festzulegen?\n" +"- Versuchen Sie eine bestimmte Frequenz festzulegen, aber der " +"userspace-Regler ist nicht verfügbar,\n" +" z.B. wegen Hardware, die nicht auf eine bestimmte Frequenz eingestellt " +"werden kann\n" +" oder weil der userspace-Regler nicht geladen ist?\n" #: utils/cpufreq-set.c:170 #, c-format msgid "wrong, unknown or unhandled CPU?\n" -msgstr "unbekannte oder nicht regelbare CPU\n" +msgstr "falsche, unbekannte oder nicht regelbare CPU?\n" #: utils/cpufreq-set.c:302 #, c-format @@ -654,8 +667,8 @@ msgid "" "the -f/--freq parameter cannot be combined with -d/--min, -u/--max or\n" "-g/--governor parameters\n" msgstr "" -"Der -f bzw. --freq-Parameter kann nicht mit den Parametern -d/--min, -u/--" -"max\n" +"Der -f bzw. --freq-Parameter kann nicht mit den Parametern -d/--min, " +"-u/--max\n" "oder -g/--governor kombiniert werden\n" #: utils/cpufreq-set.c:308 @@ -664,18 +677,18 @@ msgid "" "At least one parameter out of -f/--freq, -d/--min, -u/--max, and\n" "-g/--governor must be passed\n" msgstr "" -"Es muss mindestens ein Parameter aus -f/--freq, -d/--min, -u/--max oder\n" -"-g/--governor angegeben werden.\n" +"Mindestens ein Parameter aus -f/--freq, -d/--min, -u/--max und\n" +"-g/--governor muss übergeben werden\n" #: utils/cpufreq-set.c:347 #, c-format msgid "Setting cpu: %d\n" -msgstr "" +msgstr "CPU einstellen: %d\n" #: utils/cpupower-set.c:22 #, c-format msgid "Usage: cpupower set [ -b val ] [ -m val ] [ -s val ]\n" -msgstr "" +msgstr "Aufruf: cpupower set [ -b val ] [ -m val ] [ -s val ]\n" #: utils/cpupower-set.c:24 #, c-format @@ -689,6 +702,8 @@ msgstr "" msgid "" " -m, --sched-mc [VAL] Sets the kernel's multi core scheduler policy.\n" msgstr "" +" -m, --sched-mc [VAL] Legt die Mehrkern-Scheduler-Richtlinie des " +"Kernels fest.\n" #: utils/cpupower-set.c:27 #, c-format @@ -700,37 +715,37 @@ msgstr "" #: utils/cpupower-set.c:80 #, c-format msgid "--perf-bias param out of range [0-%d]\n" -msgstr "" +msgstr "--perf-bias-Parameter außerhalb des Bereichs [0-%d]\n" #: utils/cpupower-set.c:91 #, c-format msgid "--sched-mc param out of range [0-%d]\n" -msgstr "" +msgstr "Parameter --sched-mc außerhalb des Bereichs [0-%d]\n" #: utils/cpupower-set.c:102 #, c-format msgid "--sched-smt param out of range [0-%d]\n" -msgstr "" +msgstr "Parameter --sched-smt außerhalb des Bereichs [0-%d]\n" #: utils/cpupower-set.c:121 #, c-format msgid "Error setting sched-mc %s\n" -msgstr "" +msgstr "Fehler beim Einstellen von sched-mc %s\n" #: utils/cpupower-set.c:127 #, c-format msgid "Error setting sched-smt %s\n" -msgstr "" +msgstr "Fehler beim Einstellen von sched-smt %s\n" #: utils/cpupower-set.c:146 #, c-format msgid "Error setting perf-bias value on CPU %d\n" -msgstr "" +msgstr "Fehler beim Einstellen des perf-bias-Wertes auf der CPU %d\n" #: utils/cpupower-info.c:21 #, c-format msgid "Usage: cpupower info [ -b ] [ -m ] [ -s ]\n" -msgstr "" +msgstr "Aufruf: cpupower info [ -b ] [ -m ] [ -s ]\n" #: utils/cpupower-info.c:23 #, c-format @@ -740,9 +755,10 @@ msgid "" msgstr "" #: utils/cpupower-info.c:25 -#, fuzzy, c-format +#, c-format msgid " -m, --sched-mc Gets the kernel's multi core scheduler policy.\n" -msgstr " -p, --policy Findet die momentane Taktik heraus *\n" +msgstr "" +" -m, --sched-mc Ruft die Mehrkern-Scheduler-Richtlinie des Kernels ab.\n" #: utils/cpupower-info.c:26 #, c-format @@ -756,17 +772,20 @@ msgid "" "\n" "Passing no option will show all info, by default only on core 0\n" msgstr "" +"\n" +"Wenn Sie keine Option übergeben, werden alle Informationen angezeigt, " +"standardmäßig nur auf Kern 0\n" #: utils/cpupower-info.c:102 #, c-format msgid "System's multi core scheduler setting: " -msgstr "" +msgstr "Mehrkern-Scheduler-Einstellung des Systems: " #. if sysfs file is missing it's: errno == ENOENT #: utils/cpupower-info.c:105 utils/cpupower-info.c:114 #, c-format msgid "not supported\n" -msgstr "" +msgstr "nicht unterstützt\n" #: utils/cpupower-info.c:111 #, c-format @@ -786,164 +805,161 @@ msgstr "" #: utils/cpupower-info.c:147 #, c-format msgid "Could not read perf-bias value\n" -msgstr "" +msgstr "perf-bias-Wert konnte nicht gelesen werden\n" #: utils/cpupower-info.c:150 #, c-format msgid "perf-bias: %d\n" -msgstr "" +msgstr "perf-bias: %d\n" #: utils/cpuidle-info.c:28 -#, fuzzy, c-format +#, c-format msgid "Analyzing CPU %d:\n" -msgstr "analysiere CPU %d:\n" +msgstr "CPU %d wird analysiert:\n" #: utils/cpuidle-info.c:32 #, c-format msgid "CPU %u: No idle states\n" -msgstr "" +msgstr "CPU %u: Keine Ruhezustände\n" #: utils/cpuidle-info.c:36 #, c-format msgid "CPU %u: Can't read idle state info\n" -msgstr "" +msgstr "CPU %u: Ruhezustands-Informationen können nicht gelesen werden\n" #: utils/cpuidle-info.c:41 #, c-format msgid "Could not determine max idle state %u\n" -msgstr "" +msgstr "Max. Ruhezustand %u konnte nicht bestimmt werden\n" #: utils/cpuidle-info.c:46 #, c-format msgid "Number of idle states: %d\n" -msgstr "" +msgstr "Anzahl der Ruhezustände: %d\n" #: utils/cpuidle-info.c:48 -#, fuzzy, c-format +#, c-format msgid "Available idle states:" -msgstr " mögliche Taktfrequenzen: " +msgstr "Verfügbare Ruhezustände:" #: utils/cpuidle-info.c:71 #, c-format msgid "Flags/Description: %s\n" -msgstr "" +msgstr "Merker/Beschreibung: %s\n" #: utils/cpuidle-info.c:74 #, c-format msgid "Latency: %lu\n" -msgstr "" +msgstr "Latenz: %lu\n" #: utils/cpuidle-info.c:76 #, c-format msgid "Usage: %lu\n" -msgstr "" +msgstr "Aufruf: %lu\n" #: utils/cpuidle-info.c:78 #, c-format msgid "Duration: %llu\n" -msgstr "" +msgstr "Dauer: %llu\n" #: utils/cpuidle-info.c:90 #, c-format msgid "Could not determine cpuidle driver\n" -msgstr "" +msgstr "cpuidle-Treiber konnte nicht bestimmt werden\n" #: utils/cpuidle-info.c:94 -#, fuzzy, c-format +#, c-format msgid "CPUidle driver: %s\n" -msgstr " Treiber: %s\n" +msgstr "CPUidle-Treiber: %s\n" #: utils/cpuidle-info.c:99 #, c-format msgid "Could not determine cpuidle governor\n" -msgstr "" +msgstr "cpuidle-Regler konnte nicht bestimmt werden\n" #: utils/cpuidle-info.c:103 #, c-format msgid "CPUidle governor: %s\n" -msgstr "" +msgstr "CPUidle-Regler: %s\n" #: utils/cpuidle-info.c:122 #, c-format msgid "CPU %u: Can't read C-state info\n" -msgstr "" +msgstr "CPU %u: C-Zustands-Informationen können nicht gelesen werden\n" #. printf("Cstates: %d\n", cstates); #: utils/cpuidle-info.c:127 #, c-format msgid "active state: C0\n" -msgstr "" +msgstr "aktiver Zustand: C0\n" #: utils/cpuidle-info.c:128 #, c-format msgid "max_cstate: C%u\n" -msgstr "" +msgstr "max_cstate: C%u\n" #: utils/cpuidle-info.c:129 -#, fuzzy, c-format +#, c-format msgid "maximum allowed latency: %lu usec\n" -msgstr " Maximale Dauer eines Taktfrequenzwechsels: " +msgstr "maximal erlaubte Latenz: %lu usec\n" #: utils/cpuidle-info.c:130 #, c-format msgid "states:\t\n" -msgstr "" +msgstr "Zustände:\t\n" #: utils/cpuidle-info.c:132 #, c-format msgid " C%d: type[C%d] " -msgstr "" +msgstr " C%d: Typ[C%d] " #: utils/cpuidle-info.c:134 #, c-format msgid "promotion[--] demotion[--] " -msgstr "" +msgstr "promotion[--] demotion[--] " #: utils/cpuidle-info.c:135 #, c-format msgid "latency[%03lu] " -msgstr "" +msgstr "Latenz[%03lu] " #: utils/cpuidle-info.c:137 #, c-format msgid "usage[%08lu] " -msgstr "" +msgstr "Aufruf[%08lu] " #: utils/cpuidle-info.c:139 #, c-format msgid "duration[%020Lu] \n" -msgstr "" +msgstr "Dauer[%020Lu] \n" #: utils/cpuidle-info.c:147 -#, fuzzy, c-format +#, c-format msgid "Usage: cpupower idleinfo [options]\n" -msgstr "Aufruf: cpufreq-info [Optionen]\n" +msgstr "Aufruf: cpupower idleinfo [Optionen]\n" #: utils/cpuidle-info.c:149 -#, fuzzy, c-format +#, c-format msgid " -s, --silent Only show general C-state information\n" msgstr "" -" -e, --debug Erzeugt detaillierte Informationen, hilfreich\n" -" zum Aufspüren von Fehlern\n" +" -s, --silent Nur allgemeine C-Zustands-Informationen anzeigen\n" #: utils/cpuidle-info.c:150 -#, fuzzy, c-format +#, c-format msgid "" -" -o, --proc Prints out information like provided by the /proc/" -"acpi/processor/*/power\n" +" -o, --proc Prints out information like provided by the " +"/proc/acpi/processor/*/power\n" " interface in older kernels\n" msgstr "" -" -o, --proc Erzeugt Informationen in einem ähnlichem Format zu " -"dem\n" -" der /proc/cpufreq-Datei in 2.4. und frühen 2.6.\n" -" Kernel-Versionen\n" +" -o, --proc Gibt Informationen so aus, wie sie von der " +"Schnittstelle\n" +" /proc/acpi/processor/*/power in älteren Kerneln " +"bereitgestellt werden\n" #: utils/cpuidle-info.c:209 -#, fuzzy, c-format +#, c-format msgid "You can't specify more than one output-specific argument\n" -msgstr "" -"Man kann nicht mehr als einen --cpu-Parameter und/oder mehr als einen\n" -"informationsspezifischen Parameter gleichzeitig angeben\n" +msgstr "Sie können nicht mehr als ein ausgabenspezifisches Argument angeben\n" #~ msgid "" #~ " -c CPU, --cpu CPU CPU number which information shall be determined " @@ -956,6 +972,6 @@ msgstr "" #~ " -c CPU, --cpu CPU number of CPU where cpufreq settings shall be " #~ "modified\n" #~ msgstr "" -#~ " -c CPU, --cpu CPU Nummer der CPU, deren Taktfrequenz-" -#~ "Einstellung\n" +#~ " -c CPU, --cpu CPU Nummer der CPU, deren " +#~ "Taktfrequenz-Einstellung\n" #~ " werden soll\n" From b3ad17c09899d491cf9815a6db44d3f9b3f244e7 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 29 Aug 2019 16:48:05 +0200 Subject: [PATCH 107/126] PM / Domains: Simplify genpd_lookup_dev() genpd_lookup_dev(), is a bit unnecessary heavy, as it walks the gpd_list to try to find a valid PM domain corresponding to the device's attached genpd. Instead of walking the gpd_list, let's use the fact that a genpd always has the ->runtime_suspend() callback assigned to the genpd_runtime_suspend() function. While changing this, let's take the opportunity to also rename genpd_lookup_dev(), into dev_to_genpd_safe() to better reflect its purpose. Signed-off-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index b063bc41b0a9..59d6f183d0bb 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -149,29 +149,24 @@ static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev, return ret; } +static int genpd_runtime_suspend(struct device *dev); + /* * Get the generic PM domain for a particular struct device. * This validates the struct device pointer, the PM domain pointer, * and checks that the PM domain pointer is a real generic PM domain. * Any failure results in NULL being returned. */ -static struct generic_pm_domain *genpd_lookup_dev(struct device *dev) +static struct generic_pm_domain *dev_to_genpd_safe(struct device *dev) { - struct generic_pm_domain *genpd = NULL, *gpd; - if (IS_ERR_OR_NULL(dev) || IS_ERR_OR_NULL(dev->pm_domain)) return NULL; - mutex_lock(&gpd_list_lock); - list_for_each_entry(gpd, &gpd_list, gpd_list_node) { - if (&gpd->domain == dev->pm_domain) { - genpd = gpd; - break; - } - } - mutex_unlock(&gpd_list_lock); + /* A genpd's always have its ->runtime_suspend() callback assigned. */ + if (dev->pm_domain->ops.runtime_suspend == genpd_runtime_suspend) + return pd_to_genpd(dev->pm_domain); - return genpd; + return NULL; } /* @@ -1610,7 +1605,7 @@ static int genpd_remove_device(struct generic_pm_domain *genpd, */ int pm_genpd_remove_device(struct device *dev) { - struct generic_pm_domain *genpd = genpd_lookup_dev(dev); + struct generic_pm_domain *genpd = dev_to_genpd_safe(dev); if (!genpd) return -EINVAL; From 3ea4ca9267cf5d02149f7d8a11d8b4c82723a668 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 29 Aug 2019 16:48:15 +0200 Subject: [PATCH 108/126] PM / Domains: Verify PM domain type in dev_pm_genpd_set_performance_state() The dev_pm_genpd_set_performance_state() could in principle be called for a device that has a different PM domain type attached than a genpd. This would lead to a problem as dev_to_genpd() uses the container_of macro. Address this problem by using dev_to_genpd_safe() instead. Signed-off-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 59d6f183d0bb..cc85e87eaf05 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -380,8 +380,8 @@ int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state) unsigned int prev; int ret; - genpd = dev_to_genpd(dev); - if (IS_ERR(genpd)) + genpd = dev_to_genpd_safe(dev); + if (!genpd) return -ENODEV; if (unlikely(!genpd->set_performance_state)) From df0eea4488081e0698b0b58ccd1e8c8823e22841 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 23 Jul 2019 11:44:09 +0530 Subject: [PATCH 109/126] cpufreq: Remove CPUFREQ_ADJUST and CPUFREQ_NOTIFY policy notifier events No driver makes reference to these events now, remove them and the code related to them. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 36 +++++++----------------------------- include/linux/cpufreq.h | 6 ++---- 2 files changed, 9 insertions(+), 33 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index c13dcb59b30c..e0ee23895497 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2360,15 +2360,13 @@ EXPORT_SYMBOL(cpufreq_get_policy); * @policy: Policy object to modify. * @new_policy: New policy data. * - * Pass @new_policy to the cpufreq driver's ->verify() callback, run the - * installed policy notifiers for it with the CPUFREQ_ADJUST value, pass it to - * the driver's ->verify() callback again and run the notifiers for it again - * with the CPUFREQ_NOTIFY value. Next, copy the min and max parameters - * of @new_policy to @policy and either invoke the driver's ->setpolicy() - * callback (if present) or carry out a governor update for @policy. That is, - * run the current governor's ->limits() callback (if the governor field in - * @new_policy points to the same object as the one in @policy) or replace the - * governor for @policy with the new one stored in @new_policy. + * Pass @new_policy to the cpufreq driver's ->verify() callback. Next, copy the + * min and max parameters of @new_policy to @policy and either invoke the + * driver's ->setpolicy() callback (if present) or carry out a governor update + * for @policy. That is, run the current governor's ->limits() callback (if the + * governor field in @new_policy points to the same object as the one in + * @policy) or replace the governor for @policy with the new one stored in + * @new_policy. * * The cpuinfo part of @policy is not updated by this function. */ @@ -2396,26 +2394,6 @@ int cpufreq_set_policy(struct cpufreq_policy *policy, if (ret) return ret; - /* - * The notifier-chain shall be removed once all the users of - * CPUFREQ_ADJUST are moved to use the QoS framework. - */ - /* adjust if necessary - all reasons */ - blocking_notifier_call_chain(&cpufreq_policy_notifier_list, - CPUFREQ_ADJUST, new_policy); - - /* - * verify the cpu speed can be set within this limit, which might be - * different to the first one - */ - ret = cpufreq_driver->verify(new_policy); - if (ret) - return ret; - - /* notification of the new policy */ - blocking_notifier_call_chain(&cpufreq_policy_notifier_list, - CPUFREQ_NOTIFY, new_policy); - policy->min = new_policy->min; policy->max = new_policy->max; trace_cpu_frequency_limits(policy); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index afc10384a681..c57e88e85c41 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -456,10 +456,8 @@ static inline void cpufreq_resume(void) {} #define CPUFREQ_POSTCHANGE (1) /* Policy Notifiers */ -#define CPUFREQ_ADJUST (0) -#define CPUFREQ_NOTIFY (1) -#define CPUFREQ_CREATE_POLICY (2) -#define CPUFREQ_REMOVE_POLICY (3) +#define CPUFREQ_CREATE_POLICY (0) +#define CPUFREQ_REMOVE_POLICY (1) #ifdef CONFIG_CPU_FREQ int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list); From c27c38afd871f54b40bd40172cb4e23fba01113c Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 23 Jul 2019 11:44:10 +0530 Subject: [PATCH 110/126] Documentation: cpufreq: Update policy notifier documentation Update documentation with the recent policy notifier updates. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- Documentation/cpu-freq/core.txt | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/Documentation/cpu-freq/core.txt b/Documentation/cpu-freq/core.txt index 55193e680250..ed577d9c154b 100644 --- a/Documentation/cpu-freq/core.txt +++ b/Documentation/cpu-freq/core.txt @@ -57,19 +57,11 @@ transition notifiers. 2.1 CPUFreq policy notifiers ---------------------------- -These are notified when a new policy is intended to be set. Each -CPUFreq policy notifier is called twice for a policy transition: +These are notified when a new policy is created or removed. -1.) During CPUFREQ_ADJUST all CPUFreq notifiers may change the limit if - they see a need for this - may it be thermal considerations or - hardware limitations. - -2.) And during CPUFREQ_NOTIFY all notifiers are informed of the new policy - - if two hardware drivers failed to agree on a new policy before this - stage, the incompatible hardware shall be shut down, and the user - informed of this. - -The phase is specified in the second argument to the notifier. +The phase is specified in the second argument to the notifier. The phase is +CPUFREQ_CREATE_POLICY when the policy is first created and it is +CPUFREQ_REMOVE_POLICY when the policy is removed. The third argument, a void *pointer, points to a struct cpufreq_policy consisting of several values, including min, max (the lower and upper From f6081a73091c0902efb45f47706d35284ebb4e9a Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Fri, 30 Aug 2019 12:29:15 +0200 Subject: [PATCH 111/126] dt-bindings: opp: qcom-nvmem: Support pstates provided by a power domain Some Qualcomm SoCs have support for Core Power Reduction (CPR). On these platforms, we need to attach to the power domain provider providing the performance states, so that the leaky device (the CPU) can configure the performance states (which represent different CPU clock frequencies). Signed-off-by: Niklas Cassel Reviewed-by: Rob Herring Signed-off-by: Viresh Kumar --- .../bindings/opp/qcom-nvmem-cpufreq.txt | 113 +++++++++++++++++- 1 file changed, 112 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/opp/qcom-nvmem-cpufreq.txt b/Documentation/devicetree/bindings/opp/qcom-nvmem-cpufreq.txt index c5ea8b90e35d..4751029b9b74 100644 --- a/Documentation/devicetree/bindings/opp/qcom-nvmem-cpufreq.txt +++ b/Documentation/devicetree/bindings/opp/qcom-nvmem-cpufreq.txt @@ -14,7 +14,7 @@ operating-points-v2 table when it is parsed by the OPP framework. Required properties: -------------------- -In 'cpus' nodes: +In 'cpu' nodes: - operating-points-v2: Phandle to the operating-points-v2 table to use. In 'operating-points-v2' table: @@ -23,6 +23,15 @@ In 'operating-points-v2' table: Optional properties: -------------------- +In 'cpu' nodes: +- power-domains: A phandle pointing to the PM domain specifier which provides + the performance states available for active state management. + Please refer to the power-domains bindings + Documentation/devicetree/bindings/power/power_domain.txt + and also examples below. +- power-domain-names: Should be + - 'cpr' for qcs404. + In 'operating-points-v2' table: - nvmem-cells: A phandle pointing to a nvmem-cells node representing the efuse registers that has information about the @@ -682,3 +691,105 @@ soc { }; }; }; + +Example 2: +--------- + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + CPU0: cpu@100 { + device_type = "cpu"; + compatible = "arm,cortex-a53"; + reg = <0x100>; + .... + clocks = <&apcs_glb>; + operating-points-v2 = <&cpu_opp_table>; + power-domains = <&cpr>; + power-domain-names = "cpr"; + }; + + CPU1: cpu@101 { + device_type = "cpu"; + compatible = "arm,cortex-a53"; + reg = <0x101>; + .... + clocks = <&apcs_glb>; + operating-points-v2 = <&cpu_opp_table>; + power-domains = <&cpr>; + power-domain-names = "cpr"; + }; + + CPU2: cpu@102 { + device_type = "cpu"; + compatible = "arm,cortex-a53"; + reg = <0x102>; + .... + clocks = <&apcs_glb>; + operating-points-v2 = <&cpu_opp_table>; + power-domains = <&cpr>; + power-domain-names = "cpr"; + }; + + CPU3: cpu@103 { + device_type = "cpu"; + compatible = "arm,cortex-a53"; + reg = <0x103>; + .... + clocks = <&apcs_glb>; + operating-points-v2 = <&cpu_opp_table>; + power-domains = <&cpr>; + power-domain-names = "cpr"; + }; + }; + + cpu_opp_table: cpu-opp-table { + compatible = "operating-points-v2-kryo-cpu"; + opp-shared; + + opp-1094400000 { + opp-hz = /bits/ 64 <1094400000>; + required-opps = <&cpr_opp1>; + }; + opp-1248000000 { + opp-hz = /bits/ 64 <1248000000>; + required-opps = <&cpr_opp2>; + }; + opp-1401600000 { + opp-hz = /bits/ 64 <1401600000>; + required-opps = <&cpr_opp3>; + }; + }; + + cpr_opp_table: cpr-opp-table { + compatible = "operating-points-v2-qcom-level"; + + cpr_opp1: opp1 { + opp-level = <1>; + qcom,opp-fuse-level = <1>; + }; + cpr_opp2: opp2 { + opp-level = <2>; + qcom,opp-fuse-level = <2>; + }; + cpr_opp3: opp3 { + opp-level = <3>; + qcom,opp-fuse-level = <3>; + }; + }; + +.... + +soc { +.... + cpr: power-controller@b018000 { + compatible = "qcom,qcs404-cpr", "qcom,cpr"; + reg = <0x0b018000 0x1000>; + .... + vdd-apc-supply = <&pms405_s3>; + #power-domain-cells = <0>; + operating-points-v2 = <&cpr_opp_table>; + .... + }; +}; From 475a21e0eade5227488d97aebeaa882b16f38ca1 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Mon, 19 Aug 2019 12:12:38 +0200 Subject: [PATCH 112/126] dt-bindings: opp: Add qcom-opp bindings with properties needed for CPR Add qcom-opp bindings with properties needed for Core Power Reduction (CPR). CPR is included in a great variety of Qualcomm SoCs, e.g. msm8916 and msm8996. CPR was first introduced in msm8974. Co-developed-by: Jorge Ramirez-Ortiz Signed-off-by: Jorge Ramirez-Ortiz Signed-off-by: Niklas Cassel Reviewed-by: Stephen Boyd Reviewed-by: Rob Herring Signed-off-by: Viresh Kumar --- .../devicetree/bindings/opp/qcom-opp.txt | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 Documentation/devicetree/bindings/opp/qcom-opp.txt diff --git a/Documentation/devicetree/bindings/opp/qcom-opp.txt b/Documentation/devicetree/bindings/opp/qcom-opp.txt new file mode 100644 index 000000000000..32eb0793c7e6 --- /dev/null +++ b/Documentation/devicetree/bindings/opp/qcom-opp.txt @@ -0,0 +1,19 @@ +Qualcomm OPP bindings to describe OPP nodes + +The bindings are based on top of the operating-points-v2 bindings +described in Documentation/devicetree/bindings/opp/opp.txt +Additional properties are described below. + +* OPP Table Node + +Required properties: +- compatible: Allow OPPs to express their compatibility. It should be: + "operating-points-v2-qcom-level" + +* OPP Node + +Required properties: +- qcom,opp-fuse-level: A positive value representing the fuse corner/level + associated with this OPP node. Sometimes several corners/levels shares + a certain fuse corner/level. A fuse corner/level contains e.g. ref uV, + min uV, and max uV. From 7d12709544b8b3fb9727a34a664b8380e1e3493a Mon Sep 17 00:00:00 2001 From: Sricharan R Date: Thu, 25 Jul 2019 12:41:31 +0200 Subject: [PATCH 113/126] cpufreq: qcom: Re-organise kryo cpufreq to use it for other nvmem based qcom socs The kryo cpufreq driver reads the nvmem cell and uses that data to populate the opps. There are other qcom cpufreq socs like krait which does similar thing. Except for the interpretation of the read data, rest of the driver is same for both the cases. So pull the common things out for reuse. Signed-off-by: Sricharan R [niklas.cassel@linaro.org: split dt-binding into a separate patch and do not rename the compatible string. Update MAINTAINERS file.] Signed-off-by: Niklas Cassel Reviewed-by: Ilia Lin Reviewed-by: Stephen Boyd Signed-off-by: Viresh Kumar --- MAINTAINERS | 4 +- drivers/cpufreq/Kconfig.arm | 4 +- drivers/cpufreq/Makefile | 2 +- ...om-cpufreq-kryo.c => qcom-cpufreq-nvmem.c} | 122 +++++++++++------- 4 files changed, 78 insertions(+), 54 deletions(-) rename drivers/cpufreq/{qcom-cpufreq-kryo.c => qcom-cpufreq-nvmem.c} (69%) diff --git a/MAINTAINERS b/MAINTAINERS index 783569e3c4b4..cc73f908d5f9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13285,8 +13285,8 @@ QUALCOMM CPUFREQ DRIVER MSM8996/APQ8096 M: Ilia Lin L: linux-pm@vger.kernel.org S: Maintained -F: Documentation/devicetree/bindings/opp/kryo-cpufreq.txt -F: drivers/cpufreq/qcom-cpufreq-kryo.c +F: Documentation/devicetree/bindings/opp/qcom-nvmem-cpufreq.txt +F: drivers/cpufreq/qcom-cpufreq-nvmem.c QUALCOMM EMAC GIGABIT ETHERNET DRIVER M: Timur Tabi diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 56c31a78c692..b1aa485a28dd 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -120,8 +120,8 @@ config ARM_OMAP2PLUS_CPUFREQ depends on ARCH_OMAP2PLUS default ARCH_OMAP2PLUS -config ARM_QCOM_CPUFREQ_KRYO - tristate "Qualcomm Kryo based CPUFreq" +config ARM_QCOM_CPUFREQ_NVMEM + tristate "Qualcomm nvmem based CPUFreq" depends on ARM64 depends on QCOM_QFPROM depends on QCOM_SMEM diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 5a6c70d26c98..8572a918aa75 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -64,7 +64,7 @@ obj-$(CONFIG_ARM_OMAP2PLUS_CPUFREQ) += omap-cpufreq.o obj-$(CONFIG_ARM_PXA2xx_CPUFREQ) += pxa2xx-cpufreq.o obj-$(CONFIG_PXA3xx) += pxa3xx-cpufreq.o obj-$(CONFIG_ARM_QCOM_CPUFREQ_HW) += qcom-cpufreq-hw.o -obj-$(CONFIG_ARM_QCOM_CPUFREQ_KRYO) += qcom-cpufreq-kryo.o +obj-$(CONFIG_ARM_QCOM_CPUFREQ_NVMEM) += qcom-cpufreq-nvmem.o obj-$(CONFIG_ARM_RASPBERRYPI_CPUFREQ) += raspberrypi-cpufreq.o obj-$(CONFIG_ARM_S3C2410_CPUFREQ) += s3c2410-cpufreq.o obj-$(CONFIG_ARM_S3C2412_CPUFREQ) += s3c2412-cpufreq.o diff --git a/drivers/cpufreq/qcom-cpufreq-kryo.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c similarity index 69% rename from drivers/cpufreq/qcom-cpufreq-kryo.c rename to drivers/cpufreq/qcom-cpufreq-nvmem.c index dd64dcf89c74..fd08120768af 100644 --- a/drivers/cpufreq/qcom-cpufreq-kryo.c +++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c @@ -9,7 +9,7 @@ * based on the silicon variant in use. Qualcomm Process Voltage Scaling Tables * defines the voltage and frequency value based on the msm-id in SMEM * and speedbin blown in the efuse combination. - * The qcom-cpufreq-kryo driver reads the msm-id and efuse value from the SoC + * The qcom-cpufreq-nvmem driver reads the msm-id and efuse value from the SoC * to provide the OPP framework with required information. * This is used to determine the voltage and frequency value for each OPP of * operating-points-v2 table when it is parsed by the OPP framework. @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -42,9 +43,9 @@ enum _msm8996_version { NUM_OF_MSM8996_VERSIONS, }; -static struct platform_device *cpufreq_dt_pdev, *kryo_cpufreq_pdev; +static struct platform_device *cpufreq_dt_pdev, *cpufreq_pdev; -static enum _msm8996_version qcom_cpufreq_kryo_get_msm_id(void) +static enum _msm8996_version qcom_cpufreq_get_msm_id(void) { size_t len; u32 *msm_id; @@ -73,28 +74,62 @@ static enum _msm8996_version qcom_cpufreq_kryo_get_msm_id(void) return version; } -static int qcom_cpufreq_kryo_probe(struct platform_device *pdev) +static int qcom_cpufreq_kryo_name_version(struct device *cpu_dev, + struct nvmem_cell *speedbin_nvmem, + u32 *versions) +{ + size_t len; + u8 *speedbin; + enum _msm8996_version msm8996_version; + + msm8996_version = qcom_cpufreq_get_msm_id(); + if (NUM_OF_MSM8996_VERSIONS == msm8996_version) { + dev_err(cpu_dev, "Not Snapdragon 820/821!"); + return -ENODEV; + } + + speedbin = nvmem_cell_read(speedbin_nvmem, &len); + if (IS_ERR(speedbin)) + return PTR_ERR(speedbin); + + switch (msm8996_version) { + case MSM8996_V3: + *versions = 1 << (unsigned int)(*speedbin); + break; + case MSM8996_SG: + *versions = 1 << ((unsigned int)(*speedbin) + 4); + break; + default: + BUG(); + break; + } + + kfree(speedbin); + return 0; +} + +static int qcom_cpufreq_probe(struct platform_device *pdev) { struct opp_table **opp_tables; - enum _msm8996_version msm8996_version; + int (*get_version)(struct device *cpu_dev, + struct nvmem_cell *speedbin_nvmem, + u32 *versions); struct nvmem_cell *speedbin_nvmem; struct device_node *np; struct device *cpu_dev; unsigned cpu; - u8 *speedbin; u32 versions; - size_t len; + const struct of_device_id *match; int ret; cpu_dev = get_cpu_device(0); if (!cpu_dev) return -ENODEV; - msm8996_version = qcom_cpufreq_kryo_get_msm_id(); - if (NUM_OF_MSM8996_VERSIONS == msm8996_version) { - dev_err(cpu_dev, "Not Snapdragon 820/821!"); + match = pdev->dev.platform_data; + get_version = match->data; + if (!get_version) return -ENODEV; - } np = dev_pm_opp_of_get_opp_desc_node(cpu_dev); if (!np) @@ -115,23 +150,10 @@ static int qcom_cpufreq_kryo_probe(struct platform_device *pdev) return PTR_ERR(speedbin_nvmem); } - speedbin = nvmem_cell_read(speedbin_nvmem, &len); + ret = get_version(cpu_dev, speedbin_nvmem, &versions); nvmem_cell_put(speedbin_nvmem); - if (IS_ERR(speedbin)) - return PTR_ERR(speedbin); - - switch (msm8996_version) { - case MSM8996_V3: - versions = 1 << (unsigned int)(*speedbin); - break; - case MSM8996_SG: - versions = 1 << ((unsigned int)(*speedbin) + 4); - break; - default: - BUG(); - break; - } - kfree(speedbin); + if (ret) + return ret; opp_tables = kcalloc(num_possible_cpus(), sizeof(*opp_tables), GFP_KERNEL); if (!opp_tables) @@ -174,7 +196,7 @@ free_opp: return ret; } -static int qcom_cpufreq_kryo_remove(struct platform_device *pdev) +static int qcom_cpufreq_remove(struct platform_device *pdev) { struct opp_table **opp_tables = platform_get_drvdata(pdev); unsigned int cpu; @@ -189,18 +211,20 @@ static int qcom_cpufreq_kryo_remove(struct platform_device *pdev) return 0; } -static struct platform_driver qcom_cpufreq_kryo_driver = { - .probe = qcom_cpufreq_kryo_probe, - .remove = qcom_cpufreq_kryo_remove, +static struct platform_driver qcom_cpufreq_driver = { + .probe = qcom_cpufreq_probe, + .remove = qcom_cpufreq_remove, .driver = { - .name = "qcom-cpufreq-kryo", + .name = "qcom-cpufreq-nvmem", }, }; -static const struct of_device_id qcom_cpufreq_kryo_match_list[] __initconst = { - { .compatible = "qcom,apq8096", }, - { .compatible = "qcom,msm8996", }, - {} +static const struct of_device_id qcom_cpufreq_match_list[] __initconst = { + { .compatible = "qcom,apq8096", + .data = qcom_cpufreq_kryo_name_version }, + { .compatible = "qcom,msm8996", + .data = qcom_cpufreq_kryo_name_version }, + {}, }; /* @@ -209,7 +233,7 @@ static const struct of_device_id qcom_cpufreq_kryo_match_list[] __initconst = { * which may be defered as well. The init here is only registering * the driver and the platform device. */ -static int __init qcom_cpufreq_kryo_init(void) +static int __init qcom_cpufreq_init(void) { struct device_node *np = of_find_node_by_path("/"); const struct of_device_id *match; @@ -218,32 +242,32 @@ static int __init qcom_cpufreq_kryo_init(void) if (!np) return -ENODEV; - match = of_match_node(qcom_cpufreq_kryo_match_list, np); + match = of_match_node(qcom_cpufreq_match_list, np); of_node_put(np); if (!match) return -ENODEV; - ret = platform_driver_register(&qcom_cpufreq_kryo_driver); + ret = platform_driver_register(&qcom_cpufreq_driver); if (unlikely(ret < 0)) return ret; - kryo_cpufreq_pdev = platform_device_register_simple( - "qcom-cpufreq-kryo", -1, NULL, 0); - ret = PTR_ERR_OR_ZERO(kryo_cpufreq_pdev); + cpufreq_pdev = platform_device_register_data(NULL, "qcom-cpufreq-nvmem", + -1, match, sizeof(*match)); + ret = PTR_ERR_OR_ZERO(cpufreq_pdev); if (0 == ret) return 0; - platform_driver_unregister(&qcom_cpufreq_kryo_driver); + platform_driver_unregister(&qcom_cpufreq_driver); return ret; } -module_init(qcom_cpufreq_kryo_init); +module_init(qcom_cpufreq_init); -static void __exit qcom_cpufreq_kryo_exit(void) +static void __exit qcom_cpufreq_exit(void) { - platform_device_unregister(kryo_cpufreq_pdev); - platform_driver_unregister(&qcom_cpufreq_kryo_driver); + platform_device_unregister(cpufreq_pdev); + platform_driver_unregister(&qcom_cpufreq_driver); } -module_exit(qcom_cpufreq_kryo_exit); +module_exit(qcom_cpufreq_exit); -MODULE_DESCRIPTION("Qualcomm Technologies, Inc. Kryo CPUfreq driver"); +MODULE_DESCRIPTION("Qualcomm Technologies, Inc. CPUfreq driver"); MODULE_LICENSE("GPL v2"); From 57f2f8b4aa0c6b41a284da82bfa40dc3b2abe9a5 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 25 Jul 2019 12:41:33 +0200 Subject: [PATCH 114/126] cpufreq: qcom: Refactor the driver to make it easier to extend Refactor the driver to make it easier to extend in a later commit. Create a driver struct to collect all common resources, in order to make it easier to free up all common resources. Create a driver match_data struct to make it easier to extend the driver with support for new features that might only be supported on certain SoCs. Co-developed-by: Jorge Ramirez-Ortiz Signed-off-by: Jorge Ramirez-Ortiz Signed-off-by: Niklas Cassel Reviewed-by: Ilia Lin Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-nvmem.c | 123 +++++++++++++++++---------- 1 file changed, 79 insertions(+), 44 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c index fd08120768af..2d798a1685c5 100644 --- a/drivers/cpufreq/qcom-cpufreq-nvmem.c +++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c @@ -43,6 +43,20 @@ enum _msm8996_version { NUM_OF_MSM8996_VERSIONS, }; +struct qcom_cpufreq_drv; + +struct qcom_cpufreq_match_data { + int (*get_version)(struct device *cpu_dev, + struct nvmem_cell *speedbin_nvmem, + struct qcom_cpufreq_drv *drv); +}; + +struct qcom_cpufreq_drv { + struct opp_table **opp_tables; + u32 versions; + const struct qcom_cpufreq_match_data *data; +}; + static struct platform_device *cpufreq_dt_pdev, *cpufreq_pdev; static enum _msm8996_version qcom_cpufreq_get_msm_id(void) @@ -76,7 +90,7 @@ static enum _msm8996_version qcom_cpufreq_get_msm_id(void) static int qcom_cpufreq_kryo_name_version(struct device *cpu_dev, struct nvmem_cell *speedbin_nvmem, - u32 *versions) + struct qcom_cpufreq_drv *drv) { size_t len; u8 *speedbin; @@ -94,10 +108,10 @@ static int qcom_cpufreq_kryo_name_version(struct device *cpu_dev, switch (msm8996_version) { case MSM8996_V3: - *versions = 1 << (unsigned int)(*speedbin); + drv->versions = 1 << (unsigned int)(*speedbin); break; case MSM8996_SG: - *versions = 1 << ((unsigned int)(*speedbin) + 4); + drv->versions = 1 << ((unsigned int)(*speedbin) + 4); break; default: BUG(); @@ -108,17 +122,17 @@ static int qcom_cpufreq_kryo_name_version(struct device *cpu_dev, return 0; } +static const struct qcom_cpufreq_match_data match_data_kryo = { + .get_version = qcom_cpufreq_kryo_name_version, +}; + static int qcom_cpufreq_probe(struct platform_device *pdev) { - struct opp_table **opp_tables; - int (*get_version)(struct device *cpu_dev, - struct nvmem_cell *speedbin_nvmem, - u32 *versions); + struct qcom_cpufreq_drv *drv; struct nvmem_cell *speedbin_nvmem; struct device_node *np; struct device *cpu_dev; unsigned cpu; - u32 versions; const struct of_device_id *match; int ret; @@ -126,11 +140,6 @@ static int qcom_cpufreq_probe(struct platform_device *pdev) if (!cpu_dev) return -ENODEV; - match = pdev->dev.platform_data; - get_version = match->data; - if (!get_version) - return -ENODEV; - np = dev_pm_opp_of_get_opp_desc_node(cpu_dev); if (!np) return -ENOENT; @@ -141,23 +150,43 @@ static int qcom_cpufreq_probe(struct platform_device *pdev) return -ENOENT; } - speedbin_nvmem = of_nvmem_cell_get(np, NULL); - of_node_put(np); - if (IS_ERR(speedbin_nvmem)) { - if (PTR_ERR(speedbin_nvmem) != -EPROBE_DEFER) - dev_err(cpu_dev, "Could not get nvmem cell: %ld\n", - PTR_ERR(speedbin_nvmem)); - return PTR_ERR(speedbin_nvmem); + drv = kzalloc(sizeof(*drv), GFP_KERNEL); + if (!drv) + return -ENOMEM; + + match = pdev->dev.platform_data; + drv->data = match->data; + if (!drv->data) { + ret = -ENODEV; + goto free_drv; } - ret = get_version(cpu_dev, speedbin_nvmem, &versions); - nvmem_cell_put(speedbin_nvmem); - if (ret) - return ret; + if (drv->data->get_version) { + speedbin_nvmem = of_nvmem_cell_get(np, NULL); + if (IS_ERR(speedbin_nvmem)) { + if (PTR_ERR(speedbin_nvmem) != -EPROBE_DEFER) + dev_err(cpu_dev, + "Could not get nvmem cell: %ld\n", + PTR_ERR(speedbin_nvmem)); + ret = PTR_ERR(speedbin_nvmem); + goto free_drv; + } - opp_tables = kcalloc(num_possible_cpus(), sizeof(*opp_tables), GFP_KERNEL); - if (!opp_tables) - return -ENOMEM; + ret = drv->data->get_version(cpu_dev, speedbin_nvmem, drv); + if (ret) { + nvmem_cell_put(speedbin_nvmem); + goto free_drv; + } + nvmem_cell_put(speedbin_nvmem); + } + of_node_put(np); + + drv->opp_tables = kcalloc(num_possible_cpus(), sizeof(*drv->opp_tables), + GFP_KERNEL); + if (!drv->opp_tables) { + ret = -ENOMEM; + goto free_drv; + } for_each_possible_cpu(cpu) { cpu_dev = get_cpu_device(cpu); @@ -166,19 +195,23 @@ static int qcom_cpufreq_probe(struct platform_device *pdev) goto free_opp; } - opp_tables[cpu] = dev_pm_opp_set_supported_hw(cpu_dev, - &versions, 1); - if (IS_ERR(opp_tables[cpu])) { - ret = PTR_ERR(opp_tables[cpu]); - dev_err(cpu_dev, "Failed to set supported hardware\n"); - goto free_opp; + if (drv->data->get_version) { + drv->opp_tables[cpu] = + dev_pm_opp_set_supported_hw(cpu_dev, + &drv->versions, 1); + if (IS_ERR(drv->opp_tables[cpu])) { + ret = PTR_ERR(drv->opp_tables[cpu]); + dev_err(cpu_dev, + "Failed to set supported hardware\n"); + goto free_opp; + } } } cpufreq_dt_pdev = platform_device_register_simple("cpufreq-dt", -1, NULL, 0); if (!IS_ERR(cpufreq_dt_pdev)) { - platform_set_drvdata(pdev, opp_tables); + platform_set_drvdata(pdev, drv); return 0; } @@ -187,26 +220,30 @@ static int qcom_cpufreq_probe(struct platform_device *pdev) free_opp: for_each_possible_cpu(cpu) { - if (IS_ERR_OR_NULL(opp_tables[cpu])) + if (IS_ERR_OR_NULL(drv->opp_tables[cpu])) break; - dev_pm_opp_put_supported_hw(opp_tables[cpu]); + dev_pm_opp_put_supported_hw(drv->opp_tables[cpu]); } - kfree(opp_tables); + kfree(drv->opp_tables); +free_drv: + kfree(drv); return ret; } static int qcom_cpufreq_remove(struct platform_device *pdev) { - struct opp_table **opp_tables = platform_get_drvdata(pdev); + struct qcom_cpufreq_drv *drv = platform_get_drvdata(pdev); unsigned int cpu; platform_device_unregister(cpufreq_dt_pdev); for_each_possible_cpu(cpu) - dev_pm_opp_put_supported_hw(opp_tables[cpu]); + if (drv->opp_tables[cpu]) + dev_pm_opp_put_supported_hw(drv->opp_tables[cpu]); - kfree(opp_tables); + kfree(drv->opp_tables); + kfree(drv); return 0; } @@ -220,10 +257,8 @@ static struct platform_driver qcom_cpufreq_driver = { }; static const struct of_device_id qcom_cpufreq_match_list[] __initconst = { - { .compatible = "qcom,apq8096", - .data = qcom_cpufreq_kryo_name_version }, - { .compatible = "qcom,msm8996", - .data = qcom_cpufreq_kryo_name_version }, + { .compatible = "qcom,apq8096", .data = &match_data_kryo }, + { .compatible = "qcom,msm8996", .data = &match_data_kryo }, {}, }; From 1cb8339ca225601610b11c7a47e69faa48943077 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 25 Jul 2019 12:41:35 +0200 Subject: [PATCH 115/126] cpufreq: qcom: Add support for qcs404 on nvmem driver Add support for qcs404 on nvmem driver. The qcs404 SoC has support for Core Power Reduction (CPR), which is implemented as a power domain provider, therefore add optional support in this driver to attach to a genpd power domain. Co-developed-by: Jorge Ramirez-Ortiz Signed-off-by: Jorge Ramirez-Ortiz Signed-off-by: Niklas Cassel Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-nvmem.c | 50 ++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c index 2d798a1685c5..f0d2d5035413 100644 --- a/drivers/cpufreq/qcom-cpufreq-nvmem.c +++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -49,10 +50,12 @@ struct qcom_cpufreq_match_data { int (*get_version)(struct device *cpu_dev, struct nvmem_cell *speedbin_nvmem, struct qcom_cpufreq_drv *drv); + const char **genpd_names; }; struct qcom_cpufreq_drv { struct opp_table **opp_tables; + struct opp_table **genpd_opp_tables; u32 versions; const struct qcom_cpufreq_match_data *data; }; @@ -126,6 +129,12 @@ static const struct qcom_cpufreq_match_data match_data_kryo = { .get_version = qcom_cpufreq_kryo_name_version, }; +static const char *qcs404_genpd_names[] = { "cpr", NULL }; + +static const struct qcom_cpufreq_match_data match_data_qcs404 = { + .genpd_names = qcs404_genpd_names, +}; + static int qcom_cpufreq_probe(struct platform_device *pdev) { struct qcom_cpufreq_drv *drv; @@ -188,11 +197,19 @@ static int qcom_cpufreq_probe(struct platform_device *pdev) goto free_drv; } + drv->genpd_opp_tables = kcalloc(num_possible_cpus(), + sizeof(*drv->genpd_opp_tables), + GFP_KERNEL); + if (!drv->genpd_opp_tables) { + ret = -ENOMEM; + goto free_opp; + } + for_each_possible_cpu(cpu) { cpu_dev = get_cpu_device(cpu); if (NULL == cpu_dev) { ret = -ENODEV; - goto free_opp; + goto free_genpd_opp; } if (drv->data->get_version) { @@ -203,7 +220,22 @@ static int qcom_cpufreq_probe(struct platform_device *pdev) ret = PTR_ERR(drv->opp_tables[cpu]); dev_err(cpu_dev, "Failed to set supported hardware\n"); - goto free_opp; + goto free_genpd_opp; + } + } + + if (drv->data->genpd_names) { + drv->genpd_opp_tables[cpu] = + dev_pm_opp_attach_genpd(cpu_dev, + drv->data->genpd_names, + NULL); + if (IS_ERR(drv->genpd_opp_tables[cpu])) { + ret = PTR_ERR(drv->genpd_opp_tables[cpu]); + if (ret != -EPROBE_DEFER) + dev_err(cpu_dev, + "Could not attach to pm_domain: %d\n", + ret); + goto free_genpd_opp; } } } @@ -218,6 +250,13 @@ static int qcom_cpufreq_probe(struct platform_device *pdev) ret = PTR_ERR(cpufreq_dt_pdev); dev_err(cpu_dev, "Failed to register platform device\n"); +free_genpd_opp: + for_each_possible_cpu(cpu) { + if (IS_ERR_OR_NULL(drv->genpd_opp_tables[cpu])) + break; + dev_pm_opp_detach_genpd(drv->genpd_opp_tables[cpu]); + } + kfree(drv->genpd_opp_tables); free_opp: for_each_possible_cpu(cpu) { if (IS_ERR_OR_NULL(drv->opp_tables[cpu])) @@ -238,11 +277,15 @@ static int qcom_cpufreq_remove(struct platform_device *pdev) platform_device_unregister(cpufreq_dt_pdev); - for_each_possible_cpu(cpu) + for_each_possible_cpu(cpu) { if (drv->opp_tables[cpu]) dev_pm_opp_put_supported_hw(drv->opp_tables[cpu]); + if (drv->genpd_opp_tables[cpu]) + dev_pm_opp_detach_genpd(drv->genpd_opp_tables[cpu]); + } kfree(drv->opp_tables); + kfree(drv->genpd_opp_tables); kfree(drv); return 0; @@ -259,6 +302,7 @@ static struct platform_driver qcom_cpufreq_driver = { static const struct of_device_id qcom_cpufreq_match_list[] __initconst = { { .compatible = "qcom,apq8096", .data = &match_data_kryo }, { .compatible = "qcom,msm8996", .data = &match_data_kryo }, + { .compatible = "qcom,qcs404", .data = &match_data_qcs404 }, {}, }; From 248b5f297cf809b6a61e38db22049011f619518b Mon Sep 17 00:00:00 2001 From: Jorge Ramirez-Ortiz Date: Thu, 25 Jul 2019 12:41:36 +0200 Subject: [PATCH 116/126] cpufreq: Add qcs404 to cpufreq-dt-platdev blacklist Add qcs404 to cpufreq-dt-platdev blacklist. Signed-off-by: Jorge Ramirez-Ortiz Co-developed-by: Niklas Cassel Signed-off-by: Niklas Cassel Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt-platdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 03dc4244ab00..ec6ef996e637 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -123,6 +123,7 @@ static const struct of_device_id blacklist[] __initconst = { { .compatible = "qcom,apq8096", }, { .compatible = "qcom,msm8996", }, + { .compatible = "qcom,qcs404", }, { .compatible = "st,stih407", }, { .compatible = "st,stih410", }, From 97d3eb9da84cae0548359b0aecb8619faad003b7 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Mon, 2 Sep 2019 11:40:31 +0100 Subject: [PATCH 117/126] cpuidle-haltpoll: vcpu hotplug support When cpus != maxcpus cpuidle-haltpoll will fail to register all vcpus past the online ones and thus fail to register the idle driver. This is because cpuidle_add_sysfs() will return with -ENODEV as a consequence from get_cpu_device() return no device for a non-existing CPU. Instead switch to cpuidle_register_driver() and manually register each of the present cpus through cpuhp_setup_state() callbacks and future ones that get onlined or offlined. This mimmics similar logic that intel_idle does. Fixes: fa86ee90eb11 ("add cpuidle-haltpoll driver") Signed-off-by: Joao Martins Signed-off-by: Boris Ostrovsky Reviewed-by: Marcelo Tosatti Signed-off-by: Rafael J. Wysocki --- arch/x86/include/asm/cpuidle_haltpoll.h | 4 +- arch/x86/kernel/kvm.c | 18 +++---- drivers/cpuidle/cpuidle-haltpoll.c | 68 +++++++++++++++++++++++-- include/linux/cpuidle_haltpoll.h | 4 +- 4 files changed, 73 insertions(+), 21 deletions(-) diff --git a/arch/x86/include/asm/cpuidle_haltpoll.h b/arch/x86/include/asm/cpuidle_haltpoll.h index ff8607d81526..c8b39c6716ff 100644 --- a/arch/x86/include/asm/cpuidle_haltpoll.h +++ b/arch/x86/include/asm/cpuidle_haltpoll.h @@ -2,7 +2,7 @@ #ifndef _ARCH_HALTPOLL_H #define _ARCH_HALTPOLL_H -void arch_haltpoll_enable(void); -void arch_haltpoll_disable(void); +void arch_haltpoll_enable(unsigned int cpu); +void arch_haltpoll_disable(unsigned int cpu); #endif diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index f48401be8ce0..60bab4a3b36b 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -888,32 +888,26 @@ static void kvm_enable_host_haltpoll(void *i) wrmsrl(MSR_KVM_POLL_CONTROL, 1); } -void arch_haltpoll_enable(void) +void arch_haltpoll_enable(unsigned int cpu) { if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) { - printk(KERN_ERR "kvm: host does not support poll control\n"); - printk(KERN_ERR "kvm: host upgrade recommended\n"); + pr_err_once("kvm: host does not support poll control\n"); + pr_err_once("kvm: host upgrade recommended\n"); return; } - preempt_disable(); /* Enable guest halt poll disables host halt poll */ - kvm_disable_host_haltpoll(NULL); - smp_call_function(kvm_disable_host_haltpoll, NULL, 1); - preempt_enable(); + smp_call_function_single(cpu, kvm_disable_host_haltpoll, NULL, 1); } EXPORT_SYMBOL_GPL(arch_haltpoll_enable); -void arch_haltpoll_disable(void) +void arch_haltpoll_disable(unsigned int cpu) { if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) return; - preempt_disable(); /* Enable guest halt poll disables host halt poll */ - kvm_enable_host_haltpoll(NULL); - smp_call_function(kvm_enable_host_haltpoll, NULL, 1); - preempt_enable(); + smp_call_function_single(cpu, kvm_enable_host_haltpoll, NULL, 1); } EXPORT_SYMBOL_GPL(arch_haltpoll_disable); #endif diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c index 9ac093dcbb01..56d8ab814466 100644 --- a/drivers/cpuidle/cpuidle-haltpoll.c +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -11,12 +11,16 @@ */ #include +#include #include #include #include #include #include +static struct cpuidle_device __percpu *haltpoll_cpuidle_devices; +static enum cpuhp_state haltpoll_hp_state; + static int default_enter_idle(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { @@ -46,6 +50,46 @@ static struct cpuidle_driver haltpoll_driver = { .state_count = 2, }; +static int haltpoll_cpu_online(unsigned int cpu) +{ + struct cpuidle_device *dev; + + dev = per_cpu_ptr(haltpoll_cpuidle_devices, cpu); + if (!dev->registered) { + dev->cpu = cpu; + if (cpuidle_register_device(dev)) { + pr_notice("cpuidle_register_device %d failed!\n", cpu); + return -EIO; + } + arch_haltpoll_enable(cpu); + } + + return 0; +} + +static int haltpoll_cpu_offline(unsigned int cpu) +{ + struct cpuidle_device *dev; + + dev = per_cpu_ptr(haltpoll_cpuidle_devices, cpu); + if (dev->registered) { + arch_haltpoll_disable(cpu); + cpuidle_unregister_device(dev); + } + + return 0; +} + +static void haltpoll_uninit(void) +{ + if (haltpoll_hp_state) + cpuhp_remove_state(haltpoll_hp_state); + cpuidle_unregister_driver(&haltpoll_driver); + + free_percpu(haltpoll_cpuidle_devices); + haltpoll_cpuidle_devices = NULL; +} + static int __init haltpoll_init(void) { int ret; @@ -56,17 +100,31 @@ static int __init haltpoll_init(void) if (!kvm_para_available()) return 0; - ret = cpuidle_register(&haltpoll_driver, NULL); - if (ret == 0) - arch_haltpoll_enable(); + ret = cpuidle_register_driver(drv); + if (ret < 0) + return ret; + + haltpoll_cpuidle_devices = alloc_percpu(struct cpuidle_device); + if (haltpoll_cpuidle_devices == NULL) { + cpuidle_unregister_driver(drv); + return -ENOMEM; + } + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "cpuidle/haltpoll:online", + haltpoll_cpu_online, haltpoll_cpu_offline); + if (ret < 0) { + haltpoll_uninit(); + } else { + haltpoll_hp_state = ret; + ret = 0; + } return ret; } static void __exit haltpoll_exit(void) { - arch_haltpoll_disable(); - cpuidle_unregister(&haltpoll_driver); + haltpoll_uninit(); } module_init(haltpoll_init); diff --git a/include/linux/cpuidle_haltpoll.h b/include/linux/cpuidle_haltpoll.h index fe5954c2409e..d50c1e0411a2 100644 --- a/include/linux/cpuidle_haltpoll.h +++ b/include/linux/cpuidle_haltpoll.h @@ -5,11 +5,11 @@ #ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL #include #else -static inline void arch_haltpoll_enable(void) +static inline void arch_haltpoll_enable(unsigned int cpu) { } -static inline void arch_haltpoll_disable(void) +static inline void arch_haltpoll_disable(unsigned int cpu) { } #endif From 82e430a6df7f0b5972c7fe717faffea823c6b84a Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 2 Aug 2019 19:34:23 +0200 Subject: [PATCH 118/126] cpuidle: play_idle: Increase the resolution to usec The play_idle resolution is 1ms. The intel_powerclamp bases the idle duration on jiffies. The idle injection API is also using msec based duration but has no user yet. Unfortunately, msec based time does not fit well when we want to inject idle cycle precisely with shallow idle state. In order to set the scene for the incoming idle injection user, move the precision up to usec when calling play_idle. Signed-off-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki --- drivers/powercap/idle_inject.c | 2 +- drivers/thermal/intel/intel_powerclamp.c | 2 +- include/linux/cpu.h | 2 +- kernel/sched/idle.c | 7 ++++--- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c index 24ff2a068978..10601f4bdf72 100644 --- a/drivers/powercap/idle_inject.c +++ b/drivers/powercap/idle_inject.c @@ -138,7 +138,7 @@ static void idle_inject_fn(unsigned int cpu) */ iit->should_run = 0; - play_idle(READ_ONCE(ii_dev->idle_duration_ms)); + play_idle(READ_ONCE(ii_dev->idle_duration_ms) * USEC_PER_MSEC); } /** diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index 5149a817456b..53216dcbe173 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -430,7 +430,7 @@ static void clamp_idle_injection_func(struct kthread_work *work) if (should_skip) goto balance; - play_idle(jiffies_to_msecs(w_data->duration_jiffies)); + play_idle(jiffies_to_usecs(w_data->duration_jiffies)); balance: if (clamping && w_data->clamping && cpu_online(w_data->cpu)) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index fcb1386bb0d4..88dc0c653925 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -179,7 +179,7 @@ void arch_cpu_idle_dead(void); int cpu_report_state(int cpu); int cpu_check_up_prepare(int cpu); void cpu_set_state_online(int cpu); -void play_idle(unsigned long duration_ms); +void play_idle(unsigned long duration_us); #ifdef CONFIG_HOTPLUG_CPU bool cpu_wait_death(unsigned int cpu, int seconds); diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 80940939b733..b98283fc6914 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -311,7 +311,7 @@ static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer) return HRTIMER_NORESTART; } -void play_idle(unsigned long duration_ms) +void play_idle(unsigned long duration_us) { struct idle_timer it; @@ -323,7 +323,7 @@ void play_idle(unsigned long duration_ms) WARN_ON_ONCE(current->nr_cpus_allowed != 1); WARN_ON_ONCE(!(current->flags & PF_KTHREAD)); WARN_ON_ONCE(!(current->flags & PF_NO_SETAFFINITY)); - WARN_ON_ONCE(!duration_ms); + WARN_ON_ONCE(!duration_us); rcu_sleep_check(); preempt_disable(); @@ -333,7 +333,8 @@ void play_idle(unsigned long duration_ms) it.done = 0; hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); it.timer.function = idle_inject_timer_fn; - hrtimer_start(&it.timer, ms_to_ktime(duration_ms), HRTIMER_MODE_REL_PINNED); + hrtimer_start(&it.timer, ns_to_ktime(duration_us * NSEC_PER_USEC), + HRTIMER_MODE_REL_PINNED); while (!READ_ONCE(it.done)) do_idle(); From cd4c0763064f02f42824eed61be38eafdc702281 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 2 Aug 2019 19:34:24 +0200 Subject: [PATCH 119/126] powercap: idle_inject: Use higher resolution for idle injection The resolution of the idle injection is limited to 1ms. If there is a need for an injection of 1.2 ms, it is not possible. The idle injection API is not yet used, so it is safe to convert the existing API to the new time unit instead of adding more functions. Convert to microsecond in order to use a finer grain time unit when injecting idle cycles. Signed-off-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki --- drivers/powercap/idle_inject.c | 53 +++++++++++++++++----------------- include/linux/idle_inject.h | 8 ++--- 2 files changed, 31 insertions(+), 30 deletions(-) diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c index 10601f4bdf72..cd1270614cc6 100644 --- a/drivers/powercap/idle_inject.c +++ b/drivers/powercap/idle_inject.c @@ -59,14 +59,14 @@ struct idle_inject_thread { /** * struct idle_inject_device - idle injection data * @timer: idle injection period timer - * @idle_duration_ms: duration of CPU idle time to inject - * @run_duration_ms: duration of CPU run time to allow + * @idle_duration_us: duration of CPU idle time to inject + * @run_duration_us: duration of CPU run time to allow * @cpumask: mask of CPUs affected by idle injection */ struct idle_inject_device { struct hrtimer timer; - unsigned int idle_duration_ms; - unsigned int run_duration_ms; + unsigned int idle_duration_us; + unsigned int run_duration_us; unsigned long int cpumask[0]; }; @@ -104,16 +104,16 @@ static void idle_inject_wakeup(struct idle_inject_device *ii_dev) */ static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer) { - unsigned int duration_ms; + unsigned int duration_us; struct idle_inject_device *ii_dev = container_of(timer, struct idle_inject_device, timer); - duration_ms = READ_ONCE(ii_dev->run_duration_ms); - duration_ms += READ_ONCE(ii_dev->idle_duration_ms); + duration_us = READ_ONCE(ii_dev->run_duration_us); + duration_us += READ_ONCE(ii_dev->idle_duration_us); idle_inject_wakeup(ii_dev); - hrtimer_forward_now(timer, ms_to_ktime(duration_ms)); + hrtimer_forward_now(timer, ns_to_ktime(duration_us * NSEC_PER_USEC)); return HRTIMER_RESTART; } @@ -138,35 +138,35 @@ static void idle_inject_fn(unsigned int cpu) */ iit->should_run = 0; - play_idle(READ_ONCE(ii_dev->idle_duration_ms) * USEC_PER_MSEC); + play_idle(READ_ONCE(ii_dev->idle_duration_us)); } /** * idle_inject_set_duration - idle and run duration update helper - * @run_duration_ms: CPU run time to allow in milliseconds - * @idle_duration_ms: CPU idle time to inject in milliseconds + * @run_duration_us: CPU run time to allow in microseconds + * @idle_duration_us: CPU idle time to inject in microseconds */ void idle_inject_set_duration(struct idle_inject_device *ii_dev, - unsigned int run_duration_ms, - unsigned int idle_duration_ms) + unsigned int run_duration_us, + unsigned int idle_duration_us) { - if (run_duration_ms && idle_duration_ms) { - WRITE_ONCE(ii_dev->run_duration_ms, run_duration_ms); - WRITE_ONCE(ii_dev->idle_duration_ms, idle_duration_ms); + if (run_duration_us && idle_duration_us) { + WRITE_ONCE(ii_dev->run_duration_us, run_duration_us); + WRITE_ONCE(ii_dev->idle_duration_us, idle_duration_us); } } /** * idle_inject_get_duration - idle and run duration retrieval helper - * @run_duration_ms: memory location to store the current CPU run time - * @idle_duration_ms: memory location to store the current CPU idle time + * @run_duration_us: memory location to store the current CPU run time + * @idle_duration_us: memory location to store the current CPU idle time */ void idle_inject_get_duration(struct idle_inject_device *ii_dev, - unsigned int *run_duration_ms, - unsigned int *idle_duration_ms) + unsigned int *run_duration_us, + unsigned int *idle_duration_us) { - *run_duration_ms = READ_ONCE(ii_dev->run_duration_ms); - *idle_duration_ms = READ_ONCE(ii_dev->idle_duration_ms); + *run_duration_us = READ_ONCE(ii_dev->run_duration_us); + *idle_duration_us = READ_ONCE(ii_dev->idle_duration_us); } /** @@ -181,10 +181,10 @@ void idle_inject_get_duration(struct idle_inject_device *ii_dev, */ int idle_inject_start(struct idle_inject_device *ii_dev) { - unsigned int idle_duration_ms = READ_ONCE(ii_dev->idle_duration_ms); - unsigned int run_duration_ms = READ_ONCE(ii_dev->run_duration_ms); + unsigned int idle_duration_us = READ_ONCE(ii_dev->idle_duration_us); + unsigned int run_duration_us = READ_ONCE(ii_dev->run_duration_us); - if (!idle_duration_ms || !run_duration_ms) + if (!idle_duration_us || !run_duration_us) return -EINVAL; pr_debug("Starting injecting idle cycles on CPUs '%*pbl'\n", @@ -193,7 +193,8 @@ int idle_inject_start(struct idle_inject_device *ii_dev) idle_inject_wakeup(ii_dev); hrtimer_start(&ii_dev->timer, - ms_to_ktime(idle_duration_ms + run_duration_ms), + ns_to_ktime((idle_duration_us + run_duration_us) * + NSEC_PER_USEC), HRTIMER_MODE_REL); return 0; diff --git a/include/linux/idle_inject.h b/include/linux/idle_inject.h index bdc0293fb6cb..a445cd1a36c5 100644 --- a/include/linux/idle_inject.h +++ b/include/linux/idle_inject.h @@ -20,10 +20,10 @@ int idle_inject_start(struct idle_inject_device *ii_dev); void idle_inject_stop(struct idle_inject_device *ii_dev); void idle_inject_set_duration(struct idle_inject_device *ii_dev, - unsigned int run_duration_ms, - unsigned int idle_duration_ms); + unsigned int run_duration_us, + unsigned int idle_duration_us); void idle_inject_get_duration(struct idle_inject_device *ii_dev, - unsigned int *run_duration_ms, - unsigned int *idle_duration_ms); + unsigned int *run_duration_us, + unsigned int *idle_duration_us); #endif /* __IDLE_INJECT_H__ */ From 4216148337211be7ad2bfe38a0dffc3ce479c283 Mon Sep 17 00:00:00 2001 From: Todd Brandt Date: Wed, 4 Sep 2019 14:04:52 -0700 Subject: [PATCH 120/126] pm-graph: make setVal unbuffered again for python2 and python3 sleepgraph: - kprobe_events won't set correctly if the data is buffered - force sysvals.setVal to be unbuffered and use binary mode - tested in both python2 and python3 Link: https://bugzilla.kernel.org/show_bug.cgi?id=204773 Signed-off-by: Todd Brandt [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki --- tools/power/pm-graph/sleepgraph.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index 1794c79a7d1b..f7d1c1f62f86 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -667,19 +667,19 @@ class SystemValues: if linesack < linesout: return False return True - def setVal(self, val, file, mode='w'): + def setVal(self, val, file): if not os.path.exists(file): return False try: - fp = open(file, mode) - fp.write(val) + fp = open(file, 'wb', 0) + fp.write(val.encode()) fp.flush() fp.close() except: return False return True - def fsetVal(self, val, path, mode='w'): - return self.setVal(val, self.tpath+path, mode) + def fsetVal(self, val, path): + return self.setVal(val, self.tpath+path) def getVal(self, file): res = '' if not os.path.exists(file): From 0760bb9ac9e71e7799e377ef7d31302c60c3418d Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Tue, 3 Sep 2019 22:12:22 +0900 Subject: [PATCH 121/126] PM: runtime: Documentation: add runtime_status ABI document /sys/devices/.../power/runtime_status is introduced by commit c92445fadb91 ("PM / Runtime: Add sysfs debug files"). Then commit 0fcb4eef8294 ("PM / Runtime: Make runtime_status attribute not debug-only (v. 2)") made the runtime_status attribute available without CONFIG_PM_ADVANCED_DEBUG. This adds missing runtime_status ABI document. Signed-off-by: Akinobu Mita Signed-off-by: Rafael J. Wysocki --- Documentation/ABI/testing/sysfs-devices-power | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power index 80a00f7b6667..1763e64dd152 100644 --- a/Documentation/ABI/testing/sysfs-devices-power +++ b/Documentation/ABI/testing/sysfs-devices-power @@ -260,3 +260,12 @@ Description: This attribute has no effect on system-wide suspend/resume and hibernation. + +What: /sys/devices/.../power/runtime_status +Date: April 2010 +Contact: Rafael J. Wysocki +Description: + The /sys/devices/.../power/runtime_status attribute contains + the current runtime PM status of the device, which may be + "suspended", "suspending", "resuming", "active", "error" (fatal + error), or "unsupported" (runtime PM is disabled). From cb5d8c45ab6c3daf8269e550cfb2d5018a876fe3 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Sun, 8 Sep 2019 00:45:21 +0100 Subject: [PATCH 122/126] cpuidle: allow governor switch on cpuidle_register_driver() The recently introduced haltpoll driver is largely only useful with haltpoll governor. To allow drivers to associate with a particular idle behaviour, add a @governor property to 'struct cpuidle_driver' and thus allow a cpuidle driver to switch to a *preferred* governor on idle driver registration. We save the previous governor, and when an idle driver is unregistered we switch back to that. The @governor can be overridden by cpuidle.governor= boot param or alternatively be ignored if the governor doesn't exist. Signed-off-by: Joao Martins Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle.h | 2 ++ drivers/cpuidle/driver.c | 25 +++++++++++++++++++++++++ drivers/cpuidle/governor.c | 7 ++++--- include/linux/cpuidle.h | 3 +++ 4 files changed, 34 insertions(+), 3 deletions(-) diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h index d6613101af92..9f336af17fa6 100644 --- a/drivers/cpuidle/cpuidle.h +++ b/drivers/cpuidle/cpuidle.h @@ -9,6 +9,7 @@ /* For internal use only */ extern char param_governor[]; extern struct cpuidle_governor *cpuidle_curr_governor; +extern struct cpuidle_governor *cpuidle_prev_governor; extern struct list_head cpuidle_governors; extern struct list_head cpuidle_detected_devices; extern struct mutex cpuidle_lock; @@ -22,6 +23,7 @@ extern void cpuidle_install_idle_handler(void); extern void cpuidle_uninstall_idle_handler(void); /* governors */ +extern struct cpuidle_governor *cpuidle_find_governor(const char *str); extern int cpuidle_switch_governor(struct cpuidle_governor *gov); /* sysfs */ diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index dc32f34e68d9..80c1a830d991 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -254,12 +254,25 @@ static void __cpuidle_unregister_driver(struct cpuidle_driver *drv) */ int cpuidle_register_driver(struct cpuidle_driver *drv) { + struct cpuidle_governor *gov; int ret; spin_lock(&cpuidle_driver_lock); ret = __cpuidle_register_driver(drv); spin_unlock(&cpuidle_driver_lock); + if (!ret && !strlen(param_governor) && drv->governor && + (cpuidle_get_driver() == drv)) { + mutex_lock(&cpuidle_lock); + gov = cpuidle_find_governor(drv->governor); + if (gov) { + cpuidle_prev_governor = cpuidle_curr_governor; + if (cpuidle_switch_governor(gov) < 0) + cpuidle_prev_governor = NULL; + } + mutex_unlock(&cpuidle_lock); + } + return ret; } EXPORT_SYMBOL_GPL(cpuidle_register_driver); @@ -274,9 +287,21 @@ EXPORT_SYMBOL_GPL(cpuidle_register_driver); */ void cpuidle_unregister_driver(struct cpuidle_driver *drv) { + bool enabled = (cpuidle_get_driver() == drv); + spin_lock(&cpuidle_driver_lock); __cpuidle_unregister_driver(drv); spin_unlock(&cpuidle_driver_lock); + + if (!enabled) + return; + + mutex_lock(&cpuidle_lock); + if (cpuidle_prev_governor) { + if (!cpuidle_switch_governor(cpuidle_prev_governor)) + cpuidle_prev_governor = NULL; + } + mutex_unlock(&cpuidle_lock); } EXPORT_SYMBOL_GPL(cpuidle_unregister_driver); diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c index 2e3e14192bee..e9801f26c732 100644 --- a/drivers/cpuidle/governor.c +++ b/drivers/cpuidle/governor.c @@ -20,14 +20,15 @@ char param_governor[CPUIDLE_NAME_LEN]; LIST_HEAD(cpuidle_governors); struct cpuidle_governor *cpuidle_curr_governor; +struct cpuidle_governor *cpuidle_prev_governor; /** - * __cpuidle_find_governor - finds a governor of the specified name + * cpuidle_find_governor - finds a governor of the specified name * @str: the name * * Must be called with cpuidle_lock acquired. */ -static struct cpuidle_governor * __cpuidle_find_governor(const char *str) +struct cpuidle_governor *cpuidle_find_governor(const char *str) { struct cpuidle_governor *gov; @@ -87,7 +88,7 @@ int cpuidle_register_governor(struct cpuidle_governor *gov) return -ENODEV; mutex_lock(&cpuidle_lock); - if (__cpuidle_find_governor(gov->name) == NULL) { + if (cpuidle_find_governor(gov->name) == NULL) { ret = 0; list_add_tail(&gov->governor_list, &cpuidle_governors); if (!cpuidle_curr_governor || diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 1a9f54eb3aa1..2dc4c6b19c25 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -121,6 +121,9 @@ struct cpuidle_driver { /* the driver handles the cpus in cpumask */ struct cpumask *cpumask; + + /* preferred governor to switch at register time */ + const char *governor; }; #ifdef CONFIG_CPU_IDLE From 7321440829a27d58c88b7fcfcbbc37487b5e39a5 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Sun, 8 Sep 2019 00:45:22 +0100 Subject: [PATCH 123/126] cpuidle-haltpoll: set haltpoll as preferred governor Right now, guest current governors have the following ratings: * ladder -> 10 * teo -> 19 * menu -> 20 * haltpoll -> 21 * ladder + nohz=off -> 25 haltpoll governor got introduced and it is now the default governor given its highest rating -- with ladder+nohz being the exception -- regardless of idle driver in the guest. An example of an undesirable case is x86 KVM guests with MWAIT which have intel_idle registered first, and consequently will have haltpoll be used as governor which would get limited to a poll state and state 1 and the other states wouldn't get used. To keep the previous defaults we decrease rating of governor to 9 (below current lowest rating) and thus rely on @governor switch on cpuidle_register_driver() to tie in haltpoll idle driver and governor together. Signed-off-by: Joao Martins Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle-haltpoll.c | 1 + drivers/cpuidle/governors/haltpoll.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c index 56d8ab814466..519e90d125cf 100644 --- a/drivers/cpuidle/cpuidle-haltpoll.c +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -34,6 +34,7 @@ static int default_enter_idle(struct cpuidle_device *dev, static struct cpuidle_driver haltpoll_driver = { .name = "haltpoll", + .governor = "haltpoll", .owner = THIS_MODULE, .states = { { /* entry 0 is for polling */ }, diff --git a/drivers/cpuidle/governors/haltpoll.c b/drivers/cpuidle/governors/haltpoll.c index 797477bda486..7a703d2e0064 100644 --- a/drivers/cpuidle/governors/haltpoll.c +++ b/drivers/cpuidle/governors/haltpoll.c @@ -133,7 +133,7 @@ static int haltpoll_enable_device(struct cpuidle_driver *drv, static struct cpuidle_governor haltpoll_governor = { .name = "haltpoll", - .rating = 21, + .rating = 9, .enable = haltpoll_enable_device, .select = haltpoll_select, .reflect = haltpoll_reflect, From 5cc59f597c0666c6a7e1c67aac9063895949fd56 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Sun, 8 Sep 2019 00:45:23 +0100 Subject: [PATCH 124/126] cpuidle-haltpoll: return -ENODEV on modinit failure When a user loads cpuidle-haltpoll on a non KVM guest the module will successfully load, even though idle driver registration didn't take place. We should instead return -ENODEV signaling the user that the driver can't be loaded, like other error paths in haltpoll_init(). An example of such error paths is when we return -EBUSY when attempting to register an idle driver when it had one already (e.g. intel_idle loads at boot and then we attempt to insert module cpuidle-haltpoll). Fixes: fa86ee90eb11 ("add cpuidle-haltpoll driver") Signed-off-by: Joao Martins Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle-haltpoll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c index 519e90d125cf..7a0239ef717e 100644 --- a/drivers/cpuidle/cpuidle-haltpoll.c +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -99,7 +99,7 @@ static int __init haltpoll_init(void) cpuidle_poll_state_init(drv); if (!kvm_para_available()) - return 0; + return -ENODEV; ret = cpuidle_register_driver(drv); if (ret < 0) From 472f263660832b90e53bede2020f68cd14f8b76c Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Sun, 8 Sep 2019 00:45:24 +0100 Subject: [PATCH 125/126] cpuidle-haltpoll: do not set an owner to allow modunload cpuidle-haltpoll can be built as a module to allow optional late load. Given we are setting @owner to THIS_MODULE, cpuidle will attempt to grab a module reference every time a cpuidle_device is registered -- so essentially all online cpus get a reference. This prevents for the module to be unloaded later, which makes the module_exit callback entirely unused. Thus remove the @owner and allow module to be unloaded. Fixes: fa86ee90eb11 ("add cpuidle-haltpoll driver") Signed-off-by: Joao Martins Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle-haltpoll.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c index 7a0239ef717e..49a65c6fe91e 100644 --- a/drivers/cpuidle/cpuidle-haltpoll.c +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -35,7 +35,6 @@ static int default_enter_idle(struct cpuidle_device *dev, static struct cpuidle_driver haltpoll_driver = { .name = "haltpoll", .governor = "haltpoll", - .owner = THIS_MODULE, .states = { { /* entry 0 is for polling */ }, { From 1328edca4a142ee3c7442d1eece2c3ca383eff35 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 29 Aug 2019 16:49:57 +0800 Subject: [PATCH 126/126] cpuidle-haltpoll: Enable kvm guest polling when dedicated physical CPUs are available The downside of guest side polling is that polling is performed even with other runnable tasks in the host. However, even if poll in kvm can aware whether or not other runnable tasks in the same pCPU, it can still incur extra overhead in over-subscribe scenario. Now we can just enable guest polling when dedicated pCPUs are available. Acked-by: Paolo Bonzini Signed-off-by: Wanpeng Li Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/kvm.c | 1 + drivers/cpuidle/cpuidle-haltpoll.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 60bab4a3b36b..3726ba48f763 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -711,6 +711,7 @@ unsigned int kvm_arch_para_hints(void) { return cpuid_edx(kvm_cpuid_base() | KVM_CPUID_FEATURES); } +EXPORT_SYMBOL_GPL(kvm_arch_para_hints); static uint32_t __init kvm_detect(void) { diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c index 49a65c6fe91e..932390b028f1 100644 --- a/drivers/cpuidle/cpuidle-haltpoll.c +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -97,7 +97,8 @@ static int __init haltpoll_init(void) cpuidle_poll_state_init(drv); - if (!kvm_para_available()) + if (!kvm_para_available() || + !kvm_para_has_hint(KVM_HINTS_REALTIME)) return -ENODEV; ret = cpuidle_register_driver(drv);