From 7911b6a868ea1fc7e4e4929edaee312a2f061905 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 21 Feb 2023 16:05:42 +0100 Subject: [PATCH 01/44] dt-bindings: cpufreq: qcom-hw: add a compatible for sa8775p Add the compatible for the cpufreq engine present on sa8775p platforms. Signed-off-by: Bartosz Golaszewski Reviewed-by: Krzysztof Kozlowski Signed-off-by: Viresh Kumar --- Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml index e4aa8c67d532..92693c33edf9 100644 --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml @@ -26,6 +26,7 @@ properties: items: - enum: - qcom,qdu1000-cpufreq-epss + - qcom,sa8775p-cpufreq-epss - qcom,sc7280-cpufreq-epss - qcom,sc8280xp-cpufreq-epss - qcom,sm6375-cpufreq-epss From f35512777e31085f1ade7febed6c2038adb1fb07 Mon Sep 17 00:00:00 2001 From: Nick Alcock Date: Wed, 22 Feb 2023 12:14:40 +0000 Subject: [PATCH 02/44] kbuild, cpufreq: tegra124: remove MODULE_LICENSE in non-modules Since commit 8b41fc4454e ("kbuild: create modules.builtin without Makefile.modbuiltin or tristate.conf"), MODULE_LICENSE declarations are used to identify modules. As a consequence, uses of the macro in non-modules will cause modprobe to misidentify their containing object file as a module when it is not (false positives), and modprobe might succeed rather than failing with a suitable error message. So remove it in the files in this commit, none of which can be built as modules. Signed-off-by: Nick Alcock Suggested-by: Luis Chamberlain Cc: Luis Chamberlain Cc: linux-modules@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Hitomi Hasegawa Cc: "Rafael J. Wysocki" Cc: Viresh Kumar Cc: Thierry Reding Cc: Jonathan Hunter Cc: linux-pm@vger.kernel.org Cc: linux-tegra@vger.kernel.org Signed-off-by: Viresh Kumar --- drivers/cpufreq/tegra124-cpufreq.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cpufreq/tegra124-cpufreq.c b/drivers/cpufreq/tegra124-cpufreq.c index 7a1ea6fdcab6..788672c0c834 100644 --- a/drivers/cpufreq/tegra124-cpufreq.c +++ b/drivers/cpufreq/tegra124-cpufreq.c @@ -221,4 +221,3 @@ module_init(tegra_cpufreq_init); MODULE_AUTHOR("Tuomas Tynkkynen "); MODULE_DESCRIPTION("cpufreq driver for NVIDIA Tegra124"); -MODULE_LICENSE("GPL v2"); From df97441673f303d06dd33c04d8d5ee54ae6b2a82 Mon Sep 17 00:00:00 2001 From: Nick Alcock Date: Wed, 22 Feb 2023 12:14:41 +0000 Subject: [PATCH 03/44] kbuild, cpufreq: remove MODULE_LICENSE in non-modules Since commit 8b41fc4454e ("kbuild: create modules.builtin without Makefile.modbuiltin or tristate.conf"), MODULE_LICENSE declarations are used to identify modules. As a consequence, uses of the macro in non-modules will cause modprobe to misidentify their containing object file as a module when it is not (false positives), and modprobe might succeed rather than failing with a suitable error message. So remove it in the files in this commit, none of which can be built as modules. Signed-off-by: Nick Alcock Suggested-by: Luis Chamberlain Cc: Luis Chamberlain Cc: linux-modules@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Hitomi Hasegawa Cc: "Rafael J. Wysocki" Cc: Viresh Kumar Cc: linux-pm@vger.kernel.org Signed-off-by: Viresh Kumar --- drivers/cpufreq/freq_table.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index 67e56cf638ef..90bfc27ed1ba 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -367,4 +367,3 @@ int cpufreq_table_validate_and_sort(struct cpufreq_policy *policy) MODULE_AUTHOR("Dominik Brodowski "); MODULE_DESCRIPTION("CPUfreq frequency table helpers"); -MODULE_LICENSE("GPL"); From 35527c677cb6bbaa6489b8560c8b4316fdd60792 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 16 Feb 2023 11:51:40 +0100 Subject: [PATCH 04/44] cpufreq: qcom-hw: Simplify counting frequency domains For quite some time, this driver has been performing some quite low-level DT operations. Simplify that using platform_get_resource. Signed-off-by: Konrad Dybcio Reviewed-by: Bjorn Andersson Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 2f581d2d617d..575a4461c25a 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -29,6 +29,8 @@ #define GT_IRQ_STATUS BIT(2) +#define MAX_FREQ_DOMAINS 3 + struct qcom_cpufreq_soc_data { u32 reg_enable; u32 reg_domain_state; @@ -651,10 +653,9 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) { struct clk_hw_onecell_data *clk_data; struct device *dev = &pdev->dev; - struct device_node *soc_node; struct device *cpu_dev; struct clk *clk; - int ret, i, num_domains, reg_sz; + int ret, i, num_domains; clk = clk_get(dev, "xo"); if (IS_ERR(clk)) @@ -681,24 +682,9 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) if (ret) return ret; - /* Allocate qcom_cpufreq_data based on the available frequency domains in DT */ - soc_node = of_get_parent(dev->of_node); - if (!soc_node) - return -EINVAL; - - ret = of_property_read_u32(soc_node, "#address-cells", ®_sz); - if (ret) - goto of_exit; - - ret = of_property_read_u32(soc_node, "#size-cells", &i); - if (ret) - goto of_exit; - - reg_sz += i; - - num_domains = of_property_count_elems_of_size(dev->of_node, "reg", sizeof(u32) * reg_sz); - if (num_domains <= 0) - return num_domains; + for (num_domains = 0; num_domains < MAX_FREQ_DOMAINS; num_domains++) + if (!platform_get_resource(pdev, IORESOURCE_MEM, num_domains)) + break; qcom_cpufreq.data = devm_kzalloc(dev, sizeof(struct qcom_cpufreq_data) * num_domains, GFP_KERNEL); @@ -762,9 +748,6 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) else dev_dbg(dev, "QCOM CPUFreq HW driver initialized\n"); -of_exit: - of_node_put(soc_node); - return ret; } From b8f3a396a7ee43e6079176cc0fb8de2b95a23681 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 10 Mar 2023 08:47:02 -0600 Subject: [PATCH 05/44] cpufreq: Use of_property_present() for testing DT property presence It is preferred to use typed property access functions (i.e. of_property_read_ functions) rather than low-level of_get_property/of_find_property functions for reading properties. As part of this, convert of_get_property/of_find_property calls to the recently added of_property_present() helper when we just want to test for presence of a property and nothing more. Signed-off-by: Rob Herring Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt-platdev.c | 2 +- drivers/cpufreq/imx-cpufreq-dt.c | 2 +- drivers/cpufreq/imx6q-cpufreq.c | 4 ++-- drivers/cpufreq/scmi-cpufreq.c | 2 +- drivers/cpufreq/tegra20-cpufreq.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index e85703651098..5ac6b9e5270e 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -179,7 +179,7 @@ static bool __init cpu0_node_has_opp_v2_prop(void) struct device_node *np = of_cpu_device_node_get(0); bool ret = false; - if (of_get_property(np, "operating-points-v2", NULL)) + if (of_property_present(np, "operating-points-v2")) ret = true; of_node_put(np); diff --git a/drivers/cpufreq/imx-cpufreq-dt.c b/drivers/cpufreq/imx-cpufreq-dt.c index 76e553af2071..535867a7dfdd 100644 --- a/drivers/cpufreq/imx-cpufreq-dt.c +++ b/drivers/cpufreq/imx-cpufreq-dt.c @@ -89,7 +89,7 @@ static int imx_cpufreq_dt_probe(struct platform_device *pdev) cpu_dev = get_cpu_device(0); - if (!of_find_property(cpu_dev->of_node, "cpu-supply", NULL)) + if (!of_property_present(cpu_dev->of_node, "cpu-supply")) return -ENODEV; if (of_machine_is_compatible("fsl,imx7ulp")) { diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index ad4ce8493144..48e1772e98fd 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -222,7 +222,7 @@ static int imx6q_opp_check_speed_grading(struct device *dev) u32 val; int ret; - if (of_find_property(dev->of_node, "nvmem-cells", NULL)) { + if (of_property_present(dev->of_node, "nvmem-cells")) { ret = nvmem_cell_read_u32(dev, "speed_grade", &val); if (ret) return ret; @@ -279,7 +279,7 @@ static int imx6ul_opp_check_speed_grading(struct device *dev) u32 val; int ret = 0; - if (of_find_property(dev->of_node, "nvmem-cells", NULL)) { + if (of_property_present(dev->of_node, "nvmem-cells")) { ret = nvmem_cell_read_u32(dev, "speed_grade", &val); if (ret) return ret; diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index 513a071845c2..f34e6382a4c5 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -310,7 +310,7 @@ static int scmi_cpufreq_probe(struct scmi_device *sdev) #ifdef CONFIG_COMMON_CLK /* dummy clock provider as needed by OPP if clocks property is used */ - if (of_find_property(dev->of_node, "#clock-cells", NULL)) + if (of_property_present(dev->of_node, "#clock-cells")) devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, NULL); #endif diff --git a/drivers/cpufreq/tegra20-cpufreq.c b/drivers/cpufreq/tegra20-cpufreq.c index ab7ac7df9e62..dfd2de4f8e07 100644 --- a/drivers/cpufreq/tegra20-cpufreq.c +++ b/drivers/cpufreq/tegra20-cpufreq.c @@ -25,7 +25,7 @@ static bool cpu0_node_has_opp_v2_prop(void) struct device_node *np = of_cpu_device_node_get(0); bool ret = false; - if (of_get_property(np, "operating-points-v2", NULL)) + if (of_property_present(np, "operating-points-v2")) ret = true; of_node_put(np); From e9eadc28271940266ace3f748a1c6278bd886ea8 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 10 Mar 2023 08:47:18 -0600 Subject: [PATCH 06/44] opp: Use of_property_present() for testing DT property presence It is preferred to use typed property access functions (i.e. of_property_read_ functions) rather than low-level of_get_property/of_find_property functions for reading properties. As part of this, convert of_get_property/of_find_property calls to the recently added of_property_present() helper when we just want to test for presence of a property and nothing more. Signed-off-by: Rob Herring Signed-off-by: Viresh Kumar --- drivers/opp/of.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/opp/of.c b/drivers/opp/of.c index e55c6095adf0..bed2b651deb0 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -224,7 +224,7 @@ void _of_init_opp_table(struct opp_table *opp_table, struct device *dev, of_property_read_u32(np, "voltage-tolerance", &opp_table->voltage_tolerance_v1); - if (of_find_property(np, "#power-domain-cells", NULL)) + if (of_property_present(np, "#power-domain-cells")) opp_table->is_genpd = true; /* Get OPP table node */ @@ -536,7 +536,7 @@ static bool _opp_is_supported(struct device *dev, struct opp_table *opp_table, * an OPP then the OPP should not be enabled as there is * no way to see if the hardware supports it. */ - if (of_find_property(np, "opp-supported-hw", NULL)) + if (of_property_present(np, "opp-supported-hw")) return false; else return true; From 737dbd6667ff38b6cc72d35c3ddc04db027ed76c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 4 Jan 2023 16:37:58 -0800 Subject: [PATCH 07/44] PM / devfreq: Remove "select SRCU" Now that the SRCU Kconfig option is unconditionally selected, there is no longer any point in selecting it. Therefore, remove the "select SRCU" Kconfig statements. Signed-off-by: Paul E. McKenney Signed-off-by: Chanwoo Choi --- drivers/devfreq/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig index 9754d8b31621..3c4862a752b5 100644 --- a/drivers/devfreq/Kconfig +++ b/drivers/devfreq/Kconfig @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only menuconfig PM_DEVFREQ bool "Generic Dynamic Voltage and Frequency Scaling (DVFS) support" - select SRCU select PM_OPP help A device may have a list of frequencies and voltages available. From f9d0393a8898d0bc67730ac736ad9c6f51434463 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 11 Mar 2023 18:37:53 +0100 Subject: [PATCH 08/44] PM / devfreq: exyos-bus: drop of_match_ptr for ID table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver can match only via the DT table so the table should be always used and the of_match_ptr does not have any sense (this also allows ACPI matching via PRP0001, even though it might not be relevant here). drivers/devfreq/exynos-bus.c:504:34: error: ‘exynos_bus_of_match’ defined but not used [-Werror=unused-const-variable=] Signed-off-by: Krzysztof Kozlowski Signed-off-by: Chanwoo Choi --- drivers/devfreq/exynos-bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c index 027e8f336acc..f7c554051232 100644 --- a/drivers/devfreq/exynos-bus.c +++ b/drivers/devfreq/exynos-bus.c @@ -513,7 +513,7 @@ static struct platform_driver exynos_bus_platdrv = { .driver = { .name = "exynos-bus", .pm = &exynos_bus_pm, - .of_match_table = of_match_ptr(exynos_bus_of_match), + .of_match_table = exynos_bus_of_match, }, }; module_platform_driver(exynos_bus_platdrv); From b7405e3f62d68be4cc5f4677499e1b8c13a0d216 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 10 Mar 2023 08:47:34 -0600 Subject: [PATCH 09/44] PM / devfreq: exynos: Use of_property_present() for testing DT property presence It is preferred to use typed property access functions (i.e. of_property_read_ functions) rather than low-level of_get_property/of_find_property functions for reading properties. As part of this, convert of_get_property/of_find_property calls to the recently added of_property_present() helper when we just want to test for presence of a property and nothing more. Signed-off-by: Rob Herring Signed-off-by: Chanwoo Choi --- drivers/devfreq/exynos-bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c index f7c554051232..88414445adf3 100644 --- a/drivers/devfreq/exynos-bus.c +++ b/drivers/devfreq/exynos-bus.c @@ -432,7 +432,7 @@ static int exynos_bus_probe(struct platform_device *pdev) goto err; /* Create child platform device for the interconnect provider */ - if (of_get_property(dev->of_node, "#interconnect-cells", NULL)) { + if (of_property_present(dev->of_node, "#interconnect-cells")) { bus->icc_pdev = platform_device_register_data( dev, "exynos-generic-icc", PLATFORM_DEVID_AUTO, NULL, 0); From 1f5e62f5fb217f2c1e003236be7d03cf606c26c4 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 2 Mar 2023 20:14:11 -0800 Subject: [PATCH 10/44] cpufreq: intel_pstate: Enable HWP IO boost for all servers The HWP IO boost results in slight improvements for IO performance on both Ice Lake and Sapphire Rapid servers. Currently there is a CPU model check for Skylake desktop and server along with the ACPI PM profile for performance and enterprise servers to enable IO boost. Remove the CPU model check, so that all current server models enable HWP IO boost by default. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 48a4613cef1e..2a1a6779d82d 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2384,12 +2384,6 @@ static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = { {} }; -static const struct x86_cpu_id intel_pstate_hwp_boost_ids[] = { - X86_MATCH(SKYLAKE_X, core_funcs), - X86_MATCH(SKYLAKE, core_funcs), - {} -}; - static int intel_pstate_init_cpu(unsigned int cpunum) { struct cpudata *cpu; @@ -2408,12 +2402,9 @@ static int intel_pstate_init_cpu(unsigned int cpunum) cpu->epp_default = -EINVAL; if (hwp_active) { - const struct x86_cpu_id *id; - intel_pstate_hwp_enable(cpu); - id = x86_match_cpu(intel_pstate_hwp_boost_ids); - if (id && intel_pstate_acpi_pm_profile_server()) + if (intel_pstate_acpi_pm_profile_server()) hwp_boost = true; } } else if (hwp_active) { From 76531df5e13b5b997a17940d6980a168c616e962 Mon Sep 17 00:00:00 2001 From: Wyes Karny Date: Tue, 7 Mar 2023 11:27:35 +0000 Subject: [PATCH 11/44] ACPI: CPPC: Add min and max perf register writing support Currently writing of min and max perf register is deferred in cppc_set_perf function. In CPPC guided mode, these registers needed to be written to guide the platform about min and max perf levels. Add this support to make guided mode work properly on AMD shared memory systems. Acked-by: Huang Rui Reviewed-by: Mario Limonciello Tested-by: Oleksandr Natalenko Signed-off-by: Wyes Karny [ rjw: Fixed up a multiline comment, subject edits ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index c51d3ccb4cca..523bad4e3a0c 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1488,7 +1488,7 @@ EXPORT_SYMBOL_GPL(cppc_set_enable); int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) { struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu); - struct cpc_register_resource *desired_reg; + struct cpc_register_resource *desired_reg, *min_perf_reg, *max_perf_reg; int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); struct cppc_pcc_data *pcc_ss_data = NULL; int ret = 0; @@ -1499,6 +1499,8 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) } desired_reg = &cpc_desc->cpc_regs[DESIRED_PERF]; + min_perf_reg = &cpc_desc->cpc_regs[MIN_PERF]; + max_perf_reg = &cpc_desc->cpc_regs[MAX_PERF]; /* * This is Phase-I where we want to write to CPC registers @@ -1507,7 +1509,7 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) * Since read_lock can be acquired by multiple CPUs simultaneously we * achieve that goal here */ - if (CPC_IN_PCC(desired_reg)) { + if (CPC_IN_PCC(desired_reg) || CPC_IN_PCC(min_perf_reg) || CPC_IN_PCC(max_perf_reg)) { if (pcc_ss_id < 0) { pr_debug("Invalid pcc_ss_id\n"); return -ENODEV; @@ -1530,13 +1532,19 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) cpc_desc->write_cmd_status = 0; } - /* - * Skip writing MIN/MAX until Linux knows how to come up with - * useful values. - */ cpc_write(cpu, desired_reg, perf_ctrls->desired_perf); - if (CPC_IN_PCC(desired_reg)) + /* + * Only write if min_perf and max_perf not zero. Some drivers pass zero + * value to min and max perf, but they don't mean to set the zero value, + * they just don't want to write to those registers. + */ + if (perf_ctrls->min_perf) + cpc_write(cpu, min_perf_reg, perf_ctrls->min_perf); + if (perf_ctrls->max_perf) + cpc_write(cpu, max_perf_reg, perf_ctrls->max_perf); + + if (CPC_IN_PCC(desired_reg) || CPC_IN_PCC(min_perf_reg) || CPC_IN_PCC(max_perf_reg)) up_read(&pcc_ss_data->pcc_lock); /* END Phase-I */ /* * This is Phase-II where we transfer the ownership of PCC to Platform @@ -1584,7 +1592,7 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) * case during a CMD_READ and if there are pending writes it delivers * the write command before servicing the read command */ - if (CPC_IN_PCC(desired_reg)) { + if (CPC_IN_PCC(desired_reg) || CPC_IN_PCC(min_perf_reg) || CPC_IN_PCC(max_perf_reg)) { if (down_write_trylock(&pcc_ss_data->pcc_lock)) {/* BEGIN Phase-II */ /* Update only if there are pending write commands */ if (pcc_ss_data->pending_pcc_write_cmd) From c984f5d5d45bd5f80d6a9d8541e809300c963aca Mon Sep 17 00:00:00 2001 From: Wyes Karny Date: Tue, 7 Mar 2023 11:27:36 +0000 Subject: [PATCH 12/44] ACPI: CPPC: Add auto select register read/write support For some AMD shared memory based systems, the autonomous selection bit needed to be set explicitly. Add autonomous selection register related APIs to acpi driver, which amd_pstate driver uses later. Acked-by: Huang Rui Reviewed-by: Mario Limonciello Tested-by: Oleksandr Natalenko Signed-off-by: Wyes Karny [ rjw: Fixed up kerneldoc comments, white space adjustment, subject edits ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 96 ++++++++++++++++++++++++++++++++++++++++ include/acpi/cppc_acpi.h | 11 +++++ 2 files changed, 107 insertions(+) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 523bad4e3a0c..7ff269a78c20 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1433,6 +1433,102 @@ int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable) } EXPORT_SYMBOL_GPL(cppc_set_epp_perf); +/** + * cppc_get_auto_sel_caps - Read autonomous selection register. + * @cpunum : CPU from which to read register. + * @perf_caps : struct where autonomous selection register value is updated. + */ +int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps) +{ + struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum); + struct cpc_register_resource *auto_sel_reg; + u64 auto_sel; + + if (!cpc_desc) { + pr_debug("No CPC descriptor for CPU:%d\n", cpunum); + return -ENODEV; + } + + auto_sel_reg = &cpc_desc->cpc_regs[AUTO_SEL_ENABLE]; + + if (!CPC_SUPPORTED(auto_sel_reg)) + pr_warn_once("Autonomous mode is not unsupported!\n"); + + if (CPC_IN_PCC(auto_sel_reg)) { + int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum); + struct cppc_pcc_data *pcc_ss_data = NULL; + int ret = 0; + + if (pcc_ss_id < 0) + return -ENODEV; + + pcc_ss_data = pcc_data[pcc_ss_id]; + + down_write(&pcc_ss_data->pcc_lock); + + if (send_pcc_cmd(pcc_ss_id, CMD_READ) >= 0) { + cpc_read(cpunum, auto_sel_reg, &auto_sel); + perf_caps->auto_sel = (bool)auto_sel; + } else { + ret = -EIO; + } + + up_write(&pcc_ss_data->pcc_lock); + + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(cppc_get_auto_sel_caps); + +/** + * cppc_set_auto_sel - Write autonomous selection register. + * @cpu : CPU to which to write register. + * @enable : the desired value of autonomous selection resiter to be updated. + */ +int cppc_set_auto_sel(int cpu, bool enable) +{ + int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); + struct cpc_register_resource *auto_sel_reg; + struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu); + struct cppc_pcc_data *pcc_ss_data = NULL; + int ret = -EINVAL; + + if (!cpc_desc) { + pr_debug("No CPC descriptor for CPU:%d\n", cpu); + return -ENODEV; + } + + auto_sel_reg = &cpc_desc->cpc_regs[AUTO_SEL_ENABLE]; + + if (CPC_IN_PCC(auto_sel_reg)) { + if (pcc_ss_id < 0) { + pr_debug("Invalid pcc_ss_id\n"); + return -ENODEV; + } + + if (CPC_SUPPORTED(auto_sel_reg)) { + ret = cpc_write(cpu, auto_sel_reg, enable); + if (ret) + return ret; + } + + pcc_ss_data = pcc_data[pcc_ss_id]; + + down_write(&pcc_ss_data->pcc_lock); + /* after writing CPC, transfer the ownership of PCC to platform */ + ret = send_pcc_cmd(pcc_ss_id, CMD_WRITE); + up_write(&pcc_ss_data->pcc_lock); + } else { + ret = -ENOTSUPP; + pr_debug("_CPC in PCC is not supported\n"); + } + + return ret; +} +EXPORT_SYMBOL_GPL(cppc_set_auto_sel); + /** * cppc_set_enable - Set to enable CPPC on the processor by writing the * Continuous Performance Control package EnableRegister field. diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 6b487a5bd638..6126c977ece0 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -109,6 +109,7 @@ struct cppc_perf_caps { u32 lowest_freq; u32 nominal_freq; u32 energy_perf; + bool auto_sel; }; struct cppc_perf_ctrls { @@ -153,6 +154,8 @@ extern int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val); extern int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val); extern int cppc_get_epp_perf(int cpunum, u64 *epp_perf); extern int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable); +extern int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps); +extern int cppc_set_auto_sel(int cpu, bool enable); #else /* !CONFIG_ACPI_CPPC_LIB */ static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf) { @@ -214,6 +217,14 @@ static inline int cppc_get_epp_perf(int cpunum, u64 *epp_perf) { return -ENOTSUPP; } +static inline int cppc_set_auto_sel(int cpu, bool enable) +{ + return -ENOTSUPP; +} +static inline int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps) +{ + return -ENOTSUPP; +} #endif /* !CONFIG_ACPI_CPPC_LIB */ #endif /* _CPPC_ACPI_H*/ From 3e6e078057640751654bda9fd2278a944f61fa4a Mon Sep 17 00:00:00 2001 From: Wyes Karny Date: Tue, 7 Mar 2023 11:27:37 +0000 Subject: [PATCH 13/44] Documentation: cpufreq: amd-pstate: Move amd_pstate param to alphabetical order Move amd_pstate command line param description to correct alphabetical order. Acked-by: Huang Rui Reviewed-by: Mario Limonciello Tested-by: Oleksandr Natalenko Signed-off-by: Wyes Karny Signed-off-by: Rafael J. Wysocki --- .../admin-guide/kernel-parameters.txt | 35 ++++++++++--------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 6221a1d057dd..1e38f3ae0e62 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -339,6 +339,24 @@ This mode requires kvm-amd.avic=1. (Default when IOMMU HW support is present.) + amd_pstate= [X86] + disable + Do not enable amd_pstate as the default + scaling driver for the supported processors + passive + Use amd_pstate as a scaling driver, driver requests a + desired performance on this abstract scale and the power + management firmware translates the requests into actual + hardware states (core frequency, data fabric and memory + clocks etc.) + active + Use amd_pstate_epp driver instance as the scaling driver, + driver provides a hint to the hardware if software wants + to bias toward performance (0x0) or energy efficiency (0xff) + to the CPPC firmware. then CPPC power algorithm will + calculate the runtime workload and adjust the realtime cores + frequency. + amijoy.map= [HW,JOY] Amiga joystick support Map of devices attached to JOY0DAT and JOY1DAT Format: , @@ -7059,20 +7077,3 @@ xmon commands. off xmon is disabled. - amd_pstate= [X86] - disable - Do not enable amd_pstate as the default - scaling driver for the supported processors - passive - Use amd_pstate as a scaling driver, driver requests a - desired performance on this abstract scale and the power - management firmware translates the requests into actual - hardware states (core frequency, data fabric and memory - clocks etc.) - active - Use amd_pstate_epp driver instance as the scaling driver, - driver provides a hint to the hardware if software wants - to bias toward performance (0x0) or energy efficiency (0xff) - to the CPPC firmware. then CPPC power algorithm will - calculate the runtime workload and adjust the realtime cores - frequency. From 2dd6d0ebf74049256160a3d03dabbd92fe0b8599 Mon Sep 17 00:00:00 2001 From: Wyes Karny Date: Tue, 7 Mar 2023 11:27:38 +0000 Subject: [PATCH 14/44] cpufreq: amd-pstate: Add guided autonomous mode From ACPI spec below 3 modes for CPPC can be defined: 1. Non autonomous: OS scaling governor specifies operating frequency/ performance level through `Desired Performance` register and platform follows that. 2. Guided autonomous: OS scaling governor specifies min and max frequencies/ performance levels through `Minimum Performance` and `Maximum Performance` register, and platform can autonomously select an operating frequency in this range. 3. Fully autonomous: OS only hints (via EPP) to platform for the required energy performance preference for the workload and platform autonomously scales the frequency. Currently (1) is supported by amd_pstate as passive mode, and (3) is implemented by EPP support. This change is to support (2). In guided autonomous mode the min_perf is based on the input from the scaling governor. For example, in case of schedutil this value depends on the current utilization. And max_perf is set to max capacity. To activate guided auto mode ``amd_pstate=guided`` command line parameter has to be passed in the kernel. Acked-by: Huang Rui Reviewed-by: Mario Limonciello Tested-by: Oleksandr Natalenko Signed-off-by: Wyes Karny Signed-off-by: Rafael J. Wysocki --- .../admin-guide/kernel-parameters.txt | 15 +++++--- drivers/cpufreq/amd-pstate.c | 34 +++++++++++++++---- include/linux/amd-pstate.h | 2 ++ 3 files changed, 40 insertions(+), 11 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 1e38f3ae0e62..1e5fe4891dd7 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -344,11 +344,11 @@ Do not enable amd_pstate as the default scaling driver for the supported processors passive - Use amd_pstate as a scaling driver, driver requests a - desired performance on this abstract scale and the power - management firmware translates the requests into actual - hardware states (core frequency, data fabric and memory - clocks etc.) + Use amd_pstate with passive mode as a scaling driver. + In this mode autonomous selection is disabled. + Driver requests a desired performance level and platform + tries to match the same performance level if it is + satisfied by guaranteed performance level. active Use amd_pstate_epp driver instance as the scaling driver, driver provides a hint to the hardware if software wants @@ -356,6 +356,11 @@ to the CPPC firmware. then CPPC power algorithm will calculate the runtime workload and adjust the realtime cores frequency. + guided + Activate guided autonomous mode. Driver requests minimum and + maximum performance level and the platform autonomously + selects a performance level in this range and appropriate + to the current workload. amijoy.map= [HW,JOY] Amiga joystick support Map of devices attached to JOY0DAT and JOY1DAT diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 73c7643b2697..61dcd7b89b26 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -308,7 +308,22 @@ static int cppc_init_perf(struct amd_cpudata *cpudata) cppc_perf.lowest_nonlinear_perf); WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf); - return 0; + if (cppc_state == AMD_PSTATE_ACTIVE) + return 0; + + ret = cppc_get_auto_sel_caps(cpudata->cpu, &cppc_perf); + if (ret) { + pr_warn("failed to get auto_sel, ret: %d\n", ret); + return 0; + } + + ret = cppc_set_auto_sel(cpudata->cpu, + (cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1); + + if (ret) + pr_warn("failed to set auto_sel, ret: %d\n", ret); + + return ret; } DEFINE_STATIC_CALL(amd_pstate_init_perf, pstate_init_perf); @@ -385,12 +400,18 @@ static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) } static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, - u32 des_perf, u32 max_perf, bool fast_switch) + u32 des_perf, u32 max_perf, bool fast_switch, int gov_flags) { u64 prev = READ_ONCE(cpudata->cppc_req_cached); u64 value = prev; des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); + + if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) { + min_perf = des_perf; + des_perf = 0; + } + value &= ~AMD_CPPC_MIN_PERF(~0L); value |= AMD_CPPC_MIN_PERF(min_perf); @@ -445,7 +466,7 @@ static int amd_pstate_target(struct cpufreq_policy *policy, cpufreq_freq_transition_begin(policy, &freqs); amd_pstate_update(cpudata, min_perf, des_perf, - max_perf, false); + max_perf, false, policy->governor->flags); cpufreq_freq_transition_end(policy, &freqs, false); return 0; @@ -479,7 +500,8 @@ static void amd_pstate_adjust_perf(unsigned int cpu, if (max_perf < min_perf) max_perf = min_perf; - amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true); + amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true, + policy->governor->flags); cpufreq_cpu_put(policy); } @@ -1279,7 +1301,7 @@ static int __init amd_pstate_init(void) /* capability check */ if (boot_cpu_has(X86_FEATURE_CPPC)) { pr_debug("AMD CPPC MSR based functionality is supported\n"); - if (cppc_state == AMD_PSTATE_PASSIVE) + if (cppc_state != AMD_PSTATE_ACTIVE) current_pstate_driver->adjust_perf = amd_pstate_adjust_perf; } else { pr_debug("AMD CPPC shared memory based functionality is supported\n"); @@ -1341,7 +1363,7 @@ static int __init amd_pstate_param(char *str) if (cppc_state == AMD_PSTATE_ACTIVE) current_pstate_driver = &amd_pstate_epp_driver; - if (cppc_state == AMD_PSTATE_PASSIVE) + if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED) current_pstate_driver = &amd_pstate_driver; return 0; diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h index f5f22418e64b..c10ebf8c42e6 100644 --- a/include/linux/amd-pstate.h +++ b/include/linux/amd-pstate.h @@ -97,6 +97,7 @@ enum amd_pstate_mode { AMD_PSTATE_DISABLE = 0, AMD_PSTATE_PASSIVE, AMD_PSTATE_ACTIVE, + AMD_PSTATE_GUIDED, AMD_PSTATE_MAX, }; @@ -104,6 +105,7 @@ static const char * const amd_pstate_mode_string[] = { [AMD_PSTATE_DISABLE] = "disable", [AMD_PSTATE_PASSIVE] = "passive", [AMD_PSTATE_ACTIVE] = "active", + [AMD_PSTATE_GUIDED] = "guided", NULL, }; #endif /* _LINUX_AMD_PSTATE_H */ From 3ca7bc818d8ccf399cb0366d8d9a915c04a446f9 Mon Sep 17 00:00:00 2001 From: Wyes Karny Date: Tue, 7 Mar 2023 11:27:39 +0000 Subject: [PATCH 15/44] cpufreq: amd-pstate: Add guided mode control support via sysfs amd_pstate driver's `status` sysfs entry helps to control the driver's mode dynamically by user. After the addition of guided mode the combinations of mode transitions have been increased (16 combinations). Therefore optimise the amd_pstate_update_status function by implementing a state transition table. There are 4 states amd_pstate supports, namely: 'disable', 'passive', 'active', and 'guided'. The transition from any state to any other state is possible after this change. Sysfs interface: To disable amd_pstate driver: # echo disable > /sys/devices/system/cpu/amd_pstate/status To enable passive mode: # echo passive > /sys/devices/system/cpu/amd_pstate/status To change mode to active: # echo active > /sys/devices/system/cpu/amd_pstate/status To change mode to guided: # echo guided > /sys/devices/system/cpu/amd_pstate/status Acked-by: Huang Rui Reviewed-by: Mario Limonciello Tested-by: Oleksandr Natalenko Signed-off-by: Wyes Karny Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/amd-pstate.c | 143 +++++++++++++++++++++++++---------- 1 file changed, 101 insertions(+), 42 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 61dcd7b89b26..7955cfc91c31 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -106,6 +106,8 @@ static unsigned int epp_values[] = { [EPP_INDEX_POWERSAVE] = AMD_CPPC_EPP_POWERSAVE, }; +typedef int (*cppc_mode_transition_fn)(int); + static inline int get_mode_idx_from_str(const char *str, size_t size) { int i; @@ -838,6 +840,98 @@ static ssize_t show_energy_performance_preference( return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]); } +static void amd_pstate_driver_cleanup(void) +{ + amd_pstate_enable(false); + cppc_state = AMD_PSTATE_DISABLE; + current_pstate_driver = NULL; +} + +static int amd_pstate_register_driver(int mode) +{ + int ret; + + if (mode == AMD_PSTATE_PASSIVE || mode == AMD_PSTATE_GUIDED) + current_pstate_driver = &amd_pstate_driver; + else if (mode == AMD_PSTATE_ACTIVE) + current_pstate_driver = &amd_pstate_epp_driver; + else + return -EINVAL; + + cppc_state = mode; + ret = cpufreq_register_driver(current_pstate_driver); + if (ret) { + amd_pstate_driver_cleanup(); + return ret; + } + return 0; +} + +static int amd_pstate_unregister_driver(int dummy) +{ + cpufreq_unregister_driver(current_pstate_driver); + amd_pstate_driver_cleanup(); + return 0; +} + +static int amd_pstate_change_mode_without_dvr_change(int mode) +{ + int cpu = 0; + + cppc_state = mode; + + if (boot_cpu_has(X86_FEATURE_CPPC) || cppc_state == AMD_PSTATE_ACTIVE) + return 0; + + for_each_present_cpu(cpu) { + cppc_set_auto_sel(cpu, (cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1); + } + + return 0; +} + +static int amd_pstate_change_driver_mode(int mode) +{ + int ret; + + ret = amd_pstate_unregister_driver(0); + if (ret) + return ret; + + ret = amd_pstate_register_driver(mode); + if (ret) + return ret; + + return 0; +} + +cppc_mode_transition_fn mode_state_machine[AMD_PSTATE_MAX][AMD_PSTATE_MAX] = { + [AMD_PSTATE_DISABLE] = { + [AMD_PSTATE_DISABLE] = NULL, + [AMD_PSTATE_PASSIVE] = amd_pstate_register_driver, + [AMD_PSTATE_ACTIVE] = amd_pstate_register_driver, + [AMD_PSTATE_GUIDED] = amd_pstate_register_driver, + }, + [AMD_PSTATE_PASSIVE] = { + [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver, + [AMD_PSTATE_PASSIVE] = NULL, + [AMD_PSTATE_ACTIVE] = amd_pstate_change_driver_mode, + [AMD_PSTATE_GUIDED] = amd_pstate_change_mode_without_dvr_change, + }, + [AMD_PSTATE_ACTIVE] = { + [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver, + [AMD_PSTATE_PASSIVE] = amd_pstate_change_driver_mode, + [AMD_PSTATE_ACTIVE] = NULL, + [AMD_PSTATE_GUIDED] = amd_pstate_change_driver_mode, + }, + [AMD_PSTATE_GUIDED] = { + [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver, + [AMD_PSTATE_PASSIVE] = amd_pstate_change_mode_without_dvr_change, + [AMD_PSTATE_ACTIVE] = amd_pstate_change_driver_mode, + [AMD_PSTATE_GUIDED] = NULL, + }, +}; + static ssize_t amd_pstate_show_status(char *buf) { if (!current_pstate_driver) @@ -846,57 +940,22 @@ static ssize_t amd_pstate_show_status(char *buf) return sysfs_emit(buf, "%s\n", amd_pstate_mode_string[cppc_state]); } -static void amd_pstate_driver_cleanup(void) -{ - current_pstate_driver = NULL; -} - static int amd_pstate_update_status(const char *buf, size_t size) { - int ret = 0; int mode_idx; - if (size > 7 || size < 6) + if (size > strlen("passive") || size < strlen("active")) return -EINVAL; + mode_idx = get_mode_idx_from_str(buf, size); - switch(mode_idx) { - case AMD_PSTATE_DISABLE: - if (!current_pstate_driver) - return -EINVAL; - if (cppc_state == AMD_PSTATE_ACTIVE) - return -EBUSY; - cpufreq_unregister_driver(current_pstate_driver); - amd_pstate_driver_cleanup(); - break; - case AMD_PSTATE_PASSIVE: - if (current_pstate_driver) { - if (current_pstate_driver == &amd_pstate_driver) - return 0; - cpufreq_unregister_driver(current_pstate_driver); - cppc_state = AMD_PSTATE_PASSIVE; - current_pstate_driver = &amd_pstate_driver; - } + if (mode_idx < 0 || mode_idx >= AMD_PSTATE_MAX) + return -EINVAL; - ret = cpufreq_register_driver(current_pstate_driver); - break; - case AMD_PSTATE_ACTIVE: - if (current_pstate_driver) { - if (current_pstate_driver == &amd_pstate_epp_driver) - return 0; - cpufreq_unregister_driver(current_pstate_driver); - current_pstate_driver = &amd_pstate_epp_driver; - cppc_state = AMD_PSTATE_ACTIVE; - } + if (mode_state_machine[cppc_state][mode_idx]) + return mode_state_machine[cppc_state][mode_idx](mode_idx); - ret = cpufreq_register_driver(current_pstate_driver); - break; - default: - ret = -EINVAL; - break; - } - - return ret; + return 0; } static ssize_t show_status(struct kobject *kobj, From 7a9dec665f6875f61fe0c2e71b25842daade5110 Mon Sep 17 00:00:00 2001 From: Wyes Karny Date: Tue, 7 Mar 2023 11:27:40 +0000 Subject: [PATCH 16/44] Documentation: cpufreq: amd-pstate: Update amd_pstate status sysfs for guided Update amd_pstate status sysfs for guided mode. Acked-by: Huang Rui Reviewed-by: Bagas Sanjaya Reviewed-by: Mario Limonciello Tested-by: Oleksandr Natalenko Signed-off-by: Wyes Karny Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/pm/amd-pstate.rst | 31 ++++++++++++++++----- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst index 6e5298b521b1..1cf40f69278c 100644 --- a/Documentation/admin-guide/pm/amd-pstate.rst +++ b/Documentation/admin-guide/pm/amd-pstate.rst @@ -303,13 +303,18 @@ efficiency frequency management method on AMD processors. AMD Pstate Driver Operation Modes ================================= -``amd_pstate`` CPPC has two operation modes: CPPC Autonomous(active) mode and -CPPC non-autonomous(passive) mode. -active mode and passive mode can be chosen by different kernel parameters. -When in Autonomous mode, CPPC ignores requests done in the Desired Performance -Target register and takes into account only the values set to the Minimum requested -performance, Maximum requested performance, and Energy Performance Preference -registers. When Autonomous is disabled, it only considers the Desired Performance Target. +``amd_pstate`` CPPC has 3 operation modes: autonomous (active) mode, +non-autonomous (passive) mode and guided autonomous (guided) mode. +Active/passive/guided mode can be chosen by different kernel parameters. + +- In autonomous mode, platform ignores the desired performance level request + and takes into account only the values set to the minimum, maximum and energy + performance preference registers. +- In non-autonomous mode, platform gets desired performance level + from OS directly through Desired Performance Register. +- In guided-autonomous mode, platform sets operating performance level + autonomously according to the current workload and within the limits set by + OS through min and max performance registers. Active Mode ------------ @@ -338,6 +343,15 @@ to the Performance Reduction Tolerance register. Above the nominal performance l processor must provide at least nominal performance requested and go higher if current operating conditions allow. +Guided Mode +----------- + +``amd_pstate=guided`` + +If ``amd_pstate=guided`` is passed to kernel command line option then this mode +is activated. In this mode, driver requests minimum and maximum performance +level and the platform autonomously selects a performance level in this range +and appropriate to the current workload. User Space Interface in ``sysfs`` - General =========================================== @@ -358,6 +372,9 @@ control its functionality at the system level. They are located in the "passive" The driver is functional and in the ``passive mode`` + "guided" + The driver is functional and in the ``guided mode`` + "disable" The driver is unregistered and not functional now. From 877d5cd2aeed8fddeb3afe174c6b9eb77cd56f8b Mon Sep 17 00:00:00 2001 From: qinyu Date: Mon, 20 Mar 2023 16:17:02 +0800 Subject: [PATCH 17/44] cpufreq: warn about invalid vals to scaling_max/min_freq interfaces When echo an invalid val to scaling_min_freq: > echo 123abc123 > scaling_min_freq It looks weird to have a return val of 0: > echo $? > 0 Sane people won't echo strings like that into these interfaces but fuzz tests may do. Also, maybe it's better to inform people if input is invalid. After this: > echo 123abc123 > scaling_min_freq > -bash: echo: write error: Invalid argument Signed-off-by: qinyu Tested-by: zhangxiaofeng Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 6d8fd3b8dcb5..d61f7308f63c 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -725,9 +725,9 @@ static ssize_t store_##file_name \ unsigned long val; \ int ret; \ \ - ret = sscanf(buf, "%lu", &val); \ - if (ret != 1) \ - return -EINVAL; \ + ret = kstrtoul(buf, 0, &val); \ + if (ret) \ + return ret; \ \ ret = freq_qos_update_request(policy->object##_freq_req, val);\ return ret >= 0 ? count : ret; \ From 8ffdb1fe9121f85232171630a8cfb0574cbf81b2 Mon Sep 17 00:00:00 2001 From: Jingyu Wang Date: Wed, 8 Mar 2023 20:28:30 +0800 Subject: [PATCH 18/44] cpufreq: Fix typo in the ARM_BRCMSTB_AVS_CPUFREQ Kconfig entry Delete the redundant word 'to' from the help text in the ARM_BRCMSTB_AVS_CPUFREQ Kconfig entry. Signed-off-by: Jingyu Wang [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig.arm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 97acaa2136fd..123b4bbfcfee 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -95,7 +95,7 @@ config ARM_BRCMSTB_AVS_CPUFREQ help Some Broadcom STB SoCs use a co-processor running proprietary firmware ("AVS") to handle voltage and frequency scaling. This driver provides - a standard CPUfreq interface to to the firmware. + a standard CPUfreq interface to the firmware. Say Y, if you have a Broadcom SoC with AVS support for DFS or DVFS. From f914bfdd7f8468e1c3e6778658550b67a677e935 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 10 Mar 2023 08:47:03 -0600 Subject: [PATCH 19/44] cpuidle: Use of_property_present() for testing DT property presence It is preferred to use typed property access functions (i.e. of_property_read_ functions) rather than low-level of_get_property/of_find_property functions for reading properties. As part of this, convert of_get_property/of_find_property calls to the recently added of_property_present() helper when we just want to test for presence of a property and nothing more. Signed-off-by: Rob Herring Acked-by: Ulf Hansson Acked-by: Anup Patel Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle-psci-domain.c | 2 +- drivers/cpuidle/cpuidle-riscv-sbi.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c index 11316c3b14ca..c2d6d9c3c930 100644 --- a/drivers/cpuidle/cpuidle-psci-domain.c +++ b/drivers/cpuidle/cpuidle-psci-domain.c @@ -166,7 +166,7 @@ static int psci_cpuidle_domain_probe(struct platform_device *pdev) * initialize a genpd/genpd-of-provider pair when it's found. */ for_each_child_of_node(np, node) { - if (!of_find_property(node, "#power-domain-cells", NULL)) + if (!of_property_present(node, "#power-domain-cells")) continue; ret = psci_pd_init(node, use_osi); diff --git a/drivers/cpuidle/cpuidle-riscv-sbi.c b/drivers/cpuidle/cpuidle-riscv-sbi.c index be383f4b6855..1fab1abc6eb6 100644 --- a/drivers/cpuidle/cpuidle-riscv-sbi.c +++ b/drivers/cpuidle/cpuidle-riscv-sbi.c @@ -497,7 +497,7 @@ static int sbi_genpd_probe(struct device_node *np) * initialize a genpd/genpd-of-provider pair when it's found. */ for_each_child_of_node(np, node) { - if (!of_find_property(node, "#power-domain-cells", NULL)) + if (!of_property_present(node, "#power-domain-cells")) continue; ret = sbi_pd_init(node); @@ -548,8 +548,8 @@ static int sbi_cpuidle_probe(struct platform_device *pdev) for_each_possible_cpu(cpu) { np = of_cpu_device_node_get(cpu); if (np && - of_find_property(np, "power-domains", NULL) && - of_find_property(np, "power-domain-names", NULL)) { + of_property_present(np, "power-domains") && + of_property_present(np, "power-domain-names")) { continue; } else { sbi_cpuidle_use_osi = false; From 175c9df15aef7a1446fb67154a186b73dd892b50 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 10 Mar 2023 08:47:03 -0600 Subject: [PATCH 20/44] cpufreq: pmac32: Use of_property_read_bool() for boolean properties It is preferred to use typed property access functions (i.e. of_property_read_ functions) rather than low-level of_get_property/of_find_property functions for reading properties. Convert reading boolean properties to to of_property_read_bool(). Signed-off-by: Rob Herring Acked-by: Viresh Kumar Reviewed-by: Michael Ellerman Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/pmac32-cpufreq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c index 4b8ee2014da6..7ec6d1bb4592 100644 --- a/drivers/cpufreq/pmac32-cpufreq.c +++ b/drivers/cpufreq/pmac32-cpufreq.c @@ -546,7 +546,7 @@ static int pmac_cpufreq_init_7447A(struct device_node *cpunode) { struct device_node *volt_gpio_np; - if (of_get_property(cpunode, "dynamic-power-step", NULL) == NULL) + if (!of_property_read_bool(cpunode, "dynamic-power-step")) return 1; volt_gpio_np = of_find_node_by_name(NULL, "cpu-vcore-select"); @@ -576,7 +576,7 @@ static int pmac_cpufreq_init_750FX(struct device_node *cpunode) u32 pvr; const u32 *value; - if (of_get_property(cpunode, "dynamic-power-step", NULL) == NULL) + if (!of_property_read_bool(cpunode, "dynamic-power-step")) return 1; hi_freq = cur_freq; @@ -632,7 +632,7 @@ static int __init pmac_cpufreq_setup(void) /* Check for 7447A based MacRISC3 */ if (of_machine_is_compatible("MacRISC3") && - of_get_property(cpunode, "dynamic-power-step", NULL) && + of_property_read_bool(cpunode, "dynamic-power-step") && PVR_VER(mfspr(SPRN_PVR)) == 0x8003) { pmac_cpufreq_init_7447A(cpunode); From 0417552730d0b1c30268fd32546914290b7c7ca0 Mon Sep 17 00:00:00 2001 From: Todd Brandt Date: Tue, 14 Mar 2023 10:39:36 -0700 Subject: [PATCH 21/44] pm-graph: Update to v5.11 install_latest_from_github.sh: - Added a new script which allows users to install the latest pm-graph from the upstream github repo. This is useful if the kernel source version has issues that have already been fixed in github. sleepgraph.py: - Updated all the dmesg suspend/resume PM print formats to be able to process recent timelines using dmesg only. - Added ethtool output to the log for the system's ethernet device id the ethtool exists. This helps in debugging network issues. - Made the tool more robustly handle events where mangled dmesg or ftrace outputs do not include all the requisite data. The tool fails gracefully instead of creating a garbled timeline. Signed-off-by: Todd Brandt [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- tools/power/pm-graph/README | 2 +- .../pm-graph/install_latest_from_github.sh | 38 +++++++++++++ tools/power/pm-graph/sleepgraph.py | 53 +++++++++++++------ 3 files changed, 75 insertions(+), 18 deletions(-) create mode 100755 tools/power/pm-graph/install_latest_from_github.sh diff --git a/tools/power/pm-graph/README b/tools/power/pm-graph/README index 3213dbe63b74..047ce1d76467 100644 --- a/tools/power/pm-graph/README +++ b/tools/power/pm-graph/README @@ -6,7 +6,7 @@ |_| |___/ |_| pm-graph: suspend/resume/boot timing analysis tools - Version: 5.10 + Version: 5.11 Author: Todd Brandt Home Page: https://www.intel.com/content/www/us/en/developer/topic-technology/open/pm-graph/overview.html diff --git a/tools/power/pm-graph/install_latest_from_github.sh b/tools/power/pm-graph/install_latest_from_github.sh new file mode 100755 index 000000000000..eaa332399d36 --- /dev/null +++ b/tools/power/pm-graph/install_latest_from_github.sh @@ -0,0 +1,38 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Script which clones and installs the latest pm-graph +# from http://github.com/intel/pm-graph.git + +OUT=`mktemp -d 2>/dev/null` +if [ -z "$OUT" -o ! -e $OUT ]; then + echo "ERROR: mktemp failed to create folder" + exit +fi + +cleanup() { + if [ -e "$OUT" ]; then + cd $OUT + rm -rf pm-graph + cd /tmp + rmdir $OUT + fi +} + +git clone http://github.com/intel/pm-graph.git $OUT/pm-graph +if [ ! -e "$OUT/pm-graph/sleepgraph.py" ]; then + echo "ERROR: pm-graph github repo failed to clone" + cleanup + exit +fi + +cd $OUT/pm-graph +echo "INSTALLING PM-GRAPH" +sudo make install +if [ $? -eq 0 ]; then + echo "INSTALL SUCCESS" + sleepgraph -v +else + echo "INSTALL FAILED" +fi +cleanup diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index bf4ac24a1c7a..ab703c9227d5 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -86,7 +86,7 @@ def ascii(text): # store system values and test parameters class SystemValues: title = 'SleepGraph' - version = '5.10' + version = '5.11' ansi = False rs = 0 display = '' @@ -300,6 +300,7 @@ class SystemValues: [0, 'acpidevices', 'sh', '-c', 'ls -l /sys/bus/acpi/devices/*/physical_node'], [0, 's0ix_require', 'cat', '/sys/kernel/debug/pmc_core/substate_requirements'], [0, 's0ix_debug', 'cat', '/sys/kernel/debug/pmc_core/slp_s0_debug_status'], + [0, 'ethtool', 'ethtool', '{ethdev}'], [1, 's0ix_residency', 'cat', '/sys/kernel/debug/pmc_core/slp_s0_residency_usec'], [1, 'interrupts', 'cat', '/proc/interrupts'], [1, 'wakeups', 'cat', '/sys/kernel/debug/wakeup_sources'], @@ -1078,18 +1079,35 @@ class SystemValues: else: out[data[0].strip()] = data[1] return out + def cmdinfovar(self, arg): + if arg == 'ethdev': + try: + cmd = [self.getExec('ip'), '-4', '-o', '-br', 'addr'] + fp = Popen(cmd, stdout=PIPE, stderr=PIPE).stdout + info = ascii(fp.read()).strip() + fp.close() + except: + return 'iptoolcrash' + for line in info.split('\n'): + if line[0] == 'e' and 'UP' in line: + return line.split()[0] + return 'nodevicefound' + return 'unknown' def cmdinfo(self, begin, debug=False): out = [] if begin: self.cmd1 = dict() for cargs in self.infocmds: - delta, name = cargs[0], cargs[1] - cmdline, cmdpath = ' '.join(cargs[2:]), self.getExec(cargs[2]) + delta, name, args = cargs[0], cargs[1], cargs[2:] + for i in range(len(args)): + if args[i][0] == '{' and args[i][-1] == '}': + args[i] = self.cmdinfovar(args[i][1:-1]) + cmdline, cmdpath = ' '.join(args[0:]), self.getExec(args[0]) if not cmdpath or (begin and not delta): continue self.dlog('[%s]' % cmdline) try: - fp = Popen([cmdpath]+cargs[3:], stdout=PIPE, stderr=PIPE).stdout + fp = Popen([cmdpath]+args[1:], stdout=PIPE, stderr=PIPE).stdout info = ascii(fp.read()).strip() fp.close() except: @@ -1452,6 +1470,7 @@ class Data: errlist = { 'HWERROR' : r'.*\[ *Hardware Error *\].*', 'FWBUG' : r'.*\[ *Firmware Bug *\].*', + 'TASKFAIL': r'.*Freezing .*after *.*', 'BUG' : r'(?i).*\bBUG\b.*', 'ERROR' : r'(?i).*\bERROR\b.*', 'WARNING' : r'(?i).*\bWARNING\b.*', @@ -1462,7 +1481,6 @@ class Data: 'TIMEOUT' : r'(?i).*\bTIMEOUT\b.*', 'ABORT' : r'(?i).*\bABORT\b.*', 'IRQ' : r'.*\bgenirq: .*', - 'TASKFAIL': r'.*Freezing .*after *.*', 'ACPI' : r'.*\bACPI *(?P[A-Za-z]*) *Error[: ].*', 'DISKFULL': r'.*\bNo space left on device.*', 'USBERR' : r'.*usb .*device .*, error [0-9-]*', @@ -1602,7 +1620,7 @@ class Data: pend = self.dmesg[phase]['end'] if start <= pend: return phase - return 'resume_complete' + return 'resume_complete' if 'resume_complete' in self.dmesg else '' def sourceDevice(self, phaselist, start, end, pid, type): tgtdev = '' for phase in phaselist: @@ -1645,6 +1663,8 @@ class Data: else: threadname = '%s-%d' % (proc, pid) tgtphase = self.sourcePhase(start) + if not tgtphase: + return False self.newAction(tgtphase, threadname, pid, '', start, end, '', ' kth', '') return self.addDeviceFunctionCall(displayname, kprobename, proc, pid, start, end, cdata, rdata) # this should not happen @@ -1835,9 +1855,9 @@ class Data: hwr = self.hwend - timedelta(microseconds=rtime) self.tLow.append('%.0f'%((hwr - hws).total_seconds() * 1000)) def getTimeValues(self): - sktime = (self.tSuspended - self.tKernSus) * 1000 - rktime = (self.tKernRes - self.tResumed) * 1000 - return (sktime, rktime) + s = (self.tSuspended - self.tKernSus) * 1000 + r = (self.tKernRes - self.tResumed) * 1000 + return (max(s, 0), max(r, 0)) def setPhase(self, phase, ktime, isbegin, order=-1): if(isbegin): # phase start over current phase @@ -3961,7 +3981,7 @@ def parseKernelLog(data): 'suspend_machine': ['PM: suspend-to-idle', 'PM: noirq suspend of devices complete after.*', 'PM: noirq freeze of devices complete after.*'], - 'resume_machine': ['PM: Timekeeping suspended for.*', + 'resume_machine': ['[PM: ]*Timekeeping suspended for.*', 'ACPI: Low-level resume complete.*', 'ACPI: resume from mwait', 'Suspended for [0-9\.]* seconds'], @@ -3979,14 +3999,14 @@ def parseKernelLog(data): # action table (expected events that occur and show up in dmesg) at = { 'sync_filesystems': { - 'smsg': 'PM: Syncing filesystems.*', - 'emsg': 'PM: Preparing system for mem sleep.*' }, + 'smsg': '.*[Ff]+ilesystems.*', + 'emsg': 'PM: Preparing system for[a-z]* sleep.*' }, 'freeze_user_processes': { - 'smsg': 'Freezing user space processes .*', + 'smsg': 'Freezing user space processes.*', 'emsg': 'Freezing remaining freezable tasks.*' }, 'freeze_tasks': { 'smsg': 'Freezing remaining freezable tasks.*', - 'emsg': 'PM: Entering (?P[a-z,A-Z]*) sleep.*' }, + 'emsg': 'PM: Suspending system.*' }, 'ACPI prepare': { 'smsg': 'ACPI: Preparing to enter system sleep state.*', 'emsg': 'PM: Saving platform NVS memory.*' }, @@ -4120,10 +4140,9 @@ def parseKernelLog(data): for a in sorted(at): if(re.match(at[a]['smsg'], msg)): if(a not in actions): - actions[a] = [] - actions[a].append({'begin': ktime, 'end': ktime}) + actions[a] = [{'begin': ktime, 'end': ktime}] if(re.match(at[a]['emsg'], msg)): - if(a in actions): + if(a in actions and actions[a][-1]['begin'] == actions[a][-1]['end']): actions[a][-1]['end'] = ktime # now look for CPU on/off events if(re.match('Disabling non-boot CPUs .*', msg)): From 34ea427e01ea60d9a9328d2d5a72d43740be25bc Mon Sep 17 00:00:00 2001 From: Xueqin Luo Date: Thu, 16 Mar 2023 09:33:07 +0800 Subject: [PATCH 22/44] PM: tools: sleepgraph: Recognize "CPU killed" messages On the arm64 platform with PSCI, the core log of CPU offline is as follows: [ 100.431501] CPU1: shutdown [ 100.454820] psci: CPU1 killed (polled 20 ms) [ 100.459266] CPU2: shutdown [ 100.482575] psci: CPU2 killed (polled 20 ms) [ 100.486057] CPU3: shutdown [ 100.513974] psci: CPU3 killed (polled 28 ms) [ 100.518068] CPU4: shutdown [ 100.541481] psci: CPU4 killed (polled 24 ms) Prevent sleepgraph from mistakenly treating the "CPU up" message as part of the suspend flow (because it should be regarded as part of the resume flow) by making it recognize the "CPU* killed" messages above. Signed-off-by: Xueqin Luo [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- tools/power/pm-graph/sleepgraph.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index ab703c9227d5..4a356a706785 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -4151,9 +4151,12 @@ def parseKernelLog(data): elif(re.match('Enabling non-boot CPUs .*', msg)): # start of first cpu resume cpu_start = ktime - elif(re.match('smpboot: CPU (?P[0-9]*) is now offline', msg)): + elif(re.match('smpboot: CPU (?P[0-9]*) is now offline', msg)) \ + or re.match('psci: CPU(?P[0-9]*) killed.*', msg)): # end of a cpu suspend, start of the next m = re.match('smpboot: CPU (?P[0-9]*) is now offline', msg) + if(not m): + m = re.match('psci: CPU(?P[0-9]*) killed.*', msg) cpu = 'CPU'+m.group('cpu') if(cpu not in actions): actions[cpu] = [] From d51c63230994f167126d9d8381011b4cb2b0ad22 Mon Sep 17 00:00:00 2001 From: Jia-Wei Chang Date: Fri, 24 Mar 2023 18:11:27 +0800 Subject: [PATCH 23/44] cpufreq: mediatek: fix passing zero to 'PTR_ERR' In order to prevent passing zero to 'PTR_ERR' in mtk_cpu_dvfs_info_init(), we fix the return value of of_get_cci() using error pointer by explicitly casting error number. Signed-off-by: Jia-Wei Chang Fixes: 0daa47325bae ("cpufreq: mediatek: Link CCI device to CPU") Reported-by: Dan Carpenter Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Viresh Kumar --- drivers/cpufreq/mediatek-cpufreq.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 7f2680bc9a0f..01d949707c37 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -373,13 +373,13 @@ static struct device *of_get_cci(struct device *cpu_dev) struct platform_device *pdev; np = of_parse_phandle(cpu_dev->of_node, "mediatek,cci", 0); - if (IS_ERR_OR_NULL(np)) - return NULL; + if (!np) + return ERR_PTR(-ENODEV); pdev = of_find_device_by_node(np); of_node_put(np); - if (IS_ERR_OR_NULL(pdev)) - return NULL; + if (!pdev) + return ERR_PTR(-ENODEV); return &pdev->dev; } @@ -401,7 +401,7 @@ static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu) info->ccifreq_bound = false; if (info->soc_data->ccifreq_supported) { info->cci_dev = of_get_cci(info->cpu_dev); - if (IS_ERR_OR_NULL(info->cci_dev)) { + if (IS_ERR(info->cci_dev)) { ret = PTR_ERR(info->cci_dev); dev_err(cpu_dev, "cpu%d: failed to get cci device\n", cpu); return -ENODEV; From d51e106240bc755cbe59634b70d567c192b045b2 Mon Sep 17 00:00:00 2001 From: Jia-Wei Chang Date: Fri, 24 Mar 2023 18:11:28 +0800 Subject: [PATCH 24/44] cpufreq: mediatek: fix KP caused by handler usage after regulator_put/clk_put Any kind of failure in mtk_cpu_dvfs_info_init() will lead to calling regulator_put() or clk_put() and the KP will occur since the regulator/clk handlers are used after released in mtk_cpu_dvfs_info_release(). To prevent the usage after regulator_put()/clk_put(), the regulator/clk handlers are addressed in a way of "Free the Last Thing Style". Signed-off-by: Jia-Wei Chang Fixes: 4b9ceb757bbb ("cpufreq: mediatek: Enable clocks and regulators") Suggested-by: AngeloGioacchino Del Regno Suggested-by: Dan Carpenter Signed-off-by: Viresh Kumar --- drivers/cpufreq/mediatek-cpufreq.c | 62 +++++++++++++++--------------- 1 file changed, 30 insertions(+), 32 deletions(-) diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 01d949707c37..6dc225546a8d 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -420,7 +420,7 @@ static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu) ret = PTR_ERR(info->inter_clk); dev_err_probe(cpu_dev, ret, "cpu%d: failed to get intermediate clk\n", cpu); - goto out_free_resources; + goto out_free_mux_clock; } info->proc_reg = regulator_get_optional(cpu_dev, "proc"); @@ -428,13 +428,13 @@ static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu) ret = PTR_ERR(info->proc_reg); dev_err_probe(cpu_dev, ret, "cpu%d: failed to get proc regulator\n", cpu); - goto out_free_resources; + goto out_free_inter_clock; } ret = regulator_enable(info->proc_reg); if (ret) { dev_warn(cpu_dev, "cpu%d: failed to enable vproc\n", cpu); - goto out_free_resources; + goto out_free_proc_reg; } /* Both presence and absence of sram regulator are valid cases. */ @@ -442,14 +442,14 @@ static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu) if (IS_ERR(info->sram_reg)) { ret = PTR_ERR(info->sram_reg); if (ret == -EPROBE_DEFER) - goto out_free_resources; + goto out_disable_proc_reg; info->sram_reg = NULL; } else { ret = regulator_enable(info->sram_reg); if (ret) { dev_warn(cpu_dev, "cpu%d: failed to enable vsram\n", cpu); - goto out_free_resources; + goto out_free_sram_reg; } } @@ -458,13 +458,13 @@ static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu) if (ret) { dev_err(cpu_dev, "cpu%d: failed to get OPP-sharing information\n", cpu); - goto out_free_resources; + goto out_disable_sram_reg; } ret = dev_pm_opp_of_cpumask_add_table(&info->cpus); if (ret) { dev_warn(cpu_dev, "cpu%d: no OPP table\n", cpu); - goto out_free_resources; + goto out_disable_sram_reg; } ret = clk_prepare_enable(info->cpu_clk); @@ -533,43 +533,41 @@ out_disable_mux_clock: out_free_opp_table: dev_pm_opp_of_cpumask_remove_table(&info->cpus); -out_free_resources: - if (regulator_is_enabled(info->proc_reg)) - regulator_disable(info->proc_reg); - if (info->sram_reg && regulator_is_enabled(info->sram_reg)) +out_disable_sram_reg: + if (info->sram_reg) regulator_disable(info->sram_reg); - if (!IS_ERR(info->proc_reg)) - regulator_put(info->proc_reg); - if (!IS_ERR(info->sram_reg)) +out_free_sram_reg: + if (info->sram_reg) regulator_put(info->sram_reg); - if (!IS_ERR(info->cpu_clk)) - clk_put(info->cpu_clk); - if (!IS_ERR(info->inter_clk)) - clk_put(info->inter_clk); + +out_disable_proc_reg: + regulator_disable(info->proc_reg); + +out_free_proc_reg: + regulator_put(info->proc_reg); + +out_free_inter_clock: + clk_put(info->inter_clk); + +out_free_mux_clock: + clk_put(info->cpu_clk); return ret; } static void mtk_cpu_dvfs_info_release(struct mtk_cpu_dvfs_info *info) { - if (!IS_ERR(info->proc_reg)) { - regulator_disable(info->proc_reg); - regulator_put(info->proc_reg); - } - if (!IS_ERR(info->sram_reg)) { + regulator_disable(info->proc_reg); + regulator_put(info->proc_reg); + if (info->sram_reg) { regulator_disable(info->sram_reg); regulator_put(info->sram_reg); } - if (!IS_ERR(info->cpu_clk)) { - clk_disable_unprepare(info->cpu_clk); - clk_put(info->cpu_clk); - } - if (!IS_ERR(info->inter_clk)) { - clk_disable_unprepare(info->inter_clk); - clk_put(info->inter_clk); - } - + clk_disable_unprepare(info->cpu_clk); + clk_put(info->cpu_clk); + clk_disable_unprepare(info->inter_clk); + clk_put(info->inter_clk); dev_pm_opp_of_cpumask_remove_table(&info->cpus); dev_pm_opp_unregister_notifier(info->cpu_dev, &info->opp_nb); } From d3296bb4cafd4bad4a5cf2eeab9d19cc94f9e30e Mon Sep 17 00:00:00 2001 From: Jia-Wei Chang Date: Fri, 24 Mar 2023 18:11:29 +0800 Subject: [PATCH 25/44] cpufreq: mediatek: raise proc/sram max voltage for MT8516 Since the upper boundary of proc/sram voltage of MT8516 is 1300 mV, which is greater than the value of MT2701 1150 mV, we fix it by adding the corresponding platform data and specify proc/sram_max_volt to support MT8516. Signed-off-by: Jia-Wei Chang Fixes: ead858bd128d ("cpufreq: mediatek: Move voltage limits to platform data") Fixes: 6a17b3876bc8 ("cpufreq: mediatek: Refine mtk_cpufreq_voltage_tracking()") Reported-by: Nick Hainke Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Viresh Kumar --- drivers/cpufreq/mediatek-cpufreq.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 6dc225546a8d..764e4fbdd536 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -711,20 +711,29 @@ static const struct mtk_cpufreq_platform_data mt8186_platform_data = { .ccifreq_supported = true, }; +static const struct mtk_cpufreq_platform_data mt8516_platform_data = { + .min_volt_shift = 100000, + .max_volt_shift = 200000, + .proc_max_volt = 1310000, + .sram_min_volt = 0, + .sram_max_volt = 1310000, + .ccifreq_supported = false, +}; + /* List of machines supported by this driver */ static const struct of_device_id mtk_cpufreq_machines[] __initconst = { { .compatible = "mediatek,mt2701", .data = &mt2701_platform_data }, { .compatible = "mediatek,mt2712", .data = &mt2701_platform_data }, { .compatible = "mediatek,mt7622", .data = &mt2701_platform_data }, { .compatible = "mediatek,mt7623", .data = &mt2701_platform_data }, - { .compatible = "mediatek,mt8167", .data = &mt2701_platform_data }, + { .compatible = "mediatek,mt8167", .data = &mt8516_platform_data }, { .compatible = "mediatek,mt817x", .data = &mt2701_platform_data }, { .compatible = "mediatek,mt8173", .data = &mt2701_platform_data }, { .compatible = "mediatek,mt8176", .data = &mt2701_platform_data }, { .compatible = "mediatek,mt8183", .data = &mt8183_platform_data }, { .compatible = "mediatek,mt8186", .data = &mt8186_platform_data }, { .compatible = "mediatek,mt8365", .data = &mt2701_platform_data }, - { .compatible = "mediatek,mt8516", .data = &mt2701_platform_data }, + { .compatible = "mediatek,mt8516", .data = &mt8516_platform_data }, { } }; MODULE_DEVICE_TABLE(of, mtk_cpufreq_machines); From 0883426fd07e39355362e3f2eb9aee1a154dcaf6 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Fri, 24 Mar 2023 18:11:30 +0800 Subject: [PATCH 26/44] cpufreq: mediatek: Raise proc and sram max voltage for MT7622/7623 During the addition of SRAM voltage tracking for CCI scaling, this driver got some voltage limits set for the vtrack algorithm: these were moved to platform data first, then enforced in a later commit 6a17b3876bc8 ("cpufreq: mediatek: Refine mtk_cpufreq_voltage_tracking()") using these as max values for the regulator_set_voltage() calls. In this case, the vsram/vproc constraints for MT7622 and MT7623 were supposed to be the same as MT2701 (and a number of other SoCs), but that turned out to be a mistake because the aforementioned two SoCs' maximum voltage for both VPROC and VPROC_SRAM is 1.36V. Fix that by adding new platform data for MT7622/7623 declaring the right {proc,sram}_max_volt parameter. Fixes: ead858bd128d ("cpufreq: mediatek: Move voltage limits to platform data") Fixes: 6a17b3876bc8 ("cpufreq: mediatek: Refine mtk_cpufreq_voltage_tracking()") Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Jia-Wei Chang Signed-off-by: Viresh Kumar --- drivers/cpufreq/mediatek-cpufreq.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 764e4fbdd536..9a39a7ccfae9 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -693,6 +693,15 @@ static const struct mtk_cpufreq_platform_data mt2701_platform_data = { .ccifreq_supported = false, }; +static const struct mtk_cpufreq_platform_data mt7622_platform_data = { + .min_volt_shift = 100000, + .max_volt_shift = 200000, + .proc_max_volt = 1360000, + .sram_min_volt = 0, + .sram_max_volt = 1360000, + .ccifreq_supported = false, +}; + static const struct mtk_cpufreq_platform_data mt8183_platform_data = { .min_volt_shift = 100000, .max_volt_shift = 200000, @@ -724,8 +733,8 @@ static const struct mtk_cpufreq_platform_data mt8516_platform_data = { static const struct of_device_id mtk_cpufreq_machines[] __initconst = { { .compatible = "mediatek,mt2701", .data = &mt2701_platform_data }, { .compatible = "mediatek,mt2712", .data = &mt2701_platform_data }, - { .compatible = "mediatek,mt7622", .data = &mt2701_platform_data }, - { .compatible = "mediatek,mt7623", .data = &mt2701_platform_data }, + { .compatible = "mediatek,mt7622", .data = &mt7622_platform_data }, + { .compatible = "mediatek,mt7623", .data = &mt7622_platform_data }, { .compatible = "mediatek,mt8167", .data = &mt8516_platform_data }, { .compatible = "mediatek,mt817x", .data = &mt2701_platform_data }, { .compatible = "mediatek,mt8173", .data = &mt2701_platform_data }, From ba5e770c9698782bc203bbf5cf3b36a77720bdbe Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 23 Mar 2023 18:40:26 +0100 Subject: [PATCH 27/44] cpufreq: qcom-cpufreq-hw: fix double IO unmap and resource release on exit Commit 054a3ef683a1 ("cpufreq: qcom-hw: Allocate qcom_cpufreq_data during probe") moved getting memory resource and iomap from qcom_cpufreq_hw_cpu_init() to the probe function, however it left untouched cleanup in qcom_cpufreq_hw_cpu_exit(). During device unbind this will lead to doule release of resource and double iounmap(), first by qcom_cpufreq_hw_cpu_exit() and second via managed resources: resource: Trying to free nonexistent resource <0x0000000018593000-0x0000000018593fff> Trying to vunmap() nonexistent vm area (0000000088a7d4dc) ... vunmap (mm/vmalloc.c:2771 (discriminator 1)) iounmap (mm/ioremap.c:60) devm_ioremap_release (lib/devres.c:19) devres_release_all (drivers/base/devres.c:506 drivers/base/devres.c:535) device_unbind_cleanup (drivers/base/dd.c:523) device_release_driver_internal (drivers/base/dd.c:1248 drivers/base/dd.c:1263) device_driver_detach (drivers/base/dd.c:1300) unbind_store (drivers/base/bus.c:243) drv_attr_store (drivers/base/bus.c:127) sysfs_kf_write (fs/sysfs/file.c:137) kernfs_fop_write_iter (fs/kernfs/file.c:334) vfs_write (include/linux/fs.h:1851 fs/read_write.c:491 fs/read_write.c:584) ksys_write (fs/read_write.c:637) __arm64_sys_write (fs/read_write.c:646) invoke_syscall (arch/arm64/include/asm/current.h:19 arch/arm64/kernel/syscall.c:57) el0_svc_common.constprop.0 (arch/arm64/include/asm/daifflags.h:28 arch/arm64/kernel/syscall.c:150) do_el0_svc (arch/arm64/kernel/syscall.c:194) el0_svc (arch/arm64/include/asm/daifflags.h:28 arch/arm64/kernel/entry-common.c:133 arch/arm64/kernel/entry-common.c:142 arch/arm64/kernel/entry-common.c:638) el0t_64_sync_handler (arch/arm64/kernel/entry-common.c:656) el0t_64_sync (arch/arm64/kernel/entry.S:591) Fixes: 054a3ef683a1 ("cpufreq: qcom-hw: Allocate qcom_cpufreq_data during probe") Cc: Cc: Manivannan Sadhasivam Signed-off-by: Krzysztof Kozlowski Reviewed-by: Manivannan Sadhasivam Reviewed-by: Bjorn Andersson Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 575a4461c25a..b75e6157dc81 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -45,7 +45,6 @@ struct qcom_cpufreq_soc_data { struct qcom_cpufreq_data { void __iomem *base; - struct resource *res; /* * Mutex to synchronize between de-init sequence and re-starting LMh @@ -592,16 +591,12 @@ static int qcom_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy) { struct device *cpu_dev = get_cpu_device(policy->cpu); struct qcom_cpufreq_data *data = policy->driver_data; - struct resource *res = data->res; - void __iomem *base = data->base; dev_pm_opp_remove_all_dynamic(cpu_dev); dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); qcom_cpufreq_hw_lmh_exit(data); kfree(policy->freq_table); kfree(data); - iounmap(base); - release_mem_region(res->start, resource_size(res)); return 0; } @@ -704,17 +699,15 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) for (i = 0; i < num_domains; i++) { struct qcom_cpufreq_data *data = &qcom_cpufreq.data[i]; struct clk_init_data clk_init = {}; - struct resource *res; void __iomem *base; - base = devm_platform_get_and_ioremap_resource(pdev, i, &res); + base = devm_platform_ioremap_resource(pdev, i); if (IS_ERR(base)) { - dev_err(dev, "Failed to map resource %pR\n", res); + dev_err(dev, "Failed to map resource index %d\n", i); return PTR_ERR(base); } data->base = base; - data->res = res; /* Register CPU clock for each frequency domain */ clk_init.name = kasprintf(GFP_KERNEL, "qcom_cpufreq%d", i); From 417598f998520b1484a7cef2ac0563de7d975937 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Fri, 24 Mar 2023 15:06:24 +0100 Subject: [PATCH 28/44] cpufreq: Add SM7225 to cpufreq-dt-platdev blocklist The Qualcomm SM7225 platform uses the qcom-cpufreq-hw driver, so add it to the cpufreq-dt-platdev driver's blocklist. Signed-off-by: Luca Weiss Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt-platdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 5ac6b9e5270e..452181434735 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -152,6 +152,7 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "qcom,sm6115", }, { .compatible = "qcom,sm6350", }, { .compatible = "qcom,sm6375", }, + { .compatible = "qcom,sm7225", }, { .compatible = "qcom,sm8150", }, { .compatible = "qcom,sm8250", }, { .compatible = "qcom,sm8350", }, From 287143d886961194c23c98ebbde5c0fad39e12b8 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 8 Mar 2023 02:26:58 +0100 Subject: [PATCH 29/44] dt-bindings: cpufreq: cpufreq-qcom-hw: Allow just 1 frequency domain Some SoCs implementing CPUFREQ-HW only have a single frequency domain. Allow such case. Signed-off-by: Konrad Dybcio Acked-by: Rob Herring Signed-off-by: Viresh Kumar --- .../devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml index 92693c33edf9..276e2871fc99 100644 --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml @@ -37,14 +37,14 @@ properties: - const: qcom,cpufreq-epss reg: - minItems: 2 + minItems: 1 items: - description: Frequency domain 0 register region - description: Frequency domain 1 register region - description: Frequency domain 2 register region reg-names: - minItems: 2 + minItems: 1 items: - const: freq-domain0 - const: freq-domain1 From 7ae24e054f753ff40ca1adaa631edd7476590a5f Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 8 Mar 2023 02:26:59 +0100 Subject: [PATCH 30/44] dt-bindings: cpufreq: cpufreq-qcom-hw: Sanitize data per compatible Introduce per-SoC compatibles for OSM targets (read: pre-sm8250) and sanitize the number of interrupt{s,-names} and reg/-names per-compatible. Signed-off-by: Konrad Dybcio Reviewed-by: Rob Herring Signed-off-by: Viresh Kumar --- .../bindings/cpufreq/cpufreq-qcom-hw.yaml | 90 ++++++++++++++++++- 1 file changed, 89 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml index 276e2871fc99..b152fbe8a668 100644 --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml @@ -20,6 +20,12 @@ properties: oneOf: - description: v1 of CPUFREQ HW items: + - enum: + - qcom,sc7180-cpufreq-hw + - qcom,sdm845-cpufreq-hw + - qcom,sm6115-cpufreq-hw + - qcom,sm6350-cpufreq-hw + - qcom,sm8150-cpufreq-hw - const: qcom,cpufreq-hw - description: v2 of CPUFREQ HW (EPSS) @@ -86,6 +92,88 @@ required: additionalProperties: false +allOf: + - if: + properties: + compatible: + contains: + enum: + - qcom,qdu1000-cpufreq-epss + - qcom,sc7180-cpufreq-hw + - qcom,sc8280xp-cpufreq-epss + - qcom,sdm845-cpufreq-hw + - qcom,sm6115-cpufreq-hw + - qcom,sm6350-cpufreq-hw + - qcom,sm6375-cpufreq-epss + then: + properties: + reg: + minItems: 2 + maxItems: 2 + + reg-names: + minItems: 2 + maxItems: 2 + + interrupts: + minItems: 2 + maxItems: 2 + + interrupt-names: + minItems: 2 + + - if: + properties: + compatible: + contains: + enum: + - qcom,sc7280-cpufreq-epss + - qcom,sm8250-cpufreq-epss + - qcom,sm8350-cpufreq-epss + - qcom,sm8450-cpufreq-epss + - qcom,sm8550-cpufreq-epss + then: + properties: + reg: + minItems: 3 + maxItems: 3 + + reg-names: + minItems: 3 + maxItems: 3 + + interrupts: + minItems: 3 + maxItems: 3 + + interrupt-names: + minItems: 3 + + - if: + properties: + compatible: + contains: + enum: + - qcom,sm8150-cpufreq-hw + then: + properties: + reg: + minItems: 3 + maxItems: 3 + + reg-names: + minItems: 3 + maxItems: 3 + + # On some SoCs the Prime core shares the LMH irq with Big cores + interrupts: + minItems: 2 + maxItems: 2 + + interrupt-names: + minItems: 2 + + examples: - | #include @@ -236,7 +324,7 @@ examples: #size-cells = <1>; cpufreq@17d43000 { - compatible = "qcom,cpufreq-hw"; + compatible = "qcom,sdm845-cpufreq-hw", "qcom,cpufreq-hw"; reg = <0x17d43000 0x1400>, <0x17d45800 0x1400>; reg-names = "freq-domain0", "freq-domain1"; From e69003202434131510ab18acda7d251b1975ecb1 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 8 Mar 2023 02:27:00 +0100 Subject: [PATCH 31/44] dt-bindings: cpufreq: cpufreq-qcom-hw: Add QCM2290 Document the OSM CPUFREQ_HW present on QCM2290, featuring just one lonely frequency domain. Signed-off-by: Konrad Dybcio Reviewed-by: Rob Herring Signed-off-by: Viresh Kumar --- .../bindings/cpufreq/cpufreq-qcom-hw.yaml | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml index b152fbe8a668..a6b3bb8fdf33 100644 --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml @@ -21,6 +21,7 @@ properties: - description: v1 of CPUFREQ HW items: - enum: + - qcom,qcm2290-cpufreq-hw - qcom,sc7180-cpufreq-hw - qcom,sdm845-cpufreq-hw - qcom,sm6115-cpufreq-hw @@ -93,6 +94,29 @@ required: additionalProperties: false allOf: + - if: + properties: + compatible: + contains: + enum: + - qcom,qcm2290-cpufreq-hw + then: + properties: + reg: + minItems: 1 + maxItems: 1 + + reg-names: + minItems: 1 + maxItems: 1 + + interrupts: + minItems: 1 + maxItems: 1 + + interrupt-names: + minItems: 1 + - if: properties: compatible: From e2b47e585931a988c856fd4ba31e1296f749aee3 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 23 Mar 2023 15:33:43 -0700 Subject: [PATCH 32/44] cpufreq: qcom-cpufreq-hw: Revert adding cpufreq qos The OSM/EPSS hardware controls the frequency of each CPU cluster based on requests from the OS and various throttling events in the system. While throttling is in effect the related dcvs interrupt will be kept high. The purpose of the code handling this interrupt is to continuously report the thermal pressure based on the throttled frequency. The reasoning for adding QoS control to this mechanism is not entirely clear, but the introduction of commit 'c4c0efb06f17 ("cpufreq: qcom-cpufreq-hw: Add cpufreq qos for LMh")' causes the scaling_max_frequncy to be set to the throttled frequency. On the next iteration of polling, the throttled frequency is above or equal to the newly requested frequency, so the polling is stopped. With cpufreq limiting the max frequency, the hardware no longer report a throttling state and no further updates to thermal pressure or qos state are made. The result of this is that scaling_max_frequency can only go down, and the system becomes slower and slower every time a thermal throttling event is reported by the hardware. Even if the logic could be improved, there is no reason for software to limit the max freqency in response to the hardware limiting the max frequency. At best software will follow the reported hardware state, but typically it will cause slower backoff of the throttling. This reverts commit c4c0efb06f17fa4a37ad99e7752b18a5405c76dc. Fixes: c4c0efb06f17 ("cpufreq: qcom-cpufreq-hw: Add cpufreq qos for LMh") Reported-by: Krzysztof Kozlowski Signed-off-by: Bjorn Andersson Reviewed-by: Konrad Dybcio Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index b75e6157dc81..eb54f7f17ab5 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -59,8 +58,6 @@ struct qcom_cpufreq_data { struct clk_hw cpu_clk; bool per_core_dcvs; - - struct freq_qos_request throttle_freq_req; }; static struct { @@ -350,8 +347,6 @@ static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data) throttled_freq = freq_hz / HZ_PER_KHZ; - freq_qos_update_request(&data->throttle_freq_req, throttled_freq); - /* Update thermal pressure (the boost frequencies are accepted) */ arch_update_thermal_pressure(policy->related_cpus, throttled_freq); @@ -444,14 +439,6 @@ static int qcom_cpufreq_hw_lmh_init(struct cpufreq_policy *policy, int index) if (data->throttle_irq < 0) return data->throttle_irq; - ret = freq_qos_add_request(&policy->constraints, - &data->throttle_freq_req, FREQ_QOS_MAX, - FREQ_QOS_MAX_DEFAULT_VALUE); - if (ret < 0) { - dev_err(&pdev->dev, "Failed to add freq constraint (%d)\n", ret); - return ret; - } - data->cancel_throttle = false; data->policy = policy; @@ -518,7 +505,6 @@ static void qcom_cpufreq_hw_lmh_exit(struct qcom_cpufreq_data *data) if (data->throttle_irq <= 0) return; - freq_qos_remove_request(&data->throttle_freq_req); free_irq(data->throttle_irq, data); } From 29b1a92e5e953214388ac1f3656cb3af266ca74f Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 22 Feb 2023 16:36:37 +0530 Subject: [PATCH 33/44] OPP: Handle all genpd cases together in _set_required_opps() There is no real need of keeping separate code for single genpd case, it can be made to work with a simple change. Signed-off-by: Viresh Kumar Reviewed-by: Ulf Hansson --- drivers/opp/core.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index e87567dbe99f..6d7016ce8c53 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -959,7 +959,8 @@ static int _set_required_opps(struct device *dev, struct dev_pm_opp *opp, bool up) { struct opp_table **required_opp_tables = opp_table->required_opp_tables; - struct device **genpd_virt_devs = opp_table->genpd_virt_devs; + struct device **genpd_virt_devs = + opp_table->genpd_virt_devs ? opp_table->genpd_virt_devs : &dev; int i, ret = 0; if (!required_opp_tables) @@ -979,12 +980,6 @@ static int _set_required_opps(struct device *dev, return -ENOENT; } - /* Single genpd case */ - if (!genpd_virt_devs) - return _set_required_opp(dev, dev, opp, 0); - - /* Multiple genpd case */ - /* * Acquire genpd_virt_dev_lock to make sure we don't use a genpd_dev * after it is freed from another thread. From 528f2d8d540a3c374d6b765c769620d91536b176 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 22 Feb 2023 16:36:38 +0530 Subject: [PATCH 34/44] OPP: Move required opps configuration to specialized callback The required-opps configuration is closely tied to genpd and performance states at the moment and it is not very obvious that required-opps can live without genpds. Though we don't support configuring required-opps for non-genpd cases currently. This commit aims at separating these parts, where configuring genpds would be a special case of configuring the required-opps. Add a specialized callback, set_required_opps(), to the opp table and set it to different callbacks accordingly. This shouldn't result in any functional changes for now. Signed-off-by: Viresh Kumar Reviewed-by: Ulf Hansson --- drivers/opp/core.c | 69 ++++++++++++++++++++++++++++------------------ drivers/opp/of.c | 3 ++ drivers/opp/opp.h | 4 +++ 3 files changed, 49 insertions(+), 27 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 6d7016ce8c53..954c94865cf5 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -935,8 +935,8 @@ static int _set_opp_bw(const struct opp_table *opp_table, return 0; } -static int _set_required_opp(struct device *dev, struct device *pd_dev, - struct dev_pm_opp *opp, int i) +static int _set_performance_state(struct device *dev, struct device *pd_dev, + struct dev_pm_opp *opp, int i) { unsigned int pstate = likely(opp) ? opp->required_opps[i]->pstate : 0; int ret; @@ -953,33 +953,20 @@ static int _set_required_opp(struct device *dev, struct device *pd_dev, return ret; } -/* This is only called for PM domain for now */ -static int _set_required_opps(struct device *dev, - struct opp_table *opp_table, - struct dev_pm_opp *opp, bool up) +static int _opp_set_required_opps_generic(struct device *dev, + struct opp_table *opp_table, struct dev_pm_opp *opp, bool scaling_down) +{ + dev_err(dev, "setting required-opps isn't supported for non-genpd devices\n"); + return -ENOENT; +} + +static int _opp_set_required_opps_genpd(struct device *dev, + struct opp_table *opp_table, struct dev_pm_opp *opp, bool scaling_down) { - struct opp_table **required_opp_tables = opp_table->required_opp_tables; struct device **genpd_virt_devs = opp_table->genpd_virt_devs ? opp_table->genpd_virt_devs : &dev; int i, ret = 0; - if (!required_opp_tables) - return 0; - - /* required-opps not fully initialized yet */ - if (lazy_linking_pending(opp_table)) - return -EBUSY; - - /* - * We only support genpd's OPPs in the "required-opps" for now, as we - * don't know much about other use cases. Error out if the required OPP - * doesn't belong to a genpd. - */ - if (unlikely(!required_opp_tables[0]->is_genpd)) { - dev_err(dev, "required-opps don't belong to a genpd\n"); - return -ENOENT; - } - /* * Acquire genpd_virt_dev_lock to make sure we don't use a genpd_dev * after it is freed from another thread. @@ -987,15 +974,15 @@ static int _set_required_opps(struct device *dev, mutex_lock(&opp_table->genpd_virt_dev_lock); /* Scaling up? Set required OPPs in normal order, else reverse */ - if (up) { + if (!scaling_down) { for (i = 0; i < opp_table->required_opp_count; i++) { - ret = _set_required_opp(dev, genpd_virt_devs[i], opp, i); + ret = _set_performance_state(dev, genpd_virt_devs[i], opp, i); if (ret) break; } } else { for (i = opp_table->required_opp_count - 1; i >= 0; i--) { - ret = _set_required_opp(dev, genpd_virt_devs[i], opp, i); + ret = _set_performance_state(dev, genpd_virt_devs[i], opp, i); if (ret) break; } @@ -1006,6 +993,34 @@ static int _set_required_opps(struct device *dev, return ret; } +/* This is only called for PM domain for now */ +static int _set_required_opps(struct device *dev, struct opp_table *opp_table, + struct dev_pm_opp *opp, bool up) +{ + /* required-opps not fully initialized yet */ + if (lazy_linking_pending(opp_table)) + return -EBUSY; + + if (opp_table->set_required_opps) + return opp_table->set_required_opps(dev, opp_table, opp, up); + + return 0; +} + +/* Update set_required_opps handler */ +void _update_set_required_opps(struct opp_table *opp_table) +{ + /* Already set */ + if (opp_table->set_required_opps) + return; + + /* All required OPPs will belong to genpd or none */ + if (opp_table->required_opp_tables[0]->is_genpd) + opp_table->set_required_opps = _opp_set_required_opps_genpd; + else + opp_table->set_required_opps = _opp_set_required_opps_generic; +} + static void _find_current_opp(struct device *dev, struct opp_table *opp_table) { struct dev_pm_opp *opp = ERR_PTR(-ENODEV); diff --git a/drivers/opp/of.c b/drivers/opp/of.c index bed2b651deb0..233eab70e15e 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -196,6 +196,8 @@ static void _opp_table_alloc_required_tables(struct opp_table *opp_table, /* Let's do the linking later on */ if (lazy) list_add(&opp_table->lazy, &lazy_opp_tables); + else + _update_set_required_opps(opp_table); goto put_np; @@ -411,6 +413,7 @@ static void lazy_link_required_opp_table(struct opp_table *new_table) /* All required opp-tables found, remove from lazy list */ if (!lazy) { + _update_set_required_opps(opp_table); list_del_init(&opp_table->lazy); list_for_each_entry(opp, &opp_table->opp_list, node) diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h index 3a6e077df386..2a057c42ddf4 100644 --- a/drivers/opp/opp.h +++ b/drivers/opp/opp.h @@ -184,6 +184,7 @@ enum opp_table_access { * @enabled: Set to true if the device's resources are enabled/configured. * @genpd_performance_state: Device's power domain support performance state. * @is_genpd: Marks if the OPP table belongs to a genpd. + * @set_required_opps: Helper responsible to set required OPPs. * @dentry: debugfs dentry pointer of the real device directory (not links). * @dentry_name: Name of the real dentry. * @@ -234,6 +235,8 @@ struct opp_table { bool enabled; bool genpd_performance_state; bool is_genpd; + int (*set_required_opps)(struct device *dev, + struct opp_table *opp_table, struct dev_pm_opp *opp, bool scaling_down); #ifdef CONFIG_DEBUG_FS struct dentry *dentry; @@ -257,6 +260,7 @@ void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, int last_cp struct opp_table *_add_opp_table_indexed(struct device *dev, int index, bool getclk); void _put_opp_list_kref(struct opp_table *opp_table); void _required_opps_available(struct dev_pm_opp *opp, int count); +void _update_set_required_opps(struct opp_table *opp_table); static inline bool lazy_linking_pending(struct opp_table *opp_table) { From 376b1446153ca67e7028e6b9555d9b17477f568b Mon Sep 17 00:00:00 2001 From: Yang Li Date: Tue, 28 Mar 2023 13:38:53 +0800 Subject: [PATCH 35/44] PM / devfreq: exynos-ppmu: Use devm_platform_get_and_ioremap_resource() According to commit 890cc39a8799 ("drivers: provide devm_platform_get_and_ioremap_resource()"), convert platform_get_resource(), devm_ioremap_resource() to a single call to devm_platform_get_and_ioremap_resource(), as this is exactly what this function does. Signed-off-by: Yang Li Reviewed-by: Andi Shyti Signed-off-by: Chanwoo Choi --- drivers/devfreq/event/exynos-ppmu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c index a443e7c42daf..896a6cc93b00 100644 --- a/drivers/devfreq/event/exynos-ppmu.c +++ b/drivers/devfreq/event/exynos-ppmu.c @@ -621,8 +621,7 @@ static int exynos_ppmu_parse_dt(struct platform_device *pdev, } /* Maps the memory mapped IO to control PPMU register */ - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - base = devm_ioremap_resource(dev, res); + base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(base)) return PTR_ERR(base); From a038895e25b296ca1ef0254f92673ea64bc1a2ee Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 4 Apr 2023 05:09:10 +0530 Subject: [PATCH 36/44] cpufreq: drivers with target_index() must set freq_table Since the cpufreq core directly uses freq_table, for cpufreq drivers that set their target_index() callback, make it mandatory for them to set the same. Since this is set per policy and normally from policy->init(), do this from cpufreq_table_validate_and_sort() which gets called right after ->init(). Reported-by: Yajun Deng Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 5 +++++ drivers/cpufreq/freq_table.c | 7 ++++++- include/linux/cpufreq.h | 1 + 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index d61f7308f63c..b24ed81ba8bd 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -73,6 +73,11 @@ static inline bool has_target(void) return cpufreq_driver->target_index || cpufreq_driver->target; } +bool has_target_index(void) +{ + return !!cpufreq_driver->target_index; +} + /* internal prototypes */ static unsigned int __cpufreq_get(struct cpufreq_policy *policy); static int cpufreq_init_governor(struct cpufreq_policy *policy); diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index 67e56cf638ef..ddd4832bcc0d 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -355,8 +355,13 @@ int cpufreq_table_validate_and_sort(struct cpufreq_policy *policy) { int ret; - if (!policy->freq_table) + if (!policy->freq_table) { + /* Freq table must be passed by drivers with target_index() */ + if (has_target_index()) + return -EINVAL; + return 0; + } ret = cpufreq_frequency_table_cpuinfo(policy, policy->freq_table); if (ret) diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 65623233ab2f..541013487a0e 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -237,6 +237,7 @@ bool cpufreq_supports_freq_invariance(void); struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy); void cpufreq_enable_fast_switch(struct cpufreq_policy *policy); void cpufreq_disable_fast_switch(struct cpufreq_policy *policy); +bool has_target_index(void); #else static inline unsigned int cpufreq_get(unsigned int cpu) { From 73d73f5ee7fb0c42ff87091d105bee720a9565f1 Mon Sep 17 00:00:00 2001 From: Li zeming Date: Sun, 26 Mar 2023 06:19:35 +0800 Subject: [PATCH 37/44] PM: core: Remove unnecessary (void *) conversions Assignments from pointer variables of type (void *) do not require explicit type casts, so remove such type cases from the code in drivers/base/power/main.c where applicable. Signed-off-by: Li zeming [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index c50139207794..f85f3515c258 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -679,7 +679,7 @@ static bool dpm_async_fn(struct device *dev, async_func_t func) static void async_resume_noirq(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; + struct device *dev = data; int error; error = device_resume_noirq(dev, pm_transition, true); @@ -816,7 +816,7 @@ Out: static void async_resume_early(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; + struct device *dev = data; int error; error = device_resume_early(dev, pm_transition, true); @@ -980,7 +980,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) static void async_resume(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; + struct device *dev = data; int error; error = device_resume(dev, pm_transition, true); @@ -1269,7 +1269,7 @@ Complete: static void async_suspend_noirq(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; + struct device *dev = data; int error; error = __device_suspend_noirq(dev, pm_transition, true); @@ -1450,7 +1450,7 @@ Complete: static void async_suspend_late(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; + struct device *dev = data; int error; error = __device_suspend_late(dev, pm_transition, true); @@ -1727,7 +1727,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) static void async_suspend(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; + struct device *dev = data; int error; error = __device_suspend(dev, pm_transition, true); From 11fa52fe619acfa945712d94a0bc27c0f5bc49de Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Mon, 3 Apr 2023 20:33:37 -0400 Subject: [PATCH 38/44] cpufreq: amd-pstate: Make varaiable mode_state_machine static smatch reports drivers/cpufreq/amd-pstate.c:907:25: warning: symbol 'mode_state_machine' was not declared. Should it be static? This variable is only used in one file so it should be static. Signed-off-by: Tom Rix Reviewed-by: Wyes Karny Tested-by: Wyes Karny Reviewed-by: Dhruva Gole [ rjw: Subject edits ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/amd-pstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 7955cfc91c31..fcb54a6f6598 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -905,7 +905,7 @@ static int amd_pstate_change_driver_mode(int mode) return 0; } -cppc_mode_transition_fn mode_state_machine[AMD_PSTATE_MAX][AMD_PSTATE_MAX] = { +static cppc_mode_transition_fn mode_state_machine[AMD_PSTATE_MAX][AMD_PSTATE_MAX] = { [AMD_PSTATE_DISABLE] = { [AMD_PSTATE_DISABLE] = NULL, [AMD_PSTATE_PASSIVE] = amd_pstate_register_driver, From f41e1442ac5bd687389d6104d7b74766db821eb3 Mon Sep 17 00:00:00 2001 From: Sumit Gupta Date: Tue, 11 Apr 2023 16:29:59 +0530 Subject: [PATCH 39/44] cpufreq: tegra194: add OPP support and set bandwidth Add support to use OPP table from DT in Tegra194 cpufreq driver. Tegra SoC's receive the frequency lookup table (LUT) from BPMP-FW. Cross check the OPP's present in DT against the LUT from BPMP-FW and enable only those DT OPP's which are present in LUT also. The OPP table in DT has CPU Frequency to bandwidth mapping where the bandwidth value is per MC channel. DRAM bandwidth depends on the number of MC channels which can vary as per the boot configuration. This per channel bandwidth from OPP table will be later converted by MC driver to final bandwidth value by multiplying with number of channels before sending the request to BPMP-FW. If OPP table is not present in DT, then use the LUT from BPMP-FW directy as the CPU frequency table and not do the DRAM frequency scaling which is same as the current behavior. Now, as the CPU Frequency table is being controlling through OPP table in DT. Keeping fewer entries in the table will create less frequency steps and can help to scale fast to high frequencies when required. Signed-off-by: Sumit Gupta Signed-off-by: Viresh Kumar --- drivers/cpufreq/tegra194-cpufreq.c | 156 ++++++++++++++++++++++++++--- 1 file changed, 143 insertions(+), 13 deletions(-) diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c index 5890e25d7f77..c8d03346068a 100644 --- a/drivers/cpufreq/tegra194-cpufreq.c +++ b/drivers/cpufreq/tegra194-cpufreq.c @@ -12,6 +12,7 @@ #include #include #include +#include #include @@ -65,12 +66,36 @@ struct tegra_cpufreq_soc { struct tegra194_cpufreq_data { void __iomem *regs; - struct cpufreq_frequency_table **tables; + struct cpufreq_frequency_table **bpmp_luts; const struct tegra_cpufreq_soc *soc; + bool icc_dram_bw_scaling; }; static struct workqueue_struct *read_counters_wq; +static int tegra_cpufreq_set_bw(struct cpufreq_policy *policy, unsigned long freq_khz) +{ + struct tegra194_cpufreq_data *data = cpufreq_get_driver_data(); + struct dev_pm_opp *opp; + struct device *dev; + int ret; + + dev = get_cpu_device(policy->cpu); + if (!dev) + return -ENODEV; + + opp = dev_pm_opp_find_freq_exact(dev, freq_khz * KHZ, true); + if (IS_ERR(opp)) + return PTR_ERR(opp); + + ret = dev_pm_opp_set_opp(dev, opp); + if (ret) + data->icc_dram_bw_scaling = false; + + dev_pm_opp_put(opp); + return ret; +} + static void tegra_get_cpu_mpidr(void *mpidr) { *((u64 *)mpidr) = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; @@ -354,7 +379,7 @@ static unsigned int tegra194_get_speed(u32 cpu) * to the last written ndiv value from freq_table. This is * done to return consistent value. */ - cpufreq_for_each_valid_entry(pos, data->tables[clusterid]) { + cpufreq_for_each_valid_entry(pos, data->bpmp_luts[clusterid]) { if (pos->driver_data != ndiv) continue; @@ -369,16 +394,93 @@ static unsigned int tegra194_get_speed(u32 cpu) return rate; } +static int tegra_cpufreq_init_cpufreq_table(struct cpufreq_policy *policy, + struct cpufreq_frequency_table *bpmp_lut, + struct cpufreq_frequency_table **opp_table) +{ + struct tegra194_cpufreq_data *data = cpufreq_get_driver_data(); + struct cpufreq_frequency_table *freq_table = NULL; + struct cpufreq_frequency_table *pos; + struct device *cpu_dev; + struct dev_pm_opp *opp; + unsigned long rate; + int ret, max_opps; + int j = 0; + + cpu_dev = get_cpu_device(policy->cpu); + if (!cpu_dev) { + pr_err("%s: failed to get cpu%d device\n", __func__, policy->cpu); + return -ENODEV; + } + + /* Initialize OPP table mentioned in operating-points-v2 property in DT */ + ret = dev_pm_opp_of_add_table_indexed(cpu_dev, 0); + if (!ret) { + max_opps = dev_pm_opp_get_opp_count(cpu_dev); + if (max_opps <= 0) { + dev_err(cpu_dev, "Failed to add OPPs\n"); + return max_opps; + } + + /* Disable all opps and cross-validate against LUT later */ + for (rate = 0; ; rate++) { + opp = dev_pm_opp_find_freq_ceil(cpu_dev, &rate); + if (IS_ERR(opp)) + break; + + dev_pm_opp_put(opp); + dev_pm_opp_disable(cpu_dev, rate); + } + } else { + dev_err(cpu_dev, "Invalid or empty opp table in device tree\n"); + data->icc_dram_bw_scaling = false; + return ret; + } + + freq_table = kcalloc((max_opps + 1), sizeof(*freq_table), GFP_KERNEL); + if (!freq_table) + return -ENOMEM; + + /* + * Cross check the frequencies from BPMP-FW LUT against the OPP's present in DT. + * Enable only those DT OPP's which are present in LUT also. + */ + cpufreq_for_each_valid_entry(pos, bpmp_lut) { + opp = dev_pm_opp_find_freq_exact(cpu_dev, pos->frequency * KHZ, false); + if (IS_ERR(opp)) + continue; + + ret = dev_pm_opp_enable(cpu_dev, pos->frequency * KHZ); + if (ret < 0) + return ret; + + freq_table[j].driver_data = pos->driver_data; + freq_table[j].frequency = pos->frequency; + j++; + } + + freq_table[j].driver_data = pos->driver_data; + freq_table[j].frequency = CPUFREQ_TABLE_END; + + *opp_table = &freq_table[0]; + + dev_pm_opp_set_sharing_cpus(cpu_dev, policy->cpus); + + return ret; +} + static int tegra194_cpufreq_init(struct cpufreq_policy *policy) { struct tegra194_cpufreq_data *data = cpufreq_get_driver_data(); int maxcpus_per_cluster = data->soc->maxcpus_per_cluster; + struct cpufreq_frequency_table *freq_table; + struct cpufreq_frequency_table *bpmp_lut; u32 start_cpu, cpu; u32 clusterid; + int ret; data->soc->ops->get_cpu_cluster_id(policy->cpu, NULL, &clusterid); - - if (clusterid >= data->soc->num_clusters || !data->tables[clusterid]) + if (clusterid >= data->soc->num_clusters || !data->bpmp_luts[clusterid]) return -EINVAL; start_cpu = rounddown(policy->cpu, maxcpus_per_cluster); @@ -387,9 +489,22 @@ static int tegra194_cpufreq_init(struct cpufreq_policy *policy) if (cpu_possible(cpu)) cpumask_set_cpu(cpu, policy->cpus); } - policy->freq_table = data->tables[clusterid]; policy->cpuinfo.transition_latency = TEGRA_CPUFREQ_TRANSITION_LATENCY; + bpmp_lut = data->bpmp_luts[clusterid]; + + if (data->icc_dram_bw_scaling) { + ret = tegra_cpufreq_init_cpufreq_table(policy, bpmp_lut, &freq_table); + if (!ret) { + policy->freq_table = freq_table; + return 0; + } + } + + data->icc_dram_bw_scaling = false; + policy->freq_table = bpmp_lut; + pr_info("OPP tables missing from DT, EMC frequency scaling disabled\n"); + return 0; } @@ -406,6 +521,9 @@ static int tegra194_cpufreq_set_target(struct cpufreq_policy *policy, */ data->soc->ops->set_cpu_ndiv(policy, (u64)tbl->driver_data); + if (data->icc_dram_bw_scaling) + tegra_cpufreq_set_bw(policy, tbl->frequency); + return 0; } @@ -439,8 +557,8 @@ static void tegra194_cpufreq_free_resources(void) } static struct cpufreq_frequency_table * -init_freq_table(struct platform_device *pdev, struct tegra_bpmp *bpmp, - unsigned int cluster_id) +tegra_cpufreq_bpmp_read_lut(struct platform_device *pdev, struct tegra_bpmp *bpmp, + unsigned int cluster_id) { struct cpufreq_frequency_table *freq_table; struct mrq_cpu_ndiv_limits_response resp; @@ -515,6 +633,7 @@ static int tegra194_cpufreq_probe(struct platform_device *pdev) const struct tegra_cpufreq_soc *soc; struct tegra194_cpufreq_data *data; struct tegra_bpmp *bpmp; + struct device *cpu_dev; int err, i; data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); @@ -530,9 +649,9 @@ static int tegra194_cpufreq_probe(struct platform_device *pdev) return -EINVAL; } - data->tables = devm_kcalloc(&pdev->dev, data->soc->num_clusters, - sizeof(*data->tables), GFP_KERNEL); - if (!data->tables) + data->bpmp_luts = devm_kcalloc(&pdev->dev, data->soc->num_clusters, + sizeof(*data->bpmp_luts), GFP_KERNEL); + if (!data->bpmp_luts) return -ENOMEM; if (soc->actmon_cntr_base) { @@ -556,15 +675,26 @@ static int tegra194_cpufreq_probe(struct platform_device *pdev) } for (i = 0; i < data->soc->num_clusters; i++) { - data->tables[i] = init_freq_table(pdev, bpmp, i); - if (IS_ERR(data->tables[i])) { - err = PTR_ERR(data->tables[i]); + data->bpmp_luts[i] = tegra_cpufreq_bpmp_read_lut(pdev, bpmp, i); + if (IS_ERR(data->bpmp_luts[i])) { + err = PTR_ERR(data->bpmp_luts[i]); goto err_free_res; } } tegra194_cpufreq_driver.driver_data = data; + /* Check for optional OPPv2 and interconnect paths on CPU0 to enable ICC scaling */ + cpu_dev = get_cpu_device(0); + if (!cpu_dev) + return -EPROBE_DEFER; + + if (dev_pm_opp_of_get_opp_desc_node(cpu_dev)) { + err = dev_pm_opp_of_find_icc_paths(cpu_dev, NULL); + if (!err) + data->icc_dram_bw_scaling = true; + } + err = cpufreq_register_driver(&tegra194_cpufreq_driver); if (!err) goto put_bpmp; From 44295af5019f1997d038ad2611086a2d1e2af167 Mon Sep 17 00:00:00 2001 From: Sanjay Chandrashekara Date: Tue, 18 Apr 2023 17:04:54 +0530 Subject: [PATCH 40/44] cpufreq: use correct unit when verify cur freq cpufreq_verify_current_freq checks() if the frequency returned by the hardware has a slight delta with the valid frequency value last set and returns "policy->cur" if the delta is within "1 MHz". In the comparison, "policy->cur" is in "kHz" but it's compared against HZ_PER_MHZ. So, the comparison range becomes "1 GHz". Fix this by comparing against KHZ_PER_MHZ instead of HZ_PER_MHZ. Fixes: f55ae08c8987 ("cpufreq: Avoid unnecessary frequency updates due to mismatch") Signed-off-by: Sanjay Chandrashekara [ sumit gupta: Commit message update ] Signed-off-by: Sumit Gupta Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index b24ed81ba8bd..2ac905c637dd 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1732,7 +1732,7 @@ static unsigned int cpufreq_verify_current_freq(struct cpufreq_policy *policy, b * MHz. In such cases it is better to avoid getting into * unnecessary frequency updates. */ - if (abs(policy->cur - new_freq) < HZ_PER_MHZ) + if (abs(policy->cur - new_freq) < KHZ_PER_MHZ) return policy->cur; cpufreq_out_of_sync(policy, new_freq); From b52124a78ab34eb0754e32edc0c9996937779176 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 17 Apr 2023 10:27:05 -0500 Subject: [PATCH 41/44] PM: Add sysfs files to represent time spent in hardware sleep state Userspace can't easily discover how much of a sleep cycle was spent in a hardware sleep state without using kernel tracing and vendor specific sysfs or debugfs files. To make this information more discoverable, introduce 3 new sysfs files: 1) The time spent in a hw sleep state for last cycle. 2) The time spent in a hw sleep state since the kernel booted 3) The maximum time that the hardware can report for a sleep cycle. All of these files will be present only if the system supports s2idle. Reviewed-by: Hans de Goede Signed-off-by: Mario Limonciello Signed-off-by: Rafael J. Wysocki --- Documentation/ABI/testing/sysfs-power | 29 +++++++++++++ include/linux/suspend.h | 8 ++++ kernel/power/main.c | 59 +++++++++++++++++++++------ 3 files changed, 84 insertions(+), 12 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power index f99d433ff311..a3942b1036e2 100644 --- a/Documentation/ABI/testing/sysfs-power +++ b/Documentation/ABI/testing/sysfs-power @@ -413,6 +413,35 @@ Description: The /sys/power/suspend_stats/last_failed_step file contains the last failed step in the suspend/resume path. +What: /sys/power/suspend_stats/last_hw_sleep +Date: June 2023 +Contact: Mario Limonciello +Description: + The /sys/power/suspend_stats/last_hw_sleep file + contains the duration of time spent in a hardware sleep + state in the most recent system suspend-resume cycle. + This number is measured in microseconds. + +What: /sys/power/suspend_stats/total_hw_sleep +Date: June 2023 +Contact: Mario Limonciello +Description: + The /sys/power/suspend_stats/total_hw_sleep file + contains the aggregate of time spent in a hardware sleep + state since the kernel was booted. This number + is measured in microseconds. + +What: /sys/power/suspend_stats/max_hw_sleep +Date: June 2023 +Contact: Mario Limonciello +Description: + The /sys/power/suspend_stats/max_hw_sleep file + contains the maximum amount of time that the hardware can + report for time spent in a hardware sleep state. When sleep + cycles are longer than this time, the values for + 'total_hw_sleep' and 'last_hw_sleep' may not be accurate. + This number is measured in microseconds. + What: /sys/power/sync_on_suspend Date: October 2019 Contact: Jonas Meurer diff --git a/include/linux/suspend.h b/include/linux/suspend.h index cfe19a028918..d0d4598a7b3f 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -68,6 +68,9 @@ struct suspend_stats { int last_failed_errno; int errno[REC_FAILED_NUM]; int last_failed_step; + u64 last_hw_sleep; + u64 total_hw_sleep; + u64 max_hw_sleep; enum suspend_stat_step failed_steps[REC_FAILED_NUM]; }; @@ -489,6 +492,8 @@ void restore_processor_state(void); extern int register_pm_notifier(struct notifier_block *nb); extern int unregister_pm_notifier(struct notifier_block *nb); extern void ksys_sync_helper(void); +extern void pm_report_hw_sleep_time(u64 t); +extern void pm_report_max_hw_sleep(u64 t); #define pm_notifier(fn, pri) { \ static struct notifier_block fn##_nb = \ @@ -526,6 +531,9 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) return 0; } +static inline void pm_report_hw_sleep_time(u64 t) {}; +static inline void pm_report_max_hw_sleep(u64 t) {}; + static inline void ksys_sync_helper(void) {} #define pm_notifier(fn, pri) do { (void)(fn); } while (0) diff --git a/kernel/power/main.c b/kernel/power/main.c index 31ec4a9b9d70..3113ec2f1db4 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -6,6 +6,7 @@ * Copyright (c) 2003 Open Source Development Lab */ +#include #include #include #include @@ -83,6 +84,19 @@ int unregister_pm_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(unregister_pm_notifier); +void pm_report_hw_sleep_time(u64 t) +{ + suspend_stats.last_hw_sleep = t; + suspend_stats.total_hw_sleep += t; +} +EXPORT_SYMBOL_GPL(pm_report_hw_sleep_time); + +void pm_report_max_hw_sleep(u64 t) +{ + suspend_stats.max_hw_sleep = t; +} +EXPORT_SYMBOL_GPL(pm_report_max_hw_sleep); + int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long val_down) { int ret; @@ -314,24 +328,27 @@ static char *suspend_step_name(enum suspend_stat_step step) } } -#define suspend_attr(_name) \ +#define suspend_attr(_name, format_str) \ static ssize_t _name##_show(struct kobject *kobj, \ struct kobj_attribute *attr, char *buf) \ { \ - return sprintf(buf, "%d\n", suspend_stats._name); \ + return sprintf(buf, format_str, suspend_stats._name); \ } \ static struct kobj_attribute _name = __ATTR_RO(_name) -suspend_attr(success); -suspend_attr(fail); -suspend_attr(failed_freeze); -suspend_attr(failed_prepare); -suspend_attr(failed_suspend); -suspend_attr(failed_suspend_late); -suspend_attr(failed_suspend_noirq); -suspend_attr(failed_resume); -suspend_attr(failed_resume_early); -suspend_attr(failed_resume_noirq); +suspend_attr(success, "%d\n"); +suspend_attr(fail, "%d\n"); +suspend_attr(failed_freeze, "%d\n"); +suspend_attr(failed_prepare, "%d\n"); +suspend_attr(failed_suspend, "%d\n"); +suspend_attr(failed_suspend_late, "%d\n"); +suspend_attr(failed_suspend_noirq, "%d\n"); +suspend_attr(failed_resume, "%d\n"); +suspend_attr(failed_resume_early, "%d\n"); +suspend_attr(failed_resume_noirq, "%d\n"); +suspend_attr(last_hw_sleep, "%llu\n"); +suspend_attr(total_hw_sleep, "%llu\n"); +suspend_attr(max_hw_sleep, "%llu\n"); static ssize_t last_failed_dev_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -391,12 +408,30 @@ static struct attribute *suspend_attrs[] = { &last_failed_dev.attr, &last_failed_errno.attr, &last_failed_step.attr, + &last_hw_sleep.attr, + &total_hw_sleep.attr, + &max_hw_sleep.attr, NULL, }; +static umode_t suspend_attr_is_visible(struct kobject *kobj, struct attribute *attr, int idx) +{ + if (attr != &last_hw_sleep.attr && + attr != &total_hw_sleep.attr && + attr != &max_hw_sleep.attr) + return 0444; + +#ifdef CONFIG_ACPI + if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) + return 0444; +#endif + return 0; +} + static const struct attribute_group suspend_attr_group = { .name = "suspend_stats", .attrs = suspend_attrs, + .is_visible = suspend_attr_is_visible, }; #ifdef CONFIG_DEBUG_FS From 09f5df3fb82fd444296e248f90dd29288e82d3a3 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 17 Apr 2023 10:27:06 -0500 Subject: [PATCH 42/44] platform/x86/amd: pmc: Report duration of time in hw sleep state amd_pmc displays a warning when a suspend didn't get to the deepest state and a dynamic debugging message with the duration if it did. Rather than logging to dynamic debugging the duration spent in the deepest state, report this to the standard kernel reporting infrastructure so that userspace software can query after the suspend cycle is done. Reviewed-by: Hans de Goede Signed-off-by: Mario Limonciello Signed-off-by: Rafael J. Wysocki --- drivers/platform/x86/amd/pmc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/amd/pmc.c b/drivers/platform/x86/amd/pmc.c index 2edaae04a691..e610457136e6 100644 --- a/drivers/platform/x86/amd/pmc.c +++ b/drivers/platform/x86/amd/pmc.c @@ -393,9 +393,8 @@ static void amd_pmc_validate_deepest(struct amd_pmc_dev *pdev) if (!table.s0i3_last_entry_status) dev_warn(pdev->dev, "Last suspend didn't reach deepest state\n"); - else - dev_dbg(pdev->dev, "Last suspend in deepest state for %lluus\n", - table.timein_s0i3_lastcapture); + pm_report_hw_sleep_time(table.s0i3_last_entry_status ? + table.timein_s0i3_lastcapture : 0); } static int amd_pmc_get_smu_version(struct amd_pmc_dev *dev) @@ -1015,6 +1014,7 @@ static int amd_pmc_probe(struct platform_device *pdev) } amd_pmc_dbgfs_register(dev); + pm_report_max_hw_sleep(U64_MAX); return 0; err_pci_dev_put: From e2348afe702e9431f428e230be7f4ef8a1778b19 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 17 Apr 2023 10:27:07 -0500 Subject: [PATCH 43/44] platform/x86/intel/pmc: core: Always capture counters on suspend Currently counters are only captured during suspend when the warn_on_s0ix_failures module parameter is set. In order to relay this counter information to the kernel reporting infrastructure adjust it so that the counters are always captured. warn_on_s0ix_failures will be utilized solely for messaging by the driver instead. Reviewed-by: Hans de Goede Reviewed-by: David E. Box Signed-off-by: Mario Limonciello Signed-off-by: Rafael J. Wysocki --- drivers/platform/x86/intel/pmc/core.c | 13 +++++-------- drivers/platform/x86/intel/pmc/core.h | 2 -- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c index b9591969e0fa..925c5d676a43 100644 --- a/drivers/platform/x86/intel/pmc/core.c +++ b/drivers/platform/x86/intel/pmc/core.c @@ -1179,12 +1179,6 @@ static __maybe_unused int pmc_core_suspend(struct device *dev) { struct pmc_dev *pmcdev = dev_get_drvdata(dev); - pmcdev->check_counters = false; - - /* No warnings on S0ix failures */ - if (!warn_on_s0ix_failures) - return 0; - /* Check if the syspend will actually use S0ix */ if (pm_suspend_via_firmware()) return 0; @@ -1197,7 +1191,6 @@ static __maybe_unused int pmc_core_suspend(struct device *dev) if (pmc_core_dev_state_get(pmcdev, &pmcdev->s0ix_counter)) return -EIO; - pmcdev->check_counters = true; return 0; } @@ -1233,12 +1226,16 @@ static __maybe_unused int pmc_core_resume(struct device *dev) const struct pmc_bit_map **maps = pmcdev->map->lpm_sts; int offset = pmcdev->map->lpm_status_offset; - if (!pmcdev->check_counters) + /* Check if the syspend used S0ix */ + if (pm_suspend_via_firmware()) return 0; if (!pmc_core_is_s0ix_failed(pmcdev)) return 0; + if (!warn_on_s0ix_failures) + return 0; + if (pmc_core_is_pc10_failed(pmcdev)) { /* S0ix failed because of PC10 entry failure */ dev_info(dev, "CPU did not enter PC10!!! (PC10 cnt=0x%llx)\n", diff --git a/drivers/platform/x86/intel/pmc/core.h b/drivers/platform/x86/intel/pmc/core.h index 810204d758ab..51d73efceaf3 100644 --- a/drivers/platform/x86/intel/pmc/core.h +++ b/drivers/platform/x86/intel/pmc/core.h @@ -319,7 +319,6 @@ struct pmc_reg_map { * @pmc_xram_read_bit: flag to indicate whether PMC XRAM shadow registers * used to read MPHY PG and PLL status are available * @mutex_lock: mutex to complete one transcation - * @check_counters: On resume, check if counters are getting incremented * @pc10_counter: PC10 residency counter * @s0ix_counter: S0ix residency (step adjusted) * @num_lpm_modes: Count of enabled modes @@ -338,7 +337,6 @@ struct pmc_dev { int pmc_xram_read_bit; struct mutex lock; /* generic mutex lock for PMC Core */ - bool check_counters; /* Check for counter increments on resume */ u64 pc10_counter; u64 s0ix_counter; int num_lpm_modes; From ddd66d63473513bb013928245b6482969b97de05 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 17 Apr 2023 10:27:08 -0500 Subject: [PATCH 44/44] platform/x86/intel/pmc: core: Report duration of time in HW sleep state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_pmc_core displays a warning when the module parameter `warn_on_s0ix_failures` is set and a suspend didn't get to a HW sleep state. Report this to the standard kernel reporting infrastructure so that userspace software can query after the suspend cycle is done. Reviewed-by: Hans de Goede Suggested-by: Ilpo Järvinen Reviewed-by: David E. Box Signed-off-by: Mario Limonciello Signed-off-by: Rafael J. Wysocki --- drivers/platform/x86/intel/pmc/core.c | 4 ++++ drivers/platform/x86/intel/pmc/core.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c index 925c5d676a43..298f27ba1e10 100644 --- a/drivers/platform/x86/intel/pmc/core.c +++ b/drivers/platform/x86/intel/pmc/core.c @@ -1153,6 +1153,8 @@ static int pmc_core_probe(struct platform_device *pdev) pmc_core_do_dmi_quirks(pmcdev); pmc_core_dbgfs_register(pmcdev); + pm_report_max_hw_sleep(FIELD_MAX(SLP_S0_RES_COUNTER_MASK) * + pmc_core_adjust_slp_s0_step(pmcdev, 1)); device_initialized = true; dev_info(&pdev->dev, " initialized\n"); @@ -1214,6 +1216,8 @@ static inline bool pmc_core_is_s0ix_failed(struct pmc_dev *pmcdev) if (pmc_core_dev_state_get(pmcdev, &s0ix_counter)) return false; + pm_report_hw_sleep_time((u32)(s0ix_counter - pmcdev->s0ix_counter)); + if (s0ix_counter == pmcdev->s0ix_counter) return true; diff --git a/drivers/platform/x86/intel/pmc/core.h b/drivers/platform/x86/intel/pmc/core.h index 51d73efceaf3..9ca9b9746719 100644 --- a/drivers/platform/x86/intel/pmc/core.h +++ b/drivers/platform/x86/intel/pmc/core.h @@ -16,6 +16,8 @@ #include #include +#define SLP_S0_RES_COUNTER_MASK GENMASK(31, 0) + #define PMC_BASE_ADDR_DEFAULT 0xFE000000 /* Sunrise Point Power Management Controller PCI Device ID */