From 8c3313e8556b8a817e7b4bc9c249b58855a7e49c Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 12 Dec 2022 12:44:23 +0000 Subject: [PATCH 001/186] arm64: dts: rockchip: use correct reset names for rk3399 crypto nodes The reset names does not follow the binding, use the correct ones. Fixes: 8c701fa6e38c ("arm64: dts: rockchip: rk3399: add crypto node") Signed-off-by: Corentin Labbe Link: https://lore.kernel.org/r/20221212124423.1239748-1-clabbe@baylibre.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 4391aea25984..834d16acb546 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -589,7 +589,7 @@ clocks = <&cru HCLK_M_CRYPTO0>, <&cru HCLK_S_CRYPTO0>, <&cru SCLK_CRYPTO0>; clock-names = "hclk_master", "hclk_slave", "sclk"; resets = <&cru SRST_CRYPTO0>, <&cru SRST_CRYPTO0_S>, <&cru SRST_CRYPTO0_M>; - reset-names = "master", "lave", "crypto"; + reset-names = "master", "slave", "crypto-rst"; }; crypto1: crypto@ff8b8000 { @@ -599,7 +599,7 @@ clocks = <&cru HCLK_M_CRYPTO1>, <&cru HCLK_S_CRYPTO1>, <&cru SCLK_CRYPTO1>; clock-names = "hclk_master", "hclk_slave", "sclk"; resets = <&cru SRST_CRYPTO1>, <&cru SRST_CRYPTO1_S>, <&cru SRST_CRYPTO1_M>; - reset-names = "master", "slave", "crypto"; + reset-names = "master", "slave", "crypto-rst"; }; i2c1: i2c@ff110000 { From 33e24f0738b922b6f5f4118dbdc26cac8400d7b9 Mon Sep 17 00:00:00 2001 From: Jarrah Gosbell Date: Wed, 7 Dec 2022 11:32:13 +0000 Subject: [PATCH 002/186] arm64: dts: rockchip: reduce thermal limits on rk3399-pinephone-pro While this device uses the rk3399 it is also enclosed in a tight package and cooled through the screen and back case. The default rk3399 thermal limits can result in a burnt screen. These lower limits have resulted in the existing burn not expanding and will hopefully result in future devices not experiencing the issue. Signed-off-by: Jarrah Gosbell Link: https://lore.kernel.org/r/20221207113212.8216-1-kernel@undef.tools Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts b/arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts index 04403a76238b..a0795a2b1cb1 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts @@ -104,6 +104,13 @@ }; }; +&cpu_alert0 { + temperature = <65000>; +}; +&cpu_alert1 { + temperature = <68000>; +}; + &cpu_l0 { cpu-supply = <&vdd_cpu_l>; }; From 1692bffec674551163a7a4be32f59fdde04ecd27 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 25 Nov 2022 15:41:34 +0100 Subject: [PATCH 003/186] arm64: dts: rockchip: drop unused LED mode property from rk3328-roc-cc GPIO LEDs do not have a 'mode' property: rockchip/rk3328-roc-pc.dtb: leds: led-0: Unevaluated properties are not allowed ('mode' was unexpected) Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221125144135.477144-1-krzysztof.kozlowski@linaro.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts index aa22a0c22265..5d5d9574088c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts @@ -96,7 +96,6 @@ linux,default-trigger = "heartbeat"; gpios = <&rk805 1 GPIO_ACTIVE_LOW>; default-state = "on"; - mode = <0x23>; }; user_led: led-1 { @@ -104,7 +103,6 @@ linux,default-trigger = "mmc1"; gpios = <&rk805 0 GPIO_ACTIVE_LOW>; default-state = "off"; - mode = <0x05>; }; }; }; From 0b693c8f8b88d50114caaa4d2337932d4d172631 Mon Sep 17 00:00:00 2001 From: Chukun Pan Date: Mon, 19 Dec 2022 18:10:52 +0800 Subject: [PATCH 004/186] arm64: dts: rockchip: remove unsupported property from sdmmc2 for rock-3a 'supports-sdio' is not part of the DT binding and not supported by the Linux driver. Signed-off-by: Chukun Pan Link: https://lore.kernel.org/r/20221219101052.7899-1-amadeus@jmu.edu.cn Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts b/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts index a1c5fdf7d68f..5af11acb5c16 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts @@ -737,7 +737,6 @@ }; &sdmmc2 { - supports-sdio; bus-width = <4>; disable-wp; cap-sd-highspeed; From 9554f023385825be4b1e3557398c82e25be83da4 Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Thu, 29 Dec 2022 19:50:43 +0800 Subject: [PATCH 005/186] arm64: dts: rockchip: add io domain setting to rk3566-box-demo Add the missing pmu_io_domains setting, the gmac can't work well without this. Fixes: 2e0537b16b25 ("arm64: dts: rockchip: Add dts for rockchip rk3566 box demo board") Signed-off-by: Andy Yan Link: https://lore.kernel.org/r/20221229115043.3899733-1-andyshrk@163.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts b/arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts index 4c7f9abd594f..d956496d5221 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts @@ -353,6 +353,17 @@ }; }; +&pmu_io_domains { + pmuio2-supply = <&vcc_3v3>; + vccio1-supply = <&vcc_3v3>; + vccio3-supply = <&vcc_3v3>; + vccio4-supply = <&vcca_1v8>; + vccio5-supply = <&vcc_3v3>; + vccio6-supply = <&vcca_1v8>; + vccio7-supply = <&vcc_3v3>; + status = "okay"; +}; + &pwm0 { status = "okay"; }; From 80422339a75088322b4d3884bd12fa0fe5d11050 Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Wed, 28 Dec 2022 21:17:32 +0100 Subject: [PATCH 006/186] ARM: dts: rockchip: add power-domains property to dp node on rk3288 The clocks in the Rockchip rk3288 DisplayPort node are included in the power-domain@RK3288_PD_VIO logic, but the power-domains property in the dp node is missing, so fix it. Signed-off-by: Johan Jonker Link: https://lore.kernel.org/r/dab85bfb-9f55-86a1-5cd5-7388c43e0ec5@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rk3288.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index 487b0e03d4b4..2ca76b69add7 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -1181,6 +1181,7 @@ clock-names = "dp", "pclk"; phys = <&edp_phy>; phy-names = "dp"; + power-domains = <&power RK3288_PD_VIO>; resets = <&cru SRST_EDP>; reset-names = "dp"; rockchip,grf = <&grf>; From 6f515b663d49a14fb63f8c5d0a2a4ae53d44790a Mon Sep 17 00:00:00 2001 From: Arnaud Ferraris Date: Thu, 15 Dec 2022 11:19:47 +0100 Subject: [PATCH 007/186] arm64: dts: rockchip: fix input enable pinconf on rk3399 When the input enable pinconf was introduced, a default drive-strength value of 2 was set for the pull up/down configs. However, this parameter is unneeded when configuring the pin as input, and having a single hardcoded value here is actually harmful: GPIOs on the RK3399 have various same drive-strength capabilities depending on the bank and port they belong to. As an example, trying to configure the GPIO4_PD3 pin as an input with pull-up enabled fails with the following output: [ 10.706542] rockchip-pinctrl pinctrl: unsupported driver strength 2 [ 10.713661] rockchip-pinctrl pinctrl: pin_config_set op failed for pin 155 (acceptable drive-strength values for this pin being 3, 6, 9 and 12) Let's drop the drive-strength property from all input pinconfs in order to solve this issue. Fixes: ec48c3e82ca3 ("arm64: dts: rockchip: add an input enable pinconf to rk3399") Signed-off-by: Arnaud Ferraris Reviewed-by: Caleb Connolly Link: https://lore.kernel.org/r/20221215101947.254896-1-arnaud.ferraris@collabora.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 834d16acb546..1881b4b71f91 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -2241,13 +2241,11 @@ pcfg_input_pull_up: pcfg-input-pull-up { input-enable; bias-pull-up; - drive-strength = <2>; }; pcfg_input_pull_down: pcfg-input-pull-down { input-enable; bias-pull-down; - drive-strength = <2>; }; clock { From f276aacf5d2f7fb57e400db44c807ea3b9525fd6 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Fri, 13 Jan 2023 19:23:08 +0900 Subject: [PATCH 008/186] spi: Use a 32-bit DT property for spi-cs-setup-delay-ns 65us is not a reasonable maximum for this property, as some devices might need a much longer setup time (e.g. those driven by firmware on the other end). Plus, device tree property values are in 32-bit cells and smaller widths should not be used without good reason. Also move the logic to a helper function, since this will later be used to parse other CS delay properties too. Fixes: 33a2fde5f77b ("spi: Introduce spi-cs-setup-ns property") Signed-off-by: Janne Grunau Signed-off-by: Hector Martin Link: https://lore.kernel.org/r/20230113102309.18308-2-marcan@marcan.st Signed-off-by: Mark Brown --- drivers/spi/spi.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 15f174f4e056..3f33934f5429 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2220,11 +2220,26 @@ void spi_flush_queue(struct spi_controller *ctlr) /*-------------------------------------------------------------------------*/ #if defined(CONFIG_OF) +static void of_spi_parse_dt_cs_delay(struct device_node *nc, + struct spi_delay *delay, const char *prop) +{ + u32 value; + + if (!of_property_read_u32(nc, prop, &value)) { + if (value > U16_MAX) { + delay->value = DIV_ROUND_UP(value, 1000); + delay->unit = SPI_DELAY_UNIT_USECS; + } else { + delay->value = value; + delay->unit = SPI_DELAY_UNIT_NSECS; + } + } +} + static int of_spi_parse_dt(struct spi_controller *ctlr, struct spi_device *spi, struct device_node *nc) { u32 value; - u16 cs_setup; int rc; /* Mode (clock phase/polarity/etc.) */ @@ -2310,10 +2325,8 @@ static int of_spi_parse_dt(struct spi_controller *ctlr, struct spi_device *spi, if (!of_property_read_u32(nc, "spi-max-frequency", &value)) spi->max_speed_hz = value; - if (!of_property_read_u16(nc, "spi-cs-setup-delay-ns", &cs_setup)) { - spi->cs_setup.value = cs_setup; - spi->cs_setup.unit = SPI_DELAY_UNIT_NSECS; - } + /* Device CS delays */ + of_spi_parse_dt_cs_delay(nc, &spi->cs_setup, "spi-cs-setup-delay-ns"); return 0; } From a323e6b5737bb6e3d3946369b97099abb7dde695 Mon Sep 17 00:00:00 2001 From: Jensen Huang Date: Fri, 13 Jan 2023 14:44:57 +0800 Subject: [PATCH 009/186] arm64: dts: rockchip: add missing #interrupt-cells to rk356x pcie2x1 This fixes the following issue: pcieport 0000:00:00.0: of_irq_parse_pci: failed with rc=-22 Signed-off-by: Jensen Huang Link: https://lore.kernel.org/r/20230113064457.7105-1-jensenhuang@friendlyarm.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk356x.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/rockchip/rk356x.dtsi b/arch/arm64/boot/dts/rockchip/rk356x.dtsi index 5706c3e24f0a..c27f1c7f072d 100644 --- a/arch/arm64/boot/dts/rockchip/rk356x.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk356x.dtsi @@ -966,6 +966,7 @@ clock-names = "aclk_mst", "aclk_slv", "aclk_dbi", "pclk", "aux"; device_type = "pci"; + #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 7>; interrupt-map = <0 0 0 1 &pcie_intc 0>, <0 0 0 2 &pcie_intc 1>, From 86d884f5287f4369c198811aaa4931a3a11f36d2 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 3 Jan 2023 17:45:30 +0100 Subject: [PATCH 010/186] clk: microchip: mpfs-ccc: Use devm_kasprintf() for allocating formatted strings In various places, string buffers of a fixed size are allocated, and filled using snprintf() with the same fixed size, which is error-prone. Replace this by calling devm_kasprintf() instead, which always uses the appropriate size. While at it, remove an unneeded intermediate variable, which allows us to drop a cast as a bonus. With the initial behavior it would have been possible to have a device tree with a node address that would make "ccc_pll" exceed 18 characters. If that happened, the would be cut off & both pll 0 & 1 would be named identically. If that happens, pll1 would fail to register. Thus, the fixes tag has been added to this commit. Fixes: d39fb172760e ("clk: microchip: add PolarFire SoC fabric clock support") Signed-off-by: Geert Uytterhoeven Reviewed-by: Conor Dooley Tested-by: Conor Dooley [claudiu.beznea: added the rationale behind fixes tag] Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/f904fd28b2087d1463ea65f059924e3b1acc193c.1672764239.git.geert+renesas@glider.be --- drivers/clk/microchip/clk-mpfs-ccc.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/clk/microchip/clk-mpfs-ccc.c b/drivers/clk/microchip/clk-mpfs-ccc.c index 32aae880a14f..0ddc73e07be4 100644 --- a/drivers/clk/microchip/clk-mpfs-ccc.c +++ b/drivers/clk/microchip/clk-mpfs-ccc.c @@ -164,12 +164,11 @@ static int mpfs_ccc_register_outputs(struct device *dev, struct mpfs_ccc_out_hw_ for (unsigned int i = 0; i < num_clks; i++) { struct mpfs_ccc_out_hw_clock *out_hw = &out_hws[i]; - char *name = devm_kzalloc(dev, 23, GFP_KERNEL); + char *name = devm_kasprintf(dev, GFP_KERNEL, "%s_out%u", parent->name, i); if (!name) return -ENOMEM; - snprintf(name, 23, "%s_out%u", parent->name, i); out_hw->divider.hw.init = CLK_HW_INIT_HW(name, &parent->hw, &clk_divider_ops, 0); out_hw->divider.reg = data->pll_base[i / MPFS_CCC_OUTPUTS_PER_PLL] + out_hw->reg_offset; @@ -201,14 +200,13 @@ static int mpfs_ccc_register_plls(struct device *dev, struct mpfs_ccc_pll_hw_clo for (unsigned int i = 0; i < num_clks; i++) { struct mpfs_ccc_pll_hw_clock *pll_hw = &pll_hws[i]; - char *name = devm_kzalloc(dev, 18, GFP_KERNEL); - if (!name) + pll_hw->name = devm_kasprintf(dev, GFP_KERNEL, "ccc%s_pll%u", + strchrnul(dev->of_node->full_name, '@'), i); + if (!pll_hw->name) return -ENOMEM; pll_hw->base = data->pll_base[i]; - snprintf(name, 18, "ccc%s_pll%u", strchrnul(dev->of_node->full_name, '@'), i); - pll_hw->name = (const char *)name; pll_hw->hw.init = CLK_HW_INIT_PARENTS_DATA_FIXED_SIZE(pll_hw->name, pll_hw->parents, &mpfs_ccc_pll_ops, 0); From 1104693cdfcd337e73ab585a225f05445ff7a864 Mon Sep 17 00:00:00 2001 From: Jonas Karlman Date: Sun, 15 Jan 2023 21:15:56 +0000 Subject: [PATCH 011/186] arm64: dts: rockchip: fix probe of analog sound card on rock-3a The following was observed on my Radxa ROCK 3 Model A board: rockchip-pinctrl pinctrl: pin gpio1-9 already requested by vcc-cam-regulator; cannot claim for fe410000.i2s ... platform rk809-sound: deferred probe pending Fix this by supplying a board specific pinctrl with the i2s1 pins used by pmic codec according to the schematic [1]. [1] https://dl.radxa.com/rock3/docs/hw/3a/ROCK-3A-V1.3-SCH.pdf Signed-off-by: Jonas Karlman Acked-by: Michael Riesch Link: https://lore.kernel.org/r/20230115211553.445007-1-jonas@kwiboo.se Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts b/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts index 5af11acb5c16..7d5d16d0e000 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts @@ -571,6 +571,8 @@ }; &i2s1_8ch { + pinctrl-names = "default"; + pinctrl-0 = <&i2s1m0_sclktx &i2s1m0_lrcktx &i2s1m0_sdi0 &i2s1m0_sdo0>; rockchip,trcm-sync-tx-only; status = "okay"; }; From 6601fc0d15ffc20654e39486f9bef35567106d68 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Thu, 12 Jan 2023 13:16:02 -0500 Subject: [PATCH 012/186] IB/hfi1: Restore allocated resources on failed copyout Fix a resource leak if an error occurs. Fixes: f404ca4c7ea8 ("IB/hfi1: Refactor hfi_user_exp_rcv_setup() IOCTL") Signed-off-by: Dean Luick Signed-off-by: Dennis Dalessandro Link: https://lore.kernel.org/r/167354736291.2132367.10894218740150168180.stgit@awfm-02.cornelisnetworks.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hfi1/file_ops.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index f5f9269fdc16..7c5d487ec916 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -1318,12 +1318,15 @@ static int user_exp_rcv_setup(struct hfi1_filedata *fd, unsigned long arg, addr = arg + offsetof(struct hfi1_tid_info, tidcnt); if (copy_to_user((void __user *)addr, &tinfo.tidcnt, sizeof(tinfo.tidcnt))) - return -EFAULT; + ret = -EFAULT; addr = arg + offsetof(struct hfi1_tid_info, length); - if (copy_to_user((void __user *)addr, &tinfo.length, + if (!ret && copy_to_user((void __user *)addr, &tinfo.length, sizeof(tinfo.length))) ret = -EFAULT; + + if (ret) + hfi1_user_exp_rcv_invalid(fd, &tinfo); } return ret; From 1c88b9ba6b126080a9c121abd943b25e20cd7ac1 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 16 Jan 2023 23:02:49 -0800 Subject: [PATCH 013/186] dax: super.c: fix kernel-doc bad line warning Convert an empty line to " *" to avoid a kernel-doc warning: drivers/dax/super.c:478: warning: bad line: Signed-off-by: Randy Dunlap Cc: Dan Williams Cc: Vishal Verma Cc: Dave Jiang Cc: nvdimm@lists.linux.dev Link: https://lore.kernel.org/r/20230117070249.31934-1-rdunlap@infradead.org Signed-off-by: Dan Williams --- drivers/dax/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index da4438f3188c..c4c4728a36e4 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -475,7 +475,7 @@ EXPORT_SYMBOL_GPL(put_dax); /** * dax_holder() - obtain the holder of a dax device * @dax_dev: a dax_device instance - + * * Return: the holder's data which represents the holder if registered, * otherwize NULL. */ From fb6df4366f86dd252bfa3049edffa52d17e7b895 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Wed, 25 Jan 2023 11:34:18 -0700 Subject: [PATCH 014/186] ACPI: NFIT: fix a potential deadlock during NFIT teardown Lockdep reports that acpi_nfit_shutdown() may deadlock against an opportune acpi_nfit_scrub(). acpi_nfit_scrub () is run from inside a 'work' and therefore has already acquired workqueue-internal locks. It also acquiires acpi_desc->init_mutex. acpi_nfit_shutdown() first acquires init_mutex, and was subsequently attempting to cancel any pending workqueue items. This reversed locking order causes a potential deadlock: ====================================================== WARNING: possible circular locking dependency detected 6.2.0-rc3 #116 Tainted: G O N ------------------------------------------------------ libndctl/1958 is trying to acquire lock: ffff888129b461c0 ((work_completion)(&(&acpi_desc->dwork)->work)){+.+.}-{0:0}, at: __flush_work+0x43/0x450 but task is already holding lock: ffff888129b460e8 (&acpi_desc->init_mutex){+.+.}-{3:3}, at: acpi_nfit_shutdown+0x87/0xd0 [nfit] which lock already depends on the new lock. ... Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&acpi_desc->init_mutex); lock((work_completion)(&(&acpi_desc->dwork)->work)); lock(&acpi_desc->init_mutex); lock((work_completion)(&(&acpi_desc->dwork)->work)); *** DEADLOCK *** Since the workqueue manipulation is protected by its own internal locking, the cancellation of pending work doesn't need to be done under acpi_desc->init_mutex. Move cancel_delayed_work_sync() outside the init_mutex to fix the deadlock. Any work that starts after acpi_nfit_shutdown() drops the lock will see ARS_CANCEL, and the cancel_delayed_work_sync() will safely flush it out. Reported-by: Dan Williams Signed-off-by: Vishal Verma Link: https://lore.kernel.org/r/20230112-acpi_nfit_lockdep-v1-1-660be4dd10be@intel.com Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index f1cc5ec6a3b6..4e48d6db05eb 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -3297,8 +3297,8 @@ void acpi_nfit_shutdown(void *data) mutex_lock(&acpi_desc->init_mutex); set_bit(ARS_CANCEL, &acpi_desc->scrub_flags); - cancel_delayed_work_sync(&acpi_desc->dwork); mutex_unlock(&acpi_desc->init_mutex); + cancel_delayed_work_sync(&acpi_desc->dwork); /* * Bounce the nvdimm bus lock to make sure any in-flight From ecfb9f404771dde909ce7743df954370933c3be2 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 14 Dec 2022 13:37:04 +0100 Subject: [PATCH 015/186] clk: ingenic: jz4760: Update M/N/OD calculation algorithm The previous algorithm was pretty broken. - The inner loop had a '(m > m_max)' condition, and the value of 'm' would increase in each iteration; - Each iteration would actually multiply 'm' by two, so it is not needed to re-compute the whole equation at each iteration; - It would loop until (m & 1) == 0, which means it would loop at most once. - The outer loop would divide the 'n' value by two at the end of each iteration. This meant that for a 12 MHz parent clock and a 1.2 GHz requested clock, it would first try n=12, then n=6, then n=3, then n=1, none of which would work; the only valid value is n=2 in this case. Simplify this algorithm with a single for loop, which decrements 'n' after each iteration, addressing all of the above problems. Fixes: bdbfc029374f ("clk: ingenic: Add support for the JZ4760") Cc: Signed-off-by: Paul Cercueil Link: https://lore.kernel.org/r/20221214123704.7305-1-paul@crapouillou.net Signed-off-by: Stephen Boyd --- drivers/clk/ingenic/jz4760-cgu.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/clk/ingenic/jz4760-cgu.c b/drivers/clk/ingenic/jz4760-cgu.c index ecd395ac8a28..e407f00bd594 100644 --- a/drivers/clk/ingenic/jz4760-cgu.c +++ b/drivers/clk/ingenic/jz4760-cgu.c @@ -58,7 +58,7 @@ jz4760_cgu_calc_m_n_od(const struct ingenic_cgu_pll_info *pll_info, unsigned long rate, unsigned long parent_rate, unsigned int *pm, unsigned int *pn, unsigned int *pod) { - unsigned int m, n, od, m_max = (1 << pll_info->m_bits) - 2; + unsigned int m, n, od, m_max = (1 << pll_info->m_bits) - 1; /* The frequency after the N divider must be between 1 and 50 MHz. */ n = parent_rate / (1 * MHZ); @@ -66,19 +66,17 @@ jz4760_cgu_calc_m_n_od(const struct ingenic_cgu_pll_info *pll_info, /* The N divider must be >= 2. */ n = clamp_val(n, 2, 1 << pll_info->n_bits); - for (;; n >>= 1) { - od = (unsigned int)-1; + rate /= MHZ; + parent_rate /= MHZ; - do { - m = (rate / MHZ) * (1 << ++od) * n / (parent_rate / MHZ); - } while ((m > m_max || m & 1) && (od < 4)); - - if (od < 4 && m >= 4 && m <= m_max) - break; + for (m = m_max; m >= m_max && n >= 2; n--) { + m = rate * n / parent_rate; + od = m & 1; + m <<= od; } *pm = m; - *pn = n; + *pn = n + 1; *pod = 1 << od; } From 5754a1c98b18009cb3030dc391aa37b77428a0bd Mon Sep 17 00:00:00 2001 From: Guodong Liu Date: Wed, 18 Jan 2023 14:21:16 +0800 Subject: [PATCH 016/186] pinctrl: mediatek: Fix the drive register definition of some Pins The drive adjustment register definition of gpio13 and gpio81 is wrong: "the start address for the range" of gpio18 is corrected to 0x000, "the start bit for the first register within the range" of gpio81 is corrected to 24. Fixes: 6cf5e9ef362a ("pinctrl: add pinctrl driver on mt8195") Signed-off-by: Guodong Liu Link: https://lore.kernel.org/r/20230118062116.26315-1-Guodong.Liu@mediatek.com Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/pinctrl-mt8195.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8195.c b/drivers/pinctrl/mediatek/pinctrl-mt8195.c index 89557c7ed2ab..09c4dcef9338 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt8195.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt8195.c @@ -659,7 +659,7 @@ static const struct mtk_pin_field_calc mt8195_pin_drv_range[] = { PIN_FIELD_BASE(10, 10, 4, 0x010, 0x10, 9, 3), PIN_FIELD_BASE(11, 11, 4, 0x000, 0x10, 24, 3), PIN_FIELD_BASE(12, 12, 4, 0x010, 0x10, 12, 3), - PIN_FIELD_BASE(13, 13, 4, 0x010, 0x10, 27, 3), + PIN_FIELD_BASE(13, 13, 4, 0x000, 0x10, 27, 3), PIN_FIELD_BASE(14, 14, 4, 0x010, 0x10, 15, 3), PIN_FIELD_BASE(15, 15, 4, 0x010, 0x10, 0, 3), PIN_FIELD_BASE(16, 16, 4, 0x010, 0x10, 18, 3), @@ -708,7 +708,7 @@ static const struct mtk_pin_field_calc mt8195_pin_drv_range[] = { PIN_FIELD_BASE(78, 78, 3, 0x000, 0x10, 15, 3), PIN_FIELD_BASE(79, 79, 3, 0x000, 0x10, 18, 3), PIN_FIELD_BASE(80, 80, 3, 0x000, 0x10, 21, 3), - PIN_FIELD_BASE(81, 81, 3, 0x000, 0x10, 28, 3), + PIN_FIELD_BASE(81, 81, 3, 0x000, 0x10, 24, 3), PIN_FIELD_BASE(82, 82, 3, 0x000, 0x10, 27, 3), PIN_FIELD_BASE(83, 83, 3, 0x010, 0x10, 0, 3), PIN_FIELD_BASE(84, 84, 3, 0x010, 0x10, 3, 3), From e632291a2dbce45a24cddeb5fe28fe71d724ba43 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Tue, 24 Jan 2023 20:24:18 +0200 Subject: [PATCH 017/186] IB/IPoIB: Fix legacy IPoIB due to wrong number of queues The cited commit creates child PKEY interfaces over netlink will multiple tx and rx queues, but some devices doesn't support more than 1 tx and 1 rx queues. This causes to a crash when traffic is sent over the PKEY interface due to the parent having a single queue but the child having multiple queues. This patch fixes the number of queues to 1 for legacy IPoIB at the earliest possible point in time. BUG: kernel NULL pointer dereference, address: 000000000000036b PGD 0 P4D 0 Oops: 0000 [#1] SMP CPU: 4 PID: 209665 Comm: python3 Not tainted 6.1.0_for_upstream_min_debug_2022_12_12_17_02 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:kmem_cache_alloc+0xcb/0x450 Code: ce 7e 49 8b 50 08 49 83 78 10 00 4d 8b 28 0f 84 cb 02 00 00 4d 85 ed 0f 84 c2 02 00 00 41 8b 44 24 28 48 8d 4a 01 49 8b 3c 24 <49> 8b 5c 05 00 4c 89 e8 65 48 0f c7 0f 0f 94 c0 84 c0 74 b8 41 8b RSP: 0018:ffff88822acbbab8 EFLAGS: 00010202 RAX: 0000000000000070 RBX: ffff8881c28e3e00 RCX: 00000000064f8dae RDX: 00000000064f8dad RSI: 0000000000000a20 RDI: 0000000000030d00 RBP: 0000000000000a20 R08: ffff8882f5d30d00 R09: ffff888104032f40 R10: ffff88810fade828 R11: 736f6d6570736575 R12: ffff88810081c000 R13: 00000000000002fb R14: ffffffff817fc865 R15: 0000000000000000 FS: 00007f9324ff9700(0000) GS:ffff8882f5d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000000036b CR3: 00000001125af004 CR4: 0000000000370ea0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: skb_clone+0x55/0xd0 ip6_finish_output2+0x3fe/0x690 ip6_finish_output+0xfa/0x310 ip6_send_skb+0x1e/0x60 udp_v6_send_skb+0x1e5/0x420 udpv6_sendmsg+0xb3c/0xe60 ? ip_mc_finish_output+0x180/0x180 ? __switch_to_asm+0x3a/0x60 ? __switch_to_asm+0x34/0x60 sock_sendmsg+0x33/0x40 __sys_sendto+0x103/0x160 ? _copy_to_user+0x21/0x30 ? kvm_clock_get_cycles+0xd/0x10 ? ktime_get_ts64+0x49/0xe0 __x64_sys_sendto+0x25/0x30 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7f9374f1ed14 Code: 42 41 f8 ff 44 8b 4c 24 2c 4c 8b 44 24 20 89 c5 44 8b 54 24 28 48 8b 54 24 18 b8 2c 00 00 00 48 8b 74 24 10 8b 7c 24 08 0f 05 <48> 3d 00 f0 ff ff 77 34 89 ef 48 89 44 24 08 e8 68 41 f8 ff 48 8b RSP: 002b:00007f9324ff7bd0 EFLAGS: 00000293 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00007f9324ff7cc8 RCX: 00007f9374f1ed14 RDX: 00000000000002fb RSI: 00007f93000052f0 RDI: 0000000000000030 RBP: 0000000000000000 R08: 00007f9324ff7d40 R09: 000000000000001c R10: 0000000000000000 R11: 0000000000000293 R12: 0000000000000000 R13: 000000012a05f200 R14: 0000000000000001 R15: 00007f9374d57bdc Fixes: dbc94a0fb817 ("IB/IPoIB: Fix queue count inconsistency for PKEY child interfaces") Signed-off-by: Dragos Tatulea Link: https://lore.kernel.org/r/95eb6b74c7cf49fa46281f9d056d685c9fa11d38.1674584576.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index ac25fc80fb33..f10d4bcf87d2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2200,6 +2200,14 @@ int ipoib_intf_init(struct ib_device *hca, u32 port, const char *name, rn->attach_mcast = ipoib_mcast_attach; rn->detach_mcast = ipoib_mcast_detach; rn->hca = hca; + + rc = netif_set_real_num_tx_queues(dev, 1); + if (rc) + goto out; + + rc = netif_set_real_num_rx_queues(dev, 1); + if (rc) + goto out; } priv->rn_ops = dev->netdev_ops; From ad34656db3b959421621c4e239d7e0dc710ec04c Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 26 Jan 2023 15:10:03 -0600 Subject: [PATCH 018/186] MAINTAINERS: Move to shared PCI tree Move PCI subsystem maintenance to a shared git tree to make it easier for maintainers to collaborate. Update MAINTAINERS accordingly. No change to patch submission and patchwork tracking. Link: https://lore.kernel.org/r/20230126211003.1310916-1-helgaas@kernel.org Signed-off-by: Bjorn Helgaas --- MAINTAINERS | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index f61eb221415b..14c0b3e89c63 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16115,7 +16115,7 @@ S: Supported Q: https://patchwork.kernel.org/project/linux-pci/list/ B: https://bugzilla.kernel.org C: irc://irc.oftc.net/linux-pci -T: git git://git.kernel.org/pub/scm/linux/kernel/git/lpieralisi/pci.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git F: Documentation/PCI/endpoint/* F: Documentation/misc-devices/pci-endpoint-test.rst F: drivers/misc/pci_endpoint_test.c @@ -16150,7 +16150,7 @@ S: Supported Q: https://patchwork.kernel.org/project/linux-pci/list/ B: https://bugzilla.kernel.org C: irc://irc.oftc.net/linux-pci -T: git git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git F: Documentation/driver-api/pci/p2pdma.rst F: drivers/pci/p2pdma.c F: include/linux/pci-p2pdma.h @@ -16179,7 +16179,7 @@ S: Supported Q: https://patchwork.kernel.org/project/linux-pci/list/ B: https://bugzilla.kernel.org C: irc://irc.oftc.net/linux-pci -T: git git://git.kernel.org/pub/scm/linux/kernel/git/lpieralisi/pci.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git F: Documentation/devicetree/bindings/pci/ F: drivers/pci/controller/ F: drivers/pci/pci-bridge-emul.c @@ -16192,7 +16192,7 @@ S: Supported Q: https://patchwork.kernel.org/project/linux-pci/list/ B: https://bugzilla.kernel.org C: irc://irc.oftc.net/linux-pci -T: git git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git F: Documentation/PCI/ F: Documentation/devicetree/bindings/pci/ F: arch/x86/kernel/early-quirks.c From 287a344a11f1ebd31055cf9b22c88d7005f108d7 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Fri, 20 Jan 2023 09:48:56 +1030 Subject: [PATCH 019/186] pinctrl: aspeed: Fix confusing types in return value The function signature is int, but we return a bool. Instead return a negative errno as the kerneldoc suggests. Fixes: 4d3d0e4272d8 ("pinctrl: Add core support for Aspeed SoCs") Signed-off-by: Joel Stanley Reviewed-by: Andrew Jeffery Link: https://lore.kernel.org/r/20230119231856.52014-1-joel@jms.id.au Signed-off-by: Linus Walleij --- drivers/pinctrl/aspeed/pinctrl-aspeed.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed.c b/drivers/pinctrl/aspeed/pinctrl-aspeed.c index 3945612900e6..b5ca4501a605 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed.c @@ -114,7 +114,7 @@ static int aspeed_disable_sig(struct aspeed_pinmux_data *ctx, int ret = 0; if (!exprs) - return true; + return -EINVAL; while (*exprs && !ret) { ret = aspeed_sig_expr_disable(ctx, *exprs); From c6e0679b8381bf03315e6660cf5370f916c1a1c6 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sat, 21 Jan 2023 07:48:10 -0600 Subject: [PATCH 020/186] pinctrl: amd: Fix debug output for debounce time If one GPIO has debounce enabled but future GPIOs in the list don't have debounce the time never gets reset and shows wrong value. Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20230121134812.16637-2-mario.limonciello@amd.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-amd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 9bc6e3922e78..32c3edaf9038 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -365,6 +365,7 @@ static void amd_gpio_dbg_show(struct seq_file *s, struct gpio_chip *gc) } else { debounce_enable = " ∅"; + time = 0; } snprintf(debounce_value, sizeof(debounce_value), "%u", time * unit); seq_printf(s, "debounce %s (🕑 %sus)| ", debounce_enable, debounce_value); From d2d73e6d4822140445ad4a7b1c6091e0f5fe703b Mon Sep 17 00:00:00 2001 From: Maxim Korotkov Date: Fri, 18 Nov 2022 13:43:32 +0300 Subject: [PATCH 021/186] pinctrl: single: fix potential NULL dereference Added checking of pointer "function" in pcs_set_mux(). pinmux_generic_get_function() can return NULL and the pointer "function" was dereferenced without checking against NULL. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 571aec4df5b7 ("pinctrl: single: Use generic pinmux helpers for managing functions") Signed-off-by: Maxim Korotkov Reviewed-by: Tony Lindgren Link: https://lore.kernel.org/r/20221118104332.943-1-korotkov.maxim.s@gmail.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-single.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index 99c3745da456..190923757cda 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -372,6 +372,8 @@ static int pcs_set_mux(struct pinctrl_dev *pctldev, unsigned fselector, if (!pcs->fmask) return 0; function = pinmux_generic_get_function(pctldev, fselector); + if (!function) + return -EINVAL; func = function->data; if (!func) return -EINVAL; From c63b8fd14a7db719f8252038a790638728c4eb66 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 13 Jan 2023 21:59:42 +0300 Subject: [PATCH 022/186] spi: dw: Fix wrong FIFO level setting for long xfers Due to using the u16 type in the min_t() macros the SPI transfer length will be cast to word before participating in the conditional statement implied by the macro. Thus if the transfer length is greater than 64KB the Tx/Rx FIFO threshold level value will be determined by the leftover of the truncated after the type-case length. In the worst case it will cause the dramatical performance drop due to the "Tx FIFO Empty" or "Rx FIFO Full" interrupts triggered on each xfer word sent/received to/from the bus. The problem can be easily fixed by specifying the unsigned int type in the min_t() macros thus preventing the possible data loss. Fixes: ea11370fffdf ("spi: dw: get TX level without an additional variable") Reported-by: Sergey Nazarov Signed-off-by: Serge Semin Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230113185942.2516-1-Sergey.Semin@baikalelectronics.ru Signed-off-by: Mark Brown --- drivers/spi/spi-dw-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-dw-core.c b/drivers/spi/spi-dw-core.c index 99edddf9958b..c3bfb6c84cab 100644 --- a/drivers/spi/spi-dw-core.c +++ b/drivers/spi/spi-dw-core.c @@ -366,7 +366,7 @@ static void dw_spi_irq_setup(struct dw_spi *dws) * will be adjusted at the final stage of the IRQ-based SPI transfer * execution so not to lose the leftover of the incoming data. */ - level = min_t(u16, dws->fifo_len / 2, dws->tx_len); + level = min_t(unsigned int, dws->fifo_len / 2, dws->tx_len); dw_writel(dws, DW_SPI_TXFTLR, level); dw_writel(dws, DW_SPI_RXFTLR, level - 1); From eede42c9459b58b71edc99303dad65216a655810 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 16 Jan 2023 15:41:49 +0100 Subject: [PATCH 023/186] spi: spidev: fix a recursive locking error When calling spidev_message() from the one of the ioctl() callbacks, the spi_lock is already taken. When we then end up calling spidev_sync(), we get the following splat: [ 214.047619] [ 214.049198] ============================================ [ 214.054533] WARNING: possible recursive locking detected [ 214.059858] 6.2.0-rc3-0.0.0-devel+git.97ec4d559d93 #1 Not tainted [ 214.065969] -------------------------------------------- [ 214.071290] spidev_test/1454 is trying to acquire lock: [ 214.076530] c4925dbc (&spidev->spi_lock){+.+.}-{3:3}, at: spidev_ioctl+0x8e0/0xab8 [ 214.084164] [ 214.084164] but task is already holding lock: [ 214.090007] c4925dbc (&spidev->spi_lock){+.+.}-{3:3}, at: spidev_ioctl+0x44/0xab8 [ 214.097537] [ 214.097537] other info that might help us debug this: [ 214.104075] Possible unsafe locking scenario: [ 214.104075] [ 214.110004] CPU0 [ 214.112461] ---- [ 214.114916] lock(&spidev->spi_lock); [ 214.118687] lock(&spidev->spi_lock); [ 214.122457] [ 214.122457] *** DEADLOCK *** [ 214.122457] [ 214.128386] May be due to missing lock nesting notation [ 214.128386] [ 214.135183] 2 locks held by spidev_test/1454: [ 214.139553] #0: c4925dbc (&spidev->spi_lock){+.+.}-{3:3}, at: spidev_ioctl+0x44/0xab8 [ 214.147524] #1: c4925e14 (&spidev->buf_lock){+.+.}-{3:3}, at: spidev_ioctl+0x70/0xab8 [ 214.155493] [ 214.155493] stack backtrace: [ 214.159861] CPU: 0 PID: 1454 Comm: spidev_test Not tainted 6.2.0-rc3-0.0.0-devel+git.97ec4d559d93 #1 [ 214.169012] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) [ 214.175555] unwind_backtrace from show_stack+0x10/0x14 [ 214.180819] show_stack from dump_stack_lvl+0x60/0x90 [ 214.185900] dump_stack_lvl from __lock_acquire+0x874/0x2858 [ 214.191584] __lock_acquire from lock_acquire+0xfc/0x378 [ 214.196918] lock_acquire from __mutex_lock+0x9c/0x8a8 [ 214.202083] __mutex_lock from mutex_lock_nested+0x1c/0x24 [ 214.207597] mutex_lock_nested from spidev_ioctl+0x8e0/0xab8 [ 214.213284] spidev_ioctl from sys_ioctl+0x4d0/0xe2c [ 214.218277] sys_ioctl from ret_fast_syscall+0x0/0x1c [ 214.223351] Exception stack(0xe75cdfa8 to 0xe75cdff0) [ 214.228422] dfa0: 00000000 00001000 00000003 40206b00 bee266e8 bee266e0 [ 214.236617] dfc0: 00000000 00001000 006a71a0 00000036 004c0040 004bfd18 00000000 00000003 [ 214.244809] dfe0: 00000036 bee266c8 b6f16dc5 b6e8e5f6 Fix it by introducing an unlocked variant of spidev_sync() and calling it from spidev_message() while other users who don't check the spidev->spi's existence keep on using the locking flavor. Reported-by: Francesco Dolcini Fixes: 1f4d2dd45b6e ("spi: spidev: fix a race condition when accessing spidev->spi") Signed-off-by: Bartosz Golaszewski Tested-by: Max Krummenacher Link: https://lore.kernel.org/r/20230116144149.305560-1-brgl@bgdev.pl Signed-off-by: Mark Brown --- drivers/spi/spidev.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index 1935ca613447..a1ea093795cf 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -89,10 +89,22 @@ MODULE_PARM_DESC(bufsiz, "data bytes in biggest supported SPI message"); /*-------------------------------------------------------------------------*/ +static ssize_t +spidev_sync_unlocked(struct spi_device *spi, struct spi_message *message) +{ + ssize_t status; + + status = spi_sync(spi, message); + if (status == 0) + status = message->actual_length; + + return status; +} + static ssize_t spidev_sync(struct spidev_data *spidev, struct spi_message *message) { - int status; + ssize_t status; struct spi_device *spi; mutex_lock(&spidev->spi_lock); @@ -101,12 +113,10 @@ spidev_sync(struct spidev_data *spidev, struct spi_message *message) if (spi == NULL) status = -ESHUTDOWN; else - status = spi_sync(spi, message); - - if (status == 0) - status = message->actual_length; + status = spidev_sync_unlocked(spi, message); mutex_unlock(&spidev->spi_lock); + return status; } @@ -294,7 +304,7 @@ static int spidev_message(struct spidev_data *spidev, spi_message_add_tail(k_tmp, &msg); } - status = spidev_sync(spidev, &msg); + status = spidev_sync_unlocked(spidev->spi, &msg); if (status < 0) goto done; From c91d713630848460de8669e6570307b7e559863b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 25 Jan 2023 12:23:46 -0800 Subject: [PATCH 024/186] nvdimm: Support sizeof(struct page) > MAX_STRUCT_PAGE_SIZE Commit 6e9f05dc66f9 ("libnvdimm/pfn_dev: increase MAX_STRUCT_PAGE_SIZE") ...updated MAX_STRUCT_PAGE_SIZE to account for sizeof(struct page) potentially doubling in the case of CONFIG_KMSAN=y. Unfortunately this doubles the amount of capacity stolen from user addressable capacity for everyone, regardless of whether they are using the debug option. Revert that change, mandate that MAX_STRUCT_PAGE_SIZE never exceed 64, but allow for debug scenarios to proceed with creating debug sized page maps with a compile option to support debug scenarios. Note that this only applies to cases where the page map is permanent, i.e. stored in a reservation of the pmem itself ("--map=dev" in "ndctl create-namespace" terms). For the "--map=mem" case, since the allocation is ephemeral for the lifespan of the namespace, there are no explicit restriction. However, the implicit restriction, of having enough available "System RAM" to store the page map for the typically large pmem, still applies. Fixes: 6e9f05dc66f9 ("libnvdimm/pfn_dev: increase MAX_STRUCT_PAGE_SIZE") Cc: Cc: Alexander Potapenko Cc: Marco Elver Reported-by: Jeff Moyer Acked-by: Yu Zhao Link: https://lore.kernel.org/r/167467815773.463042.7022545814443036382.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/nvdimm/Kconfig | 19 ++++++++++++++++++ drivers/nvdimm/nd.h | 2 +- drivers/nvdimm/pfn_devs.c | 42 +++++++++++++++++++++++++-------------- 3 files changed, 47 insertions(+), 16 deletions(-) diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig index 79d93126453d..77b06d54cc62 100644 --- a/drivers/nvdimm/Kconfig +++ b/drivers/nvdimm/Kconfig @@ -102,6 +102,25 @@ config NVDIMM_KEYS depends on ENCRYPTED_KEYS depends on (LIBNVDIMM=ENCRYPTED_KEYS) || LIBNVDIMM=m +config NVDIMM_KMSAN + bool + depends on KMSAN + help + KMSAN, and other memory debug facilities, increase the size of + 'struct page' to contain extra metadata. This collides with + the NVDIMM capability to store a potentially + larger-than-"System RAM" size 'struct page' array in a + reservation of persistent memory rather than limited / + precious DRAM. However, that reservation needs to persist for + the life of the given NVDIMM namespace. If you are using KMSAN + to debug an issue unrelated to NVDIMMs or DAX then say N to this + option. Otherwise, say Y but understand that any namespaces + (with the page array stored pmem) created with this build of + the kernel will permanently reserve and strand excess + capacity compared to the CONFIG_KMSAN=n case. + + Select N if unsure. + config NVDIMM_TEST_BUILD tristate "Build the unit test core" depends on m diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 85ca5b4da3cf..ec5219680092 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -652,7 +652,7 @@ void devm_namespace_disable(struct device *dev, struct nd_namespace_common *ndns); #if IS_ENABLED(CONFIG_ND_CLAIM) /* max struct page size independent of kernel config */ -#define MAX_STRUCT_PAGE_SIZE 128 +#define MAX_STRUCT_PAGE_SIZE 64 int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap); #else static inline int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 61af072ac98f..af7d9301520c 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -13,6 +13,8 @@ #include "pfn.h" #include "nd.h" +static const bool page_struct_override = IS_ENABLED(CONFIG_NVDIMM_KMSAN); + static void nd_pfn_release(struct device *dev) { struct nd_region *nd_region = to_nd_region(dev->parent); @@ -758,12 +760,6 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) return -ENXIO; } - /* - * Note, we use 64 here for the standard size of struct page, - * debugging options may cause it to be larger in which case the - * implementation will limit the pfns advertised through - * ->direct_access() to those that are included in the memmap. - */ start = nsio->res.start; size = resource_size(&nsio->res); npfns = PHYS_PFN(size - SZ_8K); @@ -782,20 +778,33 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) } end_trunc = start + size - ALIGN_DOWN(start + size, align); if (nd_pfn->mode == PFN_MODE_PMEM) { + unsigned long page_map_size = MAX_STRUCT_PAGE_SIZE * npfns; + /* * The altmap should be padded out to the block size used * when populating the vmemmap. This *should* be equal to * PMD_SIZE for most architectures. * - * Also make sure size of struct page is less than 128. We - * want to make sure we use large enough size here so that - * we don't have a dynamic reserve space depending on - * struct page size. But we also want to make sure we notice - * when we end up adding new elements to struct page. + * Also make sure size of struct page is less than + * MAX_STRUCT_PAGE_SIZE. The goal here is compatibility in the + * face of production kernel configurations that reduce the + * 'struct page' size below MAX_STRUCT_PAGE_SIZE. For debug + * kernel configurations that increase the 'struct page' size + * above MAX_STRUCT_PAGE_SIZE, the page_struct_override allows + * for continuing with the capacity that will be wasted when + * reverting to a production kernel configuration. Otherwise, + * those configurations are blocked by default. */ - BUILD_BUG_ON(sizeof(struct page) > MAX_STRUCT_PAGE_SIZE); - offset = ALIGN(start + SZ_8K + MAX_STRUCT_PAGE_SIZE * npfns, align) - - start; + if (sizeof(struct page) > MAX_STRUCT_PAGE_SIZE) { + if (page_struct_override) + page_map_size = sizeof(struct page) * npfns; + else { + dev_err(&nd_pfn->dev, + "Memory debug options prevent using pmem for the page map\n"); + return -EINVAL; + } + } + offset = ALIGN(start + SZ_8K + page_map_size, align) - start; } else if (nd_pfn->mode == PFN_MODE_RAM) offset = ALIGN(start + SZ_8K, align) - start; else @@ -818,7 +827,10 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) pfn_sb->version_minor = cpu_to_le16(4); pfn_sb->end_trunc = cpu_to_le32(end_trunc); pfn_sb->align = cpu_to_le32(nd_pfn->align); - pfn_sb->page_struct_size = cpu_to_le16(MAX_STRUCT_PAGE_SIZE); + if (sizeof(struct page) > MAX_STRUCT_PAGE_SIZE && page_struct_override) + pfn_sb->page_struct_size = cpu_to_le16(sizeof(struct page)); + else + pfn_sb->page_struct_size = cpu_to_le16(MAX_STRUCT_PAGE_SIZE); pfn_sb->page_size = cpu_to_le32(PAGE_SIZE); checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb); pfn_sb->checksum = cpu_to_le64(checksum); From 5d9745cead1f121974322b94ceadfb4d1e67960e Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Thu, 26 Jan 2023 10:52:30 -0800 Subject: [PATCH 025/186] RDMA/irdma: Fix potential NULL-ptr-dereference in_dev_get() can return NULL which will cause a failure once idev is dereferenced in in_dev_for_each_ifa_rtnl(). This patch adds a check for NULL value in idev beforehand. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 146b9756f14c ("RDMA/irdma: Add connection manager") Signed-off-by: Nikita Zhandarovich Link: https://lore.kernel.org/r/20230126185230.62464-1-n.zhandarovich@fintech.ru Reviewed-by: Sindhu Devale Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/cm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index 7b086fe63a24..195aa9ea18b6 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -1722,6 +1722,9 @@ static int irdma_add_mqh_4(struct irdma_device *iwdev, continue; idev = in_dev_get(ip_dev); + if (!idev) + continue; + in_dev_for_each_ifa_rtnl(ifa, idev) { ibdev_dbg(&iwdev->ibdev, "CM: Allocating child CM Listener forIP=%pI4, vlan_id=%d, MAC=%pM\n", From bc121b707e816616567683e51fd9194c2309977a Mon Sep 17 00:00:00 2001 From: Dan Johansen Date: Sat, 28 Jan 2023 12:24:32 +0100 Subject: [PATCH 026/186] arm64: dts: rockchip: set sdmmc0 speed to sd-uhs-sdr50 on rock-3a As other rk336x based devices, the Rock 3 Model A has issues with high speed SD cards, so lower the speed to 50 instead of 104 in the same manor has the Quartz64 Model B has. Fixes: 22a442e6586c ("arm64: dts: rockchip: add basic dts for the radxa rock3 model a") Signed-off-by: Dan Johansen Link: https://lore.kernel.org/r/20230128112432.132302-1-strit@manjaro.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts b/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts index 7d5d16d0e000..3c9d85257cc9 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts @@ -732,7 +732,7 @@ disable-wp; pinctrl-names = "default"; pinctrl-0 = <&sdmmc0_bus4 &sdmmc0_clk &sdmmc0_cmd &sdmmc0_det>; - sd-uhs-sdr104; + sd-uhs-sdr50; vmmc-supply = <&vcc3v3_sd>; vqmmc-supply = <&vccio_sd>; status = "okay"; From b67b09733d8a41eec33d5d37be2f8cff8af82a5e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 19 Jan 2023 13:46:31 +0100 Subject: [PATCH 027/186] arm64: dts: rockchip: align rk3399 DMC OPP table with bindings Bindings expect certain pattern for OPP table node name and underscores are not allowed: rk3399-rock-pi-4a-plus.dtb: dmc_opp_table: $nodename:0: 'dmc_opp_table' does not match '^opp-table(-[a-z0-9]+)?$' Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20230119124631.91080-1-krzysztof.kozlowski@linaro.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-op1-opp.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-op1-opp.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-op1-opp.dtsi index 6e29e74f6fc6..783120e9cebe 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-op1-opp.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-op1-opp.dtsi @@ -111,7 +111,7 @@ }; }; - dmc_opp_table: dmc_opp_table { + dmc_opp_table: opp-table-3 { compatible = "operating-points-v2"; opp00 { From b7e08a5a63a11627601915473c3b569c1f6c6c06 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sun, 29 Jan 2023 17:37:57 +0800 Subject: [PATCH 028/186] RDMA/usnic: use iommu_map_atomic() under spin_lock() usnic_uiom_map_sorted_intervals() is called under spin_lock(), iommu_map() might sleep, use iommu_map_atomic() to avoid potential sleep in atomic context. Fixes: e3cf00d0a87f ("IB/usnic: Add Cisco VIC low-level hardware driver") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20230129093757.637354-1-yangyingliang@huawei.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/usnic/usnic_uiom.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index c301b3be9f30..a2857accc427 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -276,8 +276,8 @@ iter_chunk: size = pa_end - pa_start + PAGE_SIZE; usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x", va_start, &pa_start, size, flags); - err = iommu_map(pd->domain, va_start, pa_start, - size, flags); + err = iommu_map_atomic(pd->domain, va_start, + pa_start, size, flags); if (err) { usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n", va_start, &pa_start, size, err); @@ -293,8 +293,8 @@ iter_chunk: size = pa - pa_start + PAGE_SIZE; usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x\n", va_start, &pa_start, size, flags); - err = iommu_map(pd->domain, va_start, pa_start, - size, flags); + err = iommu_map_atomic(pd->domain, va_start, + pa_start, size, flags); if (err) { usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n", va_start, &pa_start, size, err); From 2036890282d56bcbf7f915ba9e04bf77967ab231 Mon Sep 17 00:00:00 2001 From: Jack Yu Date: Mon, 30 Jan 2023 02:43:25 +0000 Subject: [PATCH 029/186] ASoC: rt715-sdca: fix clock stop prepare timeout issue Modify clock_stop_timeout value for rt715-sdca according to the requirement of internal clock trimming. Signed-off-by: Jack Yu Link: https://lore.kernel.org/r/574b6586267a458cac78c5ac4d5b10bd@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt715-sdca-sdw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt715-sdca-sdw.c b/sound/soc/codecs/rt715-sdca-sdw.c index 3f981a9e7fb6..c54ecf3e6987 100644 --- a/sound/soc/codecs/rt715-sdca-sdw.c +++ b/sound/soc/codecs/rt715-sdca-sdw.c @@ -167,7 +167,7 @@ static int rt715_sdca_read_prop(struct sdw_slave *slave) } /* set the timeout values */ - prop->clk_stop_timeout = 20; + prop->clk_stop_timeout = 200; return 0; } From 581e43ef5de6cc3369b2496362348e38bc85ded0 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Mon, 30 Jan 2023 11:08:50 +0100 Subject: [PATCH 030/186] MAINTAINERS: Promote Krzysztof to PCI controller maintainer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Krzysztof has contributed significantly to the PCI controller subsystem recently through reviews, tooling and submissions. Update the MAINTAINERS file to grant him the role he deserves. Link: https://lore.kernel.org/r/20230130100850.24994-1-lpieralisi@kernel.org Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Cc: Krzysztof WilczyƄski --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 14c0b3e89c63..87e7a5e5a666 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16107,7 +16107,7 @@ F: drivers/pci/controller/pci-v3-semi.c PCI ENDPOINT SUBSYSTEM M: Lorenzo Pieralisi -R: Krzysztof WilczyƄski +M: Krzysztof WilczyƄski R: Manivannan Sadhasivam R: Kishon Vijay Abraham I L: linux-pci@vger.kernel.org @@ -16172,8 +16172,8 @@ F: drivers/pci/controller/pci-xgene-msi.c PCI NATIVE HOST BRIDGE AND ENDPOINT DRIVERS M: Lorenzo Pieralisi +M: Krzysztof WilczyƄski R: Rob Herring -R: Krzysztof WilczyƄski L: linux-pci@vger.kernel.org S: Supported Q: https://patchwork.kernel.org/project/linux-pci/list/ From 606d4ef4922662ded34aa7218288c3043ce0a41a Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Tue, 31 Jan 2023 08:38:45 +1030 Subject: [PATCH 031/186] pinctrl: aspeed: Revert "Force to disable the function's signal" This reverts commit cf517fef601b9dde151f0afc27164d13bf1fd907. The commit cf517fef601b ("pinctrl: aspeed: Force to disable the function's signal") exposed a problem with fetching the regmap for reading the GFX register. The Romulus machine the device tree contains a gpio hog for GPIO S7. With the patch applied: Muxing pin 151 for GPIO Disabling signal VPOB9 for VPO aspeed-g5-pinctrl 1e6e2080.pinctrl: Failed to acquire regmap for IP block 1 aspeed-g5-pinctrl 1e6e2080.pinctrl: request() failed for pin 151 The code path is aspeed-gpio -> pinmux-g5 -> regmap -> clk, and the of_clock code returns an error as it doesn't have a valid struct clk_hw pointer. The regmap call happens because pinmux wants to check the GFX node (IP block 1) to query bits there. For reference, before the offending patch: Muxing pin 151 for GPIO Disabling signal VPOB9 for VPO Want SCU8C[0x00000080]=0x1, got 0x0 from 0x00000000 Disabling signal VPOB9 for VPOOFF1 Want SCU8C[0x00000080]=0x1, got 0x0 from 0x00000000 Disabling signal VPOB9 for VPOOFF2 Want SCU8C[0x00000080]=0x1, got 0x0 from 0x00000000 Enabling signal GPIOS7 for GPIOS7 Muxed pin 151 as GPIOS7 gpio-943 (seq_cont): hogged as output/low We can't skip the clock check to allow pinmux to proceed, because the write to disable VPOB9 will try to set a bit in the GFX register space which will not stick when the IP is in reset. However, we do not want to enable the IP just so pinmux can do a disable-enable dance for the pin. For now, revert the offending patch while a correct solution is found. Fixes: cf517fef601b ("pinctrl: aspeed: Force to disable the function's signal") Link: https://github.com/openbmc/linux/issues/218 Signed-off-by: Joel Stanley Link: https://lore.kernel.org/r/20230130220845.917985-1-joel@jms.id.au Signed-off-by: Linus Walleij --- drivers/pinctrl/aspeed/pinctrl-aspeed.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed.c b/drivers/pinctrl/aspeed/pinctrl-aspeed.c index b5ca4501a605..9c6ee46ac7a0 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed.c @@ -93,10 +93,19 @@ static int aspeed_sig_expr_enable(struct aspeed_pinmux_data *ctx, static int aspeed_sig_expr_disable(struct aspeed_pinmux_data *ctx, const struct aspeed_sig_expr *expr) { + int ret; + pr_debug("Disabling signal %s for %s\n", expr->signal, expr->function); - return aspeed_sig_expr_set(ctx, expr, false); + ret = aspeed_sig_expr_eval(ctx, expr, true); + if (ret < 0) + return ret; + + if (ret) + return aspeed_sig_expr_set(ctx, expr, false); + + return 0; } /** From c956940a4ab73a87d0165e911c001dbdd2c8200f Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 30 Jan 2023 16:25:50 +0200 Subject: [PATCH 032/186] RDMA/umem: Use dma-buf locked API to solve deadlock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cited commit moves umem to call the unlocked versions of dmabuf unmap/map attachment, but the lock is held while calling to these functions, hence move back to the locked versions of these APIs. Fixes: 21c9c5c0784f ("RDMA/umem: Prepare to dynamic dma-buf locking specification") Link: https://lore.kernel.org/r/311c2cb791f8af75486df446819071357353db1b.1675088709.git.leon@kernel.org Signed-off-by: Maor Gottlieb Reviewed-by: Christian König Signed-off-by: Leon Romanovsky Reviewed-by: Dmitry Osipenko Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem_dmabuf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c index 43b26bc12288..39357dc2d229 100644 --- a/drivers/infiniband/core/umem_dmabuf.c +++ b/drivers/infiniband/core/umem_dmabuf.c @@ -26,8 +26,8 @@ int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf) if (umem_dmabuf->sgt) goto wait_fence; - sgt = dma_buf_map_attachment_unlocked(umem_dmabuf->attach, - DMA_BIDIRECTIONAL); + sgt = dma_buf_map_attachment(umem_dmabuf->attach, + DMA_BIDIRECTIONAL); if (IS_ERR(sgt)) return PTR_ERR(sgt); @@ -103,8 +103,8 @@ void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf) umem_dmabuf->last_sg_trim = 0; } - dma_buf_unmap_attachment_unlocked(umem_dmabuf->attach, umem_dmabuf->sgt, - DMA_BIDIRECTIONAL); + dma_buf_unmap_attachment(umem_dmabuf->attach, umem_dmabuf->sgt, + DMA_BIDIRECTIONAL); umem_dmabuf->sgt = NULL; } From f9c47b2caa7ffc903ec950b454b59c209afe3182 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Mon, 9 Jan 2023 14:04:08 -0500 Subject: [PATCH 033/186] IB/hfi1: Assign npages earlier Improve code clarity and enable earlier use of tidbuf->npages by moving its assignment to structure creation time. Signed-off-by: Dean Luick Signed-off-by: Dennis Dalessandro Link: https://lore.kernel.org/r/167329104884.1472990.4639750192433251493.stgit@awfm-02.cornelisnetworks.com Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index b02f2f0809c8..350884d5f089 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -160,16 +160,11 @@ static void unpin_rcv_pages(struct hfi1_filedata *fd, static int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf) { int pinned; - unsigned int npages; + unsigned int npages = tidbuf->npages; unsigned long vaddr = tidbuf->vaddr; struct page **pages = NULL; struct hfi1_devdata *dd = fd->uctxt->dd; - /* Get the number of pages the user buffer spans */ - npages = num_user_pages(vaddr, tidbuf->length); - if (!npages) - return -EINVAL; - if (npages > fd->uctxt->expected_count) { dd_dev_err(dd, "Expected buffer too big\n"); return -EINVAL; @@ -196,7 +191,6 @@ static int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf) return pinned; } tidbuf->pages = pages; - tidbuf->npages = npages; fd->tid_n_pinned += pinned; return pinned; } @@ -274,6 +268,7 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, mutex_init(&tidbuf->cover_mutex); tidbuf->vaddr = tinfo->vaddr; tidbuf->length = tinfo->length; + tidbuf->npages = num_user_pages(tidbuf->vaddr, tidbuf->length); tidbuf->psets = kcalloc(uctxt->expected_count, sizeof(*tidbuf->psets), GFP_KERNEL); if (!tidbuf->psets) { From 5dac9f8dc25fefd9d928b98f6477ff3daefd73e3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 31 Jan 2023 13:02:13 +0300 Subject: [PATCH 034/186] ALSA: pci: lx6464es: fix a debug loop This loop accidentally reuses the "i" iterator for both the inside and the outside loop. The value of MAX_STREAM_BUFFER is 5. I believe that chip->rmh.stat_len is in the 2-12 range. If the value of .stat_len is 4 or more then it will loop exactly one time, but if it's less then it is a forever loop. It looks like it was supposed to combined into one loop where conditions are checked. Fixes: 8e6320064c33 ("ALSA: lx_core: Remove useless #if 0 .. #endif") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/Y9jnJTis/mRFJAQp@kili Signed-off-by: Takashi Iwai --- sound/pci/lx6464es/lx_core.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/sound/pci/lx6464es/lx_core.c b/sound/pci/lx6464es/lx_core.c index d3f58a3d17fb..b5b0d43bb8dc 100644 --- a/sound/pci/lx6464es/lx_core.c +++ b/sound/pci/lx6464es/lx_core.c @@ -493,12 +493,11 @@ int lx_buffer_ask(struct lx6464es *chip, u32 pipe, int is_capture, dev_dbg(chip->card->dev, "CMD_08_ASK_BUFFERS: needed %d, freed %d\n", *r_needed, *r_freed); - for (i = 0; i < MAX_STREAM_BUFFER; ++i) { - for (i = 0; i != chip->rmh.stat_len; ++i) - dev_dbg(chip->card->dev, - " stat[%d]: %x, %x\n", i, - chip->rmh.stat[i], - chip->rmh.stat[i] & MASK_DATA_SIZE); + for (i = 0; i < MAX_STREAM_BUFFER && i < chip->rmh.stat_len; + ++i) { + dev_dbg(chip->card->dev, " stat[%d]: %x, %x\n", i, + chip->rmh.stat[i], + chip->rmh.stat[i] & MASK_DATA_SIZE); } } From bd401fd730cbcb0717bbc5438f15084db10f9259 Mon Sep 17 00:00:00 2001 From: Guillaume Pinot Date: Sun, 29 Jan 2023 18:13:38 +0100 Subject: [PATCH 035/186] ALSA: hda/realtek: Fix the speaker output on Samsung Galaxy Book2 Pro 360 Samsung Galaxy Book2 Pro 360 (13" 2022 NP930QED-KA1FR) with codec SSID 144d:ca03 requires the same workaround for enabling the speaker amp like other Samsung models with ALC298 codec. Cc: Signed-off-by: Guillaume Pinot Link: https://lore.kernel.org/r/20230129171338.17249-1-texitoi@texitoi.eu Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index db9518de9343..4055a8f5880a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9523,6 +9523,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc812, "Samsung Notebook Pen S (NT950SBE-X58)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc830, "Samsung Galaxy Book Ion (NT950XCJ-X716A)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc832, "Samsung Galaxy Book Flex Alpha (NP730QCJ)", ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), + SND_PCI_QUIRK(0x144d, 0xca03, "Samsung Galaxy Book2 Pro 360 (NP930QED)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC), From f0293cd1f4fcc4fbdcd65a5a7b3b318a6d471f78 Mon Sep 17 00:00:00 2001 From: Mayuresh Chitale Date: Mon, 30 Jan 2023 13:18:15 +0530 Subject: [PATCH 036/186] riscv: mm: Implement pmdp_collapse_flush for THP When THP is enabled, 4K pages are collapsed into a single huge page using the generic pmdp_collapse_flush() which will further use flush_tlb_range() to shoot-down stale TLB entries. Unfortunately, the generic pmdp_collapse_flush() only invalidates cached leaf PTEs using address specific SFENCEs which results in repetitive (or unpredictable) page faults on RISC-V implementations which cache non-leaf PTEs. Provide a RISC-V specific pmdp_collapse_flush() which ensures both cached leaf and non-leaf PTEs are invalidated by using non-address specific SFENCEs as recommended by the RISC-V privileged specification. Fixes: e88b333142e4 ("riscv: mm: add THP support on 64-bit") Signed-off-by: Mayuresh Chitale Link: https://lore.kernel.org/r/20230130074815.1694055-1-mchitale@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgtable.h | 4 ++++ arch/riscv/mm/pgtable.c | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 4eba9a98d0e3..3e01f4f3ab08 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -721,6 +721,10 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd); return __pmd(atomic_long_xchg((atomic_long_t *)pmdp, pmd_val(pmd))); } + +#define pmdp_collapse_flush pmdp_collapse_flush +extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c index 6645ead1a7c1..fef4e7328e49 100644 --- a/arch/riscv/mm/pgtable.c +++ b/arch/riscv/mm/pgtable.c @@ -81,3 +81,23 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) } #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) +{ + pmd_t pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); + + VM_BUG_ON(address & ~HPAGE_PMD_MASK); + VM_BUG_ON(pmd_trans_huge(*pmdp)); + /* + * When leaf PTE entries (regular pages) are collapsed into a leaf + * PMD entry (huge page), a valid non-leaf PTE is converted into a + * valid leaf PTE at the level 1 page table. Since the sfence.vma + * forms that specify an address only apply to leaf PTEs, we need a + * global flush here. collapse_huge_page() assumes these flushes are + * eager, so just do the fence here. + */ + flush_tlb_mm(vma->vm_mm); + return pmd; +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ From e33416fca8a2313b8650bd5807aaf34354d39a4c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 2 Feb 2023 21:27:35 +1100 Subject: [PATCH 037/186] powerpc: Don't select ARCH_WANTS_NO_INSTR Commit 41b7a347bf14 ("powerpc: Book3S 64-bit outline-only KASAN support") added a select of ARCH_WANTS_NO_INSTR, because it also added some uses of noinstr. However noinstr is always defined, regardless of ARCH_WANTS_NO_INSTR, so there's no need to select it just for that. As PeterZ says [1]: Note that by selecting ARCH_WANTS_NO_INSTR you effectively state to abide by its rules. As of now the powerpc code does not abide by those rules, and trips some new warnings added by Peter in linux-next. So until the code can be fixed to avoid those warnings, disable ARCH_WANTS_NO_INSTR. Note that ARCH_WANTS_NO_INSTR is also used to gate building KCOV and parts of KCSAN. However none of the noinstr annotations in powerpc were added for KCOV or KCSAN, instead instrumentation is blocked at the file level using KCOV_INSTRUMENT_foo.o := n. [1]: https://lore.kernel.org/linuxppc-dev/Y9t6yoafrO5YqVgM@hirez.programming.kicks-ass.net Reported-by: Sachin Sant Suggested-by: Peter Zijlstra Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index b8c4ac56bddc..7a5f8dbfbdd0 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -163,7 +163,6 @@ config PPC select ARCH_WANT_IRQS_OFF_ACTIVATE_MM select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANTS_MODULES_DATA_IN_VMALLOC if PPC_BOOK3S_32 || PPC_8xx - select ARCH_WANTS_NO_INSTR select ARCH_WEAK_RELEASE_ACQUIRE select BINFMT_ELF select BUILDTIME_TABLE_SORT From 2e7c6652f9b86c01cbd4e988057a746a3a461969 Mon Sep 17 00:00:00 2001 From: V sujith kumar Reddy Date: Fri, 3 Feb 2023 18:02:52 +0530 Subject: [PATCH 038/186] ASoC: SOF: amd: Fix for handling spurious interrupts from DSP As interrupts are Level-triggered,unless and until we deassert the register the interrupts are generated which causes spurious interrupts unhandled. Now we deasserted the interrupt at top half which solved the below "nobody cared" warning. warning reported in dmesg: irq 80: nobody cared (try booting with the "irqpoll" option) CPU: 5 PID: 2735 Comm: irq/80-AudioDSP Not tainted 5.15.86-15817-g4c19f3e06d49 #1 1bd3fd932cf58caacc95b0504d6ea1e3eab22289 Hardware name: Google Skyrim/Skyrim, BIOS Google_Skyrim.15303.0.0 01/03/2023 Call Trace: dump_stack_lvl+0x69/0x97 __report_bad_irq+0x3a/0xae note_interrupt+0x1a9/0x1e3 handle_irq_event_percpu+0x4b/0x6e handle_irq_event+0x36/0x5b handle_fasteoi_irq+0xae/0x171 __common_interrupt+0x48/0xc4 handlers: acp_irq_handler [snd_sof_amd_acp] threaded [<000000007e089f34>] acp_irq_thread [snd_sof_amd_acp] Disabling IRQ #80 Signed-off-by: V sujith kumar Reddy Link: https://lore.kernel.org/r/20230203123254.1898794-1-Vsujithkumar.Reddy@amd.com Signed-off-by: Mark Brown --- sound/soc/sof/amd/acp.c | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/sound/soc/sof/amd/acp.c b/sound/soc/sof/amd/acp.c index 6bd2888fbb66..d5ccd4d09278 100644 --- a/sound/soc/sof/amd/acp.c +++ b/sound/soc/sof/amd/acp.c @@ -318,7 +318,6 @@ static irqreturn_t acp_irq_thread(int irq, void *context) { struct snd_sof_dev *sdev = context; const struct sof_amd_acp_desc *desc = get_chip_info(sdev->pdata); - unsigned int base = desc->dsp_intr_base; unsigned int val, count = ACP_HW_SEM_RETRY_COUNT; val = snd_sof_dsp_read(sdev, ACP_DSP_BAR, desc->ext_intr_stat); @@ -328,28 +327,20 @@ static irqreturn_t acp_irq_thread(int irq, void *context) return IRQ_HANDLED; } - val = snd_sof_dsp_read(sdev, ACP_DSP_BAR, base + DSP_SW_INTR_STAT_OFFSET); - if (val & ACP_DSP_TO_HOST_IRQ) { - while (snd_sof_dsp_read(sdev, ACP_DSP_BAR, desc->hw_semaphore_offset)) { - /* Wait until acquired HW Semaphore lock or timeout */ - count--; - if (!count) { - dev_err(sdev->dev, "%s: Failed to acquire HW lock\n", __func__); - return IRQ_NONE; - } + while (snd_sof_dsp_read(sdev, ACP_DSP_BAR, desc->hw_semaphore_offset)) { + /* Wait until acquired HW Semaphore lock or timeout */ + count--; + if (!count) { + dev_err(sdev->dev, "%s: Failed to acquire HW lock\n", __func__); + return IRQ_NONE; } - - sof_ops(sdev)->irq_thread(irq, sdev); - val |= ACP_DSP_TO_HOST_IRQ; - snd_sof_dsp_write(sdev, ACP_DSP_BAR, base + DSP_SW_INTR_STAT_OFFSET, val); - - /* Unlock or Release HW Semaphore */ - snd_sof_dsp_write(sdev, ACP_DSP_BAR, desc->hw_semaphore_offset, 0x0); - - return IRQ_HANDLED; } - return IRQ_NONE; + sof_ops(sdev)->irq_thread(irq, sdev); + /* Unlock or Release HW Semaphore */ + snd_sof_dsp_write(sdev, ACP_DSP_BAR, desc->hw_semaphore_offset, 0x0); + + return IRQ_HANDLED; }; static irqreturn_t acp_irq_handler(int irq, void *dev_id) @@ -360,8 +351,11 @@ static irqreturn_t acp_irq_handler(int irq, void *dev_id) unsigned int val; val = snd_sof_dsp_read(sdev, ACP_DSP_BAR, base + DSP_SW_INTR_STAT_OFFSET); - if (val) + if (val) { + val |= ACP_DSP_TO_HOST_IRQ; + snd_sof_dsp_write(sdev, ACP_DSP_BAR, base + DSP_SW_INTR_STAT_OFFSET, val); return IRQ_WAKE_THREAD; + } return IRQ_NONE; } From cb80242cc679d6397e77d8a964deeb3ff218d2b5 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Wed, 7 Dec 2022 10:50:38 +0800 Subject: [PATCH 039/186] riscv: stacktrace: Fix missing the first frame When running kfence_test, I found some testcases failed like this: # test_out_of_bounds_read: EXPECTATION FAILED at mm/kfence/kfence_test.c:346 Expected report_matches(&expect) to be true, but is false not ok 1 - test_out_of_bounds_read The corresponding call-trace is: BUG: KFENCE: out-of-bounds read in kunit_try_run_case+0x38/0x84 Out-of-bounds read at 0x(____ptrval____) (32B right of kfence-#10): kunit_try_run_case+0x38/0x84 kunit_generic_run_threadfn_adapter+0x12/0x1e kthread+0xc8/0xde ret_from_exception+0x0/0xc The kfence_test using the first frame of call trace to check whether the testcase is succeed or not. Commit 6a00ef449370 ("riscv: eliminate unreliable __builtin_frame_address(1)") skip first frame for all case, which results the kfence_test failed. Indeed, we only need to skip the first frame for case (task==NULL || task==current). With this patch, the call-trace will be: BUG: KFENCE: out-of-bounds read in test_out_of_bounds_read+0x88/0x19e Out-of-bounds read at 0x(____ptrval____) (1B left of kfence-#7): test_out_of_bounds_read+0x88/0x19e kunit_try_run_case+0x38/0x84 kunit_generic_run_threadfn_adapter+0x12/0x1e kthread+0xc8/0xde ret_from_exception+0x0/0xc Fixes: 6a00ef449370 ("riscv: eliminate unreliable __builtin_frame_address(1)") Signed-off-by: Liu Shixin Tested-by: Samuel Holland Link: https://lore.kernel.org/r/20221207025038.1022045-1-liushixin2@huawei.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/stacktrace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 75c8dd64fc48..f9a5a7c90ff0 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -32,6 +32,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, fp = (unsigned long)__builtin_frame_address(0); sp = current_stack_pointer; pc = (unsigned long)walk_stackframe; + level = -1; } else { /* task blocked in __switch_to */ fp = task->thread.s[0]; @@ -43,7 +44,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, unsigned long low, high; struct stackframe *frame; - if (unlikely(!__kernel_text_address(pc) || (level++ >= 1 && !fn(arg, pc)))) + if (unlikely(!__kernel_text_address(pc) || (level++ >= 0 && !fn(arg, pc)))) break; /* Validate frame pointer */ From 97801cfcf9565247bcc53b67ea47fa87b1704375 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Thu, 2 Feb 2023 18:40:14 +0800 Subject: [PATCH 040/186] arm64: dts: mediatek: mt8195: Fix vdosys* compatible strings When vdosys1 was initially added, it was incorrectly assumed to be compatible with vdosys0, and thus both had the same mt8195-mmsys compatible attached. This has since been corrected in commit b237efd47df7 ("dt-bindings: arm: mediatek: mmsys: change compatible for MT8195") and commit 82219cfbef18 ("dt-bindings: arm: mediatek: mmsys: add vdosys1 compatible for MT8195"). The device tree needs to be fixed as well, otherwise the vdosys1 block fails to work, and causes its dependent power domain controller to not work either. Change the compatible string of vdosys1 to "mediatek,mt8195-vdosys1". While at it, also add the new "mediatek,mt8195-vdosys0" compatible to vdosys0. Fixes: 6aa5b46d1755 ("arm64: dts: mt8195: Add vdosys and vppsys clock nodes") Signed-off-by: Chen-Yu Tsai Tested-by: AngeloGioacchino Del Regno Reviewed-by: AngeloGioacchino Del Regno Acked-by: Matthias Brugger Link: https://lore.kernel.org/r/20230202104014.2931517-1-wenst@chromium.org Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/mediatek/mt8195.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi index 5d31536f4c48..c10cfeb1214d 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi @@ -2146,7 +2146,7 @@ }; vdosys0: syscon@1c01a000 { - compatible = "mediatek,mt8195-mmsys", "syscon"; + compatible = "mediatek,mt8195-vdosys0", "mediatek,mt8195-mmsys", "syscon"; reg = <0 0x1c01a000 0 0x1000>; mboxes = <&gce0 0 CMDQ_THR_PRIO_4>; #clock-cells = <1>; @@ -2292,7 +2292,7 @@ }; vdosys1: syscon@1c100000 { - compatible = "mediatek,mt8195-mmsys", "syscon"; + compatible = "mediatek,mt8195-vdosys1", "syscon"; reg = <0 0x1c100000 0 0x1000>; #clock-cells = <1>; }; From 4722dd4029c63f10414ffd8d3ffdd6c748391cd7 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 30 Sep 2022 09:00:41 -0700 Subject: [PATCH 041/186] ARM: dts: stihxxx-b2120: fix polarity of reset line of tsin0 port According to c8sectpfe driver code we first drive reset line low and then high to reset the port, therefore the reset line is supposed to be annotated as "active low". This will be important when we convert the driver to gpiod API. Reviewed-by: Patrice Chotard Signed-off-by: Dmitry Torokhov Signed-off-by: Patrice Chotard --- arch/arm/boot/dts/stihxxx-b2120.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/stihxxx-b2120.dtsi b/arch/arm/boot/dts/stihxxx-b2120.dtsi index 920a0bad7494..8d9a2dfa76f1 100644 --- a/arch/arm/boot/dts/stihxxx-b2120.dtsi +++ b/arch/arm/boot/dts/stihxxx-b2120.dtsi @@ -178,7 +178,7 @@ tsin-num = <0>; serial-not-parallel; i2c-bus = <&ssc2>; - reset-gpios = <&pio15 4 GPIO_ACTIVE_HIGH>; + reset-gpios = <&pio15 4 GPIO_ACTIVE_LOW>; dvb-card = ; }; }; From 9f8b3706eb23bed26f7898af3c6f7fe3858564a1 Mon Sep 17 00:00:00 2001 From: Bert Karwatzki Date: Thu, 2 Feb 2023 20:02:36 +0100 Subject: [PATCH 042/186] drm/amdgpu: fix memory leak in amdgpu_cs_sync_rings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit amdgpu_sync_get_fence deletes the returned fence from the syncobj, so the refcount of fence needs to lowered to avoid a memory leak. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2360 Reviewed-by: Alex Deucher Tested-by: Mikhail Gavrilov Reviewed-by: Christian König Signed-off-by: Bert Karwatzki Signed-off-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/3b590ba0f11d24b8c6c39c3d38250129c1116af4.camel@web.de --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 7b5ce00f0602..7af3041ccd0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1220,10 +1220,13 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) * next job actually sees the results from the previous one * before we start executing on the same scheduler ring. */ - if (!s_fence || s_fence->sched != sched) + if (!s_fence || s_fence->sched != sched) { + dma_fence_put(fence); continue; + } r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence); + dma_fence_put(fence); if (r) return r; } From 3770e52fd4ec40ebee16ba19ad6c09dc0b52739b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 30 Jan 2023 14:07:26 +0100 Subject: [PATCH 043/186] mm: extend max struct page size for kmsan After x86 enabled support for KMSAN, it has become possible to have larger 'struct page' than was expected when commit 5470dea49f53 ("mm: use mm_zero_struct_page from SPARC on all 64b architectures") was merged: include/linux/mm.h:156:10: warning: no case matching constant switch condition '96' switch (sizeof(struct page)) { Extend the maximum accordingly. Link: https://lkml.kernel.org/r/20230130130739.563628-1-arnd@kernel.org Fixes: 5470dea49f53 ("mm: use mm_zero_struct_page from SPARC on all 64b architectures") Fixes: 4ca8cc8d1bbe ("x86: kmsan: enable KMSAN builds for x86") Fixes: f80be4571b19 ("kmsan: add KMSAN runtime core") Signed-off-by: Arnd Bergmann Acked-by: Michal Hocko Reviewed-by: Pasha Tatashin Cc: Alexander Duyck Cc: Alexander Potapenko Cc: Alex Sierra Cc: David Hildenbrand Cc: Hugh Dickins Cc: John Hubbard Cc: Liam R. Howlett Cc: Matthew Wilcox Cc: Naoya Horiguchi Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- include/linux/mm.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 8f857163ac89..f13f20258ce9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -137,7 +137,7 @@ extern int mmap_rnd_compat_bits __read_mostly; * define their own version of this macro in */ #if BITS_PER_LONG == 64 -/* This function must be updated when the size of struct page grows above 80 +/* This function must be updated when the size of struct page grows above 96 * or reduces below 56. The idea that compiler optimizes out switch() * statement, and only leaves move/store instructions. Also the compiler can * combine write statements if they are both assignments and can be reordered, @@ -148,12 +148,18 @@ static inline void __mm_zero_struct_page(struct page *page) { unsigned long *_pp = (void *)page; - /* Check that struct page is either 56, 64, 72, or 80 bytes */ + /* Check that struct page is either 56, 64, 72, 80, 88 or 96 bytes */ BUILD_BUG_ON(sizeof(struct page) & 7); BUILD_BUG_ON(sizeof(struct page) < 56); - BUILD_BUG_ON(sizeof(struct page) > 80); + BUILD_BUG_ON(sizeof(struct page) > 96); switch (sizeof(struct page)) { + case 96: + _pp[11] = 0; + fallthrough; + case 88: + _pp[10] = 0; + fallthrough; case 80: _pp[9] = 0; fallthrough; From ca2b1a5cd107d451e71e7ef463c2a2141ec078d2 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Tue, 31 Jan 2023 13:34:56 +0100 Subject: [PATCH 044/186] mailmap: add entry for Alexander Mikhalitsyn My old email isn't working anymore. Link: https://lkml.kernel.org/r/20230131123456.192657-1-aleksandr.mikhalitsyn@canonical.com Signed-off-by: Alexander Mikhalitsyn Signed-off-by: Andrew Morton --- .mailmap | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.mailmap b/.mailmap index 79e0d680c748..a17b3c8f69d5 100644 --- a/.mailmap +++ b/.mailmap @@ -25,6 +25,8 @@ Aleksey Gorelov Alexander Lobakin Alexander Lobakin Alexander Lobakin +Alexander Mikhalitsyn +Alexander Mikhalitsyn Alexandre Belloni Alexei Starovoitov Alexei Starovoitov From 81e9d6f8647650a7bead74c5f926e29970e834d1 Mon Sep 17 00:00:00 2001 From: Seth Jenkins Date: Tue, 31 Jan 2023 12:25:55 -0500 Subject: [PATCH 045/186] aio: fix mremap after fork null-deref Commit e4a0d3e720e7 ("aio: Make it possible to remap aio ring") introduced a null-deref if mremap is called on an old aio mapping after fork as mm->ioctx_table will be set to NULL. [jmoyer@redhat.com: fix 80 column issue] Link: https://lkml.kernel.org/r/x49sffq4nvg.fsf@segfault.boston.devel.redhat.com Fixes: e4a0d3e720e7 ("aio: Make it possible to remap aio ring") Signed-off-by: Seth Jenkins Signed-off-by: Jeff Moyer Cc: Alexander Viro Cc: Benjamin LaHaise Cc: Jann Horn Cc: Pavel Emelyanov Cc: Signed-off-by: Andrew Morton --- fs/aio.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/aio.c b/fs/aio.c index 562916d85cba..e85ba0b77f59 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -361,6 +361,9 @@ static int aio_ring_mremap(struct vm_area_struct *vma) spin_lock(&mm->ioctx_lock); rcu_read_lock(); table = rcu_dereference(mm->ioctx_table); + if (!table) + goto out_unlock; + for (i = 0; i < table->nr; i++) { struct kioctx *ctx; @@ -374,6 +377,7 @@ static int aio_ring_mremap(struct vm_area_struct *vma) } } +out_unlock: rcu_read_unlock(); spin_unlock(&mm->ioctx_lock); return res; From aa1e6a932ca652a50a5df458399724a80459f521 Mon Sep 17 00:00:00 2001 From: Kuan-Ying Lee Date: Tue, 31 Jan 2023 14:32:06 +0800 Subject: [PATCH 046/186] mm/gup: add folio to list when folio_isolate_lru() succeed If we call folio_isolate_lru() successfully, we will get return value 0. We need to add this folio to the movable_pages_list. Link: https://lkml.kernel.org/r/20230131063206.28820-1-Kuan-Ying.Lee@mediatek.com Fixes: 67e139b02d99 ("mm/gup.c: refactor check_and_migrate_movable_pages()") Signed-off-by: Kuan-Ying Lee Reviewed-by: Alistair Popple Acked-by: David Hildenbrand Reviewed-by: Baolin Wang Cc: Andrew Yang Cc: Chinwen Chang Cc: John Hubbard Cc: Matthias Brugger Signed-off-by: Andrew Morton --- mm/gup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/gup.c b/mm/gup.c index f45a3a5be53a..7c034514ddd8 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1914,7 +1914,7 @@ static unsigned long collect_longterm_unpinnable_pages( drain_allow = false; } - if (!folio_isolate_lru(folio)) + if (folio_isolate_lru(folio)) continue; list_add_tail(&folio->lru, movable_page_list); From 388bc034d91d480efa88abc5c8d6e6c8a878b1ab Mon Sep 17 00:00:00 2001 From: Shiyang Ruan Date: Thu, 2 Feb 2023 12:33:47 +0000 Subject: [PATCH 047/186] fsdax: dax_unshare_iter() should return a valid length The copy_mc_to_kernel() will return 0 if it executed successfully. Then the return value should be set to the length it copied. [akpm@linux-foundation.org: don't mess up `ret', per Matthew] Link: https://lkml.kernel.org/r/1675341227-14-1-git-send-email-ruansy.fnst@fujitsu.com Fixes: d984648e428b ("fsdax,xfs: port unshare to fsdax") Signed-off-by: Shiyang Ruan Cc: Darrick J. Wong Cc: Alistair Popple Cc: Dan Williams Cc: Dave Chinner Cc: Jason Gunthorpe Cc: John Hubbard Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- fs/dax.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index c48a3a93ab29..3e457a16c7d1 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1271,8 +1271,9 @@ static s64 dax_unshare_iter(struct iomap_iter *iter) if (ret < 0) goto out_unlock; - ret = copy_mc_to_kernel(daddr, saddr, length); - if (ret) + if (copy_mc_to_kernel(daddr, saddr, length) == 0) + ret = length; + else ret = -EIO; out_unlock: From a5b21d8d791cd4db609d0bbcaa9e0c7e019888d1 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 2 Feb 2023 18:07:35 -0800 Subject: [PATCH 048/186] revert "squashfs: harden sanity check in squashfs_read_xattr_id_table" This fix was nacked by Philip, for reasons identified in the email linked below. Link: https://lkml.kernel.org/r/68f15d67-8945-2728-1f17-5b53a80ec52d@squashfs.org.uk Fixes: 72e544b1b28325 ("squashfs: harden sanity check in squashfs_read_xattr_id_table") Cc: Alexey Khoroshilov Cc: Fedor Pchelkin Cc: Phillip Lougher Signed-off-by: Andrew Morton --- fs/squashfs/xattr_id.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c index b88d19e9581e..c8469c656e0d 100644 --- a/fs/squashfs/xattr_id.c +++ b/fs/squashfs/xattr_id.c @@ -76,7 +76,7 @@ __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start, /* Sanity check values */ /* there is always at least one xattr id */ - if (*xattr_ids <= 0) + if (*xattr_ids == 0) return ERR_PTR(-EINVAL); len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids); From 51be2fffd65d9f9cb427030ab0ee85d791b4437d Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 2 Feb 2023 14:00:23 -0800 Subject: [PATCH 049/186] cpufreq: qcom-hw: Fix cpufreq_driver->get() for non-LMH systems On a sc7180-based Chromebook, when I go to /sys/devices/system/cpu/cpu0/cpufreq I can see: cpuinfo_cur_freq:2995200 cpuinfo_max_freq:1804800 scaling_available_frequencies:300000 576000 ... 1708800 1804800 scaling_cur_freq:1804800 scaling_max_freq:1804800 As you can see the `cpuinfo_cur_freq` is bogus. It turns out that this bogus info started showing up as of commit c72cf0cb1d77 ("cpufreq: qcom-hw: Fix the frequency returned by cpufreq_driver->get()"). That commit seems to assume that everyone is on the LMH bandwagon, but sc7180 isn't. Let's go back to the old code in the case where LMH isn't used. Fixes: c72cf0cb1d77 ("cpufreq: qcom-hw: Fix the frequency returned by cpufreq_driver->get()") Signed-off-by: Douglas Anderson Reviewed-by: Konrad Dybcio Reviewed-by: Manivannan Sadhasivam [ Viresh: Fixed the 'fixes' tag ] Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 32 ++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 340fed35e45d..39a87020c4c6 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -143,21 +143,6 @@ static unsigned long qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data) return lval * xo_rate; } -/* Get the current frequency of the CPU (after throttling) */ -static unsigned int qcom_cpufreq_hw_get(unsigned int cpu) -{ - struct qcom_cpufreq_data *data; - struct cpufreq_policy *policy; - - policy = cpufreq_cpu_get_raw(cpu); - if (!policy) - return 0; - - data = policy->driver_data; - - return qcom_lmh_get_throttle_freq(data) / HZ_PER_KHZ; -} - /* Get the frequency requested by the cpufreq core for the CPU */ static unsigned int qcom_cpufreq_get_freq(unsigned int cpu) { @@ -179,6 +164,23 @@ static unsigned int qcom_cpufreq_get_freq(unsigned int cpu) return policy->freq_table[index].frequency; } +static unsigned int qcom_cpufreq_hw_get(unsigned int cpu) +{ + struct qcom_cpufreq_data *data; + struct cpufreq_policy *policy; + + policy = cpufreq_cpu_get_raw(cpu); + if (!policy) + return 0; + + data = policy->driver_data; + + if (data->throttle_irq >= 0) + return qcom_lmh_get_throttle_freq(data) / HZ_PER_KHZ; + + return qcom_cpufreq_get_freq(cpu); +} + static unsigned int qcom_cpufreq_hw_fast_switch(struct cpufreq_policy *policy, unsigned int target_freq) { From 9a6804aa1c92cd28e89e746ace44d5ba101db76c Mon Sep 17 00:00:00 2001 From: Elvis Angelaccio Date: Sun, 5 Feb 2023 19:56:18 +0100 Subject: [PATCH 050/186] ALSA: hda/realtek: Enable mute/micmute LEDs on HP Elitebook, 645 G9 The HP Elitebook 645 G9 laptop (with motherboard model 89D2) uses the ALC236 codec and requires the alc236_fixup_hp_mute_led_micmute_vref fixup in order to enable mute/micmute LEDs. Note: the alc236_fixup_hp_gpio_led fixup, which is used by the Elitebook 640 G9, does not work with the 645 G9. [ rearranged the entry in SSID order -- tiwai ] Signed-off-by: Elvis Angelaccio Cc: Link: https://lore.kernel.org/r/4055cb48-e228-8a13-524d-afbb7aaafebe@kde.org Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 4055a8f5880a..d621fe4d2a6f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9423,6 +9423,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x89c3, "Zbook Studio G9", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x89c6, "Zbook Fury 17 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x89ca, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x89d3, "HP EliteBook 645 G9 (MB 89D2)", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8a78, "HP Dev One", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x103c, 0x8aa0, "HP ProBook 440 G9 (MB 8A9E)", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8aa3, "HP ProBook 450 G9 (MB 8AA1)", ALC236_FIXUP_HP_GPIO_LED), From 563ca0e9eab8acc8a1309e8b440108ff8d23e951 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 24 Jan 2023 18:20:54 +0300 Subject: [PATCH 051/186] RDMA/mana_ib: Prevent array underflow in mana_ib_create_qp_raw() The "port" comes from the user and if it is zero then the: ndev = mc->ports[port - 1]; assignment does an out of bounds read. I have changed the if statement to fix this and to mirror how it is done in mana_ib_create_qp_rss(). Fixes: 0266a177631d ("RDMA/mana_ib: Add a driver for Microsoft Azure Network Adapter") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/Y8/3Vn8qx00kE9Kk@kili Acked-by: Long Li Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mana/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index ea15ec77e321..54b61930a7fd 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -289,7 +289,7 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd, /* IB ports start with 1, MANA Ethernet ports start with 0 */ port = ucmd.port; - if (ucmd.port > mc->num_ports) + if (port < 1 || port > mc->num_ports) return -EINVAL; if (attr->cap.max_send_wr > MAX_SEND_BUFFERS_PER_QUEUE) { From 97e45d469eb180a7bd2809e4e079331552c73e42 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 4 Feb 2023 09:22:06 -0800 Subject: [PATCH 052/186] powerpc/kexec_file: fix implicit decl error kexec (PPC64) code calls memory_hotplug_max(). Add the header declaration for it from . Using does not work since the #include for depends on CONFIG_NUMA=y, which is not always set. Fixes this build error/warning: arch/powerpc/kexec/file_load_64.c: In function 'kexec_extra_fdt_size_ppc64': arch/powerpc/kexec/file_load_64.c:993:33: error: implicit declaration of function 'memory_hotplug_max' 993 | usm_entries = ((memory_hotplug_max() / drmem_lmb_size()) + | ^~~~~~~~~~~~~~~~~~ Fixes: fc546faa5595 ("powerpc/kexec_file: Count hot-pluggable memory in FDT estimate") Signed-off-by: Randy Dunlap Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230204172206.7662-1-rdunlap@infradead.org --- arch/powerpc/kexec/file_load_64.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index 52085751f5f4..9be3e818a240 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -26,6 +26,7 @@ #include #include #include +#include #include struct umem_info { From 5921b250f43870e7d8044ca14e402292ceb3e3a8 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 3 Feb 2023 17:50:54 +0100 Subject: [PATCH 053/186] pinctrl: qcom: sm8450-lpass-lpi: correct swr_rx_data group According to hardware programming guide, the swr_rx_data pin group has only two pins (GPIO5 and GPIO6). This is also visible in "struct sm8450_groups" in the driver - GPIO15 does not have swr_rx_data function. Fixes: ec1652fc4d56 ("pinctrl: qcom: Add sm8450 lpass lpi pinctrl driver") Cc: Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230203165054.390762-1-krzysztof.kozlowski@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-sm8450-lpass-lpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/qcom/pinctrl-sm8450-lpass-lpi.c b/drivers/pinctrl/qcom/pinctrl-sm8450-lpass-lpi.c index c3c8c34148f1..e22d03ce292e 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8450-lpass-lpi.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8450-lpass-lpi.c @@ -105,7 +105,7 @@ static const struct pinctrl_pin_desc sm8450_lpi_pins[] = { static const char * const swr_tx_clk_groups[] = { "gpio0" }; static const char * const swr_tx_data_groups[] = { "gpio1", "gpio2", "gpio14" }; static const char * const swr_rx_clk_groups[] = { "gpio3" }; -static const char * const swr_rx_data_groups[] = { "gpio4", "gpio5", "gpio15" }; +static const char * const swr_rx_data_groups[] = { "gpio4", "gpio5" }; static const char * const dmic1_clk_groups[] = { "gpio6" }; static const char * const dmic1_data_groups[] = { "gpio7" }; static const char * const dmic2_clk_groups[] = { "gpio8" }; From 147323792693bf013f60dca160be1d32bd4d180a Mon Sep 17 00:00:00 2001 From: Daniel Beer Date: Thu, 27 Oct 2022 21:28:31 +1300 Subject: [PATCH 054/186] ASoC: tas5805m: rework to avoid scheduling while atomic. There's some setup we need to do in order to get the DSP initialized, and this can't be done until a bit-clock is ready. In an earlier version of this driver, this work was done in a DAPM callback. The DAPM callback doesn't guarantee that the bit-clock is running, so the work was moved instead to the trigger callback. Unfortunately this callback runs in atomic context, and the setup code needs to do I2C transactions. Here we use a work_struct to kick off the setup in a thread instead. Fixes: ec45268467f4 ("ASoC: add support for TAS5805M digital amplifier") Signed-off-by: Daniel Beer Link: https://lore.kernel.org/r/85d8ba405cb009a7a3249b556dc8f3bdb1754fdf.1675497326.git.daniel.beer@igorinstitute.com Signed-off-by: Mark Brown --- sound/soc/codecs/tas5805m.c | 132 ++++++++++++++++++++++++------------ 1 file changed, 89 insertions(+), 43 deletions(-) diff --git a/sound/soc/codecs/tas5805m.c b/sound/soc/codecs/tas5805m.c index beb4ec629a03..6e2edf045446 100644 --- a/sound/soc/codecs/tas5805m.c +++ b/sound/soc/codecs/tas5805m.c @@ -154,6 +154,7 @@ static const uint32_t tas5805m_volume[] = { #define TAS5805M_VOLUME_MIN 0 struct tas5805m_priv { + struct i2c_client *i2c; struct regulator *pvdd; struct gpio_desc *gpio_pdn_n; @@ -165,6 +166,9 @@ struct tas5805m_priv { int vol[2]; bool is_powered; bool is_muted; + + struct work_struct work; + struct mutex lock; }; static void set_dsp_scale(struct regmap *rm, int offset, int vol) @@ -181,13 +185,11 @@ static void set_dsp_scale(struct regmap *rm, int offset, int vol) regmap_bulk_write(rm, offset, v, ARRAY_SIZE(v)); } -static void tas5805m_refresh(struct snd_soc_component *component) +static void tas5805m_refresh(struct tas5805m_priv *tas5805m) { - struct tas5805m_priv *tas5805m = - snd_soc_component_get_drvdata(component); struct regmap *rm = tas5805m->regmap; - dev_dbg(component->dev, "refresh: is_muted=%d, vol=%d/%d\n", + dev_dbg(&tas5805m->i2c->dev, "refresh: is_muted=%d, vol=%d/%d\n", tas5805m->is_muted, tas5805m->vol[0], tas5805m->vol[1]); regmap_write(rm, REG_PAGE, 0x00); @@ -226,8 +228,11 @@ static int tas5805m_vol_get(struct snd_kcontrol *kcontrol, struct tas5805m_priv *tas5805m = snd_soc_component_get_drvdata(component); + mutex_lock(&tas5805m->lock); ucontrol->value.integer.value[0] = tas5805m->vol[0]; ucontrol->value.integer.value[1] = tas5805m->vol[1]; + mutex_unlock(&tas5805m->lock); + return 0; } @@ -243,11 +248,13 @@ static int tas5805m_vol_put(struct snd_kcontrol *kcontrol, snd_soc_kcontrol_component(kcontrol); struct tas5805m_priv *tas5805m = snd_soc_component_get_drvdata(component); + int ret = 0; if (!(volume_is_valid(ucontrol->value.integer.value[0]) && volume_is_valid(ucontrol->value.integer.value[1]))) return -EINVAL; + mutex_lock(&tas5805m->lock); if (tas5805m->vol[0] != ucontrol->value.integer.value[0] || tas5805m->vol[1] != ucontrol->value.integer.value[1]) { tas5805m->vol[0] = ucontrol->value.integer.value[0]; @@ -256,11 +263,12 @@ static int tas5805m_vol_put(struct snd_kcontrol *kcontrol, tas5805m->vol[0], tas5805m->vol[1], tas5805m->is_powered); if (tas5805m->is_powered) - tas5805m_refresh(component); - return 1; + tas5805m_refresh(tas5805m); + ret = 1; } + mutex_unlock(&tas5805m->lock); - return 0; + return ret; } static const struct snd_kcontrol_new tas5805m_snd_controls[] = { @@ -294,50 +302,18 @@ static int tas5805m_trigger(struct snd_pcm_substream *substream, int cmd, struct snd_soc_component *component = dai->component; struct tas5805m_priv *tas5805m = snd_soc_component_get_drvdata(component); - struct regmap *rm = tas5805m->regmap; - unsigned int chan, global1, global2; switch (cmd) { case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_RESUME: case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: - dev_dbg(component->dev, "DSP startup\n"); - - /* We mustn't issue any I2C transactions until the I2S - * clock is stable. Furthermore, we must allow a 5ms - * delay after the first set of register writes to - * allow the DSP to boot before configuring it. - */ - usleep_range(5000, 10000); - send_cfg(rm, dsp_cfg_preboot, - ARRAY_SIZE(dsp_cfg_preboot)); - usleep_range(5000, 15000); - send_cfg(rm, tas5805m->dsp_cfg_data, - tas5805m->dsp_cfg_len); - - tas5805m->is_powered = true; - tas5805m_refresh(component); + dev_dbg(component->dev, "clock start\n"); + schedule_work(&tas5805m->work); break; case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - dev_dbg(component->dev, "DSP shutdown\n"); - - tas5805m->is_powered = false; - - regmap_write(rm, REG_PAGE, 0x00); - regmap_write(rm, REG_BOOK, 0x00); - - regmap_read(rm, REG_CHAN_FAULT, &chan); - regmap_read(rm, REG_GLOBAL_FAULT1, &global1); - regmap_read(rm, REG_GLOBAL_FAULT2, &global2); - - dev_dbg(component->dev, - "fault regs: CHAN=%02x, GLOBAL1=%02x, GLOBAL2=%02x\n", - chan, global1, global2); - - regmap_write(rm, REG_DEVICE_CTRL_2, DCTRL2_MODE_HIZ); break; default: @@ -347,6 +323,67 @@ static int tas5805m_trigger(struct snd_pcm_substream *substream, int cmd, return 0; } +static void do_work(struct work_struct *work) +{ + struct tas5805m_priv *tas5805m = + container_of(work, struct tas5805m_priv, work); + struct regmap *rm = tas5805m->regmap; + + dev_dbg(&tas5805m->i2c->dev, "DSP startup\n"); + + mutex_lock(&tas5805m->lock); + /* We mustn't issue any I2C transactions until the I2S + * clock is stable. Furthermore, we must allow a 5ms + * delay after the first set of register writes to + * allow the DSP to boot before configuring it. + */ + usleep_range(5000, 10000); + send_cfg(rm, dsp_cfg_preboot, ARRAY_SIZE(dsp_cfg_preboot)); + usleep_range(5000, 15000); + send_cfg(rm, tas5805m->dsp_cfg_data, tas5805m->dsp_cfg_len); + + tas5805m->is_powered = true; + tas5805m_refresh(tas5805m); + mutex_unlock(&tas5805m->lock); +} + +static int tas5805m_dac_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); + struct tas5805m_priv *tas5805m = + snd_soc_component_get_drvdata(component); + struct regmap *rm = tas5805m->regmap; + + if (event & SND_SOC_DAPM_PRE_PMD) { + unsigned int chan, global1, global2; + + dev_dbg(component->dev, "DSP shutdown\n"); + cancel_work_sync(&tas5805m->work); + + mutex_lock(&tas5805m->lock); + if (tas5805m->is_powered) { + tas5805m->is_powered = false; + + regmap_write(rm, REG_PAGE, 0x00); + regmap_write(rm, REG_BOOK, 0x00); + + regmap_read(rm, REG_CHAN_FAULT, &chan); + regmap_read(rm, REG_GLOBAL_FAULT1, &global1); + regmap_read(rm, REG_GLOBAL_FAULT2, &global2); + + dev_dbg(component->dev, "fault regs: CHAN=%02x, " + "GLOBAL1=%02x, GLOBAL2=%02x\n", + chan, global1, global2); + + regmap_write(rm, REG_DEVICE_CTRL_2, DCTRL2_MODE_HIZ); + } + mutex_unlock(&tas5805m->lock); + } + + return 0; +} + static const struct snd_soc_dapm_route tas5805m_audio_map[] = { { "DAC", NULL, "DAC IN" }, { "OUT", NULL, "DAC" }, @@ -354,7 +391,8 @@ static const struct snd_soc_dapm_route tas5805m_audio_map[] = { static const struct snd_soc_dapm_widget tas5805m_dapm_widgets[] = { SND_SOC_DAPM_AIF_IN("DAC IN", "Playback", 0, SND_SOC_NOPM, 0, 0), - SND_SOC_DAPM_DAC("DAC", NULL, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_DAC_E("DAC", NULL, SND_SOC_NOPM, 0, 0, + tas5805m_dac_event, SND_SOC_DAPM_PRE_PMD), SND_SOC_DAPM_OUTPUT("OUT") }; @@ -375,11 +413,14 @@ static int tas5805m_mute(struct snd_soc_dai *dai, int mute, int direction) struct tas5805m_priv *tas5805m = snd_soc_component_get_drvdata(component); + mutex_lock(&tas5805m->lock); dev_dbg(component->dev, "set mute=%d (is_powered=%d)\n", mute, tas5805m->is_powered); + tas5805m->is_muted = mute; if (tas5805m->is_powered) - tas5805m_refresh(component); + tas5805m_refresh(tas5805m); + mutex_unlock(&tas5805m->lock); return 0; } @@ -434,6 +475,7 @@ static int tas5805m_i2c_probe(struct i2c_client *i2c) if (!tas5805m) return -ENOMEM; + tas5805m->i2c = i2c; tas5805m->pvdd = devm_regulator_get(dev, "pvdd"); if (IS_ERR(tas5805m->pvdd)) { dev_err(dev, "failed to get pvdd supply: %ld\n", @@ -507,6 +549,9 @@ static int tas5805m_i2c_probe(struct i2c_client *i2c) gpiod_set_value(tas5805m->gpio_pdn_n, 1); usleep_range(10000, 15000); + INIT_WORK(&tas5805m->work, do_work); + mutex_init(&tas5805m->lock); + /* Don't register through devm. We need to be able to unregister * the component prior to deasserting PDN# */ @@ -527,6 +572,7 @@ static void tas5805m_i2c_remove(struct i2c_client *i2c) struct device *dev = &i2c->dev; struct tas5805m_priv *tas5805m = dev_get_drvdata(dev); + cancel_work_sync(&tas5805m->work); snd_soc_unregister_component(dev); gpiod_set_value(tas5805m->gpio_pdn_n, 0); usleep_range(10000, 15000); From e0576cd642ced1ac65370b4516b7be9f536a0498 Mon Sep 17 00:00:00 2001 From: Daniel Beer Date: Thu, 27 Oct 2022 21:38:38 +1300 Subject: [PATCH 055/186] ASoC: tas5805m: add missing page switch. In tas5805m_refresh, we switch pages to update the DSP volume control, but we need to switch back to page 0 before trying to alter the soft-mute control. This latter page-switch was missing. Fixes: ec45268467f4 ("ASoC: add support for TAS5805M digital amplifier") Signed-off-by: Daniel Beer Link: https://lore.kernel.org/r/1fea38a71ea6ab0225d19ab28d1fa12828d762d0.1675497326.git.daniel.beer@igorinstitute.com Signed-off-by: Mark Brown --- sound/soc/codecs/tas5805m.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/tas5805m.c b/sound/soc/codecs/tas5805m.c index 6e2edf045446..4e38eb7acea1 100644 --- a/sound/soc/codecs/tas5805m.c +++ b/sound/soc/codecs/tas5805m.c @@ -203,6 +203,9 @@ static void tas5805m_refresh(struct tas5805m_priv *tas5805m) set_dsp_scale(rm, 0x24, tas5805m->vol[0]); set_dsp_scale(rm, 0x28, tas5805m->vol[1]); + regmap_write(rm, REG_PAGE, 0x00); + regmap_write(rm, REG_BOOK, 0x00); + /* Set/clear digital soft-mute */ regmap_write(rm, REG_DEVICE_CTRL_2, (tas5805m->is_muted ? DCTRL2_MUTE : 0) | From fe1e7e8ce2c47bd8fd9885eab63fca0a522e94c9 Mon Sep 17 00:00:00 2001 From: Alexey Firago Date: Sat, 4 Feb 2023 22:51:06 +0300 Subject: [PATCH 056/186] ASoC: codecs: es8326: Fix DTS properties reading Seems like properties parsing and reading was copy-pasted, so "everest,interrupt-src" and "everest,interrupt-clk" are saved into the es8326->jack_pol variable. This might lead to wrong settings being saved into the reg 57 (ES8326_HP_DET). Fix this by using proper variables while reading properties. Signed-off-by: Alexey Firago Reviewed-by: Yang Yingliang --- sound/soc/codecs/es8326.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c index 9ddf6a35e91c..28a0565c2a95 100755 --- a/sound/soc/codecs/es8326.c +++ b/sound/soc/codecs/es8326.c @@ -729,14 +729,16 @@ static int es8326_probe(struct snd_soc_component *component) } dev_dbg(component->dev, "jack-pol %x", es8326->jack_pol); - ret = device_property_read_u8(component->dev, "everest,interrupt-src", &es8326->jack_pol); + ret = device_property_read_u8(component->dev, "everest,interrupt-src", + &es8326->interrupt_src); if (ret != 0) { dev_dbg(component->dev, "interrupt-src return %d", ret); es8326->interrupt_src = ES8326_HP_DET_SRC_PIN9; } dev_dbg(component->dev, "interrupt-src %x", es8326->interrupt_src); - ret = device_property_read_u8(component->dev, "everest,interrupt-clk", &es8326->jack_pol); + ret = device_property_read_u8(component->dev, "everest,interrupt-clk", + &es8326->interrupt_clk); if (ret != 0) { dev_dbg(component->dev, "interrupt-clk return %d", ret); es8326->interrupt_clk = 0x45; From 90d5e8301ac24550be80d193aa5582cab56c29fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 31 Jan 2023 02:21:24 +0200 Subject: [PATCH 057/186] drm/i915: Don't do the WM0->WM1 copy w/a if WM1 is already enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Due to a workaround we have to make sure the WM1 watermarks block/lines values are sensible even when WM1 is disabled. To that end we copy those values from WM0. However since we now keep each wm level enabled on a per-plane basis it doesn't seem necessary to do that copy when we already have an enabled WM1 on the current plane. That is, we might be in a situation where another plane can only do WM0 (and thus needs the copy) but the current plane's WM1 is still perfectly valid (ie. fits into the current DDB allocation). Skipping the copy could avoid reprogramming the plane's registers needlessly in some cases. Fixes: a301cb0fca2d ("drm/i915: Keep plane watermarks enabled more aggressively") Signed-off-by: Ville SyrjĂ€lĂ€ Link: https://patchwork.freedesktop.org/patch/msgid/20230131002127.29305-1-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy (cherry picked from commit c580c2d27ac8754cc6f01da1d715b7272f5f9cbb) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/skl_watermark.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index e0766d1be966..11554645e6ee 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -1587,7 +1587,8 @@ skl_crtc_allocate_plane_ddb(struct intel_atomic_state *state, skl_check_wm_level(&wm->wm[level], ddb); if (icl_need_wm1_wa(i915, plane_id) && - level == 1 && wm->wm[0].enable) { + level == 1 && !wm->wm[level].enable && + wm->wm[0].enable) { wm->wm[level].blocks = wm->wm[0].blocks; wm->wm[level].lines = wm->wm[0].lines; wm->wm[level].ignore_lines = wm->wm[0].ignore_lines; From 039a72ce7e570c56713fe7758a0658034dc48c9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20H=C3=B6gander?= Date: Mon, 23 Jan 2023 09:44:37 +0200 Subject: [PATCH 058/186] drm/i915/fbdev: Implement fb_dirty for intel custom fb helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After disconnecting damage worker from update logic it's left to fbdev emulation implementation to have fb_dirty function. Currently intel fbdev doesn't have it. This is causing problems to features (PSR, FBC, DRRS) relying on dirty callback. Implement simple fb_dirty callback to deliver notifications about updates in fb console. v4: Add proper Fixes tag and modify commit message v3: Check damage clip v2: Improved commit message and added Fixes tag Fixes: f231af498c29 ("drm/fb-helper: Disconnect damage worker from update logic") Cc: Ville SyrjĂ€lĂ€ Cc: Thomas Zimmermann Cc: Jani Nikula Signed-off-by: Jouni Högander Signed-off-by: Ville SyrjĂ€lĂ€ Link: https://patchwork.freedesktop.org/patch/msgid/20230123074437.475103-1-jouni.hogander@intel.com (cherry picked from commit 1af546c2cec6e28b6bbe01a4ad0c38e96e54fcb4) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_fbdev.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index 5575d7abdc09..f76c06b7f1d4 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -328,8 +328,20 @@ out_unlock: return ret; } +static int intelfb_dirty(struct drm_fb_helper *helper, struct drm_clip_rect *clip) +{ + if (!(clip->x1 < clip->x2 && clip->y1 < clip->y2)) + return 0; + + if (helper->fb->funcs->dirty) + return helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, clip, 1); + + return 0; +} + static const struct drm_fb_helper_funcs intel_fb_helper_funcs = { .fb_probe = intelfb_create, + .fb_dirty = intelfb_dirty, }; static void intel_fbdev_destroy(struct intel_fbdev *ifbdev) From 251e8c5b1b1fadcc387a8e618c7437d330bdac3e Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 3 Feb 2023 08:49:20 -0800 Subject: [PATCH 059/186] drm/i915: Move fd_install after last use of fence Because eb_composite_fence_create() drops the fence_array reference after creation of the sync_file, only the sync_file holds a ref to the fence. But fd_install() makes that reference visable to userspace, so it must be the last thing we do with the fence. Signed-off-by: Rob Clark Fixes: 00dae4d3d35d ("drm/i915: Implement SINGLE_TIMELINE with a syncobj (v4)") Cc: # v5.15+ [tursulin: Added stable tag.] Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20230203164937.4035503-1-robdclark@gmail.com (cherry picked from commit 960dafa30455450d318756a9896a02727f2639e0) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index f266b68cf012..0f2e056c02dd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -3483,6 +3483,13 @@ err_request: eb.composite_fence : &eb.requests[0]->fence); + if (unlikely(eb.gem_context->syncobj)) { + drm_syncobj_replace_fence(eb.gem_context->syncobj, + eb.composite_fence ? + eb.composite_fence : + &eb.requests[0]->fence); + } + if (out_fence) { if (err == 0) { fd_install(out_fence_fd, out_fence->file); @@ -3494,13 +3501,6 @@ err_request: } } - if (unlikely(eb.gem_context->syncobj)) { - drm_syncobj_replace_fence(eb.gem_context->syncobj, - eb.composite_fence ? - eb.composite_fence : - &eb.requests[0]->fence); - } - if (!out_fence && eb.composite_fence) dma_fence_put(eb.composite_fence); From 44e4c5684fcc82d8f099656c4ea39d9571e2a8ac Mon Sep 17 00:00:00 2001 From: Aravind Iddamsetty Date: Fri, 3 Feb 2023 19:22:05 +0530 Subject: [PATCH 060/186] drm/i915: Initialize the obj flags for shmem objects Obj flags for shmem objects is not being set correctly. Fixes in setting BO_ALLOC_USER flag which applies to shmem objs as well. v2: Add fixes tag (Tvrtko, Matt A) Fixes: 13d29c823738 ("drm/i915/ehl: unconditionally flush the pages on acquire") Cc: # v5.15+ Cc: Matthew Auld Cc: Tvrtko Ursulin Reviewed-by: Matthew Auld Signed-off-by: Aravind Iddamsetty Reviewed-by: Andrzej Hajda Signed-off-by: Tvrtko Ursulin [tursulin: Grouped all tags together.] Link: https://patchwork.freedesktop.org/patch/msgid/20230203135205.4051149-1-aravind.iddamsetty@intel.com (cherry picked from commit bca0d1d3ceeb07be45a51c0fa4d57a0ce31b6aed) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 9c759df700ca..937728840428 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -579,7 +579,7 @@ static int shmem_object_init(struct intel_memory_region *mem, mapping_set_gfp_mask(mapping, mask); GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); - i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, 0); + i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, flags); obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; From eb4b55f2f26fc8a7b7dc6f06f1de91480d53485b Mon Sep 17 00:00:00 2001 From: Gayatri Kammela Date: Thu, 2 Feb 2023 17:17:16 -0800 Subject: [PATCH 061/186] platform/x86/intel/vsec: Add support for Meteor Lake Add Meteor Lake PMT telemetry support. Signed-off-by: Gayatri Kammela Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20230203011716.1078003-1-david.e.box@linux.intel.com Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/vsec.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/platform/x86/intel/vsec.c b/drivers/platform/x86/intel/vsec.c index bb81b8b1f7e9..89c5374e33b3 100644 --- a/drivers/platform/x86/intel/vsec.c +++ b/drivers/platform/x86/intel/vsec.c @@ -408,14 +408,23 @@ static const struct intel_vsec_platform_info dg1_info = { .quirks = VSEC_QUIRK_NO_DVSEC | VSEC_QUIRK_EARLY_HW, }; +/* MTL info */ +static const struct intel_vsec_platform_info mtl_info = { + .quirks = VSEC_QUIRK_NO_WATCHER | VSEC_QUIRK_NO_CRASHLOG, +}; + #define PCI_DEVICE_ID_INTEL_VSEC_ADL 0x467d #define PCI_DEVICE_ID_INTEL_VSEC_DG1 0x490e +#define PCI_DEVICE_ID_INTEL_VSEC_MTL_M 0x7d0d +#define PCI_DEVICE_ID_INTEL_VSEC_MTL_S 0xad0d #define PCI_DEVICE_ID_INTEL_VSEC_OOBMSM 0x09a7 #define PCI_DEVICE_ID_INTEL_VSEC_RPL 0xa77d #define PCI_DEVICE_ID_INTEL_VSEC_TGL 0x9a0d static const struct pci_device_id intel_vsec_pci_ids[] = { { PCI_DEVICE_DATA(INTEL, VSEC_ADL, &tgl_info) }, { PCI_DEVICE_DATA(INTEL, VSEC_DG1, &dg1_info) }, + { PCI_DEVICE_DATA(INTEL, VSEC_MTL_M, &mtl_info) }, + { PCI_DEVICE_DATA(INTEL, VSEC_MTL_S, &mtl_info) }, { PCI_DEVICE_DATA(INTEL, VSEC_OOBMSM, &(struct intel_vsec_platform_info) {}) }, { PCI_DEVICE_DATA(INTEL, VSEC_RPL, &tgl_info) }, { PCI_DEVICE_DATA(INTEL, VSEC_TGL, &tgl_info) }, From db370a8b9f67ae5f17e3d5482493294467784504 Mon Sep 17 00:00:00 2001 From: Wander Lairson Costa Date: Thu, 2 Feb 2023 09:30:20 -0300 Subject: [PATCH 062/186] rtmutex: Ensure that the top waiter is always woken up Let L1 and L2 be two spinlocks. Let T1 be a task holding L1 and blocked on L2. T1, currently, is the top waiter of L2. Let T2 be the task holding L2. Let T3 be a task trying to acquire L1. The following events will lead to a state in which the wait queue of L2 isn't empty, but no task actually holds the lock. T1 T2 T3 == == == spin_lock(L1) | raw_spin_lock(L1->wait_lock) | rtlock_slowlock_locked(L1) | | task_blocks_on_rt_mutex(L1, T3) | | | orig_waiter->lock = L1 | | | orig_waiter->task = T3 | | | raw_spin_unlock(L1->wait_lock) | | | rt_mutex_adjust_prio_chain(T1, L1, L2, orig_waiter, T3) spin_unlock(L2) | | | | | rt_mutex_slowunlock(L2) | | | | | | raw_spin_lock(L2->wait_lock) | | | | | | wakeup(T1) | | | | | | raw_spin_unlock(L2->wait_lock) | | | | | | | | waiter = T1->pi_blocked_on | | | | waiter == rt_mutex_top_waiter(L2) | | | | waiter->task == T1 | | | | raw_spin_lock(L2->wait_lock) | | | | dequeue(L2, waiter) | | | | update_prio(waiter, T1) | | | | enqueue(L2, waiter) | | | | waiter != rt_mutex_top_waiter(L2) | | | | L2->owner == NULL | | | | wakeup(T1) | | | | raw_spin_unlock(L2->wait_lock) T1 wakes up T1 != top_waiter(L2) schedule_rtlock() If the deadline of T1 is updated before the call to update_prio(), and the new deadline is greater than the deadline of the second top waiter, then after the requeue, T1 is no longer the top waiter, and the wrong task is woken up which will then go back to sleep because it is not the top waiter. This can be reproduced in PREEMPT_RT with stress-ng: while true; do stress-ng --sched deadline --sched-period 1000000000 \ --sched-runtime 800000000 --sched-deadline \ 1000000000 --mmapfork 23 -t 20 done A similar issue was pointed out by Thomas versus the cases where the top waiter drops out early due to a signal or timeout, which is a general issue for all regular rtmutex use cases, e.g. futex. The problematic code is in rt_mutex_adjust_prio_chain(): // Save the top waiter before dequeue/enqueue prerequeue_top_waiter = rt_mutex_top_waiter(lock); rt_mutex_dequeue(lock, waiter); waiter_update_prio(waiter, task); rt_mutex_enqueue(lock, waiter); // Lock has no owner? if (!rt_mutex_owner(lock)) { // Top waiter changed ----> if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) ----> wake_up_state(waiter->task, waiter->wake_state); This only takes the case into account where @waiter is the new top waiter due to the requeue operation. But it fails to handle the case where @waiter is not longer the top waiter due to the requeue operation. Ensure that the new top waiter is woken up so in all cases so it can take over the ownerless lock. [ tglx: Amend changelog, add Fixes tag ] Fixes: c014ef69b3ac ("locking/rtmutex: Add wake_state to rt_mutex_waiter") Signed-off-by: Wander Lairson Costa Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230117172649.52465-1-wander@redhat.com Link: https://lore.kernel.org/r/20230202123020.14844-1-wander@redhat.com --- kernel/locking/rtmutex.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 010cf4e6d0b8..728f434de2bb 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -901,8 +901,9 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, * then we need to wake the new top waiter up to try * to get the lock. */ - if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) - wake_up_state(waiter->task, waiter->wake_state); + top_waiter = rt_mutex_top_waiter(lock); + if (prerequeue_top_waiter != top_waiter) + wake_up_state(top_waiter->task, top_waiter->wake_state); raw_spin_unlock_irq(&lock->wait_lock); return 0; } From 7a17e8423a133a6ac238462126d7f88faaccc681 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Mon, 6 Feb 2023 15:00:19 +0000 Subject: [PATCH 063/186] ALSA: hda/realtek: Add quirk for ASUS UM3402 using CS35L41 This Asus Zenbook laptop use Realtek HDA codec combined with 2xCS35L41 Amplifiers using I2C with External Boost. Signed-off-by: Stefan Binding Cc: Link: https://lore.kernel.org/r/20230206150019.3825120-1-sbinding@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d621fe4d2a6f..f1d829cb66a3 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9481,6 +9481,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x1e02, "ASUS UX3402", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502), + SND_PCI_QUIRK(0x1043, 0x1e12, "ASUS UM3402", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS), SND_PCI_QUIRK(0x1043, 0x1e5e, "ASUS ROG Strix G513", ALC294_FIXUP_ASUS_G513_PINS), SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401), From 8f20660f053cefd4693e69cfff9cf58f4f7c4929 Mon Sep 17 00:00:00 2001 From: Ryan Neph Date: Fri, 3 Feb 2023 15:33:44 -0800 Subject: [PATCH 064/186] drm/virtio: exbuf->fence_fd unmodified on interrupted wait An interrupted dma_fence_wait() becomes an -ERESTARTSYS returned to userspace ioctl(DRM_IOCTL_VIRTGPU_EXECBUFFER) calls, prompting to retry the ioctl(), but the passed exbuf->fence_fd has been reset to -1, making the retry attempt fail at sync_file_get_fence(). The uapi for DRM_IOCTL_VIRTGPU_EXECBUFFER is changed to retain the passed value for exbuf->fence_fd when returning anything besides a successful result from the ioctl. Fixes: 2cd7b6f08bc4 ("drm/virtio: add in/out fence support for explicit synchronization") Signed-off-by: Ryan Neph Reviewed-by: Rob Clark Reviewed-by: Dmitry Osipenko Signed-off-by: Dmitry Osipenko Link: https://patchwork.freedesktop.org/patch/msgid/20230203233345.2477767-1-ryanneph@chromium.org --- drivers/gpu/drm/virtio/virtgpu_ioctl.c | 5 +---- include/uapi/drm/virtgpu_drm.h | 1 + 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index 9f4a90493aea..da45215a933d 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -126,7 +126,6 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data, void __user *user_bo_handles = NULL; struct virtio_gpu_object_array *buflist = NULL; struct sync_file *sync_file; - int in_fence_fd = exbuf->fence_fd; int out_fence_fd = -1; void *buf; uint64_t fence_ctx; @@ -152,13 +151,11 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data, ring_idx = exbuf->ring_idx; } - exbuf->fence_fd = -1; - virtio_gpu_create_context(dev, file); if (exbuf->flags & VIRTGPU_EXECBUF_FENCE_FD_IN) { struct dma_fence *in_fence; - in_fence = sync_file_get_fence(in_fence_fd); + in_fence = sync_file_get_fence(exbuf->fence_fd); if (!in_fence) return -EINVAL; diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h index 0512fde5e697..7b158fcb02b4 100644 --- a/include/uapi/drm/virtgpu_drm.h +++ b/include/uapi/drm/virtgpu_drm.h @@ -64,6 +64,7 @@ struct drm_virtgpu_map { __u32 pad; }; +/* fence_fd is modified on success if VIRTGPU_EXECBUF_FENCE_FD_OUT flag is set. */ struct drm_virtgpu_execbuffer { __u32 flags; __u32 size; From 6afaed53cc9adde69d8a76ff5b4d740d5efbc54c Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 10 Jan 2023 14:56:39 +0000 Subject: [PATCH 065/186] btrfs: simplify update of last_dir_index_offset when logging a directory When logging a directory, we always set the inode's last_dir_index_offset to the offset of the last dir index item we found. This is using an extra field in the log context structure, and it makes more sense to update it only after we insert dir index items, and we could directly update the inode's last_dir_index_offset field instead. So make this simpler by updating the inode's last_dir_index_offset only when we actually insert dir index keys in the log tree, and getting rid of the last_dir_item_offset field in the log context structure. Reported-by: David Arendt Link: https://lore.kernel.org/linux-btrfs/ae169fc6-f504-28f0-a098-6fa6a4dfb612@leemhuis.info/ Reported-by: Maxim Mikityanskiy Link: https://lore.kernel.org/linux-btrfs/Y8voyTXdnPDz8xwY@mail.gmail.com/ Reported-by: Hunter Wardlaw Link: https://bugzilla.suse.com/show_bug.cgi?id=1207231 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=216851 CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 23 +++++++++++++++++------ fs/btrfs/tree-log.h | 2 -- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d43261545264..58599189bd18 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3576,17 +3576,19 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, } static int flush_dir_items_batch(struct btrfs_trans_handle *trans, - struct btrfs_root *log, + struct btrfs_inode *inode, struct extent_buffer *src, struct btrfs_path *dst_path, int start_slot, int count) { + struct btrfs_root *log = inode->root->log_root; char *ins_data = NULL; struct btrfs_item_batch batch; struct extent_buffer *dst; unsigned long src_offset; unsigned long dst_offset; + u64 last_index; struct btrfs_key key; u32 item_size; int ret; @@ -3644,6 +3646,19 @@ static int flush_dir_items_batch(struct btrfs_trans_handle *trans, src_offset = btrfs_item_ptr_offset(src, start_slot + count - 1); copy_extent_buffer(dst, src, dst_offset, src_offset, batch.total_data_size); btrfs_release_path(dst_path); + + last_index = batch.keys[count - 1].offset; + ASSERT(last_index > inode->last_dir_index_offset); + + /* + * If for some unexpected reason the last item's index is not greater + * than the last index we logged, warn and return an error to fallback + * to a transaction commit. + */ + if (WARN_ON(last_index <= inode->last_dir_index_offset)) + ret = -EUCLEAN; + else + inode->last_dir_index_offset = last_index; out: kfree(ins_data); @@ -3693,7 +3708,6 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans, } di = btrfs_item_ptr(src, i, struct btrfs_dir_item); - ctx->last_dir_item_offset = key.offset; /* * Skip ranges of items that consist only of dir item keys created @@ -3756,7 +3770,7 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans, if (batch_size > 0) { int ret; - ret = flush_dir_items_batch(trans, log, src, dst_path, + ret = flush_dir_items_batch(trans, inode, src, dst_path, batch_start, batch_size); if (ret < 0) return ret; @@ -4044,7 +4058,6 @@ static noinline int log_directory_changes(struct btrfs_trans_handle *trans, min_key = BTRFS_DIR_START_INDEX; max_key = 0; - ctx->last_dir_item_offset = inode->last_dir_index_offset; while (1) { ret = log_dir_items(trans, inode, path, dst_path, @@ -4056,8 +4069,6 @@ static noinline int log_directory_changes(struct btrfs_trans_handle *trans, min_key = max_key + 1; } - inode->last_dir_index_offset = ctx->last_dir_item_offset; - return 0; } diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 85b43075ac58..85cd24cb0540 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -24,8 +24,6 @@ struct btrfs_log_ctx { bool logging_new_delayed_dentries; /* Indicate if the inode being logged was logged before. */ bool logged_before; - /* Tracks the last logged dir item/index key offset. */ - u64 last_dir_item_offset; struct inode *inode; struct list_head list; /* Only used for fast fsyncs. */ From 04119ab1a49fc41cb70f0472be5455af268fa260 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 6 Feb 2023 07:05:28 +1000 Subject: [PATCH 066/186] nvidiafb: detect the hardware support before removing console. This driver removed the console, but hasn't yet decided if it could take over the console yet. Instead of doing that, probe the hw for support and then remove the console afterwards. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216859 Fixes: 145eed48de27 ("fbdev: Remove conflicting devices on PCI bus") Reported-by: Zeno Davatz Tested-by: Zeno Davatz Tested-by: Thomas Zimmermann Reviewed-by: Thomas Zimmermann Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20230205210751.3842103-1-airlied@gmail.com --- drivers/video/fbdev/nvidia/nvidia.c | 81 +++++++++++++++-------------- 1 file changed, 42 insertions(+), 39 deletions(-) diff --git a/drivers/video/fbdev/nvidia/nvidia.c b/drivers/video/fbdev/nvidia/nvidia.c index 1960916098d4..e60a276b4855 100644 --- a/drivers/video/fbdev/nvidia/nvidia.c +++ b/drivers/video/fbdev/nvidia/nvidia.c @@ -1197,17 +1197,17 @@ static int nvidia_set_fbinfo(struct fb_info *info) return nvidiafb_check_var(&info->var, info); } -static u32 nvidia_get_chipset(struct fb_info *info) +static u32 nvidia_get_chipset(struct pci_dev *pci_dev, + volatile u32 __iomem *REGS) { - struct nvidia_par *par = info->par; - u32 id = (par->pci_dev->vendor << 16) | par->pci_dev->device; + u32 id = (pci_dev->vendor << 16) | pci_dev->device; printk(KERN_INFO PFX "Device ID: %x \n", id); if ((id & 0xfff0) == 0x00f0 || (id & 0xfff0) == 0x02e0) { /* pci-e */ - id = NV_RD32(par->REGS, 0x1800); + id = NV_RD32(REGS, 0x1800); if ((id & 0x0000ffff) == 0x000010DE) id = 0x10DE0000 | (id >> 16); @@ -1220,12 +1220,11 @@ static u32 nvidia_get_chipset(struct fb_info *info) return id; } -static u32 nvidia_get_arch(struct fb_info *info) +static u32 nvidia_get_arch(u32 Chipset) { - struct nvidia_par *par = info->par; u32 arch = 0; - switch (par->Chipset & 0x0ff0) { + switch (Chipset & 0x0ff0) { case 0x0100: /* GeForce 256 */ case 0x0110: /* GeForce2 MX */ case 0x0150: /* GeForce2 */ @@ -1278,16 +1277,44 @@ static int nvidiafb_probe(struct pci_dev *pd, const struct pci_device_id *ent) struct fb_info *info; unsigned short cmd; int ret; + volatile u32 __iomem *REGS; + int Chipset; + u32 Architecture; NVTRACE_ENTER(); assert(pd != NULL); + if (pci_enable_device(pd)) { + printk(KERN_ERR PFX "cannot enable PCI device\n"); + return -ENODEV; + } + + /* enable IO and mem if not already done */ + pci_read_config_word(pd, PCI_COMMAND, &cmd); + cmd |= (PCI_COMMAND_IO | PCI_COMMAND_MEMORY); + pci_write_config_word(pd, PCI_COMMAND, cmd); + + nvidiafb_fix.mmio_start = pci_resource_start(pd, 0); + nvidiafb_fix.mmio_len = pci_resource_len(pd, 0); + + REGS = ioremap(nvidiafb_fix.mmio_start, nvidiafb_fix.mmio_len); + if (!REGS) { + printk(KERN_ERR PFX "cannot ioremap MMIO base\n"); + return -ENODEV; + } + + Chipset = nvidia_get_chipset(pd, REGS); + Architecture = nvidia_get_arch(Chipset); + if (Architecture == 0) { + printk(KERN_ERR PFX "unknown NV_ARCH\n"); + goto err_out; + } + ret = aperture_remove_conflicting_pci_devices(pd, "nvidiafb"); if (ret) - return ret; + goto err_out; info = framebuffer_alloc(sizeof(struct nvidia_par), &pd->dev); - if (!info) goto err_out; @@ -1298,11 +1325,6 @@ static int nvidiafb_probe(struct pci_dev *pd, const struct pci_device_id *ent) if (info->pixmap.addr == NULL) goto err_out_kfree; - if (pci_enable_device(pd)) { - printk(KERN_ERR PFX "cannot enable PCI device\n"); - goto err_out_enable; - } - if (pci_request_regions(pd, "nvidiafb")) { printk(KERN_ERR PFX "cannot request PCI regions\n"); goto err_out_enable; @@ -1318,34 +1340,17 @@ static int nvidiafb_probe(struct pci_dev *pd, const struct pci_device_id *ent) par->paneltweak = paneltweak; par->reverse_i2c = reverse_i2c; - /* enable IO and mem if not already done */ - pci_read_config_word(pd, PCI_COMMAND, &cmd); - cmd |= (PCI_COMMAND_IO | PCI_COMMAND_MEMORY); - pci_write_config_word(pd, PCI_COMMAND, cmd); - - nvidiafb_fix.mmio_start = pci_resource_start(pd, 0); nvidiafb_fix.smem_start = pci_resource_start(pd, 1); - nvidiafb_fix.mmio_len = pci_resource_len(pd, 0); - par->REGS = ioremap(nvidiafb_fix.mmio_start, nvidiafb_fix.mmio_len); + par->REGS = REGS; - if (!par->REGS) { - printk(KERN_ERR PFX "cannot ioremap MMIO base\n"); - goto err_out_free_base0; - } - - par->Chipset = nvidia_get_chipset(info); - par->Architecture = nvidia_get_arch(info); - - if (par->Architecture == 0) { - printk(KERN_ERR PFX "unknown NV_ARCH\n"); - goto err_out_arch; - } + par->Chipset = Chipset; + par->Architecture = Architecture; sprintf(nvidiafb_fix.id, "NV%x", (pd->device & 0x0ff0) >> 4); if (NVCommonSetup(info)) - goto err_out_arch; + goto err_out_free_base0; par->FbAddress = nvidiafb_fix.smem_start; par->FbMapSize = par->RamAmountKBytes * 1024; @@ -1401,7 +1406,6 @@ static int nvidiafb_probe(struct pci_dev *pd, const struct pci_device_id *ent) goto err_out_iounmap_fb; } - printk(KERN_INFO PFX "PCI nVidia %s framebuffer (%dMB @ 0x%lX)\n", info->fix.id, @@ -1415,15 +1419,14 @@ err_out_iounmap_fb: err_out_free_base1: fb_destroy_modedb(info->monspecs.modedb); nvidia_delete_i2c_busses(par); -err_out_arch: - iounmap(par->REGS); - err_out_free_base0: +err_out_free_base0: pci_release_regions(pd); err_out_enable: kfree(info->pixmap.addr); err_out_kfree: framebuffer_release(info); err_out: + iounmap(REGS); return -ENODEV; } From 2ea31e2e62bbc4d11c411eeb36f1b02841dbcab1 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 6 Feb 2023 14:22:40 +1000 Subject: [PATCH 067/186] powerpc/64s/interrupt: Fix interrupt exit race with security mitigation switch The RFI and STF security mitigation options can flip the interrupt_exit_not_reentrant static branch condition concurrently with the interrupt exit code which tests that branch. Interrupt exit tests this condition to set MSR[EE|RI] for exit, then again in the case a soft-masked interrupt is found pending, to recover the MSR so the interrupt can be replayed before attempting to exit again. If the condition changes between these two tests, the MSR and irq soft-mask state will become corrupted, leading to warnings and possible crashes. For example, if the branch is initially true then false, MSR[EE] will be 0 but PACA_IRQ_HARD_DIS clear and EE may not get enabled, leading to warnings in irq_64.c. Fixes: 13799748b957 ("powerpc/64: use interrupt restart table to speed up return from interrupt") Cc: stable@vger.kernel.org # v5.14+ Reported-by: Sachin Sant Tested-by: Sachin Sant Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230206042240.92103-1-npiggin@gmail.com --- arch/powerpc/kernel/interrupt.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index fc6631a80527..0ec1581619db 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -50,16 +50,18 @@ static inline bool exit_must_hard_disable(void) */ static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable) { + bool must_hard_disable = (exit_must_hard_disable() || !restartable); + /* This must be done with RI=1 because tracing may touch vmaps */ trace_hardirqs_on(); - if (exit_must_hard_disable() || !restartable) + if (must_hard_disable) __hard_EE_RI_disable(); #ifdef CONFIG_PPC64 /* This pattern matches prep_irq_for_idle */ if (unlikely(lazy_irq_pending_nocheck())) { - if (exit_must_hard_disable() || !restartable) { + if (must_hard_disable) { local_paca->irq_happened |= PACA_IRQ_HARD_DIS; __hard_RI_enable(); } From 4fa4302d6dc7de7e8e74dc7405611a2efb4bf54b Mon Sep 17 00:00:00 2001 From: Fan Ni Date: Thu, 15 Dec 2022 17:09:14 +0000 Subject: [PATCH 068/186] cxl/region: Fix null pointer dereference for resetting decoder Not all decoders have a reset callback. The CXL specification allows a host bridge with a single root port to have no explicit HDM decoders. Currently the region driver assumes there are none. As such the CXL core creates a special pass through decoder instance without a commit/reset callback. Prior to this patch, the ->reset() callback was called unconditionally when calling cxl_region_decode_reset. Thus a configuration with 1 Host Bridge, 1 Root Port, and one directly attached CXL type 3 device or multiple CXL type 3 devices attached to downstream ports of a switch can cause a null pointer dereference. Before the fix, a kernel crash was observed when we destroy the region, and a pass through decoder is reset. The issue can be reproduced as below, 1) create a region with a CXL setup which includes a HB with a single root port under which a memdev is attached directly. 2) destroy the region with cxl destroy-region regionX -f. Fixes: 176baefb2eb5 ("cxl/hdm: Commit decoder state to hardware") Cc: Signed-off-by: Fan Ni Reviewed-by: Davidlohr Bueso Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Tested-by: Gregory Price Reviewed-by: Gregory Price Link: https://lore.kernel.org/r/20221215170909.2650271-1-fan.ni@samsung.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 02f28da519e3..02275e6b621b 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -131,7 +131,7 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count) struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); struct cxl_port *iter = cxled_to_port(cxled); struct cxl_ep *ep; - int rc; + int rc = 0; while (!is_cxl_root(to_cxl_port(iter->dev.parent))) iter = to_cxl_port(iter->dev.parent); @@ -143,7 +143,8 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count) cxl_rr = cxl_rr_load(iter, cxlr); cxld = cxl_rr->decoder; - rc = cxld->reset(cxld); + if (cxld->reset) + rc = cxld->reset(cxld); if (rc) return rc; } @@ -186,7 +187,8 @@ static int cxl_region_decode_commit(struct cxl_region *cxlr) iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) { cxl_rr = cxl_rr_load(iter, cxlr); cxld = cxl_rr->decoder; - cxld->reset(cxld); + if (cxld->reset) + cxld->reset(cxld); } cxled->cxld.reset(&cxled->cxld); From 85e26dd5100a182bf8448050427539c0a66ab793 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 26 Jan 2023 10:24:26 +0100 Subject: [PATCH 069/186] drm/client: fix circular reference counting issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We reference dump buffers both by their handle as well as their object. The problem is now that when anybody iterates over the DRM framebuffers and exports the underlying GEM objects through DMA-buf we run into a circular reference count situation. The result is that the fbdev handling holds the GEM handle preventing the DMA-buf in the GEM object to be released. This DMA-buf in turn holds a reference to the driver module which on unload would release the fbdev. Break that loop by releasing the handle as soon as the DRM framebuffer object is created. The DRM framebuffer and the DRM client buffer structure still hold a reference to the underlying GEM object preventing its destruction. Signed-off-by: Christian König Fixes: c76f0f7cb546 ("drm: Begin an API for in-kernel clients") Cc: Reviewed-by: Thomas Zimmermann Tested-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20230126102814.8722-1-christian.koenig@amd.com --- drivers/gpu/drm/drm_client.c | 33 ++++++++++++++++++++------------- include/drm/drm_client.h | 5 ----- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c index fd67efe37c63..056ab9d5f313 100644 --- a/drivers/gpu/drm/drm_client.c +++ b/drivers/gpu/drm/drm_client.c @@ -233,21 +233,17 @@ void drm_client_dev_restore(struct drm_device *dev) static void drm_client_buffer_delete(struct drm_client_buffer *buffer) { - struct drm_device *dev = buffer->client->dev; - if (buffer->gem) { drm_gem_vunmap_unlocked(buffer->gem, &buffer->map); drm_gem_object_put(buffer->gem); } - if (buffer->handle) - drm_mode_destroy_dumb(dev, buffer->handle, buffer->client->file); - kfree(buffer); } static struct drm_client_buffer * -drm_client_buffer_create(struct drm_client_dev *client, u32 width, u32 height, u32 format) +drm_client_buffer_create(struct drm_client_dev *client, u32 width, u32 height, + u32 format, u32 *handle) { const struct drm_format_info *info = drm_format_info(format); struct drm_mode_create_dumb dumb_args = { }; @@ -269,16 +265,15 @@ drm_client_buffer_create(struct drm_client_dev *client, u32 width, u32 height, u if (ret) goto err_delete; - buffer->handle = dumb_args.handle; - buffer->pitch = dumb_args.pitch; - obj = drm_gem_object_lookup(client->file, dumb_args.handle); if (!obj) { ret = -ENOENT; goto err_delete; } + buffer->pitch = dumb_args.pitch; buffer->gem = obj; + *handle = dumb_args.handle; return buffer; @@ -365,7 +360,8 @@ static void drm_client_buffer_rmfb(struct drm_client_buffer *buffer) } static int drm_client_buffer_addfb(struct drm_client_buffer *buffer, - u32 width, u32 height, u32 format) + u32 width, u32 height, u32 format, + u32 handle) { struct drm_client_dev *client = buffer->client; struct drm_mode_fb_cmd fb_req = { }; @@ -377,7 +373,7 @@ static int drm_client_buffer_addfb(struct drm_client_buffer *buffer, fb_req.depth = info->depth; fb_req.width = width; fb_req.height = height; - fb_req.handle = buffer->handle; + fb_req.handle = handle; fb_req.pitch = buffer->pitch; ret = drm_mode_addfb(client->dev, &fb_req, client->file); @@ -414,13 +410,24 @@ struct drm_client_buffer * drm_client_framebuffer_create(struct drm_client_dev *client, u32 width, u32 height, u32 format) { struct drm_client_buffer *buffer; + u32 handle; int ret; - buffer = drm_client_buffer_create(client, width, height, format); + buffer = drm_client_buffer_create(client, width, height, format, + &handle); if (IS_ERR(buffer)) return buffer; - ret = drm_client_buffer_addfb(buffer, width, height, format); + ret = drm_client_buffer_addfb(buffer, width, height, format, handle); + + /* + * The handle is only needed for creating the framebuffer, destroy it + * again to solve a circular dependency should anybody export the GEM + * object as DMA-buf. The framebuffer and our buffer structure are still + * holding references to the GEM object to prevent its destruction. + */ + drm_mode_destroy_dumb(client->dev, handle, client->file); + if (ret) { drm_client_buffer_delete(buffer); return ERR_PTR(ret); diff --git a/include/drm/drm_client.h b/include/drm/drm_client.h index 4fc8018eddda..1220d185c776 100644 --- a/include/drm/drm_client.h +++ b/include/drm/drm_client.h @@ -126,11 +126,6 @@ struct drm_client_buffer { */ struct drm_client_dev *client; - /** - * @handle: Buffer handle - */ - u32 handle; - /** * @pitch: Buffer pitch */ From a8520be3ffef3d25b53bf171a7ebe17ee0154175 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 6 Feb 2023 16:15:59 +0200 Subject: [PATCH 070/186] pinctrl: intel: Restore the pins that used to be in Direct IRQ mode If the firmware mangled the register contents too much, check the saved value for the Direct IRQ mode. If it matches, we will restore the pin state. Reported-by: Jim Minter Fixes: 6989ea4881c8 ("pinctrl: intel: Save and restore pins in "direct IRQ" mode") Tested-by: Jim Minter Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Link: https://lore.kernel.org/r/20230206141558.20916-1-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-intel.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index cc3aaba24188..e49f271de936 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -1709,6 +1709,12 @@ const struct intel_pinctrl_soc_data *intel_pinctrl_get_soc_data(struct platform_ EXPORT_SYMBOL_GPL(intel_pinctrl_get_soc_data); #ifdef CONFIG_PM_SLEEP +static bool __intel_gpio_is_direct_irq(u32 value) +{ + return (value & PADCFG0_GPIROUTIOXAPIC) && (value & PADCFG0_GPIOTXDIS) && + (__intel_gpio_get_gpio_mode(value) == PADCFG0_PMODE_GPIO); +} + static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned int pin) { const struct pin_desc *pd = pin_desc_get(pctrl->pctldev, pin); @@ -1742,8 +1748,7 @@ static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned int * See https://bugzilla.kernel.org/show_bug.cgi?id=214749. */ value = readl(intel_get_padcfg(pctrl, pin, PADCFG0)); - if ((value & PADCFG0_GPIROUTIOXAPIC) && (value & PADCFG0_GPIOTXDIS) && - (__intel_gpio_get_gpio_mode(value) == PADCFG0_PMODE_GPIO)) + if (__intel_gpio_is_direct_irq(value)) return true; return false; @@ -1873,7 +1878,12 @@ int intel_pinctrl_resume_noirq(struct device *dev) for (i = 0; i < pctrl->soc->npins; i++) { const struct pinctrl_pin_desc *desc = &pctrl->soc->pins[i]; - if (!intel_pinctrl_should_save(pctrl, desc->number)) + if (!(intel_pinctrl_should_save(pctrl, desc->number) || + /* + * If the firmware mangled the register contents too much, + * check the saved value for the Direct IRQ mode. + */ + __intel_gpio_is_direct_irq(pads[i].padcfg0))) continue; intel_restore_padcfg(pctrl, desc->number, PADCFG0, pads[i].padcfg0); From 6c4715aa5b0ab1c0d35780b7c552e952dbb5515d Mon Sep 17 00:00:00 2001 From: Andy Chi Date: Tue, 7 Feb 2023 16:30:09 +0800 Subject: [PATCH 071/186] ALSA: hda/realtek: fix mute/micmute LEDs don't work for a HP platform. There is a HP platform needs ALC236_FIXUP_HP_GPIO_LED quirk to make mic-mute/audio-mute working. Signed-off-by: Andy Chi Cc: Link: https://lore.kernel.org/r/20230207083011.100189-1-andy.chi@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f1d829cb66a3..4ed0afae9202 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9434,6 +9434,11 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b5d, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b5e, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8b7a, "HP", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b7d, "HP", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b8a, "HP", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b8b, "HP", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b8d, "HP", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b92, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8bf0, "HP", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), From 2de49fb1c9bb8bfe283070fef2e9304d9842a30c Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Thu, 2 Feb 2023 10:12:01 +0000 Subject: [PATCH 072/186] RDMA/rtrs: Don't call kobject_del for srv_path->kobj As the mention in commmit f7452a7e96c1 ("RDMA/rtrs-srv: fix memory leak by missing kobject free"), it was intended to remove the kobject_del for srv_path->kobj. f7452a7e96c1 said: >This patch moves kobject_del() into free_sess() so that the kobject of > rtrs_srv_sess can be freed. This patch also move rtrs_srv_destroy_once_sysfs_root_folders back to 'if (srv_path->kobj.state_in_sysfs)' block to avoid a 'held lock freed!' A kernel panic will be triggered by following script ----------------------- $ while true do echo "sessname=foo path=ip: device_path=/dev/nvme0n1" > /sys/devices/virtual/rnbd-client/ctl/map_device echo "normal" > /sys/block/rnbd0/rnbd/unmap_device done ----------------------- The bisection pointed to commit 6af4609c18b3 ("RDMA/rtrs-srv: Fix several issues in rtrs_srv_destroy_path_files") at last. rnbd_server L777: : Opened device 'nvme0n1' general protection fault, probably for non-canonical address 0x765f766564753aea: 0000 [#1] PREEMPT SMP PTI CPU: 0 PID: 3558 Comm: systemd-udevd Kdump: loaded Not tainted 6.1.0-rc3-roce-flush+ #51 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:kernfs_dop_revalidate+0x36/0x180 Code: 00 00 41 55 41 54 55 53 48 8b 47 68 48 89 fb 48 85 c0 0f 84 db 00 00 00 48 8b a8 60 04 00 00 48 8b 45 30 48 85 c0 48 0f 44 c5 <4c> 8b 60 78 49 81 c4 d8 00 00 00 4c 89 e7 e8 b7 78 7b 00 8b 05 3d RSP: 0018:ffffaf1700b67c78 EFLAGS: 00010206 RAX: 765f766564753a72 RBX: ffff89e2830849c0 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff89e2830849c0 RBP: ffff89e280361bd0 R08: 0000000000000000 R09: 0000000000000001 R10: 0000000000000065 R11: 0000000000000000 R12: ffff89e2830849c0 R13: ffff89e283084888 R14: d0d0d0d0d0d0d0d0 R15: 2f2f2f2f2f2f2f2f FS: 00007f13fbce7b40(0000) GS:ffff89e2bbc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f93e055d340 CR3: 0000000104664002 CR4: 00000000001706f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: lookup_fast+0x7b/0x100 walk_component+0x21/0x160 link_path_walk.part.0+0x24d/0x390 path_openat+0xad/0x9a0 do_filp_open+0xa9/0x150 ? lock_release+0x13c/0x2e0 ? _raw_spin_unlock+0x29/0x50 ? alloc_fd+0x124/0x1f0 do_sys_openat2+0x9b/0x160 __x64_sys_openat+0x54/0xa0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f13fc9d701b Code: 25 00 00 41 00 3d 00 00 41 00 74 4b 64 8b 04 25 18 00 00 00 85 c0 75 67 44 89 e2 48 89 ee bf 9c ff ff ff b8 01 01 00 00 0f 05 <48> 3d 00 f0 ff ff 0f 87 91 00 00 00 48 8b 54 24 28 64 48 2b 14 25 RSP: 002b:00007ffddf242640 EFLAGS: 00000246 ORIG_RAX: 0000000000000101 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f13fc9d701b RDX: 0000000000080000 RSI: 00007ffddf2427c0 RDI: 00000000ffffff9c RBP: 00007ffddf2427c0 R08: 00007f13fcc5b440 R09: 21b2131aa64b1ef2 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000080000 R13: 00007ffddf2427c0 R14: 000055ed13be8db0 R15: 0000000000000000 Fixes: 6af4609c18b3 ("RDMA/rtrs-srv: Fix several issues in rtrs_srv_destroy_path_files") Acked-by: Guoqing Jiang Signed-off-by: Li Zhijian Link: https://lore.kernel.org/r/1675332721-2-1-git-send-email-lizhijian@fujitsu.com Acked-by: Jack Wang Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c index c76ba29da1e2..5adba0f754b6 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -312,9 +312,8 @@ void rtrs_srv_destroy_path_files(struct rtrs_srv_path *srv_path) if (srv_path->kobj.state_in_sysfs) { sysfs_remove_group(&srv_path->kobj, &rtrs_srv_path_attr_group); - kobject_del(&srv_path->kobj); kobject_put(&srv_path->kobj); + rtrs_srv_destroy_once_sysfs_root_folders(srv_path); } - rtrs_srv_destroy_once_sysfs_root_folders(srv_path); } From 647037adcad00f2bab8828d3d41cd0553d41f3bd Mon Sep 17 00:00:00 2001 From: Aaron Thompson Date: Tue, 7 Feb 2023 08:21:51 +0000 Subject: [PATCH 073/186] Revert "mm: Always release pages to the buddy allocator in memblock_free_late()." This reverts commit 115d9d77bb0f9152c60b6e8646369fa7f6167593. The pages being freed by memblock_free_late() have already been initialized, but if they are in the deferred init range, __free_one_page() might access nearby uninitialized pages when trying to coalesce buddies. This can, for example, trigger this BUG: BUG: unable to handle page fault for address: ffffe964c02580c8 RIP: 0010:__list_del_entry_valid+0x3f/0x70 __free_one_page+0x139/0x410 __free_pages_ok+0x21d/0x450 memblock_free_late+0x8c/0xb9 efi_free_boot_services+0x16b/0x25c efi_enter_virtual_mode+0x403/0x446 start_kernel+0x678/0x714 secondary_startup_64_no_verify+0xd2/0xdb A proper fix will be more involved so revert this change for the time being. Fixes: 115d9d77bb0f ("mm: Always release pages to the buddy allocator in memblock_free_late().") Signed-off-by: Aaron Thompson Link: https://lore.kernel.org/r/20230207082151.1303-1-dev@aaront.org Signed-off-by: Mike Rapoport (IBM) --- mm/memblock.c | 8 +------- tools/testing/memblock/internal.h | 4 ---- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/mm/memblock.c b/mm/memblock.c index 685e30e6d27c..d036c7861310 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1640,13 +1640,7 @@ void __init memblock_free_late(phys_addr_t base, phys_addr_t size) end = PFN_DOWN(base + size); for (; cursor < end; cursor++) { - /* - * Reserved pages are always initialized by the end of - * memblock_free_all() (by memmap_init() and, if deferred - * initialization is enabled, memmap_init_reserved_pages()), so - * these pages can be released directly to the buddy allocator. - */ - __free_pages_core(pfn_to_page(cursor), 0); + memblock_free_pages(pfn_to_page(cursor), cursor, 0); totalram_pages_inc(); } } diff --git a/tools/testing/memblock/internal.h b/tools/testing/memblock/internal.h index 85973e55489e..fdb7f5db7308 100644 --- a/tools/testing/memblock/internal.h +++ b/tools/testing/memblock/internal.h @@ -15,10 +15,6 @@ bool mirrored_kernelcore = false; struct page {}; -void __free_pages_core(struct page *page, unsigned int order) -{ -} - void memblock_free_pages(struct page *page, unsigned long pfn, unsigned int order) { From 29aab38823b61e482995c24644bd2d8acfe56185 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Tue, 7 Feb 2023 17:04:24 +0800 Subject: [PATCH 074/186] ASoC: fsl_sai: fix getting version from VERID The version information is at the bit31 ~ bit16 in the VERID register, so need to right shift 16bit to get it, otherwise the result of comparison "sai->verid.version >= 0x0301" is wrong. Fixes: 99c1e74f25d4 ("ASoC: fsl_sai: store full version instead of major/minor") Signed-off-by: Shengjiu Wang Reviewed-by: Iuliana Prodan Reviewed-by: Fabio Estevam Link: https://lore.kernel.org/r/1675760664-25193-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_sai.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index 1c9be8a5dcb1..35a52c3a020d 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -1141,6 +1141,7 @@ static int fsl_sai_check_version(struct device *dev) sai->verid.version = val & (FSL_SAI_VERID_MAJOR_MASK | FSL_SAI_VERID_MINOR_MASK); + sai->verid.version >>= FSL_SAI_VERID_MINOR_SHIFT; sai->verid.feature = val & FSL_SAI_VERID_FEATURE_MASK; ret = regmap_read(sai->regmap, FSL_SAI_PARAM, &val); From 6a32425f953b955b4ff82f339d01df0b713caa5d Mon Sep 17 00:00:00 2001 From: Artemii Karasev Date: Tue, 7 Feb 2023 18:20:26 +0500 Subject: [PATCH 075/186] ALSA: emux: Avoid potential array out-of-bound in snd_emux_xg_control() snd_emux_xg_control() can be called with an argument 'param' greater than size of 'control' array. It may lead to accessing 'control' array at a wrong index. Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Artemii Karasev Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: Link: https://lore.kernel.org/r/20230207132026.2870-1-karasev@ispras.ru Signed-off-by: Takashi Iwai --- sound/synth/emux/emux_nrpn.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/synth/emux/emux_nrpn.c b/sound/synth/emux/emux_nrpn.c index 8056422ed7c5..0d6b82ae2955 100644 --- a/sound/synth/emux/emux_nrpn.c +++ b/sound/synth/emux/emux_nrpn.c @@ -349,6 +349,9 @@ int snd_emux_xg_control(struct snd_emux_port *port, struct snd_midi_channel *chan, int param) { + if (param >= ARRAY_SIZE(chan->control)) + return -EINVAL; + return send_converted_effect(xg_effects, ARRAY_SIZE(xg_effects), port, chan, param, chan->control[param], From c173ee5b2fa6195066674d66d1d7e191010fb1ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?= Date: Tue, 7 Feb 2023 22:04:28 +0100 Subject: [PATCH 076/186] ASoC: topology: Return -ENOMEM on memory allocation failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When handling error path, ret needs to be set to correct value. Reported-by: kernel test robot Reported-by: Dan Carpenter Fixes: d29d41e28eea ("ASoC: topology: Add support for multiple kcontrol types to a widget") Reviewed-by: Cezary Rojewski Signed-off-by: Amadeusz SƂawiƄski Link: https://lore.kernel.org/r/20230207210428.2076354-1-amadeuszx.slawinski@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/soc-topology.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index c3be24b2fac5..a79a2fb260b8 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -1401,13 +1401,17 @@ static int soc_tplg_dapm_widget_create(struct soc_tplg *tplg, template.num_kcontrols = le32_to_cpu(w->num_kcontrols); kc = devm_kcalloc(tplg->dev, le32_to_cpu(w->num_kcontrols), sizeof(*kc), GFP_KERNEL); - if (!kc) + if (!kc) { + ret = -ENOMEM; goto hdr_err; + } kcontrol_type = devm_kcalloc(tplg->dev, le32_to_cpu(w->num_kcontrols), sizeof(unsigned int), GFP_KERNEL); - if (!kcontrol_type) + if (!kcontrol_type) { + ret = -ENOMEM; goto hdr_err; + } for (i = 0; i < le32_to_cpu(w->num_kcontrols); i++) { control_hdr = (struct snd_soc_tplg_ctl_hdr *)tplg->pos; From e7d84c6a1296d059389f7342d9b4b7defb518d3a Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 7 Feb 2023 13:04:52 +0800 Subject: [PATCH 077/186] ceph: flush cap releases when the session is flushed MDS expects the completed cap release prior to responding to the session flush for cache drop. Cc: stable@vger.kernel.org Link: http://tracker.ceph.com/issues/38009 Signed-off-by: Xiubo Li Reviewed-by: Venky Shankar Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index e163f58ff3c5..27a245d959c0 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3685,6 +3685,12 @@ static void handle_session(struct ceph_mds_session *session, break; case CEPH_SESSION_FLUSHMSG: + /* flush cap releases */ + spin_lock(&session->s_cap_lock); + if (session->s_num_cap_releases) + ceph_flush_cap_releases(mdsc, session); + spin_unlock(&session->s_cap_lock); + send_flushmsg_ack(mdsc, session, seq); break; From 711442e29f16f0d39dd0e2460c9baacfccb9d5a7 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 7 Feb 2023 11:04:30 -0800 Subject: [PATCH 078/186] cxl/region: Fix passthrough-decoder detection A passthrough decoder is a decoder that maps only 1 target. It is a special case because it does not impose any constraints on the interleave-math as compared to a decoder with multiple targets. Extend the passthrough case to multi-target-capable decoders that only have one target selected. I.e. the current code was only considering passthrough *ports* which are only a subset of the potential passthrough decoder scenarios. Fixes: e4f6dfa9ef75 ("cxl/region: Fix 'distance' calculation with passthrough ports") Cc: Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/167564540422.847146.13816934143225777888.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 02275e6b621b..940f805b1534 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -993,10 +993,10 @@ static int cxl_port_setup_targets(struct cxl_port *port, int i, distance; /* - * Passthrough ports impose no distance requirements between + * Passthrough decoders impose no distance requirements between * peers */ - if (port->nr_dports == 1) + if (cxl_rr->nr_targets == 1) distance = 0; else distance = p->nr_targets / cxl_rr->nr_targets; From 70daa5c8f001e351af174c40ac21eb0a25600483 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Mon, 6 Feb 2023 06:57:00 -0800 Subject: [PATCH 079/186] nvme-auth: mark nvme_auth_wq static Fix a smatch report for the newly added nvme_auth_wq. Signed-off-by: Tom Rix Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/auth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index b57630d1d3b8..bdb97496ba2d 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -45,7 +45,7 @@ struct nvme_dhchap_queue_context { int sess_key_len; }; -struct workqueue_struct *nvme_auth_wq; +static struct workqueue_struct *nvme_auth_wq; #define nvme_auth_flags_from_qid(qid) \ (qid == 0) ? 0 : BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED From 88d18b8896bd98e636b632f805b7e84e61458255 Mon Sep 17 00:00:00 2001 From: Edson Juliano Drosdeck Date: Tue, 7 Feb 2023 15:37:20 -0300 Subject: [PATCH 080/186] ALSA: hda/realtek: Add Positivo N14KP6-TG Positivo N14KP6-TG (1c6c:1251) require quirk for enabling headset-mic Signed-off-by: Edson Juliano Drosdeck Cc: Link: https://lore.kernel.org/r/20230207183720.2519-1-edson.drosdeck@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 4ed0afae9202..1134a493d225 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9709,6 +9709,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1b7d, 0xa831, "Ordissimo EVE2 ", ALC269VB_FIXUP_ORDISSIMO_EVE2), /* Also known as Malata PC-B1303 */ SND_PCI_QUIRK(0x1c06, 0x2013, "Lemote A1802", ALC269_FIXUP_LEMOTE_A1802), SND_PCI_QUIRK(0x1c06, 0x2015, "Lemote A190X", ALC269_FIXUP_LEMOTE_A190X), + SND_PCI_QUIRK(0x1c6c, 0x1251, "Positivo N14KP6-TG", ALC288_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1d05, 0x1132, "TongFang PHxTxX1", ALC256_FIXUP_SET_COEF_DEFAULTS), SND_PCI_QUIRK(0x1d05, 0x1096, "TongFang GMxMRxx", ALC269_FIXUP_NO_SHUTUP), SND_PCI_QUIRK(0x1d05, 0x1100, "TongFang GKxNRxx", ALC269_FIXUP_NO_SHUTUP), From 6a7ff131f17f44c593173c5ee30e2c03ef211685 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 7 Feb 2023 08:43:35 +0200 Subject: [PATCH 081/186] drm/i915: Fix VBT DSI DVO port handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turns out modern (icl+) VBTs still declare their DSI ports as MIPI-A and MIPI-C despite the PHYs now being A and B. Remap appropriately to allow the panels declared as MIPI-C to work. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8016 Signed-off-by: Ville SyrjĂ€lĂ€ Link: https://patchwork.freedesktop.org/patch/msgid/20230207064337.18697-2-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit 118b5c136c04da705b274b0d39982bb8b7430fc5) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_bios.c | 33 ++++++++++++++++------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 572a4e3769f3..a491e6c38875 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -2466,6 +2466,22 @@ static enum port dvo_port_to_port(struct drm_i915_private *i915, dvo_port); } +static enum port +dsi_dvo_port_to_port(struct drm_i915_private *i915, u8 dvo_port) +{ + switch (dvo_port) { + case DVO_PORT_MIPIA: + return PORT_A; + case DVO_PORT_MIPIC: + if (DISPLAY_VER(i915) >= 11) + return PORT_B; + else + return PORT_C; + default: + return PORT_NONE; + } +} + static int parse_bdb_230_dp_max_link_rate(const int vbt_max_link_rate) { switch (vbt_max_link_rate) { @@ -3414,19 +3430,16 @@ bool intel_bios_is_dsi_present(struct drm_i915_private *i915, dvo_port = child->dvo_port; - if (dvo_port == DVO_PORT_MIPIA || - (dvo_port == DVO_PORT_MIPIB && DISPLAY_VER(i915) >= 11) || - (dvo_port == DVO_PORT_MIPIC && DISPLAY_VER(i915) < 11)) { - if (port) - *port = dvo_port - DVO_PORT_MIPIA; - return true; - } else if (dvo_port == DVO_PORT_MIPIB || - dvo_port == DVO_PORT_MIPIC || - dvo_port == DVO_PORT_MIPID) { + if (dsi_dvo_port_to_port(i915, dvo_port) == PORT_NONE) { drm_dbg_kms(&i915->drm, "VBT has unsupported DSI port %c\n", port_name(dvo_port - DVO_PORT_MIPIA)); + continue; } + + if (port) + *port = dsi_dvo_port_to_port(i915, dvo_port); + return true; } return false; @@ -3511,7 +3524,7 @@ bool intel_bios_get_dsc_params(struct intel_encoder *encoder, if (!(child->device_type & DEVICE_TYPE_MIPI_OUTPUT)) continue; - if (child->dvo_port - DVO_PORT_MIPIA == encoder->port) { + if (dsi_dvo_port_to_port(i915, child->dvo_port) == encoder->port) { if (!devdata->dsc) return false; From ae052e3ae09572194d7e574906db7272041577d3 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Wed, 8 Feb 2023 07:17:08 +0000 Subject: [PATCH 082/186] x86/kprobes: Fix 1 byte conditional jump target Commit 3bc753c06dd0 ("kbuild: treat char as always unsigned") broke kprobes. Setting a probe-point on 1 byte conditional jump can cause the kernel to crash when the (signed) relative jump offset gets treated as unsigned. Fix by replacing the unsigned 'immediate.bytes' (plus a cast) with the signed 'immediate.value' when assigning to the relative jump offset. [ dhansen: clarified changelog ] Fixes: 3bc753c06dd0 ("kbuild: treat char as always unsigned") Suggested-by: Masami Hiramatsu (Google) Suggested-by: Dave Hansen Signed-off-by: Nadav Amit Signed-off-by: Dave Hansen Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20230208071708.4048-1-namit%40vmware.com --- arch/x86/kernel/kprobes/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index b36f3c367cb2..695873c0f50b 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -625,7 +625,7 @@ static int prepare_emulation(struct kprobe *p, struct insn *insn) /* 1 byte conditional jump */ p->ainsn.emulate_op = kprobe_emulate_jcc; p->ainsn.jcc.type = opcode & 0xf; - p->ainsn.rel32 = *(char *)insn->immediate.bytes; + p->ainsn.rel32 = insn->immediate.value; break; case 0x0f: opcode = insn->opcode.bytes[1]; From f545e8831e70065e127f903fc7aca09aa50422c7 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 8 Feb 2023 09:23:40 -0800 Subject: [PATCH 083/186] x86/cpu: Add Lunar Lake M Intel confirmed the existence of this CPU in Q4'2022 earnings presentation. Add the CPU model number. [ dhansen: Merging these as soon as possible makes it easier on all the folks developing model-specific features. ] Signed-off-by: Kan Liang Signed-off-by: Tony Luck Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20230208172340.158548-1-tony.luck%40intel.com --- arch/x86/include/asm/intel-family.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 347707d459c6..cbaf174d8efd 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -123,6 +123,8 @@ #define INTEL_FAM6_METEORLAKE 0xAC #define INTEL_FAM6_METEORLAKE_L 0xAA +#define INTEL_FAM6_LUNARLAKE_M 0xBD + /* "Small Core" Processors (Atom/E-Core) */ #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ From 877f26bf3ca65447e923e86305a7f2a20d059e21 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 26 Jan 2023 10:15:37 -0500 Subject: [PATCH 084/186] drm/amd/display: disable S/G display on DCN 2.1.0 Causes flickering or white screens in some configurations. Disable it for now until we can fix the issue. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2352 Cc: roman.li@amd.com Cc: yifan1.zhang@amd.com Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 31bce529f685..da2ec41a0477 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1499,7 +1499,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) (adev->apu_flags & AMD_APU_IS_PICASSO)) init_data.flags.gpu_vm_support = true; break; - case IP_VERSION(2, 1, 0): case IP_VERSION(3, 0, 1): case IP_VERSION(3, 1, 2): case IP_VERSION(3, 1, 3): From 077e9659581acab70f2dcc04b5bc799aca3a056b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 25 Jan 2023 09:57:27 -0500 Subject: [PATCH 085/186] drm/amd/display: disable S/G display on DCN 3.1.2/3 Causes flickering or white screens in some configurations. Disable it for now until we can fix the issue. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2352 Cc: roman.li@amd.com Cc: yifan1.zhang@amd.com Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index da2ec41a0477..78e047e1429c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1500,8 +1500,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) init_data.flags.gpu_vm_support = true; break; case IP_VERSION(3, 0, 1): - case IP_VERSION(3, 1, 2): - case IP_VERSION(3, 1, 3): case IP_VERSION(3, 1, 6): init_data.flags.gpu_vm_support = true; break; From 5c4e8c71d1202cd84d870e7e5cb8d6b52f9c3507 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 31 Jan 2023 09:56:46 -0500 Subject: [PATCH 086/186] drm/amd/display: properly handling AGP aperture in vm setup Take into account whether or not the AGP aperture is enabled or not when calculating the system aperture. Fixes white screens with DCN 3.1.4. Based on a patch from Yifan Zhang Cc: Yifan Zhang Acked-by: Harry Wentland Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 42 ++++++++++++------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 78e047e1429c..7f6e27561899 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1184,24 +1184,38 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_ memset(pa_config, 0, sizeof(*pa_config)); - logical_addr_low = min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18; - pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); - - if (adev->apu_flags & AMD_APU_IS_RAVEN2) - /* - * Raven2 has a HW issue that it is unable to use the vram which - * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the - * workaround that increase system aperture high address (add 1) - * to get rid of the VM fault and hardware hang. - */ - logical_addr_high = max((adev->gmc.fb_end >> 18) + 0x1, adev->gmc.agp_end >> 18); - else - logical_addr_high = max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18; - agp_base = 0; agp_bot = adev->gmc.agp_start >> 24; agp_top = adev->gmc.agp_end >> 24; + /* AGP aperture is disabled */ + if (agp_bot == agp_top) { + logical_addr_low = adev->gmc.vram_start >> 18; + if (adev->apu_flags & AMD_APU_IS_RAVEN2) + /* + * Raven2 has a HW issue that it is unable to use the vram which + * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the + * workaround that increase system aperture high address (add 1) + * to get rid of the VM fault and hardware hang. + */ + logical_addr_high = (adev->gmc.fb_end >> 18) + 0x1; + else + logical_addr_high = adev->gmc.vram_end >> 18; + } else { + logical_addr_low = min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18; + if (adev->apu_flags & AMD_APU_IS_RAVEN2) + /* + * Raven2 has a HW issue that it is unable to use the vram which + * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the + * workaround that increase system aperture high address (add 1) + * to get rid of the VM fault and hardware hang. + */ + logical_addr_high = max((adev->gmc.fb_end >> 18) + 0x1, adev->gmc.agp_end >> 18); + else + logical_addr_high = max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18; + } + + pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); page_table_start.high_part = (u32)(adev->gmc.gart_start >> 44) & 0xF; page_table_start.low_part = (u32)(adev->gmc.gart_start >> 12); From 7ece674cd9468ce740494f6108c39831cfc7eb4e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 31 Jan 2023 13:10:55 -0500 Subject: [PATCH 087/186] Revert "drm/amd/display: disable S/G display on DCN 3.1.4" This reverts commit 9aa15370819294beb7eb67c9dcbf654d79ff8790. This is fixed now so we can re-enable S/G display on DCN 3.1.4. Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 7f6e27561899..78452856b2a3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1514,6 +1514,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) init_data.flags.gpu_vm_support = true; break; case IP_VERSION(3, 0, 1): + case IP_VERSION(3, 1, 4): case IP_VERSION(3, 1, 6): init_data.flags.gpu_vm_support = true; break; From 5630a35024d12432827bd7e7986dd60cfe2f983b Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Fri, 3 Feb 2023 14:19:37 +0800 Subject: [PATCH 088/186] drm/amd/amdgpu: enable athub cg 11.0.3 enable athub cg on gc 11.0.3 Signed-off-by: Kenneth Feng Reviewed-by: Likun Gao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc21.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 9eff5f41df9d..7050238c4c48 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -641,7 +641,9 @@ static int soc21_common_early_init(void *handle) AMD_CG_SUPPORT_GFX_CGLS | AMD_CG_SUPPORT_REPEATER_FGCG | AMD_CG_SUPPORT_GFX_MGCG | - AMD_CG_SUPPORT_HDP_SD; + AMD_CG_SUPPORT_HDP_SD | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS; adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_JPEG; From 49d0555976f0972af68397ed996375c135b38ba7 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Tue, 31 Jan 2023 15:05:46 -0100 Subject: [PATCH 089/186] drm/amd/display: fix cursor offset on rotation 180 Cursor gets clipped off in the middle of the screen with hw rotation 180. Fix a miscalculation of cursor offset when it's placed near the edges in the pipe split case. Cursor bugs with hw rotation were reported on AMD issue tracker: https://gitlab.freedesktop.org/drm/amd/-/issues/2247 The issues on rotation 270 was fixed by: https://lore.kernel.org/amd-gfx/20221118125935.4013669-22-Brian.Chang@amd.com/ that partially addressed the rotation 180 too. So, this patch is the final bits for rotation 180. Reported-by: Xaver Hugl Reviewed-by: Harry Wentland Fixes: 9d84c7ef8a87 ("drm/amd/display: Correct cursor position on horizontal mirror") Signed-off-by: Melissa Wen Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index fe2023f18b7d..8f894c1d1d1e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -3626,7 +3626,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) (int)hubp->curs_attr.width || pos_cpy.x <= (int)hubp->curs_attr.width + pipe_ctx->plane_state->src_rect.x) { - pos_cpy.x = temp_x + viewport_width; + pos_cpy.x = 2 * viewport_width - temp_x; } } } else { From 0e763afcb50814e256ecb780fcc0f3bade2e1a0c Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 3 Feb 2023 15:33:59 +0800 Subject: [PATCH 090/186] drm/amd/pm: add SMU 13.0.7 missing GetPptLimit message mapping Add missing GetPptLimit message mapping. Signed-off-by: Evan Quan Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index e87db7e02e8a..9e1967d8049e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -124,6 +124,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(DFCstateControl, PPSMC_MSG_SetExternalClientDfCstateAllow, 0), MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 0), MSG_MAP(AllowGpo, PPSMC_MSG_SetGpoAllow, 0), + MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 0), }; static struct cmn2asic_mapping smu_v13_0_7_clk_map[SMU_CLK_COUNT] = { From 9874cc2df4e892c8744aa0472866cbf7c3cf1862 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 31 Jan 2023 10:40:09 +0800 Subject: [PATCH 091/186] drm/amd/pm: bump SMU 13.0.0 driver_if header version This can suppress the warning caused by version mismatch. Signed-off-by: Evan Quan Acked-by: Alex Deucher Acked-by: Guchun Chen Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x --- .../drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h | 5 +++-- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h index d6b964cf73bd..4bc7aee4d44f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h @@ -123,7 +123,8 @@ (1 << FEATURE_DS_FCLK_BIT) | \ (1 << FEATURE_DS_LCLK_BIT) | \ (1 << FEATURE_DS_DCFCLK_BIT) | \ - (1 << FEATURE_DS_UCLK_BIT)) + (1 << FEATURE_DS_UCLK_BIT) | \ + (1ULL << FEATURE_DS_VCN_BIT)) //For use with feature control messages typedef enum { @@ -522,9 +523,9 @@ typedef enum { TEMP_HOTSPOT_M, TEMP_MEM, TEMP_VR_GFX, - TEMP_VR_SOC, TEMP_VR_MEM0, TEMP_VR_MEM1, + TEMP_VR_SOC, TEMP_VR_U, TEMP_LIQUID0, TEMP_LIQUID1, diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index e8c6febb8b64..d9c4821bcfc8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -28,7 +28,7 @@ #define SMU13_DRIVER_IF_VERSION_INV 0xFFFFFFFF #define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04 #define SMU13_DRIVER_IF_VERSION_ALDE 0x08 -#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_0 0x34 +#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_0 0x37 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x07 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_10 0x32 From dc38b996db968f51f0fe45845a519c5cd7f6bd04 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 7 Feb 2023 10:42:31 +0800 Subject: [PATCH 092/186] drm/amd/pm: bump SMU 13.0.7 driver_if header version This can suppress the warning caused by version mismatch. Signed-off-by: Evan Quan Acked-by: Alex Deucher Acked-by: Guchun Chen Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x --- .../inc/pmfw_if/smu13_driver_if_v13_0_7.h | 29 ++++++++++--------- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 2 +- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h index d6b13933a98f..48a3a3952ceb 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h @@ -113,20 +113,21 @@ #define NUM_FEATURES 64 #define ALLOWED_FEATURE_CTRL_DEFAULT 0xFFFFFFFFFFFFFFFFULL -#define ALLOWED_FEATURE_CTRL_SCPM (1 << FEATURE_DPM_GFXCLK_BIT) | \ - (1 << FEATURE_DPM_GFX_POWER_OPTIMIZER_BIT) | \ - (1 << FEATURE_DPM_UCLK_BIT) | \ - (1 << FEATURE_DPM_FCLK_BIT) | \ - (1 << FEATURE_DPM_SOCCLK_BIT) | \ - (1 << FEATURE_DPM_MP0CLK_BIT) | \ - (1 << FEATURE_DPM_LINK_BIT) | \ - (1 << FEATURE_DPM_DCN_BIT) | \ - (1 << FEATURE_DS_GFXCLK_BIT) | \ - (1 << FEATURE_DS_SOCCLK_BIT) | \ - (1 << FEATURE_DS_FCLK_BIT) | \ - (1 << FEATURE_DS_LCLK_BIT) | \ - (1 << FEATURE_DS_DCFCLK_BIT) | \ - (1 << FEATURE_DS_UCLK_BIT) +#define ALLOWED_FEATURE_CTRL_SCPM ((1 << FEATURE_DPM_GFXCLK_BIT) | \ + (1 << FEATURE_DPM_GFX_POWER_OPTIMIZER_BIT) | \ + (1 << FEATURE_DPM_UCLK_BIT) | \ + (1 << FEATURE_DPM_FCLK_BIT) | \ + (1 << FEATURE_DPM_SOCCLK_BIT) | \ + (1 << FEATURE_DPM_MP0CLK_BIT) | \ + (1 << FEATURE_DPM_LINK_BIT) | \ + (1 << FEATURE_DPM_DCN_BIT) | \ + (1 << FEATURE_DS_GFXCLK_BIT) | \ + (1 << FEATURE_DS_SOCCLK_BIT) | \ + (1 << FEATURE_DS_FCLK_BIT) | \ + (1 << FEATURE_DS_LCLK_BIT) | \ + (1 << FEATURE_DS_DCFCLK_BIT) | \ + (1 << FEATURE_DS_UCLK_BIT) | \ + (1ULL << FEATURE_DS_VCN_BIT)) //For use with feature control messages typedef enum { diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index d9c4821bcfc8..992163e66f7b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -32,7 +32,7 @@ #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x07 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_10 0x32 -#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x35 +#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x37 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_10 0x1D #define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms From c108a18462949fe709ebd6b0be68398d643bc285 Mon Sep 17 00:00:00 2001 From: Kent Russell Date: Mon, 6 Feb 2023 12:21:42 -0500 Subject: [PATCH 093/186] drm/amdgpu: Add unique_id support for GC 11.0.1/2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These can support unique_id, so create the sysfs file for them Signed-off-by: Kent Russell Reviewed-by: Harish Kasiviswanathan Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index a9170360d7e8..2f3e239e623d 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -1991,6 +1991,8 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ case IP_VERSION(9, 4, 2): case IP_VERSION(10, 3, 0): case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 2): *states = ATTR_STATE_SUPPORTED; break; default: From e53448e0a1efa5133c7db78f1df1f4caf177676b Mon Sep 17 00:00:00 2001 From: Friedrich Vock Date: Thu, 2 Feb 2023 17:21:03 +0100 Subject: [PATCH 094/186] drm/amdgpu: Use the TGID for trace_amdgpu_vm_update_ptes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pid field corresponds to the result of gettid() in userspace. However, userspace cannot reliably attribute PTE events to processes with just the thread id. This patch allows userspace to easily attribute PTE update events to specific processes by comparing this field with the result of getpid(). For attributing events to specific threads, the thread id is also contained in the common fields of each trace event. Reviewed-by: Christian König Signed-off-by: Friedrich Vock Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index b5f3bba851db..01e42bdd8e4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -974,7 +974,7 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, trace_amdgpu_vm_update_ptes(params, frag_start, upd_end, min(nptes, 32u), dst, incr, upd_flags, - vm->task_info.pid, + vm->task_info.tgid, vm->immediate.fence_context); amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt), cursor.level, pe_start, dst, From 5ad7bbf3dba5c4a684338df1f285080f2588b535 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Thu, 2 Feb 2023 10:48:56 -0300 Subject: [PATCH 095/186] drm/amdgpu/fence: Fix oops due to non-matching drm_sched init/fini MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently amdgpu calls drm_sched_fini() from the fence driver sw fini routine - such function is expected to be called only after the respective init function - drm_sched_init() - was executed successfully. Happens that we faced a driver probe failure in the Steam Deck recently, and the function drm_sched_fini() was called even without its counter-part had been previously called, causing the following oops: amdgpu: probe of 0000:04:00.0 failed with error -110 BUG: kernel NULL pointer dereference, address: 0000000000000090 PGD 0 P4D 0 Oops: 0002 [#1] PREEMPT SMP NOPTI CPU: 0 PID: 609 Comm: systemd-udevd Not tainted 6.2.0-rc3-gpiccoli #338 Hardware name: Valve Jupiter/Jupiter, BIOS F7A0113 11/04/2022 RIP: 0010:drm_sched_fini+0x84/0xa0 [gpu_sched] [...] Call Trace: amdgpu_fence_driver_sw_fini+0xc8/0xd0 [amdgpu] amdgpu_device_fini_sw+0x2b/0x3b0 [amdgpu] amdgpu_driver_release_kms+0x16/0x30 [amdgpu] devm_drm_dev_init_release+0x49/0x70 [...] To prevent that, check if the drm_sched was properly initialized for a given ring before calling its fini counter-part. Notice ideally we'd use sched.ready for that; such field is set as the latest thing on drm_sched_init(). But amdgpu seems to "override" the meaning of such field - in the above oops for example, it was a GFX ring causing the crash, and the sched.ready field was set to true in the ring init routine, regardless of the state of the DRM scheduler. Hence, we ended-up using sched.ops as per Christian's suggestion [0], and also removed the no_scheduler check [1]. [0] https://lore.kernel.org/amd-gfx/984ee981-2906-0eaf-ccec-9f80975cb136@amd.com/ [1] https://lore.kernel.org/amd-gfx/cd0e2994-f85f-d837-609f-7056d5fb7231@amd.com/ Fixes: 067f44c8b459 ("drm/amdgpu: avoid over-handle of fence driver fini in s3 test (v2)") Suggested-by: Christian König Cc: Guchun Chen Cc: Luben Tuikov Cc: Mario Limonciello Reviewed-by: Luben Tuikov Signed-off-by: Guilherme G. Piccoli Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 00444203220d..faff4a3f96e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -618,7 +618,13 @@ void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev) if (!ring || !ring->fence_drv.initialized) continue; - if (!ring->no_scheduler) + /* + * Notice we check for sched.ops since there's some + * override on the meaning of sched.ready by amdgpu. + * The natural check would be sched.ready, which is + * set as drm_sched_init() finishes... + */ + if (ring->sched.ops) drm_sched_fini(&ring->sched); for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j) From 6c1a6d0b64e1a15016ba7450cce8629f94de56c7 Mon Sep 17 00:00:00 2001 From: JesseZhang Date: Wed, 8 Feb 2023 10:07:18 +0800 Subject: [PATCH 096/186] amd/amdgpu: remove test ib on hw ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit test ib function is not necessary on hw ring, so remove it. v2: squash in NULL check fix Signed-off-by: JesseZhang Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index f752c7ae7f60..3989e755a5b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -295,7 +295,7 @@ struct amdgpu_ring { #define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib))) #define amdgpu_ring_patch_cs_in_place(r, p, job, ib) ((r)->funcs->patch_cs_in_place((p), (job), (ib))) #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r)) -#define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t)) +#define amdgpu_ring_test_ib(r, t) ((r)->funcs->test_ib ? (r)->funcs->test_ib((r), (t)) : 0) #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r)) #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r)) #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r)) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index f202b45c413c..5dde6f82a1ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -6877,7 +6877,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, .test_ring = gfx_v9_0_ring_test_ring, - .test_ib = gfx_v9_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_switch_buffer = gfx_v9_ring_emit_sb, From c6ac406cd8ff610a2d5da298b1d3071acfcde7f0 Mon Sep 17 00:00:00 2001 From: Jane Jian Date: Fri, 13 Jan 2023 18:53:45 +0800 Subject: [PATCH 097/186] drm/amdgpu/smu: skip pptable init under sriov sriov does not need to init pptable from amdgpu driver we finish it from PF Signed-off-by: Jane Jian Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index cf96c3f2affe..508e392547d7 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -407,6 +407,9 @@ static int smu_v13_0_0_setup_pptable(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; int ret = 0; + if (amdgpu_sriov_vf(smu->adev)) + return 0; + ret = smu_v13_0_0_get_pptable_from_pmfw(smu, &smu_table->power_play_table, &smu_table->power_play_table_size); @@ -1257,6 +1260,9 @@ static int smu_v13_0_0_get_thermal_temperature_range(struct smu_context *smu, table_context->power_play_table; PPTable_t *pptable = smu->smu_table.driver_pptable; + if (amdgpu_sriov_vf(smu->adev)) + return 0; + if (!range) return -EINVAL; From 6b77b16de75a6efc0870b1fa467209387cbee8f3 Mon Sep 17 00:00:00 2001 From: Dave Stevenson Date: Fri, 27 Jan 2023 16:57:08 +0100 Subject: [PATCH 098/186] drm/vc4: Fix YUV plane handling when planes are in different buffers YUV images can either be presented as one allocation with offsets for the different planes, or multiple allocations with 0 offsets. The driver only ever calls drm_fb_[dma|cma]_get_gem_obj with plane index 0, therefore any application using the second approach was incorrectly rendered. Correctly determine the address for each plane, removing the assumption that the base address is the same for each. Fixes: fc04023fafec ("drm/vc4: Add support for YUV planes.") Signed-off-by: Dave Stevenson Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20230127155708.454704-1-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_plane.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 8b92a45a3c89..bd5acc4a8687 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -340,7 +340,7 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) { struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); struct drm_framebuffer *fb = state->fb; - struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, 0); + struct drm_gem_dma_object *bo; int num_planes = fb->format->num_planes; struct drm_crtc_state *crtc_state; u32 h_subsample = fb->format->hsub; @@ -359,8 +359,10 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) if (ret) return ret; - for (i = 0; i < num_planes; i++) + for (i = 0; i < num_planes; i++) { + bo = drm_fb_dma_get_gem_obj(fb, i); vc4_state->offsets[i] = bo->dma_addr + fb->offsets[i]; + } /* * We don't support subpixel source positioning for scaling, From 7fa846b95ce8fd4cb80f32516e2ad3f4ea4d6742 Mon Sep 17 00:00:00 2001 From: Dom Cobley Date: Fri, 27 Jan 2023 17:12:19 +0100 Subject: [PATCH 099/186] drm/vc4: hdmi: Always enable GCP with AVMUTE cleared Issue is some displays go blank at the point of firmware to kms handover. Plugging/unplugging hdmi cable, power cycling display, or switching standby off/on typically resolve this case. Finally managed to find a display that suffers from this, and track down the issue. The firmware uses AVMUTE in normal operation. It will set AVMUTE before disabling hdmi clocks and phy. It will clear AVMUTE after clocks and phy are set up for a new hdmi mode. But with the hdmi handover from firmware to kms, AVMUTE will be set by firmware. kms driver typically has no GCP packet (except for deep colour modes). The spec isn't clear on whether to consider the AVMUTE as continuing indefinitely in the absence of a GCP packet, or to consider that state to have ended. Most displays behave as we want, but there are a number (from multiple manufacturers) which need to see AVMUTE cleared before displaying a picture. Lets just always enable GCP packet with AVMUTE cleared. That resolves the issue on problematic displays. From HDMI 1.4 spec: A CD field of zero (Color Depth not indicated) shall be used whenever the Sink does not indicate support for Deep Color. This value may also be used in Deep Color mode to transmit a GCP indicating only non-Deep Color information (e.g. AVMUTE). So use CD=0 where we were previously not enabling a GCP. Link: https://forum.libreelec.tv/thread/24780-le-10-0-1-rpi4-no-picture-after-update-from-le-10-0-0 Signed-off-by: Dom Cobley Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20230127161219.457058-1-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_hdmi.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 55744216392b..7546103f1499 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -97,6 +97,10 @@ #define VC5_HDMI_GCP_WORD_1_GCP_SUBPACKET_BYTE_1_SHIFT 8 #define VC5_HDMI_GCP_WORD_1_GCP_SUBPACKET_BYTE_1_MASK VC4_MASK(15, 8) +#define VC5_HDMI_GCP_WORD_1_GCP_SUBPACKET_BYTE_0_MASK VC4_MASK(7, 0) +#define VC5_HDMI_GCP_WORD_1_GCP_SUBPACKET_BYTE_0_SET_AVMUTE BIT(0) +#define VC5_HDMI_GCP_WORD_1_GCP_SUBPACKET_BYTE_0_CLEAR_AVMUTE BIT(4) + # define VC4_HD_M_SW_RST BIT(2) # define VC4_HD_M_ENABLE BIT(0) @@ -1306,7 +1310,6 @@ static void vc5_hdmi_set_timings(struct vc4_hdmi *vc4_hdmi, VC4_HDMI_VERTB_VBP)); unsigned long flags; unsigned char gcp; - bool gcp_en; u32 reg; int idx; @@ -1341,16 +1344,13 @@ static void vc5_hdmi_set_timings(struct vc4_hdmi *vc4_hdmi, switch (vc4_state->output_bpc) { case 12: gcp = 6; - gcp_en = true; break; case 10: gcp = 5; - gcp_en = true; break; case 8: default: - gcp = 4; - gcp_en = false; + gcp = 0; break; } @@ -1359,8 +1359,7 @@ static void vc5_hdmi_set_timings(struct vc4_hdmi *vc4_hdmi, * doesn't signal in GCP. */ if (vc4_state->output_format == VC4_HDMI_OUTPUT_YUV422) { - gcp = 4; - gcp_en = false; + gcp = 0; } reg = HDMI_READ(HDMI_DEEP_COLOR_CONFIG_1); @@ -1373,11 +1372,12 @@ static void vc5_hdmi_set_timings(struct vc4_hdmi *vc4_hdmi, reg = HDMI_READ(HDMI_GCP_WORD_1); reg &= ~VC5_HDMI_GCP_WORD_1_GCP_SUBPACKET_BYTE_1_MASK; reg |= VC4_SET_FIELD(gcp, VC5_HDMI_GCP_WORD_1_GCP_SUBPACKET_BYTE_1); + reg &= ~VC5_HDMI_GCP_WORD_1_GCP_SUBPACKET_BYTE_0_MASK; + reg |= VC5_HDMI_GCP_WORD_1_GCP_SUBPACKET_BYTE_0_CLEAR_AVMUTE; HDMI_WRITE(HDMI_GCP_WORD_1, reg); reg = HDMI_READ(HDMI_GCP_CONFIG); - reg &= ~VC5_HDMI_GCP_CONFIG_GCP_ENABLE; - reg |= gcp_en ? VC5_HDMI_GCP_CONFIG_GCP_ENABLE : 0; + reg |= VC5_HDMI_GCP_CONFIG_GCP_ENABLE; HDMI_WRITE(HDMI_GCP_CONFIG, reg); reg = HDMI_READ(HDMI_MISC_CONTROL); From 247a631f9c0ffb37ed0786a94cb4c5f2b6fc7ab1 Mon Sep 17 00:00:00 2001 From: Dom Cobley Date: Fri, 27 Jan 2023 15:55:58 +0100 Subject: [PATCH 100/186] drm/vc4: crtc: Increase setup cost in core clock calculation to handle extreme reduced blanking The formula that determines the core clock requirement based on pixel clock and blanking has been determined experimentally to minimise the clock while supporting all modes we've seen. A new reduced blanking mode (4kp60 at 533MHz rather than the standard 594MHz) has been seen that doesn't produce a high enough clock and results in "flip_done timed out" error. Increase the setup cost in the formula to make this work. The result is a reduced blanking mode increases by up to 7MHz while leaving the standard timing mode untouched Link: https://github.com/raspberrypi/linux/issues/4446 Fixes: 16e101051f32 ("drm/vc4: Increase the core clock based on HVS load") Signed-off-by: Dom Cobley Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20230127145558.446123-1-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_crtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 0108613e79d5..7258975331ca 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -711,7 +711,7 @@ static int vc4_crtc_atomic_check(struct drm_crtc *crtc, struct vc4_encoder *vc4_encoder = to_vc4_encoder(encoder); if (vc4_encoder->type == VC4_ENCODER_TYPE_HDMI0) { - vc4_state->hvs_load = max(mode->clock * mode->hdisplay / mode->htotal + 1000, + vc4_state->hvs_load = max(mode->clock * mode->hdisplay / mode->htotal + 8000, mode->clock * 9 / 10) * 1000; } else { vc4_state->hvs_load = mode->clock * 1000; From 190233164cd77115f8dea718cbac561f557092c6 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Wed, 8 Feb 2023 16:28:21 -0800 Subject: [PATCH 101/186] arm64: efi: Force the use of SetVirtualAddressMap() on eMAG and Altra Max machines Commit 550b33cfd445 ("arm64: efi: Force the use of SetVirtualAddressMap() on Altra machines") identifies the Altra family via the family field in the type#1 SMBIOS record. eMAG and Altra Max machines are similarly affected but not detected with the strict strcmp test. The type1_family smbios string is not an entirely reliable means of identifying systems with this issue as OEMs can, and do, use their own strings for these fields. However, until we have a better solution, capture the bulk of these systems by adding strcmp matching for "eMAG" and "Altra Max". Fixes: 550b33cfd445 ("arm64: efi: Force the use of SetVirtualAddressMap() on Altra machines") Cc: # 6.1.x Cc: Alexandru Elisei Signed-off-by: Darren Hart Tested-by: Justin He Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/arm64.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/efi/libstub/arm64.c b/drivers/firmware/efi/libstub/arm64.c index ff2d18c42ee7..4501652e11ab 100644 --- a/drivers/firmware/efi/libstub/arm64.c +++ b/drivers/firmware/efi/libstub/arm64.c @@ -19,10 +19,13 @@ static bool system_needs_vamap(void) const u8 *type1_family = efi_get_smbios_string(1, family); /* - * Ampere Altra machines crash in SetTime() if SetVirtualAddressMap() - * has not been called prior. + * Ampere eMAG, Altra, and Altra Max machines crash in SetTime() if + * SetVirtualAddressMap() has not been called prior. */ - if (!type1_family || strcmp(type1_family, "Altra")) + if (!type1_family || ( + strcmp(type1_family, "eMAG") && + strcmp(type1_family, "Altra") && + strcmp(type1_family, "Altra Max"))) return false; efi_warn("Working around broken SetVirtualAddressMap()\n"); From bb07bd68fa0983e3915f83c471382868860389fe Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Thu, 9 Feb 2023 10:56:26 +0000 Subject: [PATCH 102/186] Revert "usb: gadget: u_ether: Do not make UDC parent of the net device" This reverts commit 321b59870f850a10dbb211ecd2bd87b41497ea6f. This commit broke USB networking on Ingenic SoCs and maybe elsewhere. The actual reason is unknown; and while a proper fix would be better, we're sitting at -rc7 now, so a revert is justified - and we can work on re-introducing this change for 6.3. Fixes: 321b59870f85 ("usb: gadget: u_ether: Do not make UDC parent of the net device") Signed-off-by: Paul Cercueil Acked-by: Sascha Hauer Link: https://lore.kernel.org/r/20230209105626.10597-1-paul@crapouillou.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/u_ether.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index 8f12f3f8f6ee..e06022873df1 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -798,6 +798,7 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g, net->max_mtu = GETHER_MAX_MTU_SIZE; dev->gadget = g; + SET_NETDEV_DEV(net, &g->dev); SET_NETDEV_DEVTYPE(net, &gadget_type); status = register_netdev(net); @@ -872,6 +873,8 @@ int gether_register_netdev(struct net_device *net) struct usb_gadget *g; int status; + if (!net->dev.parent) + return -EINVAL; dev = netdev_priv(net); g = dev->gadget; @@ -902,6 +905,7 @@ void gether_set_gadget(struct net_device *net, struct usb_gadget *g) dev = netdev_priv(net); dev->gadget = g; + SET_NETDEV_DEV(net, &g->dev); } EXPORT_SYMBOL_GPL(gether_set_gadget); From 54e5c00a4eb0a4c663445b245f641bbfab142430 Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Wed, 8 Feb 2023 20:53:19 +0000 Subject: [PATCH 103/186] usb: typec: altmodes/displayport: Fix probe pin assign check While checking Pin Assignments of the port and partner during probe, we don't take into account whether the peripheral is a plug or receptacle. This manifests itself in a mode entry failure on certain docks and dongles with captive cables. For instance, the Startech.com Type-C to DP dongle (Model #CDP2DP) advertises its DP VDO as 0x405. This would fail the Pin Assignment compatibility check, despite it supporting Pin Assignment C as a UFP. Update the check to use the correct DP Pin Assign macros that take the peripheral's receptacle bit into account. Fixes: c1e5c2f0cb8a ("usb: typec: altmodes/displayport: correct pin assignment for UFP receptacles") Cc: stable@vger.kernel.org Reported-by: Diana Zigterman Signed-off-by: Prashant Malani Link: https://lore.kernel.org/r/20230208205318.131385-1-pmalani@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/altmodes/displayport.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c index 9a6860285fbe..50b24096eb7f 100644 --- a/drivers/usb/typec/altmodes/displayport.c +++ b/drivers/usb/typec/altmodes/displayport.c @@ -535,10 +535,10 @@ int dp_altmode_probe(struct typec_altmode *alt) /* FIXME: Port can only be DFP_U. */ /* Make sure we have compatiple pin configurations */ - if (!(DP_CAP_DFP_D_PIN_ASSIGN(port->vdo) & - DP_CAP_UFP_D_PIN_ASSIGN(alt->vdo)) && - !(DP_CAP_UFP_D_PIN_ASSIGN(port->vdo) & - DP_CAP_DFP_D_PIN_ASSIGN(alt->vdo))) + if (!(DP_CAP_PIN_ASSIGN_DFP_D(port->vdo) & + DP_CAP_PIN_ASSIGN_UFP_D(alt->vdo)) && + !(DP_CAP_PIN_ASSIGN_UFP_D(port->vdo) & + DP_CAP_PIN_ASSIGN_DFP_D(alt->vdo))) return -ENODEV; ret = sysfs_create_group(&alt->dev.kobj, &dp_altmode_group); From 303e724d7b1e1a0a93daf0b1ab5f7c4f53543b34 Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Wed, 8 Feb 2023 13:12:23 -0500 Subject: [PATCH 104/186] usb: core: add quirk for Alcor Link AK9563 smartcard reader The Alcor Link AK9563 smartcard reader used on some Lenovo platforms doesn't work. If LPM is enabled the reader will provide an invalid usb config descriptor. Added quirk to disable LPM. Verified fix on Lenovo P16 G1 and T14 G3 Tested-by: Miroslav Zatko Tested-by: Dennis Wassenberg Cc: stable@vger.kernel.org Signed-off-by: Dennis Wassenberg Signed-off-by: Mark Pearson Link: https://lore.kernel.org/r/20230208181223.1092654-1-mpearson-lenovo@squebb.ca Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 079e183cf3bf..934b3d997702 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -526,6 +526,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* DJI CineSSD */ { USB_DEVICE(0x2ca3, 0x0031), .driver_info = USB_QUIRK_NO_LPM }, + /* Alcor Link AK9563 SC Reader used in 2022 Lenovo ThinkPads */ + { USB_DEVICE(0x2ce3, 0x9563), .driver_info = USB_QUIRK_NO_LPM }, + /* DELL USB GEN2 */ { USB_DEVICE(0x413c, 0xb062), .driver_info = USB_QUIRK_NO_LPM | USB_QUIRK_RESET_RESUME }, From 4693e852f19a1338a49e540fb99fe3b2898d8594 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 9 Feb 2023 09:00:02 -0500 Subject: [PATCH 105/186] drm/amdgpu: add S/G display parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some users have reported flickerng with S/G display. We've tried extensively to reproduce and debug the issue on a wide variety of platform configurations (DRAM bandwidth, etc.) and a variety of monitors, but so far have not been able to. We disabled S/G display on a number of platforms to address this but that leads to failure to pin framebuffers errors and blank displays when there is memory pressure or no displays at all on systems with limited carveout (e.g., Chromebooks). Add a option to disable this as a debugging option as a way for users to disable this, depending on their use case, and for us to help debug this further. v2: fix typo Reviewed-by: Harry Wentland Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 11 +++++++++++ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +++ 3 files changed, 15 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index e3e2e6e3b485..d148a1bd85e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -243,6 +243,7 @@ extern int amdgpu_num_kcq; #define AMDGPU_VCNFW_LOG_SIZE (32 * 1024) extern int amdgpu_vcnfw_log; +extern int amdgpu_sg_display; #define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD (256*1024*1024) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index cd4caaa29528..3fe277bc233f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -186,6 +186,7 @@ int amdgpu_num_kcq = -1; int amdgpu_smartshift_bias; int amdgpu_use_xgmi_p2p = 1; int amdgpu_vcnfw_log; +int amdgpu_sg_display = -1; /* auto */ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); @@ -931,6 +932,16 @@ module_param_named(num_kcq, amdgpu_num_kcq, int, 0444); MODULE_PARM_DESC(vcnfw_log, "Enable vcnfw log(0 = disable (default value), 1 = enable)"); module_param_named(vcnfw_log, amdgpu_vcnfw_log, int, 0444); +/** + * DOC: sg_display (int) + * Disable S/G (scatter/gather) display (i.e., display from system memory). + * This option is only relevant on APUs. Set this option to 0 to disable + * S/G display if you experience flickering or other issues under memory + * pressure and report the issue. + */ +MODULE_PARM_DESC(sg_display, "S/G Display (-1 = auto (default), 0 = disable)"); +module_param_named(sg_display, amdgpu_sg_display, int, 0444); + /** * DOC: smu_pptable_id (int) * Used to override pptable id. id = 0 use VBIOS pptable. diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 78452856b2a3..c1da9ac02e7b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1523,6 +1523,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) } break; } + if (init_data.flags.gpu_vm_support && + (amdgpu_sg_display == 0)) + init_data.flags.gpu_vm_support = false; if (init_data.flags.gpu_vm_support) adev->mode_info.gpu_vm_support = true; From 9734a75cd99d448814e64feca133dc9a6e3f65f2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 9 Feb 2023 09:07:42 -0500 Subject: [PATCH 106/186] Revert "drm/amd/display: disable S/G display on DCN 3.1.2/3" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit f081cd4ca2658752a8c0e2353d50aec80d07c65f. Some users have reported flickerng with S/G display. We've tried extensively to reproduce and debug the issue on a wide variety of platform configurations (DRAM bandwidth, etc.) and a variety of monitors, but so far have not been able to. We disabled S/G display on a number of platforms to address this but that leads to failure to pin framebuffers errors and blank displays when there is memory pressure or no displays at all on systems with limited carveout (e.g., Chromebooks). We have a parameter to disable this as a debugging option as a way for users to disable this, depending on their use case, and for us to help debug this further. Having this enabled seems like the lesser of to evils. Reviewed-by: Harry Wentland Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index c1da9ac02e7b..1a15c3a97491 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1514,6 +1514,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) init_data.flags.gpu_vm_support = true; break; case IP_VERSION(3, 0, 1): + case IP_VERSION(3, 1, 2): + case IP_VERSION(3, 1, 3): case IP_VERSION(3, 1, 4): case IP_VERSION(3, 1, 6): init_data.flags.gpu_vm_support = true; From 1b7ac7989ad82f8df6365cd6338df0d9937e0119 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 9 Feb 2023 09:09:19 -0500 Subject: [PATCH 107/186] Revert "drm/amd/display: disable S/G display on DCN 2.1.0" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 2404f9b0ea0153c3fddb0c4d7a43869dc8608f6f. Some users have reported flickerng with S/G display. We've tried extensively to reproduce and debug the issue on a wide variety of platform configurations (DRAM bandwidth, etc.) and a variety of monitors, but so far have not been able to. We disabled S/G display on a number of platforms to address this but that leads to failure to pin framebuffers errors and blank displays when there is memory pressure or no displays at all on systems with limited carveout (e.g., Chromebooks). We have a parameter to disable this as a debugging option as a way for users to disable this, depending on their use case, and for us to help debug this further. Having this enabled seems like the lesser of to evils. Reviewed-by: Harry Wentland Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 1a15c3a97491..4065bfb009ca 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1513,6 +1513,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) (adev->apu_flags & AMD_APU_IS_PICASSO)) init_data.flags.gpu_vm_support = true; break; + case IP_VERSION(2, 1, 0): case IP_VERSION(3, 0, 1): case IP_VERSION(3, 1, 2): case IP_VERSION(3, 1, 3): From e7d636476ba73e61460619bd8822e16af3cba509 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 9 Feb 2023 09:09:45 -0500 Subject: [PATCH 108/186] Revert "drm/amd/display: disable S/G display on DCN 3.1.5" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 3cc67fe1b3aa1ac4720e002f2aa2d08c9199a584. Some users have reported flickerng with S/G display. We've tried extensively to reproduce and debug the issue on a wide variety of platform configurations (DRAM bandwidth, etc.) and a variety of monitors, but so far have not been able to. We disabled S/G display on a number of platforms to address this but that leads to failure to pin framebuffers errors and blank displays when there is memory pressure or no displays at all on systems with limited carveout (e.g., Chromebooks). We have a parameter to disable this as a debugging option as a way for users to disable this, depending on their use case, and for us to help debug this further. Having this enabled seems like the lesser of to evils. Reviewed-by: Harry Wentland Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 4065bfb009ca..93dee3d1a483 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1518,6 +1518,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) case IP_VERSION(3, 1, 2): case IP_VERSION(3, 1, 3): case IP_VERSION(3, 1, 4): + case IP_VERSION(3, 1, 5): case IP_VERSION(3, 1, 6): init_data.flags.gpu_vm_support = true; break; From 519b7e13b5ae8dd38da1e52275705343be6bb508 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 23 Jan 2023 16:54:46 +0000 Subject: [PATCH 109/186] btrfs: lock the inode in shared mode before starting fiemap Currently fiemap does not take the inode's lock (VFS lock), it only locks a file range in the inode's io tree. This however can lead to a deadlock if we have a concurrent fsync on the file and fiemap code triggers a fault when accessing the user space buffer with fiemap_fill_next_extent(). The deadlock happens on the inode's i_mmap_lock semaphore, which is taken both by fsync and btrfs_page_mkwrite(). This deadlock was recently reported by syzbot and triggers a trace like the following: task:syz-executor361 state:D stack:20264 pid:5668 ppid:5119 flags:0x00004004 Call Trace: context_switch kernel/sched/core.c:5293 [inline] __schedule+0x995/0xe20 kernel/sched/core.c:6606 schedule+0xcb/0x190 kernel/sched/core.c:6682 wait_on_state fs/btrfs/extent-io-tree.c:707 [inline] wait_extent_bit+0x577/0x6f0 fs/btrfs/extent-io-tree.c:751 lock_extent+0x1c2/0x280 fs/btrfs/extent-io-tree.c:1742 find_lock_delalloc_range+0x4e6/0x9c0 fs/btrfs/extent_io.c:488 writepage_delalloc+0x1ef/0x540 fs/btrfs/extent_io.c:1863 __extent_writepage+0x736/0x14e0 fs/btrfs/extent_io.c:2174 extent_write_cache_pages+0x983/0x1220 fs/btrfs/extent_io.c:3091 extent_writepages+0x219/0x540 fs/btrfs/extent_io.c:3211 do_writepages+0x3c3/0x680 mm/page-writeback.c:2581 filemap_fdatawrite_wbc+0x11e/0x170 mm/filemap.c:388 __filemap_fdatawrite_range mm/filemap.c:421 [inline] filemap_fdatawrite_range+0x175/0x200 mm/filemap.c:439 btrfs_fdatawrite_range fs/btrfs/file.c:3850 [inline] start_ordered_ops fs/btrfs/file.c:1737 [inline] btrfs_sync_file+0x4ff/0x1190 fs/btrfs/file.c:1839 generic_write_sync include/linux/fs.h:2885 [inline] btrfs_do_write_iter+0xcd3/0x1280 fs/btrfs/file.c:1684 call_write_iter include/linux/fs.h:2189 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x7dc/0xc50 fs/read_write.c:584 ksys_write+0x177/0x2a0 fs/read_write.c:637 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f7d4054e9b9 RSP: 002b:00007f7d404fa2f8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 00007f7d405d87a0 RCX: 00007f7d4054e9b9 RDX: 0000000000000090 RSI: 0000000020000000 RDI: 0000000000000006 RBP: 00007f7d405a51d0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 61635f65646f6e69 R13: 65646f7475616f6e R14: 7261637369646f6e R15: 00007f7d405d87a8 INFO: task syz-executor361:5697 blocked for more than 145 seconds. Not tainted 6.2.0-rc3-syzkaller-00376-g7c6984405241 #0 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:syz-executor361 state:D stack:21216 pid:5697 ppid:5119 flags:0x00004004 Call Trace: context_switch kernel/sched/core.c:5293 [inline] __schedule+0x995/0xe20 kernel/sched/core.c:6606 schedule+0xcb/0x190 kernel/sched/core.c:6682 rwsem_down_read_slowpath+0x5f9/0x930 kernel/locking/rwsem.c:1095 __down_read_common+0x54/0x2a0 kernel/locking/rwsem.c:1260 btrfs_page_mkwrite+0x417/0xc80 fs/btrfs/inode.c:8526 do_page_mkwrite+0x19e/0x5e0 mm/memory.c:2947 wp_page_shared+0x15e/0x380 mm/memory.c:3295 handle_pte_fault mm/memory.c:4949 [inline] __handle_mm_fault mm/memory.c:5073 [inline] handle_mm_fault+0x1b79/0x26b0 mm/memory.c:5219 do_user_addr_fault+0x69b/0xcb0 arch/x86/mm/fault.c:1428 handle_page_fault arch/x86/mm/fault.c:1519 [inline] exc_page_fault+0x7a/0x110 arch/x86/mm/fault.c:1575 asm_exc_page_fault+0x22/0x30 arch/x86/include/asm/idtentry.h:570 RIP: 0010:copy_user_short_string+0xd/0x40 arch/x86/lib/copy_user_64.S:233 Code: 74 0a 89 (...) RSP: 0018:ffffc9000570f330 EFLAGS: 00050202 RAX: ffffffff843e6601 RBX: 00007fffffffefc8 RCX: 0000000000000007 RDX: 0000000000000000 RSI: ffffc9000570f3e0 RDI: 0000000020000120 RBP: ffffc9000570f490 R08: 0000000000000000 R09: fffff52000ae1e83 R10: fffff52000ae1e83 R11: 1ffff92000ae1e7c R12: 0000000000000038 R13: ffffc9000570f3e0 R14: 0000000020000120 R15: ffffc9000570f3e0 copy_user_generic arch/x86/include/asm/uaccess_64.h:37 [inline] raw_copy_to_user arch/x86/include/asm/uaccess_64.h:58 [inline] _copy_to_user+0xe9/0x130 lib/usercopy.c:34 copy_to_user include/linux/uaccess.h:169 [inline] fiemap_fill_next_extent+0x22e/0x410 fs/ioctl.c:144 emit_fiemap_extent+0x22d/0x3c0 fs/btrfs/extent_io.c:3458 fiemap_process_hole+0xa00/0xad0 fs/btrfs/extent_io.c:3716 extent_fiemap+0xe27/0x2100 fs/btrfs/extent_io.c:3922 btrfs_fiemap+0x172/0x1e0 fs/btrfs/inode.c:8209 ioctl_fiemap fs/ioctl.c:219 [inline] do_vfs_ioctl+0x185b/0x2980 fs/ioctl.c:810 __do_sys_ioctl fs/ioctl.c:868 [inline] __se_sys_ioctl+0x83/0x170 fs/ioctl.c:856 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f7d4054e9b9 RSP: 002b:00007f7d390d92f8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007f7d405d87b0 RCX: 00007f7d4054e9b9 RDX: 0000000020000100 RSI: 00000000c020660b RDI: 0000000000000005 RBP: 00007f7d405a51d0 R08: 00007f7d390d9700 R09: 0000000000000000 R10: 00007f7d390d9700 R11: 0000000000000246 R12: 61635f65646f6e69 R13: 65646f7475616f6e R14: 7261637369646f6e R15: 00007f7d405d87b8 What happens is the following: 1) Task A is doing an fsync, enters btrfs_sync_file() and flushes delalloc before locking the inode and the i_mmap_lock semaphore, that is, before calling btrfs_inode_lock(); 2) After task A flushes delalloc and before it calls btrfs_inode_lock(), another task dirties a page; 3) Task B starts a fiemap without FIEMAP_FLAG_SYNC, so the page dirtied at step 2 remains dirty and unflushed. Then when it enters extent_fiemap() and it locks a file range that includes the range of the page dirtied in step 2; 4) Task A calls btrfs_inode_lock() and locks the inode (VFS lock) and the inode's i_mmap_lock semaphore in write mode. Then it tries to flush delalloc by calling start_ordered_ops(), which will block, at find_lock_delalloc_range(), when trying to lock the range of the page dirtied at step 2, since this range was locked by the fiemap task (at step 3); 5) Task B generates a page fault when accessing the user space fiemap buffer with a call to fiemap_fill_next_extent(). The fault handler needs to call btrfs_page_mkwrite() for some other page of our inode, and there we deadlock when trying to lock the inode's i_mmap_lock semaphore in read mode, since the fsync task locked it in write mode (step 4) and the fsync task can not progress because it's waiting to lock a file range that is currently locked by us (the fiemap task, step 3). Fix this by taking the inode's lock (VFS lock) in shared mode when entering fiemap. This effectively serializes fiemap with fsync (except the most expensive part of fsync, the log sync), preventing this deadlock. Reported-by: syzbot+cc35f55c41e34c30dcb5@syzkaller.appspotmail.com Link: https://lore.kernel.org/linux-btrfs/00000000000032dc7305f2a66f46@google.com/ CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 9bd32daa9b9a..3bbf8703db2a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3826,6 +3826,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, lockend = round_up(start + len, inode->root->fs_info->sectorsize); prev_extent_end = lockstart; + btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED); lock_extent(&inode->io_tree, lockstart, lockend, &cached_state); ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end); @@ -4019,6 +4020,7 @@ check_eof_delalloc: out_unlock: unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); + btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); out: free_extent_state(delalloc_cached_state); btrfs_free_backref_share_ctx(backref_ctx); From 5f58d783fd7823b2c2d5954d1126e702f94bfc4c Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 20 Jan 2023 21:47:16 +0800 Subject: [PATCH 110/186] btrfs: free device in btrfs_close_devices for a single device filesystem We have this check to make sure we don't accidentally add older devices that may have disappeared and re-appeared with an older generation from being added to an fs_devices (such as a replace source device). This makes sense, we don't want stale disks in our file system. However for single disks this doesn't really make sense. I've seen this in testing, but I was provided a reproducer from a project that builds btrfs images on loopback devices. The loopback device gets cached with the new generation, and then if it is re-used to generate a new file system we'll fail to mount it because the new fs is "older" than what we have in cache. Fix this by freeing the cache when closing the device for a single device filesystem. This will ensure that the mount command passed device path is scanned successfully during the next mount. CC: stable@vger.kernel.org # 5.10+ Reported-by: Daan De Meyer Signed-off-by: Josef Bacik Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 4cdadf30163d..df43093b7a46 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -403,6 +403,7 @@ void btrfs_free_device(struct btrfs_device *device) static void free_fs_devices(struct btrfs_fs_devices *fs_devices) { struct btrfs_device *device; + WARN_ON(fs_devices->opened); while (!list_empty(&fs_devices->devices)) { device = list_entry(fs_devices->devices.next, @@ -1181,9 +1182,22 @@ void btrfs_close_devices(struct btrfs_fs_devices *fs_devices) mutex_lock(&uuid_mutex); close_fs_devices(fs_devices); - if (!fs_devices->opened) + if (!fs_devices->opened) { list_splice_init(&fs_devices->seed_list, &list); + /* + * If the struct btrfs_fs_devices is not assembled with any + * other device, it can be re-initialized during the next mount + * without the needing device-scan step. Therefore, it can be + * fully freed. + */ + if (fs_devices->num_devices == 1) { + list_del(&fs_devices->fs_list); + free_fs_devices(fs_devices); + } + } + + list_for_each_entry_safe(fs_devices, tmp, &list, seed_list) { close_fs_devices(fs_devices); list_del(&fs_devices->seed_list); From eb7423273cc9922ee2d05bf660c034d7d515bb91 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Sat, 4 Feb 2023 01:35:31 -0500 Subject: [PATCH 111/186] riscv: kprobe: Fixup misaligned load text MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current kprobe would cause a misaligned load for the probe point. This patch fixup it with two half-word loads instead. Fixes: c22b0bcb1dd0 ("riscv: Add kprobes supported") Signed-off-by: Guo Ren Signed-off-by: Guo Ren Link: https://lore.kernel.org/linux-riscv/878rhig9zj.fsf@all.your.base.are.belong.to.us/ Reported-by: Bjorn Topel Reviewed-by: Björn Töpel Link: https://lore.kernel.org/r/20230204063531.740220-1-guoren@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/probes/kprobes.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index 41c7481afde3..2bedec37d092 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -65,16 +65,18 @@ static bool __kprobes arch_check_kprobe(struct kprobe *p) int __kprobes arch_prepare_kprobe(struct kprobe *p) { - unsigned long probe_addr = (unsigned long)p->addr; + u16 *insn = (u16 *)p->addr; - if (probe_addr & 0x1) + if ((unsigned long)insn & 0x1) return -EILSEQ; if (!arch_check_kprobe(p)) return -EILSEQ; /* copy instruction */ - p->opcode = *p->addr; + p->opcode = (kprobe_opcode_t)(*insn++); + if (GET_INSN_LENGTH(p->opcode) == 4) + p->opcode |= (kprobe_opcode_t)(*insn) << 16; /* decode instruction */ switch (riscv_probe_decode_insn(p->addr, &p->ainsn.api)) { From 950b879b7f0251317d26bae0687e72592d607532 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Thu, 26 Jan 2023 22:53:06 -0500 Subject: [PATCH 112/186] riscv: Fixup race condition on PG_dcache_clean in flush_icache_pte In commit 588a513d3425 ("arm64: Fix race condition on PG_dcache_clean in __sync_icache_dcache()"), we found RISC-V has the same issue as the previous arm64. The previous implementation didn't guarantee the correct sequence of operations, which means flush_icache_all() hasn't been called when the PG_dcache_clean was set. That would cause a risk of page synchronization. Fixes: 08f051eda33b ("RISC-V: Flush I$ when making a dirty page executable") Signed-off-by: Guo Ren Signed-off-by: Guo Ren Reviewed-by: Andrew Jones Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230127035306.1819561-1-guoren@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/cacheflush.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index 3cc07ed45aeb..fcd6145fbead 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -90,8 +90,10 @@ void flush_icache_pte(pte_t pte) if (PageHuge(page)) page = compound_head(page); - if (!test_and_set_bit(PG_dcache_clean, &page->flags)) + if (!test_bit(PG_dcache_clean, &page->flags)) { flush_icache_all(); + set_bit(PG_dcache_clean, &page->flags); + } } #endif /* CONFIG_MMU */ From 55d77bae73426237b3c74c1757a894b056550dff Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 26 Jan 2023 08:04:47 +0100 Subject: [PATCH 113/186] kasan: fix Oops due to missing calls to kasan_arch_is_ready() On powerpc64, you can build a kernel with KASAN as soon as you build it with RADIX MMU support. However if the CPU doesn't have RADIX MMU, KASAN isn't enabled at init and the following Oops is encountered. [ 0.000000][ T0] KASAN not enabled as it requires radix! [ 4.484295][ T26] BUG: Unable to handle kernel data access at 0xc00e000000804a04 [ 4.485270][ T26] Faulting instruction address: 0xc00000000062ec6c [ 4.485748][ T26] Oops: Kernel access of bad area, sig: 11 [#1] [ 4.485920][ T26] BE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries [ 4.486259][ T26] Modules linked in: [ 4.486637][ T26] CPU: 0 PID: 26 Comm: kworker/u2:2 Not tainted 6.2.0-rc3-02590-gf8a023b0a805 #249 [ 4.486907][ T26] Hardware name: IBM pSeries (emulated by qemu) POWER9 (raw) 0x4e1200 0xf000005 of:SLOF,HEAD pSeries [ 4.487445][ T26] Workqueue: eval_map_wq .tracer_init_tracefs_work_func [ 4.488744][ T26] NIP: c00000000062ec6c LR: c00000000062bb84 CTR: c0000000002ebcd0 [ 4.488867][ T26] REGS: c0000000049175c0 TRAP: 0380 Not tainted (6.2.0-rc3-02590-gf8a023b0a805) [ 4.489028][ T26] MSR: 8000000002009032 CR: 44002808 XER: 00000000 [ 4.489584][ T26] CFAR: c00000000062bb80 IRQMASK: 0 [ 4.489584][ T26] GPR00: c0000000005624d4 c000000004917860 c000000001cfc000 1800000000804a04 [ 4.489584][ T26] GPR04: c0000000003a2650 0000000000000cc0 c00000000000d3d8 c00000000000d3d8 [ 4.489584][ T26] GPR08: c0000000049175b0 a80e000000000000 0000000000000000 0000000017d78400 [ 4.489584][ T26] GPR12: 0000000044002204 c000000003790000 c00000000435003c c0000000043f1c40 [ 4.489584][ T26] GPR16: c0000000043f1c68 c0000000043501a0 c000000002106138 c0000000043f1c08 [ 4.489584][ T26] GPR20: c0000000043f1c10 c0000000043f1c20 c000000004146c40 c000000002fdb7f8 [ 4.489584][ T26] GPR24: c000000002fdb834 c000000003685e00 c000000004025030 c000000003522e90 [ 4.489584][ T26] GPR28: 0000000000000cc0 c0000000003a2650 c000000004025020 c000000004025020 [ 4.491201][ T26] NIP [c00000000062ec6c] .kasan_byte_accessible+0xc/0x20 [ 4.491430][ T26] LR [c00000000062bb84] .__kasan_check_byte+0x24/0x90 [ 4.491767][ T26] Call Trace: [ 4.491941][ T26] [c000000004917860] [c00000000062ae70] .__kasan_kmalloc+0xc0/0x110 (unreliable) [ 4.492270][ T26] [c0000000049178f0] [c0000000005624d4] .krealloc+0x54/0x1c0 [ 4.492453][ T26] [c000000004917990] [c0000000003a2650] .create_trace_option_files+0x280/0x530 [ 4.492613][ T26] [c000000004917a90] [c000000002050d90] .tracer_init_tracefs_work_func+0x274/0x2c0 [ 4.492771][ T26] [c000000004917b40] [c0000000001f9948] .process_one_work+0x578/0x9f0 [ 4.492927][ T26] [c000000004917c30] [c0000000001f9ebc] .worker_thread+0xfc/0x950 [ 4.493084][ T26] [c000000004917d60] [c00000000020be84] .kthread+0x1a4/0x1b0 [ 4.493232][ T26] [c000000004917e10] [c00000000000d3d8] .ret_from_kernel_thread+0x58/0x60 [ 4.495642][ T26] Code: 60000000 7cc802a6 38a00000 4bfffc78 60000000 7cc802a6 38a00001 4bfffc68 60000000 3d20a80e 7863e8c2 792907c6 <7c6348ae> 20630007 78630fe0 68630001 [ 4.496704][ T26] ---[ end trace 0000000000000000 ]--- The Oops is due to kasan_byte_accessible() not checking the readiness of KASAN. Add missing call to kasan_arch_is_ready() and bail out when not ready. The same problem is observed with ____kasan_kfree_large() so fix it the same. Also, as KASAN is not available and no shadow area is allocated for linear memory mapping, there is no point in allocating shadow mem for vmalloc memory as shown below in /sys/kernel/debug/kernel_page_tables ---[ kasan shadow mem start ]--- 0xc00f000000000000-0xc00f00000006ffff 0x00000000040f0000 448K r w pte valid present dirty accessed 0xc00f000000860000-0xc00f00000086ffff 0x000000000ac10000 64K r w pte valid present dirty accessed 0xc00f3ffffffe0000-0xc00f3fffffffffff 0x0000000004d10000 128K r w pte valid present dirty accessed ---[ kasan shadow mem end ]--- So, also verify KASAN readiness before allocating and poisoning shadow mem for VMAs. Link: https://lkml.kernel.org/r/150768c55722311699fdcf8f5379e8256749f47d.1674716617.git.christophe.leroy@csgroup.eu Fixes: 41b7a347bf14 ("powerpc: Book3S 64-bit outline-only KASAN support") Signed-off-by: Christophe Leroy Reported-by: Nathan Lynch Suggested-by: Michael Ellerman Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Vincenzo Frascino Cc: [5.19+] Signed-off-by: Andrew Morton --- mm/kasan/common.c | 3 +++ mm/kasan/generic.c | 7 ++++++- mm/kasan/shadow.c | 12 ++++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 833bf2cfd2a3..21e66d7f261d 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -246,6 +246,9 @@ bool __kasan_slab_free(struct kmem_cache *cache, void *object, static inline bool ____kasan_kfree_large(void *ptr, unsigned long ip) { + if (!kasan_arch_is_ready()) + return false; + if (ptr != page_address(virt_to_head_page(ptr))) { kasan_report_invalid_free(ptr, ip, KASAN_REPORT_INVALID_FREE); return true; diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c index b076f597a378..cb762982c8ba 100644 --- a/mm/kasan/generic.c +++ b/mm/kasan/generic.c @@ -191,7 +191,12 @@ bool kasan_check_range(unsigned long addr, size_t size, bool write, bool kasan_byte_accessible(const void *addr) { - s8 shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(addr)); + s8 shadow_byte; + + if (!kasan_arch_is_ready()) + return true; + + shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(addr)); return shadow_byte >= 0 && shadow_byte < KASAN_GRANULE_SIZE; } diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index 2fba1f51f042..15cfb34d16a1 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -291,6 +291,9 @@ int kasan_populate_vmalloc(unsigned long addr, unsigned long size) unsigned long shadow_start, shadow_end; int ret; + if (!kasan_arch_is_ready()) + return 0; + if (!is_vmalloc_or_module_addr((void *)addr)) return 0; @@ -459,6 +462,9 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long region_start, region_end; unsigned long size; + if (!kasan_arch_is_ready()) + return; + region_start = ALIGN(start, KASAN_MEMORY_PER_SHADOW_PAGE); region_end = ALIGN_DOWN(end, KASAN_MEMORY_PER_SHADOW_PAGE); @@ -502,6 +508,9 @@ void *__kasan_unpoison_vmalloc(const void *start, unsigned long size, * with setting memory tags, so the KASAN_VMALLOC_INIT flag is ignored. */ + if (!kasan_arch_is_ready()) + return (void *)start; + if (!is_vmalloc_or_module_addr(start)) return (void *)start; @@ -524,6 +533,9 @@ void *__kasan_unpoison_vmalloc(const void *start, unsigned long size, */ void __kasan_poison_vmalloc(const void *start, unsigned long size) { + if (!kasan_arch_is_ready()) + return; + if (!is_vmalloc_or_module_addr(start)) return; From 6b970599e807ea95c653926d41b095a92fd381e2 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Fri, 9 Dec 2022 15:28:01 +0800 Subject: [PATCH 114/186] mm: hwpoison: support recovery from ksm_might_need_to_copy() When the kernel copies a page from ksm_might_need_to_copy(), but runs into an uncorrectable error, it will crash since poisoned page is consumed by kernel, this is similar to the issue recently fixed by Copy-on-write poison recovery. When an error is detected during the page copy, return VM_FAULT_HWPOISON in do_swap_page(), and install a hwpoison entry in unuse_pte() when swapoff, which help us to avoid system crash. Note, memory failure on a KSM page will be skipped, but still call memory_failure_queue() to be consistent with general memory failure process, and we could support KSM page recovery in the feature. [wangkefeng.wang@huawei.com: enhance unuse_pte(), fix issue found by lkp] Link: https://lkml.kernel.org/r/20221213120523.141588-1-wangkefeng.wang@huawei.com [wangkefeng.wang@huawei.com: update changelog, alter ksm_might_need_to_copy(), restore unlikely() in unuse_pte()] Link: https://lkml.kernel.org/r/20230201074433.96641-1-wangkefeng.wang@huawei.com Link: https://lkml.kernel.org/r/20221209072801.193221-1-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviewed-by: Naoya Horiguchi Cc: Miaohe Lin Cc: Tony Luck Signed-off-by: Andrew Morton --- mm/ksm.c | 7 +++++-- mm/memory.c | 3 +++ mm/swapfile.c | 20 ++++++++++++++------ 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/mm/ksm.c b/mm/ksm.c index dd02780c387f..addf490da146 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -2629,8 +2629,11 @@ struct page *ksm_might_need_to_copy(struct page *page, new_page = NULL; } if (new_page) { - copy_user_highpage(new_page, page, address, vma); - + if (copy_mc_user_highpage(new_page, page, address, vma)) { + put_page(new_page); + memory_failure_queue(page_to_pfn(page), 0); + return ERR_PTR(-EHWPOISON); + } SetPageDirty(new_page); __SetPageUptodate(new_page); __SetPageLocked(new_page); diff --git a/mm/memory.c b/mm/memory.c index 3e836fecd035..f526b9152bef 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3840,6 +3840,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (unlikely(!page)) { ret = VM_FAULT_OOM; goto out_page; + } else if (unlikely(PTR_ERR(page) == -EHWPOISON)) { + ret = VM_FAULT_HWPOISON; + goto out_page; } folio = page_folio(page); diff --git a/mm/swapfile.c b/mm/swapfile.c index 4fa440e87cd6..eb9b0bf1fcdd 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1764,12 +1764,15 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, struct page *swapcache; spinlock_t *ptl; pte_t *pte, new_pte; + bool hwposioned = false; int ret = 1; swapcache = page; page = ksm_might_need_to_copy(page, vma, addr); if (unlikely(!page)) return -ENOMEM; + else if (unlikely(PTR_ERR(page) == -EHWPOISON)) + hwposioned = true; pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); if (unlikely(!pte_same_as_swp(*pte, swp_entry_to_pte(entry)))) { @@ -1777,15 +1780,19 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, goto out; } - if (unlikely(!PageUptodate(page))) { - pte_t pteval; + if (unlikely(hwposioned || !PageUptodate(page))) { + swp_entry_t swp_entry; dec_mm_counter(vma->vm_mm, MM_SWAPENTS); - pteval = swp_entry_to_pte(make_swapin_error_entry()); - set_pte_at(vma->vm_mm, addr, pte, pteval); - swap_free(entry); + if (hwposioned) { + swp_entry = make_hwpoison_entry(swapcache); + page = swapcache; + } else { + swp_entry = make_swapin_error_entry(); + } + new_pte = swp_entry_to_pte(swp_entry); ret = 0; - goto out; + goto setpte; } /* See do_swap_page() */ @@ -1817,6 +1824,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, new_pte = pte_mksoft_dirty(new_pte); if (pte_swp_uffd_wp(*pte)) new_pte = pte_mkuffd_wp(new_pte); +setpte: set_pte_at(vma->vm_mm, addr, pte, new_pte); swap_free(entry); out: From badc28d4924bfed73efc93f716a0c3aa3afbdf6f Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Thu, 2 Feb 2023 18:56:12 +0800 Subject: [PATCH 115/186] mm: shrinkers: fix deadlock in shrinker debugfs The debugfs_remove_recursive() is invoked by unregister_shrinker(), which is holding the write lock of shrinker_rwsem. It will waits for the handler of debugfs file complete. The handler also needs to hold the read lock of shrinker_rwsem to do something. So it may cause the following deadlock: CPU0 CPU1 debugfs_file_get() shrinker_debugfs_count_show()/shrinker_debugfs_scan_write() unregister_shrinker() --> down_write(&shrinker_rwsem); debugfs_remove_recursive() // wait for (A) --> wait_for_completion(); // wait for (B) --> down_read_killable(&shrinker_rwsem) debugfs_file_put() -- (A) up_write() -- (B) The down_read_killable() can be killed, so that the above deadlock can be recovered. But it still requires an extra kill action, otherwise it will block all subsequent shrinker-related operations, so it's better to fix it. [akpm@linux-foundation.org: fix CONFIG_SHRINKER_DEBUG=n stub] Link: https://lkml.kernel.org/r/20230202105612.64641-1-zhengqi.arch@bytedance.com Fixes: 5035ebc644ae ("mm: shrinkers: introduce debugfs interface for memory shrinkers") Signed-off-by: Qi Zheng Reviewed-by: Roman Gushchin Cc: Kent Overstreet Cc: Muchun Song Cc: Signed-off-by: Andrew Morton --- include/linux/shrinker.h | 5 +++-- mm/shrinker_debug.c | 13 ++++++++----- mm/vmscan.c | 6 +++++- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 71310efe2fab..7bde8e1c228a 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -107,7 +107,7 @@ extern void synchronize_shrinkers(void); #ifdef CONFIG_SHRINKER_DEBUG extern int shrinker_debugfs_add(struct shrinker *shrinker); -extern void shrinker_debugfs_remove(struct shrinker *shrinker); +extern struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker); extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...); #else /* CONFIG_SHRINKER_DEBUG */ @@ -115,8 +115,9 @@ static inline int shrinker_debugfs_add(struct shrinker *shrinker) { return 0; } -static inline void shrinker_debugfs_remove(struct shrinker *shrinker) +static inline struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker) { + return NULL; } static inline __printf(2, 3) int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...) diff --git a/mm/shrinker_debug.c b/mm/shrinker_debug.c index b05295bab322..39c3491e28a3 100644 --- a/mm/shrinker_debug.c +++ b/mm/shrinker_debug.c @@ -246,18 +246,21 @@ int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...) } EXPORT_SYMBOL(shrinker_debugfs_rename); -void shrinker_debugfs_remove(struct shrinker *shrinker) +struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker) { + struct dentry *entry = shrinker->debugfs_entry; + lockdep_assert_held(&shrinker_rwsem); kfree_const(shrinker->name); shrinker->name = NULL; - if (!shrinker->debugfs_entry) - return; + if (entry) { + ida_free(&shrinker_debugfs_ida, shrinker->debugfs_id); + shrinker->debugfs_entry = NULL; + } - debugfs_remove_recursive(shrinker->debugfs_entry); - ida_free(&shrinker_debugfs_ida, shrinker->debugfs_id); + return entry; } static int __init shrinker_debugfs_init(void) diff --git a/mm/vmscan.c b/mm/vmscan.c index bf3eedf0209c..5b7b8d4f5297 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -741,6 +741,8 @@ EXPORT_SYMBOL(register_shrinker); */ void unregister_shrinker(struct shrinker *shrinker) { + struct dentry *debugfs_entry; + if (!(shrinker->flags & SHRINKER_REGISTERED)) return; @@ -749,9 +751,11 @@ void unregister_shrinker(struct shrinker *shrinker) shrinker->flags &= ~SHRINKER_REGISTERED; if (shrinker->flags & SHRINKER_MEMCG_AWARE) unregister_memcg_shrinker(shrinker); - shrinker_debugfs_remove(shrinker); + debugfs_entry = shrinker_debugfs_remove(shrinker); up_write(&shrinker_rwsem); + debugfs_remove_recursive(debugfs_entry); + kfree(shrinker->nr_deferred); shrinker->nr_deferred = NULL; } From 67222c4ba8afe409d1e049a8f1e687c8a214fec7 Mon Sep 17 00:00:00 2001 From: Li Lingfeng Date: Fri, 20 Jan 2023 11:23:52 +0800 Subject: [PATCH 116/186] lib: parser: optimize match_NUMBER apis to use local array Memory will be allocated to store substring_t in match_strdup(), which means the caller of match_strdup() may need to be scheduled out to wait for reclaiming memory. smatch complains that this can cuase sleeping in an atoic context. Using local array to store substring_t to remove the restriction. Link: https://lkml.kernel.org/r/20230120032352.242767-1-lilingfeng3@huawei.com Link: https://lore.kernel.org/all/20221104023938.2346986-5-yukuai1@huaweicloud.com/ Link: https://lkml.kernel.org/r/20230120032352.242767-1-lilingfeng3@huawei.com Fixes: 2c0647988433 ("blk-iocost: don't release 'ioc->lock' while updating params") Signed-off-by: Li Lingfeng Reported-by: Yu Kuai Acked-by: Tejun Heo Cc: BingJing Chang Cc: Eric Biggers Cc: Hou Tao Cc: James Smart Cc: Jan Kara Cc: Jens Axboe Cc: yangerkun Cc: Zhang Yi Signed-off-by: Andrew Morton --- lib/parser.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/lib/parser.c b/lib/parser.c index bcb23484100e..2b5e2b480253 100644 --- a/lib/parser.c +++ b/lib/parser.c @@ -11,6 +11,15 @@ #include #include +/* + * max size needed by different bases to express U64 + * HEX: "0xFFFFFFFFFFFFFFFF" --> 18 + * DEC: "18446744073709551615" --> 20 + * OCT: "01777777777777777777777" --> 23 + * pick the max one to define NUMBER_BUF_LEN + */ +#define NUMBER_BUF_LEN 24 + /** * match_one - Determines if a string matches a simple pattern * @s: the string to examine for presence of the pattern @@ -129,14 +138,12 @@ EXPORT_SYMBOL(match_token); static int match_number(substring_t *s, int *result, int base) { char *endp; - char *buf; + char buf[NUMBER_BUF_LEN]; int ret; long val; - buf = match_strdup(s); - if (!buf) - return -ENOMEM; - + if (match_strlcpy(buf, s, NUMBER_BUF_LEN) >= NUMBER_BUF_LEN) + return -ERANGE; ret = 0; val = simple_strtol(buf, &endp, base); if (endp == buf) @@ -145,7 +152,6 @@ static int match_number(substring_t *s, int *result, int base) ret = -ERANGE; else *result = (int) val; - kfree(buf); return ret; } @@ -163,18 +169,15 @@ static int match_number(substring_t *s, int *result, int base) */ static int match_u64int(substring_t *s, u64 *result, int base) { - char *buf; + char buf[NUMBER_BUF_LEN]; int ret; u64 val; - buf = match_strdup(s); - if (!buf) - return -ENOMEM; - + if (match_strlcpy(buf, s, NUMBER_BUF_LEN) >= NUMBER_BUF_LEN) + return -ERANGE; ret = kstrtoull(buf, base, &val); if (!ret) *result = val; - kfree(buf); return ret; } @@ -206,14 +209,12 @@ EXPORT_SYMBOL(match_int); */ int match_uint(substring_t *s, unsigned int *result) { - int err = -ENOMEM; - char *buf = match_strdup(s); + char buf[NUMBER_BUF_LEN]; - if (buf) { - err = kstrtouint(buf, 10, result); - kfree(buf); - } - return err; + if (match_strlcpy(buf, s, NUMBER_BUF_LEN) >= NUMBER_BUF_LEN) + return -ERANGE; + + return kstrtouint(buf, 10, result); } EXPORT_SYMBOL(match_uint); From c16a3b11eaa88872d07f135df94dfa3fbcd05d10 Mon Sep 17 00:00:00 2001 From: Jeff Xie Date: Sat, 4 Feb 2023 17:01:39 +0800 Subject: [PATCH 117/186] scripts/gdb: fix 'lx-current' for x86 When printing the name of the current process, it will report an error: (gdb) p $lx_current().comm Python Exception No symbol "current_task" in current context.: Error occurred in Python: No symbol "current_task" in current context. Because e57ef2ed97c1 ("x86: Put hot per CPU variables into a struct") changed it. Link: https://lkml.kernel.org/r/20230204090139.1789264-1-xiehuan09@gmail.com Fixes: e57ef2ed97c1 ("x86: Put hot per CPU variables into a struct") Signed-off-by: Jeff Xie Cc: Jan Kiszka Cc: Signed-off-by: Andrew Morton --- scripts/gdb/linux/cpus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/gdb/linux/cpus.py b/scripts/gdb/linux/cpus.py index 15fc4626d236..9ee99f9fae8d 100644 --- a/scripts/gdb/linux/cpus.py +++ b/scripts/gdb/linux/cpus.py @@ -163,7 +163,7 @@ def get_current_task(cpu): task_ptr_type = task_type.get_type().pointer() if utils.is_target_arch("x86"): - var_ptr = gdb.parse_and_eval("¤t_task") + var_ptr = gdb.parse_and_eval("&pcpu_hot.current_task") return per_cpu(var_ptr, cpu).dereference() elif utils.is_target_arch("aarch64"): current_task_addr = gdb.parse_and_eval("$SP_EL0") From ce4d9a1ea35ac5429e822c4106cb2859d5c71f3e Mon Sep 17 00:00:00 2001 From: "Isaac J. Manjarres" Date: Wed, 8 Feb 2023 15:20:00 -0800 Subject: [PATCH 118/186] of: reserved_mem: Have kmemleak ignore dynamically allocated reserved mem Patch series "Fix kmemleak crashes when scanning CMA regions", v2. When trying to boot a device with an ARM64 kernel with the following config options enabled: CONFIG_DEBUG_PAGEALLOC=y CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT=y CONFIG_DEBUG_KMEMLEAK=y a crash is encountered when kmemleak starts to scan the list of gray or allocated objects that it maintains. Upon closer inspection, it was observed that these page-faults always occurred when kmemleak attempted to scan a CMA region. At the moment, kmemleak is made aware of CMA regions that are specified through the devicetree to be dynamically allocated within a range of addresses. However, kmemleak should not need to scan CMA regions or any reserved memory region, as those regions can be used for DMA transfers between drivers and peripherals, and thus wouldn't contain anything useful for kmemleak. Additionally, since CMA regions are unmapped from the kernel's address space when they are freed to the buddy allocator at boot when CONFIG_DEBUG_PAGEALLOC is enabled, kmemleak shouldn't attempt to access those memory regions, as that will trigger a crash. Thus, kmemleak should ignore all dynamically allocated reserved memory regions. This patch (of 1): Currently, kmemleak ignores dynamically allocated reserved memory regions that don't have a kernel mapping. However, regions that do retain a kernel mapping (e.g. CMA regions) do get scanned by kmemleak. This is not ideal for two reasons: 1 kmemleak works by scanning memory regions for pointers to allocated objects to determine if those objects have been leaked or not. However, reserved memory regions can be used between drivers and peripherals for DMA transfers, and thus, would not contain pointers to allocated objects, making it unnecessary for kmemleak to scan these reserved memory regions. 2 When CONFIG_DEBUG_PAGEALLOC is enabled, along with kmemleak, the CMA reserved memory regions are unmapped from the kernel's address space when they are freed to buddy at boot. These CMA reserved regions are still tracked by kmemleak, however, and when kmemleak attempts to scan them, a crash will happen, as accessing the CMA region will result in a page-fault, since the regions are unmapped. Thus, use kmemleak_ignore_phys() for all dynamically allocated reserved memory regions, instead of those that do not have a kernel mapping associated with them. Link: https://lkml.kernel.org/r/20230208232001.2052777-1-isaacmanjarres@google.com Link: https://lkml.kernel.org/r/20230208232001.2052777-2-isaacmanjarres@google.com Fixes: a7259df76702 ("memblock: make memblock_find_in_range method private") Signed-off-by: Isaac J. Manjarres Acked-by: Mike Rapoport (IBM) Acked-by: Catalin Marinas Cc: Frank Rowand Cc: Kirill A. Shutemov Cc: Nick Kossifidis Cc: Rafael J. Wysocki Cc: Rob Herring Cc: Russell King (Oracle) Cc: Saravana Kannan Cc: [5.15+] Signed-off-by: Andrew Morton --- drivers/of/of_reserved_mem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index 65f3b02a0e4e..f90975e00446 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -48,9 +48,10 @@ static int __init early_init_dt_alloc_reserved_memory_arch(phys_addr_t size, err = memblock_mark_nomap(base, size); if (err) memblock_phys_free(base, size); - kmemleak_ignore_phys(base); } + kmemleak_ignore_phys(base); + return err; } From d61615c366a489646a1bfe5b33455f916762d5f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Wed, 8 Feb 2023 10:16:37 +0100 Subject: [PATCH 119/186] net: bgmac: fix BCM5358 support by setting correct flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Code blocks handling BCMA_CHIP_ID_BCM5357 and BCMA_CHIP_ID_BCM53572 were incorrectly unified. Chip package values are not unique and cannot be checked independently. They are meaningful only in a context of a given chip. Packages BCM5358 and BCM47188 share the same value but then belong to different chips. Code unification resulted in treating BCM5358 as BCM47188 and broke its initialization. Link: https://github.com/openwrt/openwrt/issues/8278 Fixes: cb1b0f90acfe ("net: ethernet: bgmac: unify code of the same family") Cc: Jon Mason Signed-off-by: RafaƂ MiƂecki Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20230208091637.16291-1-zajec5@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bgmac-bcma.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c index 02bd3cf9a260..6e4f36aaf5db 100644 --- a/drivers/net/ethernet/broadcom/bgmac-bcma.c +++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c @@ -240,12 +240,12 @@ static int bgmac_probe(struct bcma_device *core) bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST; bgmac->feature_flags |= BGMAC_FEAT_FLW_CTRL1; bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_PHY; - if (ci->pkg == BCMA_PKG_ID_BCM47188 || - ci->pkg == BCMA_PKG_ID_BCM47186) { + if ((ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg == BCMA_PKG_ID_BCM47186) || + (ci->id == BCMA_CHIP_ID_BCM53572 && ci->pkg == BCMA_PKG_ID_BCM47188)) { bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_RGMII; bgmac->feature_flags |= BGMAC_FEAT_IOST_ATTACHED; } - if (ci->pkg == BCMA_PKG_ID_BCM5358) + if (ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg == BCMA_PKG_ID_BCM5358) bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_EPHYRMII; break; case BCMA_CHIP_ID_BCM53573: From 7a13a2eef645f2d2e3018d6ea518f121b35a87c8 Mon Sep 17 00:00:00 2001 From: Yinjun Zhang Date: Wed, 8 Feb 2023 11:22:57 +0100 Subject: [PATCH 120/186] nfp: fix incorrect use of mbox in IPsec code The mailbox configuration mechanism requires writing several registers, which shouldn't be interrupted, so need lock to avoid race condition. The base offset of mailbox configuration registers is not fixed, it depends on TLV caps read from application firmware. Fixes: 859a497fe80c ("nfp: implement xfrm callbacks and expose ipsec offload feature to upper layer") Signed-off-by: Yinjun Zhang Signed-off-by: Simon Horman Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/crypto/ipsec.c | 15 ++++++++++++--- drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h | 1 - 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c b/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c index 4632268695cb..6d9d1c89ae6a 100644 --- a/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c +++ b/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c @@ -132,23 +132,32 @@ struct nfp_ipsec_cfg_mssg { static int nfp_ipsec_cfg_cmd_issue(struct nfp_net *nn, int type, int saidx, struct nfp_ipsec_cfg_mssg *msg) { + unsigned int offset = nn->tlv_caps.mbox_off + NFP_NET_CFG_MBOX_SIMPLE_VAL; int i, msg_size, ret; + ret = nfp_net_mbox_lock(nn, sizeof(*msg)); + if (ret) + return ret; + msg->cmd = type; msg->sa_idx = saidx; msg->rsp = 0; msg_size = ARRAY_SIZE(msg->raw); for (i = 0; i < msg_size; i++) - nn_writel(nn, NFP_NET_CFG_MBOX_VAL + 4 * i, msg->raw[i]); + nn_writel(nn, offset + 4 * i, msg->raw[i]); ret = nfp_net_mbox_reconfig(nn, NFP_NET_CFG_MBOX_CMD_IPSEC); - if (ret < 0) + if (ret < 0) { + nn_ctrl_bar_unlock(nn); return ret; + } /* For now we always read the whole message response back */ for (i = 0; i < msg_size; i++) - msg->raw[i] = nn_readl(nn, NFP_NET_CFG_MBOX_VAL + 4 * i); + msg->raw[i] = nn_readl(nn, offset + 4 * i); + + nn_ctrl_bar_unlock(nn); switch (msg->rsp) { case NFP_IPSEC_CFG_MSSG_OK: diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index 51124309ae1f..f03dcadff738 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -403,7 +403,6 @@ */ #define NFP_NET_CFG_MBOX_BASE 0x1800 #define NFP_NET_CFG_MBOX_VAL_MAX_SZ 0x1F8 -#define NFP_NET_CFG_MBOX_VAL 0x1808 #define NFP_NET_CFG_MBOX_SIMPLE_CMD 0x0 #define NFP_NET_CFG_MBOX_SIMPLE_RET 0x4 #define NFP_NET_CFG_MBOX_SIMPLE_VAL 0x8 From 71f814cda659dca3db575ed67dfcdc4b93e8d33f Mon Sep 17 00:00:00 2001 From: Yinjun Zhang Date: Wed, 8 Feb 2023 11:22:58 +0100 Subject: [PATCH 121/186] nfp: fix schedule in atomic context when offloading sa IPsec offloading callbacks may be called in atomic context, sleep is not allowed in the implementation. Now use workqueue mechanism to avoid this issue. Extend existing workqueue mechanism for multicast configuration only to universal use, so that all configuring through mailbox asynchronously can utilize it. Fixes: 859a497fe80c ("nfp: implement xfrm callbacks and expose ipsec offload feature to upper layer") Signed-off-by: Yinjun Zhang Signed-off-by: Simon Horman Signed-off-by: Jakub Kicinski --- .../net/ethernet/netronome/nfp/crypto/ipsec.c | 24 ++-- drivers/net/ethernet/netronome/nfp/nfp_net.h | 25 +++- .../ethernet/netronome/nfp/nfp_net_common.c | 110 +++++++++--------- 3 files changed, 88 insertions(+), 71 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c b/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c index 6d9d1c89ae6a..063cd371033a 100644 --- a/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c +++ b/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c @@ -129,25 +129,21 @@ struct nfp_ipsec_cfg_mssg { }; }; -static int nfp_ipsec_cfg_cmd_issue(struct nfp_net *nn, int type, int saidx, - struct nfp_ipsec_cfg_mssg *msg) +static int nfp_net_ipsec_cfg(struct nfp_net *nn, struct nfp_mbox_amsg_entry *entry) { unsigned int offset = nn->tlv_caps.mbox_off + NFP_NET_CFG_MBOX_SIMPLE_VAL; + struct nfp_ipsec_cfg_mssg *msg = (struct nfp_ipsec_cfg_mssg *)entry->msg; int i, msg_size, ret; ret = nfp_net_mbox_lock(nn, sizeof(*msg)); if (ret) return ret; - msg->cmd = type; - msg->sa_idx = saidx; - msg->rsp = 0; msg_size = ARRAY_SIZE(msg->raw); - for (i = 0; i < msg_size; i++) nn_writel(nn, offset + 4 * i, msg->raw[i]); - ret = nfp_net_mbox_reconfig(nn, NFP_NET_CFG_MBOX_CMD_IPSEC); + ret = nfp_net_mbox_reconfig(nn, entry->cmd); if (ret < 0) { nn_ctrl_bar_unlock(nn); return ret; @@ -486,7 +482,10 @@ static int nfp_net_xfrm_add_state(struct xfrm_state *x) } /* Allocate saidx and commit the SA */ - err = nfp_ipsec_cfg_cmd_issue(nn, NFP_IPSEC_CFG_MSSG_ADD_SA, saidx, &msg); + msg.cmd = NFP_IPSEC_CFG_MSSG_ADD_SA; + msg.sa_idx = saidx; + err = nfp_net_sched_mbox_amsg_work(nn, NFP_NET_CFG_MBOX_CMD_IPSEC, &msg, + sizeof(msg), nfp_net_ipsec_cfg); if (err) { xa_erase(&nn->xa_ipsec, saidx); nn_err(nn, "Failed to issue IPsec command err ret=%d\n", err); @@ -500,14 +499,17 @@ static int nfp_net_xfrm_add_state(struct xfrm_state *x) static void nfp_net_xfrm_del_state(struct xfrm_state *x) { + struct nfp_ipsec_cfg_mssg msg = { + .cmd = NFP_IPSEC_CFG_MSSG_INV_SA, + .sa_idx = x->xso.offload_handle - 1, + }; struct net_device *netdev = x->xso.dev; - struct nfp_ipsec_cfg_mssg msg; struct nfp_net *nn; int err; nn = netdev_priv(netdev); - err = nfp_ipsec_cfg_cmd_issue(nn, NFP_IPSEC_CFG_MSSG_INV_SA, - x->xso.offload_handle - 1, &msg); + err = nfp_net_sched_mbox_amsg_work(nn, NFP_NET_CFG_MBOX_CMD_IPSEC, &msg, + sizeof(msg), nfp_net_ipsec_cfg); if (err) nn_warn(nn, "Failed to invalidate SA in hardware\n"); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 432d79d691c2..939cfce15830 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -617,9 +617,10 @@ struct nfp_net_dp { * @vnic_no_name: For non-port PF vNIC make ndo_get_phys_port_name return * -EOPNOTSUPP to keep backwards compatibility (set by app) * @port: Pointer to nfp_port structure if vNIC is a port - * @mc_lock: Protect mc_addrs list - * @mc_addrs: List of mc addrs to add/del to HW - * @mc_work: Work to update mc addrs + * @mbox_amsg: Asynchronously processed message via mailbox + * @mbox_amsg.lock: Protect message list + * @mbox_amsg.list: List of message to process + * @mbox_amsg.work: Work to process message asynchronously * @app_priv: APP private data for this vNIC */ struct nfp_net { @@ -721,13 +722,25 @@ struct nfp_net { struct nfp_port *port; - spinlock_t mc_lock; - struct list_head mc_addrs; - struct work_struct mc_work; + struct { + spinlock_t lock; + struct list_head list; + struct work_struct work; + } mbox_amsg; void *app_priv; }; +struct nfp_mbox_amsg_entry { + struct list_head list; + int (*cfg)(struct nfp_net *nn, struct nfp_mbox_amsg_entry *entry); + u32 cmd; + char msg[]; +}; + +int nfp_net_sched_mbox_amsg_work(struct nfp_net *nn, u32 cmd, const void *data, size_t len, + int (*cb)(struct nfp_net *, struct nfp_mbox_amsg_entry *)); + /* Functions to read/write from/to a BAR * Performs any endian conversion necessary. */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 18fc9971f1c8..70d7484c82af 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1334,14 +1334,54 @@ err_unlock: return err; } -struct nfp_mc_addr_entry { - u8 addr[ETH_ALEN]; - u32 cmd; - struct list_head list; -}; - -static int nfp_net_mc_cfg(struct nfp_net *nn, const unsigned char *addr, const u32 cmd) +int nfp_net_sched_mbox_amsg_work(struct nfp_net *nn, u32 cmd, const void *data, size_t len, + int (*cb)(struct nfp_net *, struct nfp_mbox_amsg_entry *)) { + struct nfp_mbox_amsg_entry *entry; + + entry = kmalloc(sizeof(*entry) + len, GFP_ATOMIC); + if (!entry) + return -ENOMEM; + + memcpy(entry->msg, data, len); + entry->cmd = cmd; + entry->cfg = cb; + + spin_lock_bh(&nn->mbox_amsg.lock); + list_add_tail(&entry->list, &nn->mbox_amsg.list); + spin_unlock_bh(&nn->mbox_amsg.lock); + + schedule_work(&nn->mbox_amsg.work); + + return 0; +} + +static void nfp_net_mbox_amsg_work(struct work_struct *work) +{ + struct nfp_net *nn = container_of(work, struct nfp_net, mbox_amsg.work); + struct nfp_mbox_amsg_entry *entry, *tmp; + struct list_head tmp_list; + + INIT_LIST_HEAD(&tmp_list); + + spin_lock_bh(&nn->mbox_amsg.lock); + list_splice_init(&nn->mbox_amsg.list, &tmp_list); + spin_unlock_bh(&nn->mbox_amsg.lock); + + list_for_each_entry_safe(entry, tmp, &tmp_list, list) { + int err = entry->cfg(nn, entry); + + if (err) + nn_err(nn, "Config cmd %d to HW failed %d.\n", entry->cmd, err); + + list_del(&entry->list); + kfree(entry); + } +} + +static int nfp_net_mc_cfg(struct nfp_net *nn, struct nfp_mbox_amsg_entry *entry) +{ + unsigned char *addr = entry->msg; int ret; ret = nfp_net_mbox_lock(nn, NFP_NET_CFG_MULTICAST_SZ); @@ -1353,26 +1393,7 @@ static int nfp_net_mc_cfg(struct nfp_net *nn, const unsigned char *addr, const u nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_MULTICAST_MAC_LO, get_unaligned_be16(addr + 4)); - return nfp_net_mbox_reconfig_and_unlock(nn, cmd); -} - -static int nfp_net_mc_prep(struct nfp_net *nn, const unsigned char *addr, const u32 cmd) -{ - struct nfp_mc_addr_entry *entry; - - entry = kmalloc(sizeof(*entry), GFP_ATOMIC); - if (!entry) - return -ENOMEM; - - ether_addr_copy(entry->addr, addr); - entry->cmd = cmd; - spin_lock_bh(&nn->mc_lock); - list_add_tail(&entry->list, &nn->mc_addrs); - spin_unlock_bh(&nn->mc_lock); - - schedule_work(&nn->mc_work); - - return 0; + return nfp_net_mbox_reconfig_and_unlock(nn, entry->cmd); } static int nfp_net_mc_sync(struct net_device *netdev, const unsigned char *addr) @@ -1385,35 +1406,16 @@ static int nfp_net_mc_sync(struct net_device *netdev, const unsigned char *addr) return -EINVAL; } - return nfp_net_mc_prep(nn, addr, NFP_NET_CFG_MBOX_CMD_MULTICAST_ADD); + return nfp_net_sched_mbox_amsg_work(nn, NFP_NET_CFG_MBOX_CMD_MULTICAST_ADD, addr, + NFP_NET_CFG_MULTICAST_SZ, nfp_net_mc_cfg); } static int nfp_net_mc_unsync(struct net_device *netdev, const unsigned char *addr) { struct nfp_net *nn = netdev_priv(netdev); - return nfp_net_mc_prep(nn, addr, NFP_NET_CFG_MBOX_CMD_MULTICAST_DEL); -} - -static void nfp_net_mc_addr_config(struct work_struct *work) -{ - struct nfp_net *nn = container_of(work, struct nfp_net, mc_work); - struct nfp_mc_addr_entry *entry, *tmp; - struct list_head tmp_list; - - INIT_LIST_HEAD(&tmp_list); - - spin_lock_bh(&nn->mc_lock); - list_splice_init(&nn->mc_addrs, &tmp_list); - spin_unlock_bh(&nn->mc_lock); - - list_for_each_entry_safe(entry, tmp, &tmp_list, list) { - if (nfp_net_mc_cfg(nn, entry->addr, entry->cmd)) - nn_err(nn, "Config mc address to HW failed.\n"); - - list_del(&entry->list); - kfree(entry); - } + return nfp_net_sched_mbox_amsg_work(nn, NFP_NET_CFG_MBOX_CMD_MULTICAST_DEL, addr, + NFP_NET_CFG_MULTICAST_SZ, nfp_net_mc_cfg); } static void nfp_net_set_rx_mode(struct net_device *netdev) @@ -2681,9 +2683,9 @@ int nfp_net_init(struct nfp_net *nn) if (!nn->dp.netdev) return 0; - spin_lock_init(&nn->mc_lock); - INIT_LIST_HEAD(&nn->mc_addrs); - INIT_WORK(&nn->mc_work, nfp_net_mc_addr_config); + spin_lock_init(&nn->mbox_amsg.lock); + INIT_LIST_HEAD(&nn->mbox_amsg.list); + INIT_WORK(&nn->mbox_amsg.work, nfp_net_mbox_amsg_work); return register_netdev(nn->dp.netdev); @@ -2704,6 +2706,6 @@ void nfp_net_clean(struct nfp_net *nn) unregister_netdev(nn->dp.netdev); nfp_net_ipsec_clean(nn); nfp_ccm_mbox_clean(nn); - flush_work(&nn->mc_work); + flush_work(&nn->mbox_amsg.work); nfp_net_reconfig_wait_posted(nn); } From e010ae08c71fda8be3d6bda256837795a0b3ea41 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 8 Feb 2023 18:13:59 +0100 Subject: [PATCH 122/186] ipv6: Fix datagram socket connection with DSCP. Take into account the IPV6_TCLASS socket option (DSCP) in ip6_datagram_flow_key_init(). Otherwise fib6_rule_match() can't properly match the DSCP value, resulting in invalid route lookup. For example: ip route add unreachable table main 2001:db8::10/124 ip route add table 100 2001:db8::10/124 dev eth0 ip -6 rule add dsfield 0x04 table 100 echo test | socat - UDP6:[2001:db8::11]:54321,ipv6-tclass=0x04 Without this patch, socat fails at connect() time ("No route to host") because the fib-rule doesn't jump to table 100 and the lookup ends up being done in the main table. Fixes: 2cc67cc731d9 ("[IPV6] ROUTE: Routing by Traffic Class.") Signed-off-by: Guillaume Nault Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv6/datagram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index e624497fa992..9b6818453afe 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -51,7 +51,7 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk) fl6->flowi6_mark = sk->sk_mark; fl6->fl6_dport = inet->inet_dport; fl6->fl6_sport = inet->inet_sport; - fl6->flowlabel = np->flow_label; + fl6->flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); fl6->flowi6_uid = sk->sk_uid; if (!oif) From 8230680f36fd1525303d1117768c8852314c488c Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 8 Feb 2023 18:14:03 +0100 Subject: [PATCH 123/186] ipv6: Fix tcp socket connection with DSCP. Take into account the IPV6_TCLASS socket option (DSCP) in tcp_v6_connect(). Otherwise fib6_rule_match() can't properly match the DSCP value, resulting in invalid route lookup. For example: ip route add unreachable table main 2001:db8::10/124 ip route add table 100 2001:db8::10/124 dev eth0 ip -6 rule add dsfield 0x04 table 100 echo test | socat - TCP6:[2001:db8::11]:54321,ipv6-tclass=0x04 Without this patch, socat fails at connect() time ("No route to host") because the fib-rule doesn't jump to table 100 and the lookup ends up being done in the main table. Fixes: 2cc67cc731d9 ("[IPV6] ROUTE: Routing by Traffic Class.") Signed-off-by: Guillaume Nault Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv6/tcp_ipv6.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 11b736a76bd7..0d25e813288d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -272,6 +272,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.flowi6_proto = IPPROTO_TCP; fl6.daddr = sk->sk_v6_daddr; fl6.saddr = saddr ? *saddr : np->saddr; + fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = usin->sin6_port; From c21a20d9d102ab809a992a6b056abbec4cd4c1cd Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 8 Feb 2023 18:14:07 +0100 Subject: [PATCH 124/186] selftests: fib_rule_tests: Test UDP and TCP connections with DSCP rules. Add the fib_rule6_send and fib_rule4_send tests to verify that DSCP values are properly taken into account when UDP or TCP sockets try to connect(). Tests are done with nettest, which needs a new option to specify the DS Field value of the socket being tested. This new option is named '-Q', in reference to the similar option used by ping. Signed-off-by: Guillaume Nault Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fib_rule_tests.sh | 128 +++++++++++++++++- tools/testing/selftests/net/nettest.c | 51 ++++++- 2 files changed, 177 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index c245476fa29d..63c3eaec8d30 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -10,8 +10,10 @@ ret=0 PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} IP="ip -netns testns" +IP_PEER="ip -netns peerns" RTABLE=100 +RTABLE_PEER=101 GW_IP4=192.51.100.2 SRC_IP=192.51.100.3 GW_IP6=2001:db8:1::2 @@ -20,7 +22,9 @@ SRC_IP6=2001:db8:1::3 DEV_ADDR=192.51.100.1 DEV_ADDR6=2001:db8:1::1 DEV=dummy0 -TESTS="fib_rule6 fib_rule4" +TESTS="fib_rule6 fib_rule4 fib_rule6_connect fib_rule4_connect" + +SELFTEST_PATH="" log_test() { @@ -52,6 +56,31 @@ log_section() echo "######################################################################" } +check_nettest() +{ + if which nettest > /dev/null 2>&1; then + return 0 + fi + + # Add the selftest directory to PATH if not already done + if [ "${SELFTEST_PATH}" = "" ]; then + SELFTEST_PATH="$(dirname $0)" + PATH="${PATH}:${SELFTEST_PATH}" + + # Now retry with the new path + if which nettest > /dev/null 2>&1; then + return 0 + fi + + if [ "${ret}" -eq 0 ]; then + ret="${ksft_skip}" + fi + echo "nettest not found (try 'make -C ${SELFTEST_PATH} nettest')" + fi + + return 1 +} + setup() { set -e @@ -72,6 +101,39 @@ cleanup() ip netns del testns } +setup_peer() +{ + set -e + + ip netns add peerns + $IP_PEER link set dev lo up + + ip link add name veth0 netns testns type veth \ + peer name veth1 netns peerns + $IP link set dev veth0 up + $IP_PEER link set dev veth1 up + + $IP address add 192.0.2.10 peer 192.0.2.11/32 dev veth0 + $IP_PEER address add 192.0.2.11 peer 192.0.2.10/32 dev veth1 + + $IP address add 2001:db8::10 peer 2001:db8::11/128 dev veth0 nodad + $IP_PEER address add 2001:db8::11 peer 2001:db8::10/128 dev veth1 nodad + + $IP_PEER address add 198.51.100.11/32 dev lo + $IP route add table $RTABLE_PEER 198.51.100.11/32 via 192.0.2.11 + + $IP_PEER address add 2001:db8::1:11/128 dev lo + $IP route add table $RTABLE_PEER 2001:db8::1:11/128 via 2001:db8::11 + + set +e +} + +cleanup_peer() +{ + $IP link del dev veth0 + ip netns del peerns +} + fib_check_iproute_support() { ip rule help 2>&1 | grep -q $1 @@ -190,6 +252,37 @@ fib_rule6_test() fi } +# Verify that the IPV6_TCLASS option of UDPv6 and TCPv6 sockets is properly +# taken into account when connecting the socket and when sending packets. +fib_rule6_connect_test() +{ + local dsfield + + if ! check_nettest; then + echo "SKIP: Could not run test without nettest tool" + return + fi + + setup_peer + $IP -6 rule add dsfield 0x04 table $RTABLE_PEER + + # Combine the base DS Field value (0x04) with all possible ECN values + # (Not-ECT: 0, ECT(1): 1, ECT(0): 2, CE: 3). + # The ECN bits shouldn't influence the result of the test. + for dsfield in 0x04 0x05 0x06 0x07; do + nettest -q -6 -B -t 5 -N testns -O peerns -U -D \ + -Q "${dsfield}" -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 0 "rule6 dsfield udp connect (dsfield ${dsfield})" + + nettest -q -6 -B -t 5 -N testns -O peerns -Q "${dsfield}" \ + -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 0 "rule6 dsfield tcp connect (dsfield ${dsfield})" + done + + $IP -6 rule del dsfield 0x04 table $RTABLE_PEER + cleanup_peer +} + fib_rule4_del() { $IP rule del $1 @@ -296,6 +389,37 @@ fib_rule4_test() fi } +# Verify that the IP_TOS option of UDPv4 and TCPv4 sockets is properly taken +# into account when connecting the socket and when sending packets. +fib_rule4_connect_test() +{ + local dsfield + + if ! check_nettest; then + echo "SKIP: Could not run test without nettest tool" + return + fi + + setup_peer + $IP -4 rule add dsfield 0x04 table $RTABLE_PEER + + # Combine the base DS Field value (0x04) with all possible ECN values + # (Not-ECT: 0, ECT(1): 1, ECT(0): 2, CE: 3). + # The ECN bits shouldn't influence the result of the test. + for dsfield in 0x04 0x05 0x06 0x07; do + nettest -q -B -t 5 -N testns -O peerns -D -U -Q "${dsfield}" \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 0 "rule4 dsfield udp connect (dsfield ${dsfield})" + + nettest -q -B -t 5 -N testns -O peerns -Q "${dsfield}" \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 0 "rule4 dsfield tcp connect (dsfield ${dsfield})" + done + + $IP -4 rule del dsfield 0x04 table $RTABLE_PEER + cleanup_peer +} + run_fibrule_tests() { log_section "IPv4 fib rule" @@ -345,6 +469,8 @@ do case $t in fib_rule6_test|fib_rule6) fib_rule6_test;; fib_rule4_test|fib_rule4) fib_rule4_test;; + fib_rule6_connect_test|fib_rule6_connect) fib_rule6_connect_test;; + fib_rule4_connect_test|fib_rule4_connect) fib_rule4_connect_test;; help) echo "Test names: $TESTS"; exit 0;; diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index 7900fa98eccb..ee9a72982705 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -87,6 +87,7 @@ struct sock_args { int use_setsockopt; int use_freebind; int use_cmsg; + uint8_t dsfield; const char *dev; const char *server_dev; int ifindex; @@ -580,6 +581,36 @@ static int set_reuseaddr(int sd) return rc; } +static int set_dsfield(int sd, int version, int dsfield) +{ + if (!dsfield) + return 0; + + switch (version) { + case AF_INET: + if (setsockopt(sd, SOL_IP, IP_TOS, &dsfield, + sizeof(dsfield)) < 0) { + log_err_errno("setsockopt(IP_TOS)"); + return -1; + } + break; + + case AF_INET6: + if (setsockopt(sd, SOL_IPV6, IPV6_TCLASS, &dsfield, + sizeof(dsfield)) < 0) { + log_err_errno("setsockopt(IPV6_TCLASS)"); + return -1; + } + break; + + default: + log_error("Invalid address family\n"); + return -1; + } + + return 0; +} + static int str_to_uint(const char *str, int min, int max, unsigned int *value) { int number; @@ -1317,6 +1348,9 @@ static int msock_init(struct sock_args *args, int server) (char *)&one, sizeof(one)) < 0) log_err_errno("Setting SO_BROADCAST error"); + if (set_dsfield(sd, AF_INET, args->dsfield) != 0) + goto out_err; + if (args->dev && bind_to_device(sd, args->dev) != 0) goto out_err; else if (args->use_setsockopt && @@ -1445,6 +1479,9 @@ static int lsock_init(struct sock_args *args) if (set_reuseport(sd) != 0) goto err; + if (set_dsfield(sd, args->version, args->dsfield) != 0) + goto err; + if (args->dev && bind_to_device(sd, args->dev) != 0) goto err; else if (args->use_setsockopt && @@ -1658,6 +1695,9 @@ static int connectsock(void *addr, socklen_t alen, struct sock_args *args) if (set_reuseport(sd) != 0) goto err; + if (set_dsfield(sd, args->version, args->dsfield) != 0) + goto err; + if (args->dev && bind_to_device(sd, args->dev) != 0) goto err; else if (args->use_setsockopt && @@ -1862,7 +1902,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args) return client_status; } -#define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SUCi6xL:0:1:2:3:Fbqf" +#define GETOPT_STR "sr:l:c:Q:p:t:g:P:DRn:M:X:m:d:I:BN:O:SUCi6xL:0:1:2:3:Fbqf" #define OPT_FORCE_BIND_KEY_IFINDEX 1001 #define OPT_NO_BIND_KEY_IFINDEX 1002 @@ -1893,6 +1933,8 @@ static void print_usage(char *prog) " -D|R datagram (D) / raw (R) socket (default stream)\n" " -l addr local address to bind to in server mode\n" " -c addr local address to bind to in client mode\n" + " -Q dsfield DS Field value of the socket (the IP_TOS or\n" + " IPV6_TCLASS socket option)\n" " -x configure XFRM policy on socket\n" "\n" " -d dev bind socket to given device name\n" @@ -1971,6 +2013,13 @@ int main(int argc, char *argv[]) args.has_local_ip = 1; args.client_local_addr_str = optarg; break; + case 'Q': + if (str_to_uint(optarg, 0, 255, &tmp) != 0) { + fprintf(stderr, "Invalid DS Field\n"); + return 1; + } + args.dsfield = tmp; + break; case 'p': if (str_to_uint(optarg, 1, 65535, &tmp) != 0) { fprintf(stderr, "Invalid port\n"); From 6e77a5a4af05d5e7391c841a4a4f3e4cadf72c25 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Feb 2023 18:21:23 +0000 Subject: [PATCH 125/186] net: initialize net->notrefcnt_tracker earlier syzbot was able to trigger a warning [1] from net_free() calling ref_tracker_dir_exit(&net->notrefcnt_tracker) while the corresponding ref_tracker_dir_init() has not been done yet. copy_net_ns() can indeed bypass the call to setup_net() in some error conditions. Note: We might factorize/move more code in preinit_net() in the future. [1] INFO: trying to register non-static key. The code is fine but needs lockdep annotation, or maybe you didn't initialize this object before use? turning off the locking correctness validator. CPU: 0 PID: 5817 Comm: syz-executor.3 Not tainted 6.2.0-rc7-next-20230208-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/12/2023 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd9/0x150 lib/dump_stack.c:106 assign_lock_key kernel/locking/lockdep.c:982 [inline] register_lock_class+0xdb6/0x1120 kernel/locking/lockdep.c:1295 __lock_acquire+0x10a/0x5df0 kernel/locking/lockdep.c:4951 lock_acquire.part.0+0x11c/0x370 kernel/locking/lockdep.c:5691 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] _raw_spin_lock_irqsave+0x3d/0x60 kernel/locking/spinlock.c:162 ref_tracker_dir_exit+0x52/0x600 lib/ref_tracker.c:24 net_free net/core/net_namespace.c:442 [inline] net_free+0x98/0xd0 net/core/net_namespace.c:436 copy_net_ns+0x4f3/0x6b0 net/core/net_namespace.c:493 create_new_namespaces+0x3f6/0xb20 kernel/nsproxy.c:110 unshare_nsproxy_namespaces+0xc1/0x1f0 kernel/nsproxy.c:228 ksys_unshare+0x449/0x920 kernel/fork.c:3205 __do_sys_unshare kernel/fork.c:3276 [inline] __se_sys_unshare kernel/fork.c:3274 [inline] __x64_sys_unshare+0x31/0x40 kernel/fork.c:3274 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 Fixes: 0cafd77dcd03 ("net: add a refcount tracker for kernel sockets") Reported-by: syzbot Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20230208182123.3821604-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/net_namespace.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 078a0a420c8a..7b69cf882b8e 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -304,6 +304,12 @@ struct net *get_net_ns_by_id(const struct net *net, int id) } EXPORT_SYMBOL_GPL(get_net_ns_by_id); +/* init code that must occur even if setup_net() is not called. */ +static __net_init void preinit_net(struct net *net) +{ + ref_tracker_dir_init(&net->notrefcnt_tracker, 128); +} + /* * setup_net runs the initializers for the network namespace object. */ @@ -316,7 +322,6 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) refcount_set(&net->ns.count, 1); ref_tracker_dir_init(&net->refcnt_tracker, 128); - ref_tracker_dir_init(&net->notrefcnt_tracker, 128); refcount_set(&net->passive, 1); get_random_bytes(&net->hash_mix, sizeof(u32)); @@ -472,6 +477,8 @@ struct net *copy_net_ns(unsigned long flags, rv = -ENOMEM; goto dec_ucounts; } + + preinit_net(net); refcount_set(&net->passive, 1); net->ucounts = ucounts; get_user_ns(user_ns); @@ -1118,6 +1125,7 @@ void __init net_ns_init(void) init_net.key_domain = &init_net_key_domain; #endif down_write(&pernet_ops_rwsem); + preinit_net(&init_net); if (setup_net(&init_net, &init_user_ns)) panic("Could not setup the initial network namespace"); From ec76d0c2da5c6dfb6a33f1545cc15997013923da Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Wed, 8 Feb 2023 14:38:59 -0800 Subject: [PATCH 126/186] vmxnet3: move rss code block under eop descriptor Commit b3973bb40041 ("vmxnet3: set correct hash type based on rss information") added hashType information into skb. However, rssType field is populated for eop descriptor. This can lead to incorrectly reporting of hashType for packets which use multiple rx descriptors. Multiple rx descriptors are used for Jumbo frame or LRO packets, which can hit this issue. This patch moves the RSS codeblock under eop descritor. Cc: stable@vger.kernel.org Fixes: b3973bb40041 ("vmxnet3: set correct hash type based on rss information") Signed-off-by: Ronak Doshi Acked-by: Peng Li Acked-by: Guolin Yang Link: https://lore.kernel.org/r/20230208223900.5794-1-doshir@vmware.com Signed-off-by: Jakub Kicinski --- drivers/net/vmxnet3/vmxnet3_drv.c | 50 +++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 56267c327f0b..682987040ea8 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1546,31 +1546,6 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, rxd->len = rbi->len; } -#ifdef VMXNET3_RSS - if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE && - (adapter->netdev->features & NETIF_F_RXHASH)) { - enum pkt_hash_types hash_type; - - switch (rcd->rssType) { - case VMXNET3_RCD_RSS_TYPE_IPV4: - case VMXNET3_RCD_RSS_TYPE_IPV6: - hash_type = PKT_HASH_TYPE_L3; - break; - case VMXNET3_RCD_RSS_TYPE_TCPIPV4: - case VMXNET3_RCD_RSS_TYPE_TCPIPV6: - case VMXNET3_RCD_RSS_TYPE_UDPIPV4: - case VMXNET3_RCD_RSS_TYPE_UDPIPV6: - hash_type = PKT_HASH_TYPE_L4; - break; - default: - hash_type = PKT_HASH_TYPE_L3; - break; - } - skb_set_hash(ctx->skb, - le32_to_cpu(rcd->rssHash), - hash_type); - } -#endif skb_record_rx_queue(ctx->skb, rq->qid); skb_put(ctx->skb, rcd->len); @@ -1653,6 +1628,31 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, u32 mtu = adapter->netdev->mtu; skb->len += skb->data_len; +#ifdef VMXNET3_RSS + if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE && + (adapter->netdev->features & NETIF_F_RXHASH)) { + enum pkt_hash_types hash_type; + + switch (rcd->rssType) { + case VMXNET3_RCD_RSS_TYPE_IPV4: + case VMXNET3_RCD_RSS_TYPE_IPV6: + hash_type = PKT_HASH_TYPE_L3; + break; + case VMXNET3_RCD_RSS_TYPE_TCPIPV4: + case VMXNET3_RCD_RSS_TYPE_TCPIPV6: + case VMXNET3_RCD_RSS_TYPE_UDPIPV4: + case VMXNET3_RCD_RSS_TYPE_UDPIPV6: + hash_type = PKT_HASH_TYPE_L4; + break; + default: + hash_type = PKT_HASH_TYPE_L3; + break; + } + skb_set_hash(skb, + le32_to_cpu(rcd->rssHash), + hash_type); + } +#endif vmxnet3_rx_csum(adapter, skb, (union Vmxnet3_GenericDesc *)rcd); skb->protocol = eth_type_trans(skb, adapter->netdev); From d182bcf300772d8b2e5f43e47fa0ebda2b767cc4 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 9 Feb 2023 21:10:31 +0100 Subject: [PATCH 127/186] arm64: dts: meson-axg: Make mmc host controller interrupts level-sensitive The usage of edge-triggered interrupts lead to lost interrupts under load, see [0]. This was confirmed to be fixed by using level-triggered interrupts. The report was about SDIO. However, as the host controller is the same for SD and MMC, apply the change to all mmc controller instances. [0] https://www.spinics.net/lists/linux-mmc/msg73991.html Fixes: 221cf34bac54 ("ARM64: dts: meson-axg: enable the eMMC controller") Reported-by: Peter Suti Tested-by: Vyacheslav Bocharov Tested-by: Peter Suti Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Acked-by: Neil Armstrong Link: https://lore.kernel.org/r/c00655d3-02f8-6f5f-4239-ca2412420cad@gmail.com Signed-off-by: Neil Armstrong --- arch/arm64/boot/dts/amlogic/meson-axg.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi index 1648e67afbb6..417523dc4cc0 100644 --- a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi @@ -1886,7 +1886,7 @@ sd_emmc_b: sd@5000 { compatible = "amlogic,meson-axg-mmc"; reg = <0x0 0x5000 0x0 0x800>; - interrupts = ; + interrupts = ; status = "disabled"; clocks = <&clkc CLKID_SD_EMMC_B>, <&clkc CLKID_SD_EMMC_B_CLK0>, @@ -1898,7 +1898,7 @@ sd_emmc_c: mmc@7000 { compatible = "amlogic,meson-axg-mmc"; reg = <0x0 0x7000 0x0 0x800>; - interrupts = ; + interrupts = ; status = "disabled"; clocks = <&clkc CLKID_SD_EMMC_C>, <&clkc CLKID_SD_EMMC_C_CLK0>, From ac8db4cceed218cca21c84f9d75ce88182d8b04f Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 9 Feb 2023 21:11:10 +0100 Subject: [PATCH 128/186] arm64: dts: meson-g12-common: Make mmc host controller interrupts level-sensitive The usage of edge-triggered interrupts lead to lost interrupts under load, see [0]. This was confirmed to be fixed by using level-triggered interrupts. The report was about SDIO. However, as the host controller is the same for SD and MMC, apply the change to all mmc controller instances. [0] https://www.spinics.net/lists/linux-mmc/msg73991.html Fixes: 4759fd87b928 ("arm64: dts: meson: g12a: add mmc nodes") Tested-by: FUKAUMI Naoki Tested-by: Martin Blumenstingl Tested-by: Jerome Brunet Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Acked-by: Neil Armstrong Link: https://lore.kernel.org/r/27d89baa-b8fa-baca-541b-ef17a97cde3c@gmail.com Signed-off-by: Neil Armstrong --- arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi index 9dbd50820b1c..7f55d97f6c28 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi @@ -2324,7 +2324,7 @@ sd_emmc_a: sd@ffe03000 { compatible = "amlogic,meson-axg-mmc"; reg = <0x0 0xffe03000 0x0 0x800>; - interrupts = ; + interrupts = ; status = "disabled"; clocks = <&clkc CLKID_SD_EMMC_A>, <&clkc CLKID_SD_EMMC_A_CLK0>, @@ -2336,7 +2336,7 @@ sd_emmc_b: sd@ffe05000 { compatible = "amlogic,meson-axg-mmc"; reg = <0x0 0xffe05000 0x0 0x800>; - interrupts = ; + interrupts = ; status = "disabled"; clocks = <&clkc CLKID_SD_EMMC_B>, <&clkc CLKID_SD_EMMC_B_CLK0>, @@ -2348,7 +2348,7 @@ sd_emmc_c: mmc@ffe07000 { compatible = "amlogic,meson-axg-mmc"; reg = <0x0 0xffe07000 0x0 0x800>; - interrupts = ; + interrupts = ; status = "disabled"; clocks = <&clkc CLKID_SD_EMMC_C>, <&clkc CLKID_SD_EMMC_C_CLK0>, From 66e45351f7d6798751f98001d1fcd572024d87f0 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 9 Feb 2023 21:11:47 +0100 Subject: [PATCH 129/186] arm64: dts: meson-gx: Make mmc host controller interrupts level-sensitive The usage of edge-triggered interrupts lead to lost interrupts under load, see [0]. This was confirmed to be fixed by using level-triggered interrupts. The report was about SDIO. However, as the host controller is the same for SD and MMC, apply the change to all mmc controller instances. [0] https://www.spinics.net/lists/linux-mmc/msg73991.html Fixes: ef8d2ffedf18 ("ARM64: dts: meson-gxbb: add MMC support") Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Acked-by: Neil Armstrong Link: https://lore.kernel.org/r/76e042e0-a610-5ed5-209f-c4d7f879df44@gmail.com Signed-off-by: Neil Armstrong --- arch/arm64/boot/dts/amlogic/meson-gx.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi index e3c12e0be99d..5eed15035b67 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi @@ -603,21 +603,21 @@ sd_emmc_a: mmc@70000 { compatible = "amlogic,meson-gx-mmc", "amlogic,meson-gxbb-mmc"; reg = <0x0 0x70000 0x0 0x800>; - interrupts = ; + interrupts = ; status = "disabled"; }; sd_emmc_b: mmc@72000 { compatible = "amlogic,meson-gx-mmc", "amlogic,meson-gxbb-mmc"; reg = <0x0 0x72000 0x0 0x800>; - interrupts = ; + interrupts = ; status = "disabled"; }; sd_emmc_c: mmc@74000 { compatible = "amlogic,meson-gx-mmc", "amlogic,meson-gxbb-mmc"; reg = <0x0 0x74000 0x0 0x800>; - interrupts = ; + interrupts = ; status = "disabled"; }; }; From 3efc61d95259956db25347e2a9562c3e54546e20 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 29 Jan 2023 09:28:56 +0100 Subject: [PATCH 130/186] fbdev: Fix invalid page access after closing deferred I/O devices When a fbdev with deferred I/O is once opened and closed, the dirty pages still remain queued in the pageref list, and eventually later those may be processed in the delayed work. This may lead to a corruption of pages, hitting an Oops. This patch makes sure to cancel the delayed work and clean up the pageref list at closing the device for addressing the bug. A part of the cleanup code is factored out as a new helper function that is called from the common fb_release(). Reviewed-by: Patrik Jakobsson Cc: Signed-off-by: Takashi Iwai Tested-by: Miko Larsson Fixes: 56c134f7f1b5 ("fbdev: Track deferred-I/O pages in pageref struct") Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20230129082856.22113-1-tiwai@suse.de --- drivers/video/fbdev/core/fb_defio.c | 10 +++++++++- drivers/video/fbdev/core/fbmem.c | 4 ++++ include/linux/fb.h | 1 + 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c index c730253ab85c..583cbcf09446 100644 --- a/drivers/video/fbdev/core/fb_defio.c +++ b/drivers/video/fbdev/core/fb_defio.c @@ -313,7 +313,7 @@ void fb_deferred_io_open(struct fb_info *info, } EXPORT_SYMBOL_GPL(fb_deferred_io_open); -void fb_deferred_io_cleanup(struct fb_info *info) +void fb_deferred_io_release(struct fb_info *info) { struct fb_deferred_io *fbdefio = info->fbdefio; struct page *page; @@ -327,6 +327,14 @@ void fb_deferred_io_cleanup(struct fb_info *info) page = fb_deferred_io_page(info, i); page->mapping = NULL; } +} +EXPORT_SYMBOL_GPL(fb_deferred_io_release); + +void fb_deferred_io_cleanup(struct fb_info *info) +{ + struct fb_deferred_io *fbdefio = info->fbdefio; + + fb_deferred_io_release(info); kvfree(info->pagerefs); mutex_destroy(&fbdefio->lock); diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 3a6c8458eb8d..ab3545a00abc 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1454,6 +1454,10 @@ __releases(&info->lock) struct fb_info * const info = file->private_data; lock_fb_info(info); +#if IS_ENABLED(CONFIG_FB_DEFERRED_IO) + if (info->fbdefio) + fb_deferred_io_release(info); +#endif if (info->fbops->fb_release) info->fbops->fb_release(info,1); module_put(info->fbops->owner); diff --git a/include/linux/fb.h b/include/linux/fb.h index 96b96323e9cb..73eb1f85ea8e 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -662,6 +662,7 @@ extern int fb_deferred_io_init(struct fb_info *info); extern void fb_deferred_io_open(struct fb_info *info, struct inode *inode, struct file *file); +extern void fb_deferred_io_release(struct fb_info *info); extern void fb_deferred_io_cleanup(struct fb_info *info); extern int fb_deferred_io_fsync(struct file *file, loff_t start, loff_t end, int datasync); From be8de49bea505e7777a69ef63d60e02ac1712683 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 9 Feb 2023 09:22:24 -0600 Subject: [PATCH 131/186] x86/speculation: Identify processors vulnerable to SMT RSB predictions Certain AMD processors are vulnerable to a cross-thread return address predictions bug. When running in SMT mode and one of the sibling threads transitions out of C0 state, the other sibling thread could use return target predictions from the sibling thread that transitioned out of C0. The Spectre v2 mitigations cover the Linux kernel, as it fills the RSB when context switching to the idle thread. However, KVM allows a VMM to prevent exiting guest mode when transitioning out of C0. A guest could act maliciously in this situation, so create a new x86 BUG that can be used to detect if the processor is vulnerable. Reviewed-by: Borislav Petkov (AMD) Signed-off-by: Tom Lendacky Message-Id: <91cec885656ca1fcd4f0185ce403a53dd9edecb7.1675956146.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/common.c | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 61012476d66e..8f39c46197b8 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -466,5 +466,6 @@ #define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */ #define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */ #define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ +#define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9cfca3d7d0e2..f3cc7699e1e1 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1256,6 +1256,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define MMIO_SBDS BIT(2) /* CPU is affected by RETbleed, speculating where you would not expect it */ #define RETBLEED BIT(3) +/* CPU is affected by SMT (cross-thread) return predictions */ +#define SMT_RSB BIT(4) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), @@ -1287,8 +1289,8 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_AMD(0x15, RETBLEED), VULNBL_AMD(0x16, RETBLEED), - VULNBL_AMD(0x17, RETBLEED), - VULNBL_HYGON(0x18, RETBLEED), + VULNBL_AMD(0x17, RETBLEED | SMT_RSB), + VULNBL_HYGON(0x18, RETBLEED | SMT_RSB), {} }; @@ -1406,6 +1408,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) !(ia32_cap & ARCH_CAP_PBRSB_NO)) setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); + if (cpu_matches(cpu_vuln_blacklist, SMT_RSB)) + setup_force_cpu_bug(X86_BUG_SMT_RSB); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; From 6f0f2d5ef895d66a3f2b32dd05189ec34afa5a55 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 9 Feb 2023 09:22:25 -0600 Subject: [PATCH 132/186] KVM: x86: Mitigate the cross-thread return address predictions bug By default, KVM/SVM will intercept attempts by the guest to transition out of C0. However, the KVM_CAP_X86_DISABLE_EXITS capability can be used by a VMM to change this behavior. To mitigate the cross-thread return address predictions bug (X86_BUG_SMT_RSB), a VMM must not be allowed to override the default behavior to intercept C0 transitions. Use a module parameter to control the mitigation on processors that are vulnerable to X86_BUG_SMT_RSB. If the processor is vulnerable to the X86_BUG_SMT_RSB bug and the module parameter is set to mitigate the bug, KVM will not allow the disabling of the HLT, MWAIT and CSTATE exits. Signed-off-by: Tom Lendacky Message-Id: <4019348b5e07148eb4d593380a5f6713b93c9a16.1675956146.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 43 ++++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index da4bbd043a7b..f0fa3de2ceb8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -191,6 +191,10 @@ module_param(enable_pmu, bool, 0444); bool __read_mostly eager_page_split = true; module_param(eager_page_split, bool, 0644); +/* Enable/disable SMT_RSB bug mitigation */ +bool __read_mostly mitigate_smt_rsb; +module_param(mitigate_smt_rsb, bool, 0444); + /* * Restoring the host value for MSRs that are only consumed when running in * usermode, e.g. SYSCALL MSRs and TSC_AUX, can be deferred until the CPU @@ -4448,10 +4452,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = KVM_CLOCK_VALID_FLAGS; break; case KVM_CAP_X86_DISABLE_EXITS: - r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE | - KVM_X86_DISABLE_EXITS_CSTATE; - if(kvm_can_mwait_in_guest()) - r |= KVM_X86_DISABLE_EXITS_MWAIT; + r = KVM_X86_DISABLE_EXITS_PAUSE; + + if (!mitigate_smt_rsb) { + r |= KVM_X86_DISABLE_EXITS_HLT | + KVM_X86_DISABLE_EXITS_CSTATE; + + if (kvm_can_mwait_in_guest()) + r |= KVM_X86_DISABLE_EXITS_MWAIT; + } break; case KVM_CAP_X86_SMM: if (!IS_ENABLED(CONFIG_KVM_SMM)) @@ -6227,15 +6236,26 @@ split_irqchip_unlock: if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS) break; - if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) && - kvm_can_mwait_in_guest()) - kvm->arch.mwait_in_guest = true; - if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT) - kvm->arch.hlt_in_guest = true; if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE) kvm->arch.pause_in_guest = true; - if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE) - kvm->arch.cstate_in_guest = true; + +#define SMT_RSB_MSG "This processor is affected by the Cross-Thread Return Predictions vulnerability. " \ + "KVM_CAP_X86_DISABLE_EXITS should only be used with SMT disabled or trusted guests." + + if (!mitigate_smt_rsb) { + if (boot_cpu_has_bug(X86_BUG_SMT_RSB) && cpu_smt_possible() && + (cap->args[0] & ~KVM_X86_DISABLE_EXITS_PAUSE)) + pr_warn_once(SMT_RSB_MSG); + + if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) && + kvm_can_mwait_in_guest()) + kvm->arch.mwait_in_guest = true; + if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT) + kvm->arch.hlt_in_guest = true; + if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE) + kvm->arch.cstate_in_guest = true; + } + r = 0; break; case KVM_CAP_MSR_PLATFORM_INFO: @@ -13456,6 +13476,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit); static int __init kvm_x86_init(void) { kvm_mmu_x86_module_init(); + mitigate_smt_rsb &= boot_cpu_has_bug(X86_BUG_SMT_RSB) && cpu_smt_possible(); return 0; } module_init(kvm_x86_init); From 493a2c2d23ca91afba96ac32b6cbafb54382c2a3 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 9 Feb 2023 09:22:26 -0600 Subject: [PATCH 133/186] Documentation/hw-vuln: Add documentation for Cross-Thread Return Predictions Add the admin guide for the Cross-Thread Return Predictions vulnerability. Signed-off-by: Tom Lendacky Message-Id: <60f9c0b4396956ce70499ae180cb548720b25c7e.1675956146.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- .../admin-guide/hw-vuln/cross-thread-rsb.rst | 92 +++++++++++++++++++ Documentation/admin-guide/hw-vuln/index.rst | 1 + 2 files changed, 93 insertions(+) create mode 100644 Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst diff --git a/Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst b/Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst new file mode 100644 index 000000000000..ec6e9f5bcf9e --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst @@ -0,0 +1,92 @@ + +.. SPDX-License-Identifier: GPL-2.0 + +Cross-Thread Return Address Predictions +======================================= + +Certain AMD and Hygon processors are subject to a cross-thread return address +predictions vulnerability. When running in SMT mode and one sibling thread +transitions out of C0 state, the other sibling thread could use return target +predictions from the sibling thread that transitioned out of C0. + +The Spectre v2 mitigations protect the Linux kernel, as it fills the return +address prediction entries with safe targets when context switching to the idle +thread. However, KVM does allow a VMM to prevent exiting guest mode when +transitioning out of C0. This could result in a guest-controlled return target +being consumed by the sibling thread. + +Affected processors +------------------- + +The following CPUs are vulnerable: + + - AMD Family 17h processors + - Hygon Family 18h processors + +Related CVEs +------------ + +The following CVE entry is related to this issue: + + ============== ======================================= + CVE-2022-27672 Cross-Thread Return Address Predictions + ============== ======================================= + +Problem +------- + +Affected SMT-capable processors support 1T and 2T modes of execution when SMT +is enabled. In 2T mode, both threads in a core are executing code. For the +processor core to enter 1T mode, it is required that one of the threads +requests to transition out of the C0 state. This can be communicated with the +HLT instruction or with an MWAIT instruction that requests non-C0. +When the thread re-enters the C0 state, the processor transitions back +to 2T mode, assuming the other thread is also still in C0 state. + +In affected processors, the return address predictor (RAP) is partitioned +depending on the SMT mode. For instance, in 2T mode each thread uses a private +16-entry RAP, but in 1T mode, the active thread uses a 32-entry RAP. Upon +transition between 1T/2T mode, the RAP contents are not modified but the RAP +pointers (which control the next return target to use for predictions) may +change. This behavior may result in return targets from one SMT thread being +used by RET predictions in the sibling thread following a 1T/2T switch. In +particular, a RET instruction executed immediately after a transition to 1T may +use a return target from the thread that just became idle. In theory, this +could lead to information disclosure if the return targets used do not come +from trustworthy code. + +Attack scenarios +---------------- + +An attack can be mounted on affected processors by performing a series of CALL +instructions with targeted return locations and then transitioning out of C0 +state. + +Mitigation mechanism +-------------------- + +Before entering idle state, the kernel context switches to the idle thread. The +context switch fills the RAP entries (referred to as the RSB in Linux) with safe +targets by performing a sequence of CALL instructions. + +Prevent a guest VM from directly putting the processor into an idle state by +intercepting HLT and MWAIT instructions. + +Both mitigations are required to fully address this issue. + +Mitigation control on the kernel command line +--------------------------------------------- + +Use existing Spectre v2 mitigations that will fill the RSB on context switch. + +Mitigation control for KVM - module parameter +--------------------------------------------- + +By default, the KVM hypervisor mitigates this issue by intercepting guest +attempts to transition out of C0. A VMM can use the KVM_CAP_X86_DISABLE_EXITS +capability to override those interceptions, but since this is not common, the +mitigation that covers this path is not enabled by default. + +The mitigation for the KVM_CAP_X86_DISABLE_EXITS capability can be turned on +using the boolean module parameter mitigate_smt_rsb, e.g.: + kvm.mitigate_smt_rsb=1 diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index 4df436e7c417..e0614760a99e 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -18,3 +18,4 @@ are configurable at compile, boot or run time. core-scheduling.rst l1d_flush.rst processor_mmio_stale_data.rst + cross-thread-rsb.rst From 7484a5bc153e81a1740c06ce037fd55b7638335c Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Thu, 9 Feb 2023 10:44:17 +0100 Subject: [PATCH 134/186] drm/ast: Fix start address computation During the driver conversion to shmem, the start address for the scanout buffer was set to the base PCI address. In most cases it works because only the lower 24bits are used, and due to alignment it was almost always 0. But on some unlucky hardware, it's not the case, and some uninitialized memory is displayed on the BMC. With shmem, the primary plane is always at offset 0 in GPU memory. * v2: rewrite the patch to set the offset to 0. (Thomas Zimmermann) * v3: move the change to plane_init() and also fix the cursor plane. (Jammy Huang) Tested on a sr645 affected by this bug. Fixes: f2fa5a99ca81 ("drm/ast: Convert ast to SHMEM") Signed-off-by: Jocelyn Falempe Reviewed-by: Thomas Zimmermann Reviewed-by: Jammy Huang Link: https://patchwork.freedesktop.org/patch/msgid/20230209094417.21630-1-jfalempe@redhat.com --- drivers/gpu/drm/ast/ast_mode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index c7443317c747..66a4a41c3fe9 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -714,7 +714,7 @@ static int ast_primary_plane_init(struct ast_private *ast) struct ast_plane *ast_primary_plane = &ast->primary_plane; struct drm_plane *primary_plane = &ast_primary_plane->base; void __iomem *vaddr = ast->vram; - u64 offset = ast->vram_base; + u64 offset = 0; /* with shmem, the primary plane is always at offset 0 */ unsigned long cursor_size = roundup(AST_HWC_SIZE + AST_HWC_SIGNATURE_SIZE, PAGE_SIZE); unsigned long size = ast->vram_fb_available - cursor_size; int ret; @@ -972,7 +972,7 @@ static int ast_cursor_plane_init(struct ast_private *ast) return -ENOMEM; vaddr = ast->vram + ast->vram_fb_available - size; - offset = ast->vram_base + ast->vram_fb_available - size; + offset = ast->vram_fb_available - size; ret = ast_plane_init(dev, ast_cursor_plane, vaddr, offset, size, 0x01, &ast_cursor_plane_funcs, From a7152be79b627428c628da2a887ca4b2512a78fd Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 3 Feb 2023 13:57:29 -0600 Subject: [PATCH 135/186] Revert "PCI/ASPM: Save L1 PM Substates Capability for suspend/resume" This reverts commit 4ff116d0d5fd8a025604b0802d93a2d5f4e465d1. Tasev Nikola and Mark Enriquez reported that resume from suspend was broken in v6.1-rc1. Tasev bisected to a47126ec29f5 ("PCI/PTM: Cache PTM Capability offset"), but we can't figure out how that could be related. Mark saw the same symptoms and bisected to 4ff116d0d5fd ("PCI/ASPM: Save L1 PM Substates Capability for suspend/resume"), which does have a connection: it restores L1 Substates configuration while ASPM L1 may be enabled: pci_restore_state pci_restore_aspm_l1ss_state aspm_program_l1ss pci_write_config_dword(PCI_L1SS_CTL1, ctl1) # L1SS restore pci_restore_pcie_state pcie_capability_write_word(PCI_EXP_LNKCTL, cap[i++]) # L1 restore which is a problem because PCIe r6.0, sec 5.5.4, requires that: If setting either or both of the enable bits for ASPM L1 PM Substates, both ports must be configured as described in this section while ASPM L1 is disabled. Separately, Thomas Witt reported that 5e85eba6f50d ("PCI/ASPM: Refactor L1 PM Substates Control Register programming") broke suspend/resume, and it depends on 4ff116d0d5fd. Revert 4ff116d0d5fd ("PCI/ASPM: Save L1 PM Substates Capability for suspend/resume") to fix the resume issue and enable revert of 5e85eba6f50d to fix the issue Thomas reported. Note that reverting 4ff116d0d5fd means L1 Substates config may be lost on suspend/resume. As far as we know the system will use more power but will still *work* correctly. Fixes: 4ff116d0d5fd ("PCI/ASPM: Save L1 PM Substates Capability for suspend/resume") Link: https://bugzilla.kernel.org/show_bug.cgi?id=216782 Link: https://bugzilla.kernel.org/show_bug.cgi?id=216877 Reported-by: Tasev Nikola Reported-by: Mark Enriquez Reported-by: Thomas Witt Tested-by: Mark Enriquez Tested-by: Thomas Witt Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v6.1+ Cc: Vidya Sagar --- drivers/pci/pci.c | 7 ------- drivers/pci/pci.h | 4 ---- drivers/pci/pcie/aspm.c | 37 ------------------------------------- 3 files changed, 48 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index fba95486caaf..5641786bd020 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1665,7 +1665,6 @@ int pci_save_state(struct pci_dev *dev) return i; pci_save_ltr_state(dev); - pci_save_aspm_l1ss_state(dev); pci_save_dpc_state(dev); pci_save_aer_state(dev); pci_save_ptm_state(dev); @@ -1772,7 +1771,6 @@ void pci_restore_state(struct pci_dev *dev) * LTR itself (in the PCIe capability). */ pci_restore_ltr_state(dev); - pci_restore_aspm_l1ss_state(dev); pci_restore_pcie_state(dev); pci_restore_pasid_state(dev); @@ -3465,11 +3463,6 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev) if (error) pci_err(dev, "unable to allocate suspend buffer for LTR\n"); - error = pci_add_ext_cap_save_buffer(dev, PCI_EXT_CAP_ID_L1SS, - 2 * sizeof(u32)); - if (error) - pci_err(dev, "unable to allocate suspend buffer for ASPM-L1SS\n"); - pci_allocate_vc_save_buffers(dev); } diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 9ed3b5550043..9049d07d3aae 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -566,14 +566,10 @@ bool pcie_wait_for_link(struct pci_dev *pdev, bool active); void pcie_aspm_init_link_state(struct pci_dev *pdev); void pcie_aspm_exit_link_state(struct pci_dev *pdev); void pcie_aspm_powersave_config_link(struct pci_dev *pdev); -void pci_save_aspm_l1ss_state(struct pci_dev *dev); -void pci_restore_aspm_l1ss_state(struct pci_dev *dev); #else static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { } static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { } static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { } -static inline void pci_save_aspm_l1ss_state(struct pci_dev *dev) { } -static inline void pci_restore_aspm_l1ss_state(struct pci_dev *dev) { } #endif #ifdef CONFIG_PCIE_ECRC diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 53a1fa306e1e..915cbd939dd9 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -757,43 +757,6 @@ static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state) PCI_L1SS_CTL1_L1SS_MASK, val); } -void pci_save_aspm_l1ss_state(struct pci_dev *dev) -{ - struct pci_cap_saved_state *save_state; - u16 l1ss = dev->l1ss; - u32 *cap; - - if (!l1ss) - return; - - save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_L1SS); - if (!save_state) - return; - - cap = (u32 *)&save_state->cap.data[0]; - pci_read_config_dword(dev, l1ss + PCI_L1SS_CTL2, cap++); - pci_read_config_dword(dev, l1ss + PCI_L1SS_CTL1, cap++); -} - -void pci_restore_aspm_l1ss_state(struct pci_dev *dev) -{ - struct pci_cap_saved_state *save_state; - u32 *cap, ctl1, ctl2; - u16 l1ss = dev->l1ss; - - if (!l1ss) - return; - - save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_L1SS); - if (!save_state) - return; - - cap = (u32 *)&save_state->cap.data[0]; - ctl2 = *cap++; - ctl1 = *cap; - aspm_program_l1ss(dev, ctl1, ctl2); -} - static void pcie_config_aspm_dev(struct pci_dev *pdev, u32 val) { pcie_capability_clear_and_set_word(pdev, PCI_EXP_LNKCTL, From ff209ecc376a2ea8dd106a1f594427a5d94b7dd3 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 3 Feb 2023 13:57:39 -0600 Subject: [PATCH 136/186] Revert "PCI/ASPM: Refactor L1 PM Substates Control Register programming" This reverts commit 5e85eba6f50dc288c22083a7e213152bcc4b8208. Thomas Witt reported that 5e85eba6f50d ("PCI/ASPM: Refactor L1 PM Substates Control Register programming") broke suspend/resume on a Tuxedo Infinitybook S 14 v5, which seems to use a Clevo L140CU Mainboard. The main symptom is: iwlwifi 0000:02:00.0: Unable to change power state from D3hot to D0, device inaccessible nvme 0000:03:00.0: Unable to change power state from D3hot to D0, device inaccessible and the machine is only partially usable after resume. It can't run dmesg and can't do a clean reboot. This happens on every suspend/resume cycle. Revert 5e85eba6f50d until we can figure out the root cause. Fixes: 5e85eba6f50d ("PCI/ASPM: Refactor L1 PM Substates Control Register programming") Link: https://bugzilla.kernel.org/show_bug.cgi?id=216877 Reported-by: Thomas Witt Tested-by: Thomas Witt Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v6.1+ Cc: Vidya Sagar --- drivers/pci/pcie/aspm.c | 72 +++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 39 deletions(-) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 915cbd939dd9..4b4184563a92 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -470,31 +470,6 @@ static void pci_clear_and_set_dword(struct pci_dev *pdev, int pos, pci_write_config_dword(pdev, pos, val); } -static void aspm_program_l1ss(struct pci_dev *dev, u32 ctl1, u32 ctl2) -{ - u16 l1ss = dev->l1ss; - u32 l1_2_enable; - - /* - * Per PCIe r6.0, sec 5.5.4, T_POWER_ON in PCI_L1SS_CTL2 must be - * programmed prior to setting the L1.2 enable bits in PCI_L1SS_CTL1. - */ - pci_write_config_dword(dev, l1ss + PCI_L1SS_CTL2, ctl2); - - /* - * In addition, Common_Mode_Restore_Time and LTR_L1.2_THRESHOLD in - * PCI_L1SS_CTL1 must be programmed *before* setting the L1.2 - * enable bits, even though they're all in PCI_L1SS_CTL1. - */ - l1_2_enable = ctl1 & PCI_L1SS_CTL1_L1_2_MASK; - ctl1 &= ~PCI_L1SS_CTL1_L1_2_MASK; - - pci_write_config_dword(dev, l1ss + PCI_L1SS_CTL1, ctl1); - if (l1_2_enable) - pci_write_config_dword(dev, l1ss + PCI_L1SS_CTL1, - ctl1 | l1_2_enable); -} - /* Calculate L1.2 PM substate timing parameters */ static void aspm_calc_l1ss_info(struct pcie_link_state *link, u32 parent_l1ss_cap, u32 child_l1ss_cap) @@ -504,6 +479,7 @@ static void aspm_calc_l1ss_info(struct pcie_link_state *link, u32 t_common_mode, t_power_on, l1_2_threshold, scale, value; u32 ctl1 = 0, ctl2 = 0; u32 pctl1, pctl2, cctl1, cctl2; + u32 pl1_2_enables, cl1_2_enables; if (!(link->aspm_support & ASPM_STATE_L1_2_MASK)) return; @@ -552,21 +528,39 @@ static void aspm_calc_l1ss_info(struct pcie_link_state *link, ctl2 == pctl2 && ctl2 == cctl2) return; - pctl1 &= ~(PCI_L1SS_CTL1_CM_RESTORE_TIME | - PCI_L1SS_CTL1_LTR_L12_TH_VALUE | - PCI_L1SS_CTL1_LTR_L12_TH_SCALE); - pctl1 |= (ctl1 & (PCI_L1SS_CTL1_CM_RESTORE_TIME | - PCI_L1SS_CTL1_LTR_L12_TH_VALUE | - PCI_L1SS_CTL1_LTR_L12_TH_SCALE)); - aspm_program_l1ss(parent, pctl1, ctl2); + /* Disable L1.2 while updating. See PCIe r5.0, sec 5.5.4, 7.8.3.3 */ + pl1_2_enables = pctl1 & PCI_L1SS_CTL1_L1_2_MASK; + cl1_2_enables = cctl1 & PCI_L1SS_CTL1_L1_2_MASK; - cctl1 &= ~(PCI_L1SS_CTL1_CM_RESTORE_TIME | - PCI_L1SS_CTL1_LTR_L12_TH_VALUE | - PCI_L1SS_CTL1_LTR_L12_TH_SCALE); - cctl1 |= (ctl1 & (PCI_L1SS_CTL1_CM_RESTORE_TIME | - PCI_L1SS_CTL1_LTR_L12_TH_VALUE | - PCI_L1SS_CTL1_LTR_L12_TH_SCALE)); - aspm_program_l1ss(child, cctl1, ctl2); + if (pl1_2_enables || cl1_2_enables) { + pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_L1_2_MASK, 0); + pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_L1_2_MASK, 0); + } + + /* Program T_POWER_ON times in both ports */ + pci_write_config_dword(parent, parent->l1ss + PCI_L1SS_CTL2, ctl2); + pci_write_config_dword(child, child->l1ss + PCI_L1SS_CTL2, ctl2); + + /* Program Common_Mode_Restore_Time in upstream device */ + pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_CM_RESTORE_TIME, ctl1); + + /* Program LTR_L1.2_THRESHOLD time in both ports */ + pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_LTR_L12_TH_VALUE | + PCI_L1SS_CTL1_LTR_L12_TH_SCALE, ctl1); + pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_LTR_L12_TH_VALUE | + PCI_L1SS_CTL1_LTR_L12_TH_SCALE, ctl1); + + if (pl1_2_enables || cl1_2_enables) { + pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1, 0, + pl1_2_enables); + pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1, 0, + cl1_2_enables); + } } static void aspm_l1ss_init(struct pcie_link_state *link) From 0ed577e7e8e508c24e22ba07713ecc4903e147c3 Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Thu, 9 Feb 2023 14:14:32 +0530 Subject: [PATCH 137/186] net: ethernet: ti: am65-cpsw: Add RX DMA Channel Teardown Quirk In TI's AM62x/AM64x SoCs, successful teardown of RX DMA Channel raises an interrupt. The process of servicing this interrupt involves flushing all pending RX DMA descriptors and clearing the teardown completion marker (TDCM). The am65_cpsw_nuss_rx_packets() function invoked from the RX NAPI callback services the interrupt. Thus, it is necessary to wait for this handler to run, drain all packets and clear TDCM, before calling napi_disable() in am65_cpsw_nuss_common_stop() function post channel teardown. If napi_disable() executes before ensuring that TDCM is cleared, the TDCM remains set when the interfaces are down, resulting in an interrupt storm when the interfaces are brought up again. Since the interrupt raised to indicate the RX DMA Channel teardown is specific to the AM62x and AM64x SoCs, add a quirk for it. Fixes: 4f7cce272403 ("net: ethernet: ti: am65-cpsw: add support for am64x cpsw3g") Co-developed-by: Vignesh Raghavendra Signed-off-by: Vignesh Raghavendra Signed-off-by: Siddharth Vadapalli Reviewed-by: Roger Quadros Link: https://lore.kernel.org/r/20230209084432.189222-1-s-vadapalli@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 12 +++++++++++- drivers/net/ethernet/ti/am65-cpsw-nuss.h | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index ecbde83b5243..6cda4b7c10cb 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -501,7 +501,15 @@ static int am65_cpsw_nuss_common_stop(struct am65_cpsw_common *common) k3_udma_glue_disable_tx_chn(common->tx_chns[i].tx_chn); } + reinit_completion(&common->tdown_complete); k3_udma_glue_tdown_rx_chn(common->rx_chns.rx_chn, true); + + if (common->pdata.quirks & AM64_CPSW_QUIRK_DMA_RX_TDOWN_IRQ) { + i = wait_for_completion_timeout(&common->tdown_complete, msecs_to_jiffies(1000)); + if (!i) + dev_err(common->dev, "rx teardown timeout\n"); + } + napi_disable(&common->napi_rx); for (i = 0; i < AM65_CPSW_MAX_RX_FLOWS; i++) @@ -721,6 +729,8 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common, if (cppi5_desc_is_tdcm(desc_dma)) { dev_dbg(dev, "%s RX tdown flow: %u\n", __func__, flow_idx); + if (common->pdata.quirks & AM64_CPSW_QUIRK_DMA_RX_TDOWN_IRQ) + complete(&common->tdown_complete); return 0; } @@ -2672,7 +2682,7 @@ static const struct am65_cpsw_pdata j721e_pdata = { }; static const struct am65_cpsw_pdata am64x_cpswxg_pdata = { - .quirks = 0, + .quirks = AM64_CPSW_QUIRK_DMA_RX_TDOWN_IRQ, .ale_dev_id = "am64-cpswxg", .fdqring_mode = K3_RINGACC_RING_MODE_RING, }; diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h index 4b75620f8d28..e5f1c44788c1 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h @@ -90,6 +90,7 @@ struct am65_cpsw_rx_chn { }; #define AM65_CPSW_QUIRK_I2027_NO_TX_CSUM BIT(0) +#define AM64_CPSW_QUIRK_DMA_RX_TDOWN_IRQ BIT(1) struct am65_cpsw_pdata { u32 quirks; From a1221703a0f75a9d81748c516457e0fc76951496 Mon Sep 17 00:00:00 2001 From: Pietro Borrello Date: Thu, 9 Feb 2023 12:13:05 +0000 Subject: [PATCH 138/186] sctp: sctp_sock_filter(): avoid list_entry() on possibly empty list Use list_is_first() to check whether tsp->asoc matches the first element of ep->asocs, as the list is not guaranteed to have an entry. Fixes: 8f840e47f190 ("sctp: add the sctp_diag.c file") Signed-off-by: Pietro Borrello Acked-by: Xin Long Link: https://lore.kernel.org/r/20230208-sctp-filter-v2-1-6e1f4017f326@diag.uniroma1.it Signed-off-by: Jakub Kicinski --- net/sctp/diag.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/sctp/diag.c b/net/sctp/diag.c index a557009e9832..c3d6b92dd386 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -343,11 +343,9 @@ static int sctp_sock_filter(struct sctp_endpoint *ep, struct sctp_transport *tsp struct sctp_comm_param *commp = p; struct sock *sk = ep->base.sk; const struct inet_diag_req_v2 *r = commp->r; - struct sctp_association *assoc = - list_entry(ep->asocs.next, struct sctp_association, asocs); /* find the ep only once through the transports by this condition */ - if (tsp->asoc != assoc) + if (!list_is_first(&tsp->asoc->asocs, &ep->asocs)) return 0; if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family) From ee059170b1f7e94e55fa6cadee544e176a6e59c2 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Thu, 9 Feb 2023 11:37:39 -0300 Subject: [PATCH 139/186] net/sched: tcindex: update imperfect hash filters respecting rcu The imperfect hash area can be updated while packets are traversing, which will cause a use-after-free when 'tcf_exts_exec()' is called with the destroyed tcf_ext. CPU 0: CPU 1: tcindex_set_parms tcindex_classify tcindex_lookup tcindex_lookup tcf_exts_change tcf_exts_exec [UAF] Stop operating on the shared area directly, by using a local copy, and update the filter with 'rcu_replace_pointer()'. Delete the old filter version only after a rcu grace period elapsed. Fixes: 9b0d4446b569 ("net: sched: avoid atomic swap in tcf_exts_change") Reported-by: valis Suggested-by: valis Signed-off-by: Jamal Hadi Salim Signed-off-by: Pedro Tammela Link: https://lore.kernel.org/r/20230209143739.279867-1-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- net/sched/cls_tcindex.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index ee2a050c887b..ba7f22a49397 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -339,6 +340,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct tcf_result cr = {}; int err, balloc = 0; struct tcf_exts e; + bool update_h = false; err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); if (err < 0) @@ -456,10 +458,13 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, } } - if (cp->perfect) + if (cp->perfect) { r = cp->perfect + handle; - else - r = tcindex_lookup(cp, handle) ? : &new_filter_result; + } else { + /* imperfect area is updated in-place using rcu */ + update_h = !!tcindex_lookup(cp, handle); + r = &new_filter_result; + } if (r == &new_filter_result) { f = kzalloc(sizeof(*f), GFP_KERNEL); @@ -485,7 +490,28 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, rcu_assign_pointer(tp->root, cp); - if (r == &new_filter_result) { + if (update_h) { + struct tcindex_filter __rcu **fp; + struct tcindex_filter *cf; + + f->result.res = r->res; + tcf_exts_change(&f->result.exts, &r->exts); + + /* imperfect area bucket */ + fp = cp->h + (handle % cp->hash); + + /* lookup the filter, guaranteed to exist */ + for (cf = rcu_dereference_bh_rtnl(*fp); cf; + fp = &cf->next, cf = rcu_dereference_bh_rtnl(*fp)) + if (cf->key == handle) + break; + + f->next = cf->next; + + cf = rcu_replace_pointer(*fp, f, 1); + tcf_exts_get_net(&cf->result.exts); + tcf_queue_work(&cf->rwork, tcindex_destroy_fexts_work); + } else if (r == &new_filter_result) { struct tcindex_filter *nfp; struct tcindex_filter __rcu **fp; From 1f090494170ea298530cf1285fb8d078e355b4c0 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Thu, 9 Feb 2023 17:01:30 +0100 Subject: [PATCH 140/186] ice: xsk: Fix cleaning of XDP_TX frames Incrementation of xsk_frames inside the for-loop produces infinite loop, if we have both normal AF_XDP-TX and XDP_TXed buffers to complete. Split xsk_frames into 2 variables (xsk_frames and completed_frames) to eliminate this bug. Fixes: 29322791bc8b ("ice: xsk: change batched Tx descriptor cleaning") Acked-by: Maciej Fijalkowski Signed-off-by: Larysa Zaremba Reviewed-by: Alexander Duyck Acked-by: Tony Nguyen Link: https://lore.kernel.org/r/20230209160130.1779890-1-larysa.zaremba@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_xsk.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 7105de6fb344..374b7f10b549 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -800,6 +800,7 @@ static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring) struct ice_tx_desc *tx_desc; u16 cnt = xdp_ring->count; struct ice_tx_buf *tx_buf; + u16 completed_frames = 0; u16 xsk_frames = 0; u16 last_rs; int i; @@ -809,19 +810,21 @@ static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring) if ((tx_desc->cmd_type_offset_bsz & cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) { if (last_rs >= ntc) - xsk_frames = last_rs - ntc + 1; + completed_frames = last_rs - ntc + 1; else - xsk_frames = last_rs + cnt - ntc + 1; + completed_frames = last_rs + cnt - ntc + 1; } - if (!xsk_frames) + if (!completed_frames) return; - if (likely(!xdp_ring->xdp_tx_active)) + if (likely(!xdp_ring->xdp_tx_active)) { + xsk_frames = completed_frames; goto skip; + } ntc = xdp_ring->next_to_clean; - for (i = 0; i < xsk_frames; i++) { + for (i = 0; i < completed_frames; i++) { tx_buf = &xdp_ring->tx_buf[ntc]; if (tx_buf->raw_buf) { @@ -837,7 +840,7 @@ static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring) } skip: tx_desc->cmd_type_offset_bsz = 0; - xdp_ring->next_to_clean += xsk_frames; + xdp_ring->next_to_clean += completed_frames; if (xdp_ring->next_to_clean >= cnt) xdp_ring->next_to_clean -= cnt; if (xsk_frames) From 7fa0b526f865cb42aa33917fd02a92cb03746f4d Mon Sep 17 00:00:00 2001 From: Natalia Petrova Date: Thu, 9 Feb 2023 09:28:33 -0800 Subject: [PATCH 141/186] i40e: Add checking for null for nlmsg_find_attr() The result of nlmsg_find_attr() 'br_spec' is dereferenced in nla_for_each_nested(), but it can take NULL value in nla_find() function, which will result in an error. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 51616018dd1b ("i40e: Add support for getlink, setlink ndo ops") Signed-off-by: Natalia Petrova Reviewed-by: Jesse Brandeburg Tested-by: Gurucharan G (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20230209172833.3596034-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 53d0083e35da..4626d2a1af91 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -13167,6 +13167,8 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev, } br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (!br_spec) + return -EINVAL; nla_for_each_nested(attr, br_spec, rem) { __u16 mode; From ca43ccf41224b023fc290073d5603a755fd12eed Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 9 Feb 2023 16:22:01 -0800 Subject: [PATCH 142/186] dccp/tcp: Avoid negative sk_forward_alloc by ipv6_pinfo.pktoptions. Eric Dumazet pointed out [0] that when we call skb_set_owner_r() for ipv6_pinfo.pktoptions, sk_rmem_schedule() has not been called, resulting in a negative sk_forward_alloc. We add a new helper which clones a skb and sets its owner only when sk_rmem_schedule() succeeds. Note that we move skb_set_owner_r() forward in (dccp|tcp)_v6_do_rcv() because tcp_send_synack() can make sk_forward_alloc negative before ipv6_opt_accepted() in the crossed SYN-ACK or self-connect() cases. [0]: https://lore.kernel.org/netdev/CANn89iK9oc20Jdi_41jb9URdF210r7d1Y-+uypbMSbOfY6jqrg@mail.gmail.com/ Fixes: 323fbd0edf3f ("net: dccp: Add handling of IPV6_PKTOPTIONS to dccp_v6_do_rcv()") Fixes: 3df80d9320bc ("[DCCP]: Introduce DCCPv6") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/sock.h | 13 +++++++++++++ net/dccp/ipv6.c | 7 ++----- net/ipv6/tcp_ipv6.c | 10 +++------- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index dcd72e6285b2..556209727633 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2434,6 +2434,19 @@ static inline __must_check bool skb_set_owner_sk_safe(struct sk_buff *skb, struc return false; } +static inline struct sk_buff *skb_clone_and_charge_r(struct sk_buff *skb, struct sock *sk) +{ + skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); + if (skb) { + if (sk_rmem_schedule(sk, skb, skb->truesize)) { + skb_set_owner_r(skb, sk); + return skb; + } + __kfree_skb(skb); + } + return NULL; +} + static inline void skb_prepare_for_gro(struct sk_buff *skb) { if (skb->destructor != sock_wfree) { diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 4260fe466993..b9d7c3dd1cb3 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -551,11 +551,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL); /* Clone pktoptions received with SYN, if we own the req */ if (*own_req && ireq->pktopts) { - newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC); + newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk); consume_skb(ireq->pktopts); ireq->pktopts = NULL; - if (newnp->pktoptions) - skb_set_owner_r(newnp->pktoptions, newsk); } return newsk; @@ -615,7 +613,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) --ANK (980728) */ if (np->rxopt.all) - opt_skb = skb_clone(skb, GFP_ATOMIC); + opt_skb = skb_clone_and_charge_r(skb, sk); if (sk->sk_state == DCCP_OPEN) { /* Fast path */ if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len)) @@ -679,7 +677,6 @@ ipv6_pktoptions: np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb, &DCCP_SKB_CB(opt_skb)->header.h6)) { - skb_set_owner_r(opt_skb, sk); memmove(IP6CB(opt_skb), &DCCP_SKB_CB(opt_skb)->header.h6, sizeof(struct inet6_skb_parm)); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0d25e813288d..a52a4f12f146 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1388,14 +1388,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * /* Clone pktoptions received with SYN, if we own the req */ if (ireq->pktopts) { - newnp->pktoptions = skb_clone(ireq->pktopts, - sk_gfp_mask(sk, GFP_ATOMIC)); + newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk); consume_skb(ireq->pktopts); ireq->pktopts = NULL; - if (newnp->pktoptions) { + if (newnp->pktoptions) tcp_v6_restore_cb(newnp->pktoptions); - skb_set_owner_r(newnp->pktoptions, newsk); - } } } else { if (!req_unhash && found_dup_sk) { @@ -1467,7 +1464,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) --ANK (980728) */ if (np->rxopt.all) - opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); + opt_skb = skb_clone_and_charge_r(skb, sk); reason = SKB_DROP_REASON_NOT_SPECIFIED; if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ @@ -1553,7 +1550,6 @@ ipv6_pktoptions: if (np->repflow) np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { - skb_set_owner_r(opt_skb, sk); tcp_v6_restore_cb(opt_skb); opt_skb = xchg(&np->pktoptions, opt_skb); } else { From 62ec33b44e0f7168ff2886520fec6fb62d03b5a3 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 9 Feb 2023 16:22:02 -0800 Subject: [PATCH 143/186] net: Remove WARN_ON_ONCE(sk->sk_forward_alloc) from sk_stream_kill_queues(). Christoph Paasch reported that commit b5fc29233d28 ("inet6: Remove inet6_destroy_sock() in sk->sk_prot->destroy().") started triggering WARN_ON_ONCE(sk->sk_forward_alloc) in sk_stream_kill_queues(). [0 - 2] Also, we can reproduce it by a program in [3]. In the commit, we delay freeing ipv6_pinfo.pktoptions from sk->destroy() to sk->sk_destruct(), so sk->sk_forward_alloc is no longer zero in inet_csk_destroy_sock(). The same check has been in inet_sock_destruct() from at least v2.6, we can just remove the WARN_ON_ONCE(). However, among the users of sk_stream_kill_queues(), only CAIF is not calling inet_sock_destruct(). Thus, we add the same WARN_ON_ONCE() to caif_sock_destructor(). [0]: https://lore.kernel.org/netdev/39725AB4-88F1-41B3-B07F-949C5CAEFF4F@icloud.com/ [1]: https://github.com/multipath-tcp/mptcp_net-next/issues/341 [2]: WARNING: CPU: 0 PID: 3232 at net/core/stream.c:212 sk_stream_kill_queues+0x2f9/0x3e0 Modules linked in: CPU: 0 PID: 3232 Comm: syz-executor.0 Not tainted 6.2.0-rc5ab24eb4698afbe147b424149c529e2a43ec24eb5 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:sk_stream_kill_queues+0x2f9/0x3e0 Code: 03 0f b6 04 02 84 c0 74 08 3c 03 0f 8e ec 00 00 00 8b ab 08 01 00 00 e9 60 ff ff ff e8 d0 5f b6 fe 0f 0b eb 97 e8 c7 5f b6 fe <0f> 0b eb a0 e8 be 5f b6 fe 0f 0b e9 6a fe ff ff e8 02 07 e3 fe e9 RSP: 0018:ffff88810570fc68 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff888101f38f40 RSI: ffffffff8285e529 RDI: 0000000000000005 RBP: 0000000000000ce0 R08: 0000000000000005 R09: 0000000000000000 R10: 0000000000000ce0 R11: 0000000000000001 R12: ffff8881009e9488 R13: ffffffff84af2cc0 R14: 0000000000000000 R15: ffff8881009e9458 FS: 00007f7fdfbd5800(0000) GS:ffff88811b600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b32923000 CR3: 00000001062fc006 CR4: 0000000000170ef0 Call Trace: inet_csk_destroy_sock+0x1a1/0x320 __tcp_close+0xab6/0xe90 tcp_close+0x30/0xc0 inet_release+0xe9/0x1f0 inet6_release+0x4c/0x70 __sock_release+0xd2/0x280 sock_close+0x15/0x20 __fput+0x252/0xa20 task_work_run+0x169/0x250 exit_to_user_mode_prepare+0x113/0x120 syscall_exit_to_user_mode+0x1d/0x40 do_syscall_64+0x48/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x7f7fdf7ae28d Code: c1 20 00 00 75 10 b8 03 00 00 00 0f 05 48 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 ee fb ff ff 48 89 04 24 b8 03 00 00 00 0f 05 <48> 8b 3c 24 48 89 c2 e8 37 fc ff ff 48 89 d0 48 83 c4 08 48 3d 01 RSP: 002b:00000000007dfbb0 EFLAGS: 00000293 ORIG_RAX: 0000000000000003 RAX: 0000000000000000 RBX: 0000000000000004 RCX: 00007f7fdf7ae28d RDX: 0000000000000000 RSI: ffffffffffffffff RDI: 0000000000000003 RBP: 0000000000000000 R08: 000000007f338e0f R09: 0000000000000e0f R10: 000000007f338e13 R11: 0000000000000293 R12: 00007f7fdefff000 R13: 00007f7fdefffcd8 R14: 00007f7fdefffce0 R15: 00007f7fdefffcd8 [3]: https://lore.kernel.org/netdev/20230208004245.83497-1-kuniyu@amazon.com/ Fixes: b5fc29233d28 ("inet6: Remove inet6_destroy_sock() in sk->sk_prot->destroy().") Reported-by: syzbot Reported-by: Christoph Paasch Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- net/caif/caif_socket.c | 1 + net/core/stream.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 748be7253248..78c9729a6057 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -1015,6 +1015,7 @@ static void caif_sock_destructor(struct sock *sk) return; } sk_stream_kill_queues(&cf_sk->sk); + WARN_ON_ONCE(sk->sk_forward_alloc); caif_free_client(&cf_sk->layer); } diff --git a/net/core/stream.c b/net/core/stream.c index cd06750dd329..434446ab14c5 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -209,7 +209,6 @@ void sk_stream_kill_queues(struct sock *sk) sk_mem_reclaim_final(sk); WARN_ON_ONCE(sk->sk_wmem_queued); - WARN_ON_ONCE(sk->sk_forward_alloc); /* It is _impossible_ for the backlog to contain anything * when we get here. All user references to this socket From 4102db175b5d884d133270fdbd0e59111ce688fc Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 11 Feb 2023 07:50:08 -0500 Subject: [PATCH 144/186] nfsd: don't destroy global nfs4_file table in per-net shutdown The nfs4_file table is global, so shutting it down when a containerized nfsd is shut down is wrong and can lead to double-frees. Tear down the nfs4_file_rhltable in nfs4_state_shutdown instead of nfs4_state_shutdown_net. Fixes: d47b295e8d76 ("NFSD: Use rhashtable for managing nfs4_file objects") Link: https://bugzilla.redhat.com/show_bug.cgi?id=2169017 Reported-by: JianHong Yin Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f194d029c3d6..0fb09e37e12f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -8182,7 +8182,6 @@ nfs4_state_shutdown_net(struct net *net) nfsd4_client_tracking_exit(net); nfs4_state_destroy_net(net); - rhltable_destroy(&nfs4_file_rhltable); #ifdef CONFIG_NFSD_V4_2_INTER_SSC nfsd4_ssc_shutdown_umount(nn); #endif @@ -8192,6 +8191,7 @@ void nfs4_state_shutdown(void) { nfsd4_destroy_callback_queue(); + rhltable_destroy(&nfs4_file_rhltable); } static void From b6c7abd1c28a63ad633433d037ee15a1bc3023ba Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sun, 12 Feb 2023 15:13:03 +0000 Subject: [PATCH 145/186] tracing: Fix TASK_COMM_LEN in trace event format file After commit 3087c61ed2c4 ("tools/testing/selftests/bpf: replace open-coded 16 with TASK_COMM_LEN"), the content of the format file under /sys/kernel/tracing/events/task/task_newtask was changed from field:char comm[16]; offset:12; size:16; signed:0; to field:char comm[TASK_COMM_LEN]; offset:12; size:16; signed:0; John reported that this change breaks older versions of perfetto. Then Mathieu pointed out that this behavioral change was caused by the use of __stringify(_len), which happens to work on macros, but not on enum labels. And he also gave the suggestion on how to fix it: :One possible solution to make this more robust would be to extend :struct trace_event_fields with one more field that indicates the length :of an array as an actual integer, without storing it in its stringified :form in the type, and do the formatting in f_show where it belongs. The result as follows after this change, $ cat /sys/kernel/tracing/events/task/task_newtask/format field:char comm[16]; offset:12; size:16; signed:0; Link: https://lore.kernel.org/lkml/Y+QaZtz55LIirsUO@google.com/ Link: https://lore.kernel.org/linux-trace-kernel/20230210155921.4610-1-laoar.shao@gmail.com/ Link: https://lore.kernel.org/linux-trace-kernel/20230212151303.12353-1-laoar.shao@gmail.com Cc: stable@vger.kernel.org Cc: Alexei Starovoitov Cc: Kajetan Puchalski CC: Qais Yousef Fixes: 3087c61ed2c4 ("tools/testing/selftests/bpf: replace open-coded 16 with TASK_COMM_LEN") Reported-by: John Stultz Debugged-by: Mathieu Desnoyers Suggested-by: Mathieu Desnoyers Suggested-by: Steven Rostedt Signed-off-by: Yafang Shao Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_events.h | 1 + include/trace/stages/stage4_event_fields.h | 3 +- kernel/trace/trace.h | 1 + kernel/trace/trace_events.c | 39 +++++++++++++++++----- kernel/trace/trace_export.c | 3 +- 5 files changed, 36 insertions(+), 11 deletions(-) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 4342e996bcdb..0e373222a6df 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -270,6 +270,7 @@ struct trace_event_fields { const int align; const int is_signed; const int filter_type; + const int len; }; int (*define_fields)(struct trace_event_call *); }; diff --git a/include/trace/stages/stage4_event_fields.h b/include/trace/stages/stage4_event_fields.h index affd541fd25e..b6f679ae21aa 100644 --- a/include/trace/stages/stage4_event_fields.h +++ b/include/trace/stages/stage4_event_fields.h @@ -26,7 +26,8 @@ #define __array(_type, _item, _len) { \ .type = #_type"["__stringify(_len)"]", .name = #_item, \ .size = sizeof(_type[_len]), .align = ALIGN_STRUCTFIELD(_type), \ - .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }, + .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER,\ + .len = _len }, #undef __dynamic_array #define __dynamic_array(_type, _item, _len) { \ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 4eb6d6b97a9f..085a31b978a5 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1282,6 +1282,7 @@ struct ftrace_event_field { int offset; int size; int is_signed; + int len; }; struct prog_entry; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 33e0b4f8ebe6..6a4696719297 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -114,7 +114,7 @@ trace_find_event_field(struct trace_event_call *call, char *name) static int __trace_define_field(struct list_head *head, const char *type, const char *name, int offset, int size, - int is_signed, int filter_type) + int is_signed, int filter_type, int len) { struct ftrace_event_field *field; @@ -133,6 +133,7 @@ static int __trace_define_field(struct list_head *head, const char *type, field->offset = offset; field->size = size; field->is_signed = is_signed; + field->len = len; list_add(&field->link, head); @@ -150,14 +151,28 @@ int trace_define_field(struct trace_event_call *call, const char *type, head = trace_get_fields(call); return __trace_define_field(head, type, name, offset, size, - is_signed, filter_type); + is_signed, filter_type, 0); } EXPORT_SYMBOL_GPL(trace_define_field); +int trace_define_field_ext(struct trace_event_call *call, const char *type, + const char *name, int offset, int size, int is_signed, + int filter_type, int len) +{ + struct list_head *head; + + if (WARN_ON(!call->class)) + return 0; + + head = trace_get_fields(call); + return __trace_define_field(head, type, name, offset, size, + is_signed, filter_type, len); +} + #define __generic_field(type, item, filter_type) \ ret = __trace_define_field(&ftrace_generic_fields, #type, \ #item, 0, 0, is_signed_type(type), \ - filter_type); \ + filter_type, 0); \ if (ret) \ return ret; @@ -166,7 +181,7 @@ EXPORT_SYMBOL_GPL(trace_define_field); "common_" #item, \ offsetof(typeof(ent), item), \ sizeof(ent.item), \ - is_signed_type(type), FILTER_OTHER); \ + is_signed_type(type), FILTER_OTHER, 0); \ if (ret) \ return ret; @@ -1588,12 +1603,17 @@ static int f_show(struct seq_file *m, void *v) seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n", field->type, field->name, field->offset, field->size, !!field->is_signed); - else - seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n", + else if (field->len) + seq_printf(m, "\tfield:%.*s %s[%d];\toffset:%u;\tsize:%u;\tsigned:%d;\n", (int)(array_descriptor - field->type), field->type, field->name, - array_descriptor, field->offset, + field->len, field->offset, field->size, !!field->is_signed); + else + seq_printf(m, "\tfield:%.*s %s[];\toffset:%u;\tsize:%u;\tsigned:%d;\n", + (int)(array_descriptor - field->type), + field->type, field->name, + field->offset, field->size, !!field->is_signed); return 0; } @@ -2379,9 +2399,10 @@ event_define_fields(struct trace_event_call *call) } offset = ALIGN(offset, field->align); - ret = trace_define_field(call, field->type, field->name, + ret = trace_define_field_ext(call, field->type, field->name, offset, field->size, - field->is_signed, field->filter_type); + field->is_signed, field->filter_type, + field->len); if (WARN_ON_ONCE(ret)) { pr_err("error code is %d\n", ret); break; diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index d960f6b11b5e..58f3946081e2 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -111,7 +111,8 @@ static void __always_unused ____ftrace_check_##name(void) \ #define __array(_type, _item, _len) { \ .type = #_type"["__stringify(_len)"]", .name = #_item, \ .size = sizeof(_type[_len]), .align = __alignof__(_type), \ - is_signed_type(_type), .filter_type = FILTER_OTHER }, + is_signed_type(_type), .filter_type = FILTER_OTHER, \ + .len = _len }, #undef __array_desc #define __array_desc(_type, _container, _item, _len) __array(_type, _item, _len) From 462a8e08e0e6287e5ce13187257edbf24213ed03 Mon Sep 17 00:00:00 2001 From: David Chen Date: Thu, 9 Feb 2023 17:48:28 +0000 Subject: [PATCH 146/186] Fix page corruption caused by racy check in __free_pages When we upgraded our kernel, we started seeing some page corruption like the following consistently: BUG: Bad page state in process ganesha.nfsd pfn:1304ca page:0000000022261c55 refcount:0 mapcount:-128 mapping:0000000000000000 index:0x0 pfn:0x1304ca flags: 0x17ffffc0000000() raw: 0017ffffc0000000 ffff8a513ffd4c98 ffffeee24b35ec08 0000000000000000 raw: 0000000000000000 0000000000000001 00000000ffffff7f 0000000000000000 page dumped because: nonzero mapcount CPU: 0 PID: 15567 Comm: ganesha.nfsd Kdump: loaded Tainted: P B O 5.10.158-1.nutanix.20221209.el7.x86_64 #1 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 04/05/2016 Call Trace: dump_stack+0x74/0x96 bad_page.cold+0x63/0x94 check_new_page_bad+0x6d/0x80 rmqueue+0x46e/0x970 get_page_from_freelist+0xcb/0x3f0 ? _cond_resched+0x19/0x40 __alloc_pages_nodemask+0x164/0x300 alloc_pages_current+0x87/0xf0 skb_page_frag_refill+0x84/0x110 ... Sometimes, it would also show up as corruption in the free list pointer and cause crashes. After bisecting the issue, we found the issue started from commit e320d3012d25 ("mm/page_alloc.c: fix freeing non-compound pages"): if (put_page_testzero(page)) free_the_page(page, order); else if (!PageHead(page)) while (order-- > 0) free_the_page(page + (1 << order), order); So the problem is the check PageHead is racy because at this point we already dropped our reference to the page. So even if we came in with compound page, the page can already be freed and PageHead can return false and we will end up freeing all the tail pages causing double free. Fixes: e320d3012d25 ("mm/page_alloc.c: fix freeing non-compound pages") Link: https://lore.kernel.org/lkml/BYAPR02MB448855960A9656EEA81141FC94D99@BYAPR02MB4488.namprd02.prod.outlook.com/ Cc: Andrew Morton Cc: stable@vger.kernel.org Signed-off-by: Chunwei Chen Reviewed-by: Vlastimil Babka Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0745aedebb37..3bb3484563ed 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5631,9 +5631,12 @@ EXPORT_SYMBOL(get_zeroed_page); */ void __free_pages(struct page *page, unsigned int order) { + /* get PageHead before we drop reference */ + int head = PageHead(page); + if (put_page_testzero(page)) free_the_page(page, order); - else if (!PageHead(page)) + else if (!head) while (order-- > 0) free_the_page(page + (1 << order), order); } From 80510b63f7b6bdd30e07b3a42115d0a324e20cd6 Mon Sep 17 00:00:00 2001 From: John Paul Adrian Glaubitz Date: Tue, 7 Feb 2023 17:57:15 +0100 Subject: [PATCH 147/186] MAINTAINERS: Add myself as maintainer for arch/sh (SUPERH) Both Rich Felker and Yoshinori Sato haven't done any work on arch/sh for a while. As I have been maintaining Debian's sh4 port since 2014, I am interested to keep the architecture alive. Signed-off-by: John Paul Adrian Glaubitz Acked-by: Yoshinori Sato Acked-by: Geert Uytterhoeven Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 513045878fe7..39ff1a717625 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20090,6 +20090,7 @@ F: drivers/watchdog/sunplus_wdt.c SUPERH M: Yoshinori Sato M: Rich Felker +M: John Paul Adrian Glaubitz L: linux-sh@vger.kernel.org S: Maintained Q: http://patchwork.kernel.org/project/linux-sh/list/ From ceaa837f96adb69c0df0397937cd74991d5d821a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 12 Feb 2023 14:10:17 -0800 Subject: [PATCH 148/186] Linux 6.2-rc8 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 80afd9fb9f75..716a975730a6 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 2 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* From 70b5339caf847b8b6097b6dfab0c5a99b40713c8 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Sun, 12 Feb 2023 20:11:42 -0500 Subject: [PATCH 149/186] tracing: Make trace_define_field_ext() static trace_define_field_ext() is not used outside of trace_events.c, it should be static. Link: https://lore.kernel.org/oe-kbuild-all/202302130750.679RaRog-lkp@intel.com/ Fixes: b6c7abd1c28a ("tracing: Fix TASK_COMM_LEN in trace event format file") Reported-by: Reported-by: kernel test robot Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 6a4696719297..6a942fa275c7 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -155,7 +155,7 @@ int trace_define_field(struct trace_event_call *call, const char *type, } EXPORT_SYMBOL_GPL(trace_define_field); -int trace_define_field_ext(struct trace_event_call *call, const char *type, +static int trace_define_field_ext(struct trace_event_call *call, const char *type, const char *name, int offset, int size, int is_signed, int filter_type, int len) { From 5f69f009b7c445ddd219609c0085825c6270415a Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 8 Feb 2023 17:14:35 +0100 Subject: [PATCH 150/186] nvme-pci: add bogus ID quirk for ADATA SX6000PNP Yet another device which needs a quirk: nvme nvme1: globally duplicate IDs for nsid 1 nvme nvme1: VID:DID 10ec:5763 model:ADATA SX6000PNP firmware:V9002s94 Link: http://bugzilla.opensuse.org/show_bug.cgi?id=1207827 Reported-by: Gustavo Freitas Signed-off-by: Daniel Wagner Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c734934c407c..d0ef4b549640 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3423,6 +3423,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN | NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x10ec, 0x5763), /* ADATA SX6000PNP */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS | NVME_QUIRK_IGNORE_DEV_SUBNQN, }, From 2f4796518315ab246638db8feebfcb494212e7ee Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Thu, 9 Feb 2023 01:16:48 -0800 Subject: [PATCH 151/186] af_key: Fix heap information leak Since x->encap of pfkey_msg2xfrm_state() is not initialized to 0, kernel heap data can be leaked. Fix with kzalloc() to prevent this. Signed-off-by: Hyunwoo Kim Acked-by: Herbert Xu Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/key/af_key.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index 2bdbcec781cd..a815f5ab4c49 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1261,7 +1261,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, const struct sadb_x_nat_t_type* n_type; struct xfrm_encap_tmpl *natt; - x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL); + x->encap = kzalloc(sizeof(*x->encap), GFP_KERNEL); if (!x->encap) { err = -ENOMEM; goto out; From 2fa28f5c6fcbfc794340684f36d2581b4f2d20b5 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Fri, 10 Feb 2023 10:05:51 +0800 Subject: [PATCH 152/186] net: openvswitch: fix possible memory leak in ovs_meter_cmd_set() old_meter needs to be free after it is detached regardless of whether the new meter is successfully attached. Fixes: c7c4c44c9a95 ("net: openvswitch: expand the meters supported number") Signed-off-by: Hangyu Hua Acked-by: Eelco Chaudron Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/openvswitch/meter.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index 6e38f68f88c2..f2698d2316df 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -449,7 +449,7 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info) err = attach_meter(meter_tbl, meter); if (err) - goto exit_unlock; + goto exit_free_old_meter; ovs_unlock(); @@ -472,6 +472,8 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info) genlmsg_end(reply, ovs_reply_header); return genlmsg_reply(reply, info); +exit_free_old_meter: + ovs_meter_free(old_meter); exit_unlock: ovs_unlock(); nlmsg_free(reply); From c68f345b7c425b38656e1791a0486769a8797016 Mon Sep 17 00:00:00 2001 From: Miko Larsson Date: Fri, 10 Feb 2023 09:13:44 +0100 Subject: [PATCH 153/186] net/usb: kalmia: Don't pass act_len in usb_bulk_msg error path syzbot reported that act_len in kalmia_send_init_packet() is uninitialized when passing it to the first usb_bulk_msg error path. Jiri Pirko noted that it's pointless to pass it in the error path, and that the value that would be printed in the second error path would be the value of act_len from the first call to usb_bulk_msg.[1] With this in mind, let's just not pass act_len to the usb_bulk_msg error paths. 1: https://lore.kernel.org/lkml/Y9pY61y1nwTuzMOa@nanopsycho/ Fixes: d40261236e8e ("net/usb: Add Samsung Kalmia driver for Samsung GT-B3730") Reported-and-tested-by: syzbot+cd80c5ef5121bfe85b55@syzkaller.appspotmail.com Signed-off-by: Miko Larsson Reviewed-by: Alexander Duyck Signed-off-by: David S. Miller --- drivers/net/usb/kalmia.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/kalmia.c b/drivers/net/usb/kalmia.c index 9f2b70ef39aa..613fc6910f14 100644 --- a/drivers/net/usb/kalmia.c +++ b/drivers/net/usb/kalmia.c @@ -65,8 +65,8 @@ kalmia_send_init_packet(struct usbnet *dev, u8 *init_msg, u8 init_msg_len, init_msg, init_msg_len, &act_len, KALMIA_USB_TIMEOUT); if (status != 0) { netdev_err(dev->net, - "Error sending init packet. Status %i, length %i\n", - status, act_len); + "Error sending init packet. Status %i\n", + status); return status; } else if (act_len != init_msg_len) { @@ -83,8 +83,8 @@ kalmia_send_init_packet(struct usbnet *dev, u8 *init_msg, u8 init_msg_len, if (status != 0) netdev_err(dev->net, - "Error receiving init result. Status %i, length %i\n", - status, act_len); + "Error receiving init result. Status %i\n", + status); else if (act_len != expected_len) netdev_err(dev->net, "Unexpected init result length: %i\n", act_len); From 9b55d3f0a69af649c62cbc2633e6d695bb3cc583 Mon Sep 17 00:00:00 2001 From: Felix Riemann Date: Fri, 10 Feb 2023 13:36:44 +0100 Subject: [PATCH 154/186] net: Fix unwanted sign extension in netdev_stats_to_stats64() When converting net_device_stats to rtnl_link_stats64 sign extension is triggered on ILP32 machines as 6c1c509778 changed the previous "ulong -> u64" conversion to "long -> u64" by accessing the net_device_stats fields through a (signed) atomic_long_t. This causes for example the received bytes counter to jump to 16EiB after having received 2^31 bytes. Casting the atomic value to "unsigned long" beforehand converting it into u64 avoids this. Fixes: 6c1c5097781f ("net: add atomic_long_t to net_device_stats fields") Signed-off-by: Felix Riemann Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index b76fb37b381e..ea0a7bac1e5c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10375,7 +10375,7 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64)); for (i = 0; i < n; i++) - dst[i] = atomic_long_read(&src[i]); + dst[i] = (unsigned long)atomic_long_read(&src[i]); /* zero out counters that only exist in rtnl_link_stats64 */ memset((char *)stats64 + n * sizeof(u64), 0, sizeof(*stats64) - n * sizeof(u64)); From 2038cc592811209de20c4e094ca08bfb1e6fbc6c Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 10 Feb 2023 12:31:55 -0500 Subject: [PATCH 155/186] bnxt_en: Fix mqprio and XDP ring checking logic In bnxt_reserve_rings(), there is logic to check that the number of TX rings reserved is enough to cover all the mqprio TCs, but it fails to account for the TX XDP rings. So the check will always fail if there are mqprio TCs and TX XDP rings. As a result, the driver always fails to initialize after the XDP program is attached and the device will be brought down. A subsequent ifconfig up will also fail because the number of TX rings is set to an inconsistent number. Fix the check to properly account for TX XDP rings. If the check fails, set the number of TX rings back to a consistent number after calling netdev_reset_tc(). Fixes: 674f50a5b026 ("bnxt_en: Implement new method to reserve rings.") Reviewed-by: Hongguang Gao Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 240a7e8a7652..6c32f5c427b5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9274,10 +9274,14 @@ int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init) netdev_err(bp->dev, "ring reservation/IRQ init failure rc: %d\n", rc); return rc; } - if (tcs && (bp->tx_nr_rings_per_tc * tcs != bp->tx_nr_rings)) { + if (tcs && (bp->tx_nr_rings_per_tc * tcs != + bp->tx_nr_rings - bp->tx_nr_rings_xdp)) { netdev_err(bp->dev, "tx ring reservation failure\n"); netdev_reset_tc(bp->dev); - bp->tx_nr_rings_per_tc = bp->tx_nr_rings; + if (bp->tx_nr_rings_xdp) + bp->tx_nr_rings_per_tc = bp->tx_nr_rings_xdp; + else + bp->tx_nr_rings_per_tc = bp->tx_nr_rings; return -ENOMEM; } return 0; From bb2ff6c27bc9e1da4d3ec5e7b1d6b9df1092cb5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 7 Feb 2023 16:33:37 +0200 Subject: [PATCH 156/186] drm: Disable dynamic debug as broken MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CONFIG_DRM_USE_DYNAMIC_DEBUG breaks debug prints for (at least modular) drm drivers. The debug prints can be reinstated by manually frobbing /sys/module/drm/parameters/debug after the fact, but at that point the damage is done and all debugs from driver probe are lost. This makes drivers totally undebuggable. There's a more complete fix in progress [1], with further details, but we need this fixed in stable kernels. Mark the feature as broken and disable it by default, with hopes distros follow suit and disable it as well. [1] https://lore.kernel.org/r/20230125203743.564009-1-jim.cromie@gmail.com Fixes: 84ec67288c10 ("drm_print: wrap drm_*_dbg in dyndbg descriptor factory macro") Cc: Jim Cromie Cc: Greg Kroah-Hartman Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Daniel Vetter Cc: dri-devel@lists.freedesktop.org Cc: # v6.1+ Signed-off-by: Ville SyrjĂ€lĂ€ Acked-by: Greg Kroah-Hartman Acked-by: Jim Cromie Acked-by: Maxime Ripard Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20230207143337.2126678-1-jani.nikula@intel.com --- drivers/gpu/drm/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 315cbdf61979..9abfb482b615 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -53,7 +53,8 @@ config DRM_DEBUG_MM config DRM_USE_DYNAMIC_DEBUG bool "use dynamic debug to implement drm.debug" - default y + default n + depends on BROKEN depends on DRM depends on DYNAMIC_DEBUG || DYNAMIC_DEBUG_CORE depends on JUMP_LABEL From 3e6dc119a37bceb06e1d595b1d41b52f3e99132d Mon Sep 17 00:00:00 2001 From: Michal Wilczynski Date: Wed, 25 Jan 2023 12:37:40 +0100 Subject: [PATCH 157/186] ice: Fix check for weight and priority of a scheduling node Currently checks for weight and priority ranges don't check incoming value from the devlink. Instead it checks node current weight or priority. This makes those checks useless. Change range checks in ice_set_object_tx_priority() and ice_set_object_tx_weight() to check against incoming priority an weight. Fixes: 42c2eb6b1f43 ("ice: Implement devlink-rate API") Signed-off-by: Michal Wilczynski Acked-by: Jesse Brandeburg Reviewed-by: Paul Menzel Tested-by: Gurucharan G (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_devlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index 8286e47b4bae..0fae0186bd85 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -899,7 +899,7 @@ static int ice_set_object_tx_priority(struct ice_port_info *pi, struct ice_sched { int status; - if (node->tx_priority >= 8) { + if (priority >= 8) { NL_SET_ERR_MSG_MOD(extack, "Priority should be less than 8"); return -EINVAL; } @@ -929,7 +929,7 @@ static int ice_set_object_tx_weight(struct ice_port_info *pi, struct ice_sched_n { int status; - if (node->tx_weight > 200 || node->tx_weight < 1) { + if (weight > 200 || weight < 1) { NL_SET_ERR_MSG_MOD(extack, "Weight must be between 1 and 200"); return -EINVAL; } From 43fbca02c2ddc39ff5879b6f3a4a097b1ba02098 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Mon, 6 Feb 2023 15:54:36 -0800 Subject: [PATCH 158/186] ice: fix lost multicast packets in promisc mode There was a problem reported to us where the addition of a VF with an IPv6 address ending with a particular sequence would cause the parent device on the PF to no longer be able to respond to neighbor discovery packets. In this case, we had an ovs-bridge device living on top of a VLAN, which was on top of a PF, and it would not be able to talk anymore (the neighbor entry would expire and couldn't be restored). The root cause of the issue is that if the PF is asked to be in IFF_PROMISC mode (promiscuous mode) and it had an ipv6 address that needed the 33:33:ff:00:00:04 multicast address to work, then when the VF was added with the need for the same multicast address, the VF would steal all the traffic destined for that address. The ice driver didn't auto-subscribe a request of IFF_PROMISC to the "multicast replication from other port's traffic" meaning that it won't get for instance, packets with an exact destination in the VF, as above. The VF's IPv6 address, which adds a "perfect filter" for 33:33:ff:00:00:04, results in no packets for that multicast address making it to the PF (which is in promisc but NOT "multicast replication"). The fix is to enable "multicast promiscuous" whenever the driver is asked to enable IFF_PROMISC, and make sure to disable it when appropriate. Fixes: e94d44786693 ("ice: Implement filter sync, NDO operations and bump version") Signed-off-by: Jesse Brandeburg Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 26 +++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index b288a01a321a..8ec24f6cf6be 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -275,6 +275,8 @@ static int ice_set_promisc(struct ice_vsi *vsi, u8 promisc_m) if (status && status != -EEXIST) return status; + netdev_dbg(vsi->netdev, "set promisc filter bits for VSI %i: 0x%x\n", + vsi->vsi_num, promisc_m); return 0; } @@ -300,6 +302,8 @@ static int ice_clear_promisc(struct ice_vsi *vsi, u8 promisc_m) promisc_m, 0); } + netdev_dbg(vsi->netdev, "clear promisc filter bits for VSI %i: 0x%x\n", + vsi->vsi_num, promisc_m); return status; } @@ -414,6 +418,16 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) } err = 0; vlan_ops->dis_rx_filtering(vsi); + + /* promiscuous mode implies allmulticast so + * that VSIs that are in promiscuous mode are + * subscribed to multicast packets coming to + * the port + */ + err = ice_set_promisc(vsi, + ICE_MCAST_PROMISC_BITS); + if (err) + goto out_promisc; } } else { /* Clear Rx filter to remove traffic from wire */ @@ -430,6 +444,18 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) NETIF_F_HW_VLAN_CTAG_FILTER) vlan_ops->ena_rx_filtering(vsi); } + + /* disable allmulti here, but only if allmulti is not + * still enabled for the netdev + */ + if (!(vsi->current_netdev_flags & IFF_ALLMULTI)) { + err = ice_clear_promisc(vsi, + ICE_MCAST_PROMISC_BITS); + if (err) { + netdev_err(netdev, "Error %d clearing multicast promiscuous on VSI %i\n", + err, vsi->vsi_num); + } + } } } goto exit; From 4562c65ec852067c6196abdcf2d925f08841dcbc Mon Sep 17 00:00:00 2001 From: Johannes Zink Date: Fri, 10 Feb 2023 15:39:37 +0100 Subject: [PATCH 159/186] net: stmmac: fix order of dwmac5 FlexPPS parametrization sequence So far changing the period by just setting new period values while running did not work. The order as indicated by the publicly available reference manual of the i.MX8MP [1] indicates a sequence: * initiate the programming sequence * set the values for PPS period and start time * start the pulse train generation. This is currently not used in dwmac5_flex_pps_config(), which instead does: * initiate the programming sequence and immediately start the pulse train generation * set the values for PPS period and start time This caused the period values written not to take effect until the FlexPPS output was disabled and re-enabled again. This patch fix the order and allows the period to be set immediately. [1] https://www.nxp.com/webapp/Download?colCode=IMX8MPRM Fixes: 9a8a02c9d46d ("net: stmmac: Add Flexible PPS support") Signed-off-by: Johannes Zink Link: https://lore.kernel.org/r/20230210143937.3427483-1-j.zink@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac5.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c index 413f66017219..e95d35f1e5a0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c @@ -541,9 +541,9 @@ int dwmac5_flex_pps_config(void __iomem *ioaddr, int index, return 0; } - val |= PPSCMDx(index, 0x2); val |= TRGTMODSELx(index, 0x2); val |= PPSEN0; + writel(val, ioaddr + MAC_PPS_CONTROL); writel(cfg->start.tv_sec, ioaddr + MAC_PPSx_TARGET_TIME_SEC(index)); @@ -568,6 +568,7 @@ int dwmac5_flex_pps_config(void __iomem *ioaddr, int index, writel(period - 1, ioaddr + MAC_PPSx_WIDTH(index)); /* Finally, activate it */ + val |= PPSCMDx(index, 0x2); writel(val, ioaddr + MAC_PPS_CONTROL); return 0; } From 21c167aa0ba943a7cac2f6969814f83bb701666b Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Fri, 10 Feb 2023 17:08:25 -0300 Subject: [PATCH 160/186] net/sched: act_ctinfo: use percpu stats The tc action act_ctinfo was using shared stats, fix it to use percpu stats since bstats_update() must be called with locks or with a percpu pointer argument. tdc results: 1..12 ok 1 c826 - Add ctinfo action with default setting ok 2 0286 - Add ctinfo action with dscp ok 3 4938 - Add ctinfo action with valid cpmark and zone ok 4 7593 - Add ctinfo action with drop control ok 5 2961 - Replace ctinfo action zone and action control ok 6 e567 - Delete ctinfo action with valid index ok 7 6a91 - Delete ctinfo action with invalid index ok 8 5232 - List ctinfo actions ok 9 7702 - Flush ctinfo actions ok 10 3201 - Add ctinfo action with duplicate index ok 11 8295 - Add ctinfo action with invalid index ok 12 3964 - Replace ctinfo action with invalid goto_chain control Fixes: 24ec483cec98 ("net: sched: Introduce act_ctinfo action") Reviewed-by: Jamal Hadi Salim Signed-off-by: Pedro Tammela Reviewed-by: Larysa Zaremba Link: https://lore.kernel.org/r/20230210200824.444856-1-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- net/sched/act_ctinfo.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index 4b1b59da5c0b..4d15b6a6169c 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -93,7 +93,7 @@ TC_INDIRECT_SCOPE int tcf_ctinfo_act(struct sk_buff *skb, cp = rcu_dereference_bh(ca->params); tcf_lastuse_update(&ca->tcf_tm); - bstats_update(&ca->tcf_bstats, skb); + tcf_action_update_bstats(&ca->common, skb); action = READ_ONCE(ca->tcf_action); wlen = skb_network_offset(skb); @@ -212,8 +212,8 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla, index = actparm->index; err = tcf_idr_check_alloc(tn, &index, a, bind); if (!err) { - ret = tcf_idr_create(tn, index, est, a, - &act_ctinfo_ops, bind, false, flags); + ret = tcf_idr_create_from_flags(tn, index, est, a, + &act_ctinfo_ops, bind, flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; From 924bd96ea27d53706147a94b8e576080535785df Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 13 Feb 2023 06:58:33 +0100 Subject: [PATCH 161/186] nvme-pci: set the DMA mask earlier Set the DMA mask before calling dma_addressing_limited, which depends on it. Note that this stop checking the return value of dma_set_mask_and_coherent as this function can only fail for masks < 32-bit. Fixes: 3f30a79c2e2c ("nvme-pci: set constant paramters in nvme_pci_alloc_ctrl") Reported-by: Michael Kelley Signed-off-by: Christoph Hellwig Reviewed-by: Jens Axboe Reviewed-by: Sagi Grimberg Tested-by: Michael Kelley --- drivers/nvme/host/pci.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d0ef4b549640..b1ceb1032784 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2509,18 +2509,12 @@ static int nvme_pci_enable(struct nvme_dev *dev) { int result = -ENOMEM; struct pci_dev *pdev = to_pci_dev(dev->dev); - int dma_address_bits = 64; if (pci_enable_device_mem(pdev)) return result; pci_set_master(pdev); - if (dev->ctrl.quirks & NVME_QUIRK_DMA_ADDRESS_BITS_48) - dma_address_bits = 48; - if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(dma_address_bits))) - goto disable; - if (readl(dev->bar + NVME_REG_CSTS) == -1) { result = -ENODEV; goto disable; @@ -2998,7 +2992,11 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, quirks); if (ret) goto out_put_device; - + + if (dev->ctrl.quirks & NVME_QUIRK_DMA_ADDRESS_BITS_48) + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); + else + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); dma_set_min_align_mask(&pdev->dev, NVME_CTRL_PAGE_SIZE - 1); dma_set_max_seg_size(&pdev->dev, 0xffffffff); From dc785d69d753a3894c93afc23b91404652382ead Mon Sep 17 00:00:00 2001 From: Irvin Cote Date: Thu, 9 Feb 2023 17:43:57 -0300 Subject: [PATCH 162/186] nvme-pci: always return an ERR_PTR from nvme_pci_alloc_dev Don't mix NULL and ERR_PTR returns. Fixes: 2e87570be9d2 ("nvme-pci: factor out a nvme_pci_alloc_dev helper") Signed-off-by: Irvin Cote Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b1ceb1032784..6d88ddd35565 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2964,7 +2964,7 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node); if (!dev) - return NULL; + return ERR_PTR(-ENOMEM); INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work); mutex_init(&dev->shutdown_lock); @@ -3029,8 +3029,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) int result = -ENOMEM; dev = nvme_pci_alloc_dev(pdev, id); - if (!dev) - return -ENOMEM; + if (IS_ERR(dev)) + return PTR_ERR(dev); result = nvme_dev_map(dev); if (result) From 05d7623a892a9da62da0e714428e38f09e4a64d8 Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Fri, 10 Feb 2023 22:21:26 +0200 Subject: [PATCH 163/186] net: stmmac: Restrict warning on disabling DMA store and fwd mode When setting 'snps,force_thresh_dma_mode' DT property, the following warning is always emitted, regardless the status of force_sf_dma_mode: dwmac-starfive 10020000.ethernet: force_sf_dma_mode is ignored if force_thresh_dma_mode is set. Do not print the rather misleading message when DMA store and forward mode is already disabled. Fixes: e2a240c7d3bc ("driver:net:stmmac: Disable DMA store and forward mode if platform data force_thresh_dma_mode is set.") Signed-off-by: Cristian Ciocaltea Link: https://lore.kernel.org/r/20230210202126.877548-1-cristian.ciocaltea@collabora.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index eb6d9cd8e93f..0046a4ee6e64 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -559,7 +559,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) dma_cfg->mixed_burst = of_property_read_bool(np, "snps,mixed-burst"); plat->force_thresh_dma_mode = of_property_read_bool(np, "snps,force_thresh_dma_mode"); - if (plat->force_thresh_dma_mode) { + if (plat->force_thresh_dma_mode && plat->force_sf_dma_mode) { plat->force_sf_dma_mode = 0; dev_warn(&pdev->dev, "force_sf_dma_mode is ignored if force_thresh_dma_mode is set.\n"); From 6f098cde9df2c86d60db727d6270272751943674 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Tue, 14 Feb 2023 15:11:15 +0530 Subject: [PATCH 164/186] cpufreq: qcom-hw: Add missing null pointer check of_device_get_match_data() may return NULL, so add a check to prevent potential null pointer dereference. Issue reported by Qualcomm's internal static analysis tool. Fixes: 4f7961706c63 ("cpufreq: qcom-hw: Move soc_data to struct qcom_cpufreq") Signed-off-by: Manivannan Sadhasivam Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/qcom-cpufreq-hw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 957cf6bb8c05..d3f55ca06ed3 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -706,6 +706,8 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) return -ENOMEM; qcom_cpufreq.soc_data = of_device_get_match_data(dev); + if (!qcom_cpufreq.soc_data) + return -ENODEV; clk_data = devm_kzalloc(dev, struct_size(clk_data, hws, num_domains), GFP_KERNEL); if (!clk_data) From f9cd6a4418bac6a046ee78382423b1ae7565fb24 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Wed, 8 Feb 2023 10:43:32 +0800 Subject: [PATCH 165/186] ixgbe: allow to increase MTU to 3K with XDP enabled Recently I encountered one case where I cannot increase the MTU size directly from 1500 to a much bigger value with XDP enabled if the server is equipped with IXGBE card, which happened on thousands of servers in production environment. After applying the current patch, we can set the maximum MTU size to 3K. This patch follows the behavior of changing MTU as i40e/ice does. References: [1] commit 23b44513c3e6 ("ice: allow 3k MTU for XDP") [2] commit 0c8493d90b6b ("i40e: add XDP support for pass and drop actions") Fixes: fabf1bce103a ("ixgbe: Prevent unsupported configurations with XDP") Signed-off-by: Jason Xing Reviewed-by: Alexander Duyck Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index ab8370c413f3..25ca329f7d3c 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6777,6 +6777,18 @@ static void ixgbe_free_all_rx_resources(struct ixgbe_adapter *adapter) ixgbe_free_rx_resources(adapter->rx_ring[i]); } +/** + * ixgbe_max_xdp_frame_size - returns the maximum allowed frame size for XDP + * @adapter: device handle, pointer to adapter + */ +static int ixgbe_max_xdp_frame_size(struct ixgbe_adapter *adapter) +{ + if (PAGE_SIZE >= 8192 || adapter->flags2 & IXGBE_FLAG2_RX_LEGACY) + return IXGBE_RXBUFFER_2K; + else + return IXGBE_RXBUFFER_3K; +} + /** * ixgbe_change_mtu - Change the Maximum Transfer Unit * @netdev: network interface device structure @@ -6788,18 +6800,13 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu) { struct ixgbe_adapter *adapter = netdev_priv(netdev); - if (adapter->xdp_prog) { + if (ixgbe_enabled_xdp_adapter(adapter)) { int new_frame_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; - int i; - for (i = 0; i < adapter->num_rx_queues; i++) { - struct ixgbe_ring *ring = adapter->rx_ring[i]; - - if (new_frame_size > ixgbe_rx_bufsz(ring)) { - e_warn(probe, "Requested MTU size is not supported with XDP\n"); - return -EINVAL; - } + if (new_frame_size > ixgbe_max_xdp_frame_size(adapter)) { + e_warn(probe, "Requested MTU size is not supported with XDP\n"); + return -EINVAL; } } From ce45ffb815e8e238f05de1630be3969b6bb15e4e Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Wed, 8 Feb 2023 10:43:33 +0800 Subject: [PATCH 166/186] i40e: add double of VLAN header when computing the max MTU Include the second VLAN HLEN into account when computing the maximum MTU size as other drivers do. Fixes: 0c8493d90b6b ("i40e: add XDP support for pass and drop actions") Signed-off-by: Jason Xing Reviewed-by: Alexander Duyck Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 4626d2a1af91..52eec0a50492 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -2921,7 +2921,7 @@ static int i40e_change_mtu(struct net_device *netdev, int new_mtu) struct i40e_pf *pf = vsi->back; if (i40e_enabled_xdp_vsi(vsi)) { - int frame_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + int frame_size = new_mtu + I40E_PACKET_HDR_PAD; if (frame_size > i40e_max_xdp_frame_size(vsi)) return -EINVAL; From 0967bf837784a11c65d66060623a74e65211af0b Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 9 Feb 2023 10:41:28 +0800 Subject: [PATCH 167/186] ixgbe: add double of VLAN header when computing the max MTU Include the second VLAN HLEN into account when computing the maximum MTU size as other drivers do. Fixes: fabf1bce103a ("ixgbe: Prevent unsupported configurations with XDP") Signed-off-by: Jason Xing Reviewed-by: Alexander Duyck Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 2 ++ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index bc68b8f2176d..8736ca4b2628 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -73,6 +73,8 @@ #define IXGBE_RXBUFFER_4K 4096 #define IXGBE_MAX_RXBUFFER 16384 /* largest size for a single descriptor */ +#define IXGBE_PKT_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2)) + /* Attempt to maximize the headroom available for incoming frames. We * use a 2K buffer for receives and need 1536/1534 to store the data for * the frame. This leaves us with 512 bytes of room. From that we need diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 25ca329f7d3c..4507fba8747a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6801,8 +6801,7 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu) struct ixgbe_adapter *adapter = netdev_priv(netdev); if (ixgbe_enabled_xdp_adapter(adapter)) { - int new_frame_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + - VLAN_HLEN; + int new_frame_size = new_mtu + IXGBE_PKT_HDR_PAD; if (new_frame_size > ixgbe_max_xdp_frame_size(adapter)) { e_warn(probe, "Requested MTU size is not supported with XDP\n"); From d5a1224aa68c8b124a4c5c390186e571815ed390 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 1 Feb 2023 14:28:29 -0800 Subject: [PATCH 168/186] drm/i915/gen11: Wa_1408615072/Wa_1407596294 should be on GT list The UNSLICE_UNIT_LEVEL_CLKGATE register programmed by this workaround has 'BUS' style reset, indicating that it does not lose its value on engine resets. Furthermore, this register is part of the GT forcewake domain rather than the RENDER domain, so it should not be impacted by RCS engine resets. As such, we should implement this on the GT workaround list rather than an engine list. Bspec: 19219 Fixes: 3551ff928744 ("drm/i915/gen11: Moving WAs to rcs_engine_wa_init()") Signed-off-by: Matt Roper Reviewed-by: Gustavo Sousa Link: https://patchwork.freedesktop.org/patch/msgid/20230201222831.608281-2-matthew.d.roper@intel.com (cherry picked from commit 5f21dc07b52eb54a908e66f5d6e05a87bcb5b049) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 949c19339015..a0740308555d 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1355,6 +1355,13 @@ icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) GAMT_CHKN_BIT_REG, GAMT_CHKN_DISABLE_L3_COH_PIPE); + /* + * Wa_1408615072:icl,ehl (vsunit) + * Wa_1407596294:icl,ehl (hsunit) + */ + wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, + VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS); + /* Wa_1407352427:icl,ehl */ wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, PSDUNIT_CLKGATE_DIS); @@ -2539,13 +2546,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS, GEN11_ENABLE_32_PLANE_MODE); - /* - * Wa_1408615072:icl,ehl (vsunit) - * Wa_1407596294:icl,ehl (hsunit) - */ - wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, - VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS); - /* * Wa_1408767742:icl[a2..forever],ehl[all] * Wa_1605460711:icl[a0..c0] From 1a6897921f52ceb2c8665ef826e405bd96385159 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Wed, 8 Feb 2023 13:00:50 -0500 Subject: [PATCH 169/186] drm/vmwgfx: Stop accessing buffer objects which failed init ttm_bo_init_reserved on failure puts the buffer object back which causes it to be deleted, but kfree was still being called on the same buffer in vmw_bo_create leading to a double free. After the double free the vmw_gem_object_create_with_handle was setting the gem function objects before checking the return status of vmw_bo_create leading to null pointer access. Fix the entire path by relaying on ttm_bo_init_reserved to delete the buffer objects on failure and making sure the return status is checked before setting the gem function objects on the buffer object. Signed-off-by: Zack Rusin Fixes: 8afa13a0583f ("drm/vmwgfx: Implement DRIVER_GEM") Reviewed-by: Maaz Mombasawala Reviewed-by: Martin Krastev Link: https://patchwork.freedesktop.org/patch/msgid/20230208180050.2093426-1-zack@kde.org (cherry picked from commit 36d421e632e9a0e8375eaed0143551a34d81a7e3) Cc: # v5.17+ --- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 4 +++- drivers/gpu/drm/vmwgfx/vmwgfx_gem.c | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index aa1cd5126a32..53da183e2bfe 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -462,6 +462,9 @@ int vmw_bo_create(struct vmw_private *vmw, return -ENOMEM; } + /* + * vmw_bo_init will delete the *p_bo object if it fails + */ ret = vmw_bo_init(vmw, *p_bo, size, placement, interruptible, pin, bo_free); @@ -470,7 +473,6 @@ int vmw_bo_create(struct vmw_private *vmw, return ret; out_error: - kfree(*p_bo); *p_bo = NULL; return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c index ce609e7d758f..83d8f18cc16f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c @@ -146,11 +146,11 @@ int vmw_gem_object_create_with_handle(struct vmw_private *dev_priv, &vmw_sys_placement : &vmw_vram_sys_placement, true, false, &vmw_gem_destroy, p_vbo); - - (*p_vbo)->base.base.funcs = &vmw_gem_object_funcs; if (ret != 0) goto out_no_bo; + (*p_vbo)->base.base.funcs = &vmw_gem_object_funcs; + ret = drm_gem_handle_create(filp, &(*p_vbo)->base.base, handle); /* drop reference from allocate - handle holds it now */ drm_gem_object_put(&(*p_vbo)->base.base); From a950b989ea29ab3b38ea7f6e3d2540700a3c54e8 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Sat, 11 Feb 2023 00:05:14 -0500 Subject: [PATCH 170/186] drm/vmwgfx: Do not drop the reference to the handle too soon v3: Fix vmw_user_bo_lookup which was also dropping the gem reference before the kernel was done with buffer depending on userspace doing the right thing. Same bug, different spot. It is possible for userspace to predict the next buffer handle and to destroy the buffer while it's still used by the kernel. Delay dropping the internal reference on the buffers until kernel is done with them. Instead of immediately dropping the gem reference in vmw_user_bo_lookup and vmw_gem_object_create_with_handle let the callers decide when they're ready give the control back to userspace. Also fixes the second usage of vmw_gem_object_create_with_handle in vmwgfx_surface.c which wasn't grabbing an explicit reference to the gem object which could have been destroyed by the userspace on the owning surface at any point. Signed-off-by: Zack Rusin Fixes: 8afa13a0583f ("drm/vmwgfx: Implement DRIVER_GEM") Reviewed-by: Martin Krastev Reviewed-by: Maaz Mombasawala Link: https://patchwork.freedesktop.org/patch/msgid/20230211050514.2431155-1-zack@kde.org (cherry picked from commit 9ef8d83e8e25d5f1811b3a38eb1484f85f64296c) Cc: # v5.17+ --- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 8 +++++--- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 2 ++ drivers/gpu/drm/vmwgfx/vmwgfx_gem.c | 4 ++-- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 4 +++- drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c | 1 + drivers/gpu/drm/vmwgfx/vmwgfx_shader.c | 1 + drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 10 ++++++---- 7 files changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index 53da183e2bfe..4dcf2eb7aa80 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -598,6 +598,7 @@ static int vmw_user_bo_synccpu_release(struct drm_file *filp, ttm_bo_put(&vmw_bo->base); } + drm_gem_object_put(&vmw_bo->base.base); return ret; } @@ -638,6 +639,7 @@ int vmw_user_bo_synccpu_ioctl(struct drm_device *dev, void *data, ret = vmw_user_bo_synccpu_grab(vbo, arg->flags); vmw_bo_unreference(&vbo); + drm_gem_object_put(&vbo->base.base); if (unlikely(ret != 0)) { if (ret == -ERESTARTSYS || ret == -EBUSY) return -EBUSY; @@ -695,7 +697,7 @@ int vmw_bo_unref_ioctl(struct drm_device *dev, void *data, * struct vmw_buffer_object should be placed. * Return: Zero on success, Negative error code on error. * - * The vmw buffer object pointer will be refcounted. + * The vmw buffer object pointer will be refcounted (both ttm and gem) */ int vmw_user_bo_lookup(struct drm_file *filp, uint32_t handle, @@ -712,7 +714,6 @@ int vmw_user_bo_lookup(struct drm_file *filp, *out = gem_to_vmw_bo(gobj); ttm_bo_get(&(*out)->base); - drm_gem_object_put(gobj); return 0; } @@ -793,7 +794,8 @@ int vmw_dumb_create(struct drm_file *file_priv, ret = vmw_gem_object_create_with_handle(dev_priv, file_priv, args->size, &args->handle, &vbo); - + /* drop reference from allocate - handle holds it now */ + drm_gem_object_put(&vbo->base.base); return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index a44d53e33cdb..c0686283ffd1 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -1160,6 +1160,7 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv, } ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo, true, false); ttm_bo_put(&vmw_bo->base); + drm_gem_object_put(&vmw_bo->base.base); if (unlikely(ret != 0)) return ret; @@ -1214,6 +1215,7 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv, } ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo, false, false); ttm_bo_put(&vmw_bo->base); + drm_gem_object_put(&vmw_bo->base.base); if (unlikely(ret != 0)) return ret; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c index 83d8f18cc16f..4d2c28e39f4e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c @@ -152,8 +152,6 @@ int vmw_gem_object_create_with_handle(struct vmw_private *dev_priv, (*p_vbo)->base.base.funcs = &vmw_gem_object_funcs; ret = drm_gem_handle_create(filp, &(*p_vbo)->base.base, handle); - /* drop reference from allocate - handle holds it now */ - drm_gem_object_put(&(*p_vbo)->base.base); out_no_bo: return ret; } @@ -180,6 +178,8 @@ int vmw_gem_object_create_ioctl(struct drm_device *dev, void *data, rep->map_handle = drm_vma_node_offset_addr(&vbo->base.base.vma_node); rep->cur_gmr_id = handle; rep->cur_gmr_offset = 0; + /* drop reference from allocate - handle holds it now */ + drm_gem_object_put(&vbo->base.base); out_no_bo: return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 257f090071f1..445d619e1fdc 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -1815,8 +1815,10 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev, err_out: /* vmw_user_lookup_handle takes one ref so does new_fb */ - if (bo) + if (bo) { vmw_bo_unreference(&bo); + drm_gem_object_put(&bo->base.base); + } if (surface) vmw_surface_unreference(&surface); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c index e9f5c89b4ca6..b5b311f2a91a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c @@ -458,6 +458,7 @@ int vmw_overlay_ioctl(struct drm_device *dev, void *data, ret = vmw_overlay_update_stream(dev_priv, buf, arg, true); vmw_bo_unreference(&buf); + drm_gem_object_put(&buf->base.base); out_unlock: mutex_unlock(&overlay->mutex); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c index 108a496b5d18..51e83dfa1cac 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c @@ -807,6 +807,7 @@ static int vmw_shader_define(struct drm_device *dev, struct drm_file *file_priv, num_output_sig, tfile, shader_handle); out_bad_arg: vmw_bo_unreference(&buffer); + drm_gem_object_put(&buffer->base.base); return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 3bc63ae768f3..dcfb003841b3 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -683,7 +683,7 @@ static void vmw_user_surface_base_release(struct ttm_base_object **p_base) container_of(base, struct vmw_user_surface, prime.base); struct vmw_resource *res = &user_srf->srf.res; - if (base->shareable && res && res->backup) + if (res && res->backup) drm_gem_object_put(&res->backup->base.base); *p_base = NULL; @@ -864,7 +864,11 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, goto out_unlock; } vmw_bo_reference(res->backup); - drm_gem_object_get(&res->backup->base.base); + /* + * We don't expose the handle to the userspace and surface + * already holds a gem reference + */ + drm_gem_handle_delete(file_priv, backup_handle); } tmp = vmw_resource_reference(&srf->res); @@ -1568,8 +1572,6 @@ vmw_gb_surface_define_internal(struct drm_device *dev, drm_vma_node_offset_addr(&res->backup->base.base.vma_node); rep->buffer_size = res->backup->base.base.size; rep->buffer_handle = backup_handle; - if (user_srf->prime.base.shareable) - drm_gem_object_get(&res->backup->base.base); } else { rep->buffer_map_handle = 0; rep->buffer_size = 0; From 2558b8039d059342197610498c8749ad294adee5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Feb 2023 16:00:59 +0000 Subject: [PATCH 171/186] net: use a bounce buffer for copying skb->mark syzbot found arm64 builds would crash in sock_recv_mark() when CONFIG_HARDENED_USERCOPY=y x86 and powerpc are not detecting the issue because they define user_access_begin. This will be handled in a different patch, because a check_object_size() is missing. Only data from skb->cb[] can be copied directly to/from user space, as explained in commit 79a8a642bf05 ("net: Whitelist the skbuff_head_cache "cb" field") syzbot report was: usercopy: Kernel memory exposure attempt detected from SLUB object 'skbuff_head_cache' (offset 168, size 4)! ------------[ cut here ]------------ kernel BUG at mm/usercopy.c:102 ! Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP Modules linked in: CPU: 0 PID: 4410 Comm: syz-executor533 Not tainted 6.2.0-rc7-syzkaller-17907-g2d3827b3f393 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/21/2023 pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : usercopy_abort+0x90/0x94 mm/usercopy.c:90 lr : usercopy_abort+0x90/0x94 mm/usercopy.c:90 sp : ffff80000fb9b9a0 x29: ffff80000fb9b9b0 x28: ffff0000c6073400 x27: 0000000020001a00 x26: 0000000000000014 x25: ffff80000cf52000 x24: fffffc0000000000 x23: 05ffc00000000200 x22: fffffc000324bf80 x21: ffff0000c92fe1a8 x20: 0000000000000001 x19: 0000000000000004 x18: 0000000000000000 x17: 656a626f2042554c x16: ffff0000c6073dd0 x15: ffff80000dbd2118 x14: ffff0000c6073400 x13: 00000000ffffffff x12: ffff0000c6073400 x11: ff808000081bbb4c x10: 0000000000000000 x9 : 7b0572d7cc0ccf00 x8 : 7b0572d7cc0ccf00 x7 : ffff80000bf650d4 x6 : 0000000000000000 x5 : 0000000000000001 x4 : 0000000000000001 x3 : 0000000000000000 x2 : ffff0001fefbff08 x1 : 0000000100000000 x0 : 000000000000006c Call trace: usercopy_abort+0x90/0x94 mm/usercopy.c:90 __check_heap_object+0xa8/0x100 mm/slub.c:4761 check_heap_object mm/usercopy.c:196 [inline] __check_object_size+0x208/0x6b8 mm/usercopy.c:251 check_object_size include/linux/thread_info.h:199 [inline] __copy_to_user include/linux/uaccess.h:115 [inline] put_cmsg+0x408/0x464 net/core/scm.c:238 sock_recv_mark net/socket.c:975 [inline] __sock_recv_cmsgs+0x1fc/0x248 net/socket.c:984 sock_recv_cmsgs include/net/sock.h:2728 [inline] packet_recvmsg+0x2d8/0x678 net/packet/af_packet.c:3482 ____sys_recvmsg+0x110/0x3a0 ___sys_recvmsg net/socket.c:2737 [inline] __sys_recvmsg+0x194/0x210 net/socket.c:2767 __do_sys_recvmsg net/socket.c:2777 [inline] __se_sys_recvmsg net/socket.c:2774 [inline] __arm64_sys_recvmsg+0x2c/0x3c net/socket.c:2774 __invoke_syscall arch/arm64/kernel/syscall.c:38 [inline] invoke_syscall+0x64/0x178 arch/arm64/kernel/syscall.c:52 el0_svc_common+0xbc/0x180 arch/arm64/kernel/syscall.c:142 do_el0_svc+0x48/0x110 arch/arm64/kernel/syscall.c:193 el0_svc+0x58/0x14c arch/arm64/kernel/entry-common.c:637 el0t_64_sync_handler+0x84/0xf0 arch/arm64/kernel/entry-common.c:655 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:591 Code: 91388800 aa0903e1 f90003e8 94e6d752 (d4210000) Fixes: 6fd1d51cfa25 ("net: SO_RCVMARK socket option for SO_MARK with recvmsg()") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Erin MacNeil Reviewed-by: Alexander Lobakin Link: https://lore.kernel.org/r/20230213160059.3829741-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/socket.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/socket.c b/net/socket.c index 888cd618a968..c12af3c84d3a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -971,9 +971,12 @@ static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, static void sock_recv_mark(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { - if (sock_flag(sk, SOCK_RCVMARK) && skb) - put_cmsg(msg, SOL_SOCKET, SO_MARK, sizeof(__u32), - &skb->mark); + if (sock_flag(sk, SOCK_RCVMARK) && skb) { + /* We must use a bounce buffer for CONFIG_HARDENED_USERCOPY=y */ + __u32 mark = skb->mark; + + put_cmsg(msg, SOL_SOCKET, SO_MARK, sizeof(__u32), &mark); + } } void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk, From 207ce626add80ddd941f62fc2fe5d77586e0801b Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Mon, 13 Feb 2023 10:58:22 -0800 Subject: [PATCH 172/186] igb: Fix PPS input and output using 3rd and 4th SDP Fix handling of the tsync interrupt to compare the pin number with IGB_N_SDP instead of IGB_N_EXTTS/IGB_N_PEROUT and fix the indexing to the perout array. Fixes: cf99c1dd7b77 ("igb: move PEROUT and EXTTS isr logic to separate functions") Reported-by: Matt Corallo Signed-off-by: Miroslav Lichvar Reviewed-by: Jacob Keller Tested-by: Gurucharan G (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20230213185822.3960072-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igb/igb_main.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 3c0c35ecea10..d8e3048b93dd 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -6794,7 +6794,7 @@ static void igb_perout(struct igb_adapter *adapter, int tsintr_tt) struct timespec64 ts; u32 tsauxc; - if (pin < 0 || pin >= IGB_N_PEROUT) + if (pin < 0 || pin >= IGB_N_SDP) return; spin_lock(&adapter->tmreg_lock); @@ -6802,7 +6802,7 @@ static void igb_perout(struct igb_adapter *adapter, int tsintr_tt) if (hw->mac.type == e1000_82580 || hw->mac.type == e1000_i354 || hw->mac.type == e1000_i350) { - s64 ns = timespec64_to_ns(&adapter->perout[pin].period); + s64 ns = timespec64_to_ns(&adapter->perout[tsintr_tt].period); u32 systiml, systimh, level_mask, level, rem; u64 systim, now; @@ -6850,8 +6850,8 @@ static void igb_perout(struct igb_adapter *adapter, int tsintr_tt) ts.tv_nsec = (u32)systim; ts.tv_sec = ((u32)(systim >> 32)) & 0xFF; } else { - ts = timespec64_add(adapter->perout[pin].start, - adapter->perout[pin].period); + ts = timespec64_add(adapter->perout[tsintr_tt].start, + adapter->perout[tsintr_tt].period); } /* u32 conversion of tv_sec is safe until y2106 */ @@ -6860,7 +6860,7 @@ static void igb_perout(struct igb_adapter *adapter, int tsintr_tt) tsauxc = rd32(E1000_TSAUXC); tsauxc |= TSAUXC_EN_TT0; wr32(E1000_TSAUXC, tsauxc); - adapter->perout[pin].start = ts; + adapter->perout[tsintr_tt].start = ts; spin_unlock(&adapter->tmreg_lock); } @@ -6874,7 +6874,7 @@ static void igb_extts(struct igb_adapter *adapter, int tsintr_tt) struct ptp_clock_event event; struct timespec64 ts; - if (pin < 0 || pin >= IGB_N_EXTTS) + if (pin < 0 || pin >= IGB_N_SDP) return; if (hw->mac.type == e1000_82580 || From 11a4d6f67cf55883dc78e31c247d1903ed7feccc Mon Sep 17 00:00:00 2001 From: Tung Nguyen Date: Tue, 14 Feb 2023 01:26:06 +0000 Subject: [PATCH 173/186] tipc: fix kernel warning when sending SYN message When sending a SYN message, this kernel stack trace is observed: ... [ 13.396352] RIP: 0010:_copy_from_iter+0xb4/0x550 ... [ 13.398494] Call Trace: [ 13.398630] [ 13.398630] ? __alloc_skb+0xed/0x1a0 [ 13.398630] tipc_msg_build+0x12c/0x670 [tipc] [ 13.398630] ? shmem_add_to_page_cache.isra.71+0x151/0x290 [ 13.398630] __tipc_sendmsg+0x2d1/0x710 [tipc] [ 13.398630] ? tipc_connect+0x1d9/0x230 [tipc] [ 13.398630] ? __local_bh_enable_ip+0x37/0x80 [ 13.398630] tipc_connect+0x1d9/0x230 [tipc] [ 13.398630] ? __sys_connect+0x9f/0xd0 [ 13.398630] __sys_connect+0x9f/0xd0 [ 13.398630] ? preempt_count_add+0x4d/0xa0 [ 13.398630] ? fpregs_assert_state_consistent+0x22/0x50 [ 13.398630] __x64_sys_connect+0x16/0x20 [ 13.398630] do_syscall_64+0x42/0x90 [ 13.398630] entry_SYSCALL_64_after_hwframe+0x63/0xcd It is because commit a41dad905e5a ("iov_iter: saner checks for attempt to copy to/from iterator") has introduced sanity check for copying from/to iov iterator. Lacking of copy direction from the iterator viewpoint would lead to kernel stack trace like above. This commit fixes this issue by initializing the iov iterator with the correct copy direction when sending SYN or ACK without data. Fixes: f25dcc7687d4 ("tipc: tipc ->sendmsg() conversion") Reported-by: syzbot+d43608d061e8847ec9f3@syzkaller.appspotmail.com Acked-by: Jon Maloy Signed-off-by: Tung Nguyen Link: https://lore.kernel.org/r/20230214012606.5804-1-tung.q.nguyen@dektech.com.au Signed-off-by: Jakub Kicinski --- net/tipc/socket.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b35c8701876a..a38733f2197a 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2614,6 +2614,7 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, /* Send a 'SYN-' to destination */ m.msg_name = dest; m.msg_namelen = destlen; + iov_iter_kvec(&m.msg_iter, ITER_SOURCE, NULL, 0, 0); /* If connect is in non-blocking case, set MSG_DONTWAIT to * indicate send_msg() is never blocked. @@ -2776,6 +2777,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, __skb_queue_head(&new_sk->sk_receive_queue, buf); skb_set_owner_r(buf, new_sk); } + iov_iter_kvec(&m.msg_iter, ITER_SOURCE, NULL, 0, 0); __tipc_sendstream(new_sock, &m, 0); release_sock(new_sk); exit: From 42018a322bd453e38b3ffee294982243e50a484f Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Mon, 13 Feb 2023 22:47:29 -0300 Subject: [PATCH 174/186] net/sched: tcindex: search key must be 16 bits Syzkaller found an issue where a handle greater than 16 bits would trigger a null-ptr-deref in the imperfect hash area update. general protection fault, probably for non-canonical address 0xdffffc0000000015: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x00000000000000a8-0x00000000000000af] CPU: 0 PID: 5070 Comm: syz-executor456 Not tainted 6.2.0-rc7-syzkaller-00112-gc68f345b7c42 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/21/2023 RIP: 0010:tcindex_set_parms+0x1a6a/0x2990 net/sched/cls_tcindex.c:509 Code: 01 e9 e9 fe ff ff 4c 8b bd 28 fe ff ff e8 0e 57 7d f9 48 8d bb a8 00 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 94 0c 00 00 48 8b 85 f8 fd ff ff 48 8b 9b a8 00 RSP: 0018:ffffc90003d3ef88 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000015 RSI: ffffffff8803a102 RDI: 00000000000000a8 RBP: ffffc90003d3f1d8 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: ffff88801e2b10a8 R13: dffffc0000000000 R14: 0000000000030000 R15: ffff888017b3be00 FS: 00005555569af300(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000056041c6d2000 CR3: 000000002bfca000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: tcindex_change+0x1ea/0x320 net/sched/cls_tcindex.c:572 tc_new_tfilter+0x96e/0x2220 net/sched/cls_api.c:2155 rtnetlink_rcv_msg+0x959/0xca0 net/core/rtnetlink.c:6132 netlink_rcv_skb+0x165/0x440 net/netlink/af_netlink.c:2574 netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline] netlink_unicast+0x547/0x7f0 net/netlink/af_netlink.c:1365 netlink_sendmsg+0x91b/0xe10 net/netlink/af_netlink.c:1942 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xd3/0x120 net/socket.c:734 ____sys_sendmsg+0x334/0x8c0 net/socket.c:2476 ___sys_sendmsg+0x110/0x1b0 net/socket.c:2530 __sys_sendmmsg+0x18f/0x460 net/socket.c:2616 __do_sys_sendmmsg net/socket.c:2645 [inline] __se_sys_sendmmsg net/socket.c:2642 [inline] __x64_sys_sendmmsg+0x9d/0x100 net/socket.c:2642 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 Fixes: ee059170b1f7 ("net/sched: tcindex: update imperfect hash filters respecting rcu") Signed-off-by: Jamal Hadi Salim Signed-off-by: Pedro Tammela Reported-by: syzbot Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/cls_tcindex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index ba7f22a49397..6640e75eaa02 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -503,7 +503,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, /* lookup the filter, guaranteed to exist */ for (cf = rcu_dereference_bh_rtnl(*fp); cf; fp = &cf->next, cf = rcu_dereference_bh_rtnl(*fp)) - if (cf->key == handle) + if (cf->key == (u16)handle) break; f->next = cf->next; From fda6c89fe3d9aca073495a664e1d5aea28cd4377 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 13 Feb 2023 22:53:55 -0800 Subject: [PATCH 175/186] net: mpls: fix stale pointer if allocation fails during device rename lianhui reports that when MPLS fails to register the sysctl table under new location (during device rename) the old pointers won't get overwritten and may be freed again (double free). Handle this gracefully. The best option would be unregistering the MPLS from the device completely on failure, but unfortunately mpls_ifdown() can fail. So failing fully is also unreliable. Another option is to register the new table first then only remove old one if the new one succeeds. That requires more code, changes order of notifications and two tables may be visible at the same time. sysctl point is not used in the rest of the code - set to NULL on failures and skip unregister if already NULL. Reported-by: lianhui tang Fixes: 0fae3bf018d9 ("mpls: handle device renames for per-device sysctls") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 35b5f806fdda..dc5165d3eec4 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1428,6 +1428,7 @@ static int mpls_dev_sysctl_register(struct net_device *dev, free: kfree(table); out: + mdev->sysctl = NULL; return -ENOBUFS; } @@ -1437,6 +1438,9 @@ static void mpls_dev_sysctl_unregister(struct net_device *dev, struct net *net = dev_net(dev); struct ctl_table *table; + if (!mdev->sysctl) + return; + table = mdev->sysctl->ctl_table_arg; unregister_net_sysctl_table(mdev->sysctl); kfree(table); From b61494d0f82e7298ddef7b1ced0946ccdc1c7d88 Mon Sep 17 00:00:00 2001 From: Raag Jadav Date: Wed, 15 Feb 2023 17:30:04 +0530 Subject: [PATCH 176/186] gpiolib: acpi: remove redundant declaration Remove acpi_device declaration, as it is no longer needed. Signed-off-by: Raag Jadav Reviewed-by: Mika Westerberg Signed-off-by: Andy Shevchenko --- drivers/gpio/gpiolib-acpi.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpio/gpiolib-acpi.h b/drivers/gpio/gpiolib-acpi.h index 9475f99a9694..5a08693b8fb1 100644 --- a/drivers/gpio/gpiolib-acpi.h +++ b/drivers/gpio/gpiolib-acpi.h @@ -14,7 +14,6 @@ #include -struct acpi_device; struct device; struct fwnode_handle; From e6ef4f8ede09f4af7cde000717b349b50bc62576 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 15 Feb 2023 10:52:49 +0100 Subject: [PATCH 177/186] gpio: vf610: make irq_chip immutable Since recently, the kernel is nagging about mutable irq_chips: "not an immutable chip, please consider fixing it!" Drop the unneeded copy, flag it as IRQCHIP_IMMUTABLE, add the new helper functions and call the appropriate gpiolib functions. Signed-off-by: Alexander Stein Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-vf610.c | 41 ++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c index 9db42f6a2043..9033db00c360 100644 --- a/drivers/gpio/gpio-vf610.c +++ b/drivers/gpio/gpio-vf610.c @@ -30,7 +30,6 @@ struct fsl_gpio_soc_data { struct vf610_gpio_port { struct gpio_chip gc; - struct irq_chip ic; void __iomem *base; void __iomem *gpio_base; const struct fsl_gpio_soc_data *sdata; @@ -207,20 +206,24 @@ static int vf610_gpio_irq_set_type(struct irq_data *d, u32 type) static void vf610_gpio_irq_mask(struct irq_data *d) { - struct vf610_gpio_port *port = - gpiochip_get_data(irq_data_get_irq_chip_data(d)); - void __iomem *pcr_base = port->base + PORT_PCR(d->hwirq); + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct vf610_gpio_port *port = gpiochip_get_data(gc); + irq_hw_number_t gpio_num = irqd_to_hwirq(d); + void __iomem *pcr_base = port->base + PORT_PCR(gpio_num); vf610_gpio_writel(0, pcr_base); + gpiochip_disable_irq(gc, gpio_num); } static void vf610_gpio_irq_unmask(struct irq_data *d) { - struct vf610_gpio_port *port = - gpiochip_get_data(irq_data_get_irq_chip_data(d)); - void __iomem *pcr_base = port->base + PORT_PCR(d->hwirq); + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct vf610_gpio_port *port = gpiochip_get_data(gc); + irq_hw_number_t gpio_num = irqd_to_hwirq(d); + void __iomem *pcr_base = port->base + PORT_PCR(gpio_num); - vf610_gpio_writel(port->irqc[d->hwirq] << PORT_PCR_IRQC_OFFSET, + gpiochip_enable_irq(gc, gpio_num); + vf610_gpio_writel(port->irqc[gpio_num] << PORT_PCR_IRQC_OFFSET, pcr_base); } @@ -237,6 +240,17 @@ static int vf610_gpio_irq_set_wake(struct irq_data *d, u32 enable) return 0; } +static const struct irq_chip vf610_irqchip = { + .name = "gpio-vf610", + .irq_ack = vf610_gpio_irq_ack, + .irq_mask = vf610_gpio_irq_mask, + .irq_unmask = vf610_gpio_irq_unmask, + .irq_set_type = vf610_gpio_irq_set_type, + .irq_set_wake = vf610_gpio_irq_set_wake, + .flags = IRQCHIP_IMMUTABLE, + GPIOCHIP_IRQ_RESOURCE_HELPERS, +}; + static void vf610_gpio_disable_clk(void *data) { clk_disable_unprepare(data); @@ -249,7 +263,6 @@ static int vf610_gpio_probe(struct platform_device *pdev) struct vf610_gpio_port *port; struct gpio_chip *gc; struct gpio_irq_chip *girq; - struct irq_chip *ic; int i; int ret; @@ -315,14 +328,6 @@ static int vf610_gpio_probe(struct platform_device *pdev) gc->direction_output = vf610_gpio_direction_output; gc->set = vf610_gpio_set; - ic = &port->ic; - ic->name = "gpio-vf610"; - ic->irq_ack = vf610_gpio_irq_ack; - ic->irq_mask = vf610_gpio_irq_mask; - ic->irq_unmask = vf610_gpio_irq_unmask; - ic->irq_set_type = vf610_gpio_irq_set_type; - ic->irq_set_wake = vf610_gpio_irq_set_wake; - /* Mask all GPIO interrupts */ for (i = 0; i < gc->ngpio; i++) vf610_gpio_writel(0, port->base + PORT_PCR(i)); @@ -331,7 +336,7 @@ static int vf610_gpio_probe(struct platform_device *pdev) vf610_gpio_writel(~0, port->base + PORT_ISFR); girq = &gc->irq; - girq->chip = ic; + gpio_irq_chip_set_chip(girq, &vf610_irqchip); girq->parent_handler = vf610_gpio_irq_handler; girq->num_parents = 1; girq->parents = devm_kcalloc(&pdev->dev, 1, From a69982c37cd0586e6832268155349301b87f2e35 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Wed, 15 Feb 2023 15:39:41 +0100 Subject: [PATCH 178/186] gpiolib: acpi: Add a ignore wakeup quirk for Clevo NH5xAx The commit 1796f808e4bb ("HID: i2c-hid: acpi: Stop setting wakeup_capable") changed the policy such that I2C touchpads may be able to wake up the system by default if the system is configured as such. However for some devices there is a bug, that is causing the touchpad to instantly wake up the device again once it gets deactivated. The root cause is still under investigation (see Link tag). To workaround this problem for the time being, introduce a quirk for this model that will prevent the wakeup capability for being set for GPIO 16. Fixes: 1796f808e4bb ("HID: i2c-hid: acpi: Stop setting wakeup_capable") Link: https://lore.kernel.org/linux-acpi/20230210164636.628462-1-wse@tuxedocomputers.com/ Signed-off-by: Werner Sembach Cc: # v6.1+ Signed-off-by: Andy Shevchenko --- drivers/gpio/gpiolib-acpi.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 17c53f484280..34ff048e70d0 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -1637,6 +1637,18 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] __initconst = { .ignore_wake = "ELAN0415:00@9", }, }, + { + /* + * Spurious wakeups from TP_ATTN# pin + * Found in BIOS 1.7.7 + */ + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "NH5xAx"), + }, + .driver_data = &(struct acpi_gpiolib_dmi_quirk) { + .ignore_wake = "SYNA1202:00@16", + }, + }, {} /* Terminating entry */ }; From 2f43f6020cde9f5024d26f17e9fd9f9b5581c2f9 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 15 Feb 2023 15:52:06 +0100 Subject: [PATCH 179/186] gpio: mlxbf2: select GPIOLIB_IRQCHIP This driver uncondictionally uses the GPIOLIB_IRQCHIP so select it. Signed-off-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index ec7cfd4f52b1..e9917a45b005 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -1531,6 +1531,7 @@ config GPIO_MLXBF2 tristate "Mellanox BlueField 2 SoC GPIO" depends on (MELLANOX_PLATFORM && ARM64 && ACPI) || (64BIT && COMPILE_TEST) select GPIO_GENERIC + select GPIOLIB_IRQCHIP help Say Y here if you want GPIO support on Mellanox BlueField 2 SoC. From cbb13e12a5d3ecef400716ea7d12a9268b0f37ca Mon Sep 17 00:00:00 2001 From: John Johansen Date: Tue, 14 Feb 2023 20:21:17 -0800 Subject: [PATCH 180/186] apparmor: Fix regression in compat permissions for getattr This fixes a regression in mediation of getattr when old policy built under an older ABI is loaded and mapped to internal permissions. The regression does not occur for all getattr permission requests, only appearing if state zero is the final state in the permission lookup. This is because despite the first state (index 0) being guaranteed to not have permissions in both newer and older permission formats, it may have to carry permissions that were not mediated as part of an older policy. These backward compat permissions are mapped here to avoid special casing the mediation code paths. Since the mapping code already takes into account backwards compat permission from older formats it can be applied to state 0 to fix the regression. Fixes: 408d53e923bd ("apparmor: compute file permissions on profile load") Reported-by: Philip Meulengracht Signed-off-by: John Johansen --- security/apparmor/policy_compat.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/security/apparmor/policy_compat.c b/security/apparmor/policy_compat.c index 9e52e218bf30..cc89d1e88fb7 100644 --- a/security/apparmor/policy_compat.c +++ b/security/apparmor/policy_compat.c @@ -160,8 +160,7 @@ static struct aa_perms *compute_fperms(struct aa_dfa *dfa) if (!table) return NULL; - /* zero init so skip the trap state (state == 0) */ - for (state = 1; state < state_count; state++) { + for (state = 0; state < state_count; state++) { table[state * 2] = compute_fperms_user(dfa, state); table[state * 2 + 1] = compute_fperms_other(dfa, state); } From 8f32378986218812083b127da5ba42d48297d7c4 Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Fri, 10 Feb 2023 10:31:32 +0800 Subject: [PATCH 181/186] drm/amd/amdgpu: fix warning during suspend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Freeing memory was warned during suspend. Move the self test out of suspend. Link: https://bugzilla.redhat.com/show_bug.cgi?id=2151825 Cc: jfalempe@redhat.com Signed-off-by: Jack Xiao Reviewed-by: Christian König Reviewed-by: Feifei Xu Reviewed-and-tested-by: Evan Quan Tested-by: Jocelyn Falempe Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++ drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 2f28a8c02f64..fbf2f24169eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4268,6 +4268,9 @@ exit: } adev->in_suspend = false; + if (adev->enable_mes) + amdgpu_mes_self_test(adev); + if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0)) DRM_WARN("smart shift update failed\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 5dff79e8f301..1c4787000a5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -1344,7 +1344,7 @@ static int mes_v11_0_late_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* it's only intended for use in mes_self_test case, not for s0ix and reset */ - if (!amdgpu_in_reset(adev) && !adev->in_s0ix && + if (!amdgpu_in_reset(adev) && !adev->in_s0ix && !adev->in_suspend && (adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))) amdgpu_mes_self_test(adev); From 2a00299e7447395d0898e7c6214817c06a61a8e8 Mon Sep 17 00:00:00 2001 From: Leo Li Date: Thu, 9 Feb 2023 12:15:21 -0500 Subject: [PATCH 182/186] drm/amd/display: Fail atomic_check early on normalize_zpos error [Why] drm_atomic_normalize_zpos() can return an error code when there's modeset lock contention. This was being ignored. [How] Bail out of atomic check if normalize_zpos() returns an error. Fixes: b261509952bc ("drm/amd/display: Fix double cursor on non-video RGB MPO") Signed-off-by: Leo Li Tested-by: Mikhail Gavrilov Reviewed-by: Hamza Mahfooz Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 93dee3d1a483..9c7b69d377bd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -9658,7 +9658,11 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, * `dcn10_can_pipe_disable_cursor`). By now, all modified planes are in * atomic state, so call drm helper to normalize zpos. */ - drm_atomic_normalize_zpos(dev, state); + ret = drm_atomic_normalize_zpos(dev, state); + if (ret) { + drm_dbg(dev, "drm_atomic_normalize_zpos() failed\n"); + goto fail; + } /* Remove exiting planes if they are modified */ for_each_oldnew_plane_in_state_reverse(state, plane, old_plane_state, new_plane_state, i) { From 5d54cb1767e06025819daa6769e0f18dcbc60936 Mon Sep 17 00:00:00 2001 From: Corinna Vinschen Date: Tue, 14 Feb 2023 10:55:48 -0800 Subject: [PATCH 183/186] igb: conditionalize I2C bit banging on external thermal sensor support Commit a97f8783a937 ("igb: unbreak I2C bit-banging on i350") introduced code to change I2C settings to bit banging unconditionally. However, this patch introduced a regression: On an Intel S2600CWR Server Board with three NICs: - 1x dual-port copper Intel I350 Gigabit Network Connection [8086:1521] (rev 01) fw 1.63, 0x80000dda - 2x quad-port SFP+ with copper SFP Avago ABCU-5700RZ Intel I350 Gigabit Fiber Network Connection [8086:1522] (rev 01) fw 1.52.0 the SFP NICs no longer get link at all. Reverting commit a97f8783a937 or switching to the Intel out-of-tree driver both fix the problem. Per the igb out-of-tree driver, I2C bit banging on i350 depends on support for an external thermal sensor (ETS). However, commit a97f8783a937 added bit banging unconditionally. Additionally, the out-of-tree driver always calls init_thermal_sensor_thresh on probe, while our driver only calls init_thermal_sensor_thresh only in igb_reset(), and only if an ETS is present, ignoring the internal thermal sensor. The affected SFPs don't provide an ETS. Per Intel, the behaviour is a result of i350 firmware requirements. This patch fixes the problem by aligning the behaviour to the out-of-tree driver: - split igb_init_i2c() into two functions: - igb_init_i2c() only performs the basic I2C initialization. - igb_set_i2c_bb() makes sure that E1000_CTRL_I2C_ENA is set and enables bit-banging. - igb_probe() only calls igb_set_i2c_bb() if an ETS is present. - igb_probe() calls init_thermal_sensor_thresh() unconditionally. - igb_reset() aligns its behaviour to igb_probe(), i. e., call igb_set_i2c_bb() if an ETS is present and call init_thermal_sensor_thresh() unconditionally. Fixes: a97f8783a937 ("igb: unbreak I2C bit-banging on i350") Tested-by: Mateusz Palczewski Co-developed-by: Jamie Bainbridge Signed-off-by: Jamie Bainbridge Signed-off-by: Corinna Vinschen Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20230214185549.1306522-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igb/igb_main.c | 42 +++++++++++++++++------ 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index d8e3048b93dd..b5b443883da9 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2256,6 +2256,30 @@ static void igb_enable_mas(struct igb_adapter *adapter) } } +#ifdef CONFIG_IGB_HWMON +/** + * igb_set_i2c_bb - Init I2C interface + * @hw: pointer to hardware structure + **/ +static void igb_set_i2c_bb(struct e1000_hw *hw) +{ + u32 ctrl_ext; + s32 i2cctl; + + ctrl_ext = rd32(E1000_CTRL_EXT); + ctrl_ext |= E1000_CTRL_I2C_ENA; + wr32(E1000_CTRL_EXT, ctrl_ext); + wrfl(); + + i2cctl = rd32(E1000_I2CPARAMS); + i2cctl |= E1000_I2CBB_EN + | E1000_I2C_CLK_OE_N + | E1000_I2C_DATA_OE_N; + wr32(E1000_I2CPARAMS, i2cctl); + wrfl(); +} +#endif + void igb_reset(struct igb_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; @@ -2400,7 +2424,8 @@ void igb_reset(struct igb_adapter *adapter) * interface. */ if (adapter->ets) - mac->ops.init_thermal_sensor_thresh(hw); + igb_set_i2c_bb(hw); + mac->ops.init_thermal_sensor_thresh(hw); } } #endif @@ -3117,21 +3142,12 @@ static void igb_init_mas(struct igb_adapter *adapter) **/ static s32 igb_init_i2c(struct igb_adapter *adapter) { - struct e1000_hw *hw = &adapter->hw; s32 status = 0; - s32 i2cctl; /* I2C interface supported on i350 devices */ if (adapter->hw.mac.type != e1000_i350) return 0; - i2cctl = rd32(E1000_I2CPARAMS); - i2cctl |= E1000_I2CBB_EN - | E1000_I2C_CLK_OUT | E1000_I2C_CLK_OE_N - | E1000_I2C_DATA_OUT | E1000_I2C_DATA_OE_N; - wr32(E1000_I2CPARAMS, i2cctl); - wrfl(); - /* Initialize the i2c bus which is controlled by the registers. * This bus will use the i2c_algo_bit structure that implements * the protocol through toggling of the 4 bits in the register. @@ -3521,6 +3537,12 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->ets = true; else adapter->ets = false; + /* Only enable I2C bit banging if an external thermal + * sensor is supported. + */ + if (adapter->ets) + igb_set_i2c_bb(hw); + hw->mac.ops.init_thermal_sensor_thresh(hw); if (igb_sysfs_init(adapter)) dev_err(&pdev->dev, "failed to allocate sysfs resources\n"); From b20b8aec6ffc07bb547966b356780cd344f20f5b Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 15 Feb 2023 09:31:39 +0200 Subject: [PATCH 184/186] devlink: Fix netdev notifier chain corruption Cited commit changed devlink to register its netdev notifier block on the global netdev notifier chain instead of on the per network namespace one. However, when changing the network namespace of the devlink instance, devlink still tries to unregister its notifier block from the chain of the old namespace and register it on the chain of the new namespace. This results in corruption of the notifier chains, as the same notifier block is registered on two different chains: The global one and the per network namespace one. In turn, this causes other problems such as the inability to dismantle namespaces due to netdev reference count issues. Fix by preventing devlink from moving its notifier block between namespaces. Reproducer: # echo "10 1" > /sys/bus/netdevsim/new_device # ip netns add test123 # devlink dev reload netdevsim/netdevsim10 netns test123 # ip netns del test123 [ 71.935619] unregister_netdevice: waiting for lo to become free. Usage count = 2 [ 71.938348] leaked reference. Fixes: 565b4824c39f ("devlink: change port event netdev notifier from per-net to global") Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Reviewed-by: Jacob Keller Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/r/20230215073139.1360108-1-idosch@nvidia.com Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 2 -- net/core/dev.c | 8 -------- net/core/devlink.c | 5 +---- 3 files changed, 1 insertion(+), 14 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index aad12a179e54..e6e02184c25a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2839,8 +2839,6 @@ int unregister_netdevice_notifier(struct notifier_block *nb); int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb); int unregister_netdevice_notifier_net(struct net *net, struct notifier_block *nb); -void move_netdevice_notifier_net(struct net *src_net, struct net *dst_net, - struct notifier_block *nb); int register_netdevice_notifier_dev_net(struct net_device *dev, struct notifier_block *nb, struct netdev_net_notifier *nn); diff --git a/net/core/dev.c b/net/core/dev.c index ea0a7bac1e5c..f23e287602b7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1869,14 +1869,6 @@ static void __move_netdevice_notifier_net(struct net *src_net, __register_netdevice_notifier_net(dst_net, nb, true); } -void move_netdevice_notifier_net(struct net *src_net, struct net *dst_net, - struct notifier_block *nb) -{ - rtnl_lock(); - __move_netdevice_notifier_net(src_net, dst_net, nb); - rtnl_unlock(); -} - int register_netdevice_notifier_dev_net(struct net_device *dev, struct notifier_block *nb, struct netdev_net_notifier *nn) diff --git a/net/core/devlink.c b/net/core/devlink.c index 909a10e4b0dd..0bfc144df8b9 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4742,11 +4742,8 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net, if (err) return err; - if (dest_net && !net_eq(dest_net, curr_net)) { - move_netdevice_notifier_net(curr_net, dest_net, - &devlink->netdevice_nb); + if (dest_net && !net_eq(dest_net, curr_net)) write_pnet(&devlink->_net, dest_net); - } err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack); devlink_reload_failed_set(devlink, !!err); From a8cd2990b694ed2c0ef0e8fc80686c664b4ebbe5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 16 Feb 2023 07:29:22 +0100 Subject: [PATCH 185/186] orphan sysvfs This code has been stale for years and I have no way to test it. Signed-off-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 39ff1a717625..225c3ce347a2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20323,8 +20323,7 @@ S: Maintained F: drivers/platform/x86/system76_acpi.c SYSV FILESYSTEM -M: Christoph Hellwig -S: Maintained +S: Orphan F: Documentation/filesystems/sysv-fs.rst F: fs/sysv/ F: include/linux/sysv_fs.h From 88d355832e09f5dd30d2df4c22c4e8129c3cce3c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 16 Feb 2023 07:31:10 +0100 Subject: [PATCH 186/186] stop mainaining UUID The uuid code is very low maintainance now that the major overhaul has completed, and doesn't need it's own tree. All the recent work has been done by Andy who'd like to stay on as a reviewer without an explicit tree. Signed-off-by: Christoph Hellwig Acked-by: Andy Shevchenko Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 -- 1 file changed, 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 225c3ce347a2..6a47510d1592 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21818,11 +21818,9 @@ W: http://en.wikipedia.org/wiki/Util-linux T: git git://git.kernel.org/pub/scm/utils/util-linux/util-linux.git UUID HELPERS -M: Christoph Hellwig R: Andy Shevchenko L: linux-kernel@vger.kernel.org S: Maintained -T: git git://git.infradead.org/users/hch/uuid.git F: include/linux/uuid.h F: include/uapi/linux/uuid.h F: lib/test_uuid.c